Encode duplication factor from loop vectorization and loop unrolling to discriminator.

Summary:
This patch starts the implementation as discuss in the following RFC: http://lists.llvm.org/pipermail/llvm-dev/2016-October/106532.html

When optimization duplicates code that will scale down the execution count of a basic block, we will record the duplication factor as part of discriminator so that the offline process tool can find the duplication factor and collect the accurate execution frequency of the corresponding source code. Two important optimization that fall into this category is loop vectorization and loop unroll. This patch records the duplication factor for these 2 optimizations.

The recording will be guarded by a flag encode-duplication-in-discriminators, which is off by default.

Reviewers: probinson, aprantl, davidxl, hfinkel, echristo

Reviewed By: hfinkel

Subscribers: mehdi_amini, anemet, mzolotukhin, llvm-commits

Differential Revision: https://reviews.llvm.org/D26420

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294782 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dehao Chen 2017-02-10 21:09:07 +00:00
parent 2c44e216a8
commit d0b28d942d
16 changed files with 236 additions and 32 deletions

View File

@ -4003,7 +4003,9 @@ DICompileUnit
``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:``
fields are tuples containing the debug info to be emitted along with the compile
unit, regardless of code optimizations (some nodes are only emitted if there are
references to them from instructions).
references to them from instructions). The ``debugInfoForProfiling:`` field is a
boolean indicating whether or not line-table discriminators are updated to
provide more-accurate debug info for profiling results.
.. code-block:: text

View File

@ -1315,10 +1315,48 @@ public:
///
/// DWARF discriminators distinguish identical file locations between
/// instructions that are on different basic blocks.
///
/// There are 3 components stored in discriminator, from lower bits:
///
/// Base discriminator: assigned by AddDiscriminators pass to identify IRs
/// that are defined by the same source line, but
/// different basic blocks.
/// Duplication factor: assigned by optimizations that will scale down
/// the execution frequency of the original IR.
/// Copy Identifier: assigned by optimizations that clones the IR.
/// Each copy of the IR will be assigned an identifier.
///
/// Encoding:
///
/// The above 3 components are encoded into a 32bit unsigned integer in
/// order. If the lowest bit is 1, the current component is empty, and the
/// next component will start in the next bit. Otherwise, the the current
/// component is non-empty, and its content starts in the next bit. The
/// length of each components is either 5 bit or 12 bit: if the 7th bit
/// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
/// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
/// represent the component.
inline unsigned getDiscriminator() const;
/// Returns a new DILocation with updated \p Discriminator.
inline DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
/// Returns a new DILocation with updated base discriminator \p BD.
inline const DILocation *setBaseDiscriminator(unsigned BD) const;
/// Returns the duplication factor stored in the discriminator.
inline unsigned getDuplicationFactor() const;
/// Returns the copy identifier stored in the discriminator.
inline unsigned getCopyIdentifier() const;
/// Returns the base discriminator stored in the discriminator.
inline unsigned getBaseDiscriminator() const;
/// Returns a new DILocation with duplication factor \p DF encoded in the
/// discriminator.
inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const;
/// When two instructions are combined into a single instruction we also
/// need to combine the original locations into a single location.
@ -1351,6 +1389,28 @@ public:
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DILocationKind;
}
/// With a give unsigned int \p U, use up to 13 bits to represent it.
/// old_bit 1~5 --> new_bit 1~5
/// old_bit 6~12 --> new_bit 7~13
/// new_bit_6 is 0 if higher bits (7~13) are all 0
static unsigned getPrefixEncodingFromUnsigned(unsigned U) {
U &= 0xfff;
return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
}
/// Reverse transformation as getPrefixEncodingFromUnsigned.
static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
}
/// Returns the next component stored in discriminator.
static unsigned getNextComponentInDiscriminator(unsigned D) {
if ((D & 1) == 0)
return D >> ((D & 0x40) ? 14 : 7);
else
return D >> 1;
}
};
/// Subprogram description.
@ -1684,7 +1744,8 @@ unsigned DILocation::getDiscriminator() const {
return 0;
}
DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
const DILocation *
DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
DIScope *Scope = getScope();
// Skip all parent DILexicalBlockFile that already have a discriminator
// assigned. We do not want to have nested DILexicalBlockFiles that have
@ -1700,6 +1761,52 @@ DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
getInlinedAt());
}
unsigned DILocation::getBaseDiscriminator() const {
unsigned D = getDiscriminator();
if ((D & 1) == 0)
return getUnsignedFromPrefixEncoding(D >> 1);
else
return 0;
}
unsigned DILocation::getDuplicationFactor() const {
unsigned D = getDiscriminator();
D = getNextComponentInDiscriminator(D);
if (D == 0 || (D & 1))
return 1;
else
return getUnsignedFromPrefixEncoding(D >> 1);
}
unsigned DILocation::getCopyIdentifier() const {
return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(
getNextComponentInDiscriminator(getDiscriminator())));
}
const DILocation *DILocation::setBaseDiscriminator(unsigned D) const {
if (D == 0)
return this;
else
return cloneWithDiscriminator(getPrefixEncodingFromUnsigned(D) << 1);
}
const DILocation *DILocation::cloneWithDuplicationFactor(unsigned DF) const {
DF *= getDuplicationFactor();
if (DF <= 1)
return this;
unsigned BD = getBaseDiscriminator();
unsigned CI = getCopyIdentifier() << (DF > 0x1f ? 14 : 7);
unsigned D = CI | (getPrefixEncodingFromUnsigned(DF) << 1);
if (BD == 0)
D = (D << 1) | 1;
else
D = (D << (BD > 0x1f ? 14 : 7)) | (getPrefixEncodingFromUnsigned(BD) << 1);
return cloneWithDiscriminator(D);
}
class DINamespace : public DIScope {
friend class LLVMContextImpl;
friend class MDNode;

View File

@ -671,6 +671,9 @@ public:
/// to \a DISubprogram.
DISubprogram *getSubprogram() const;
/// Returns true if we should emit debug info for profiling.
bool isDebugInfoForProfiling() const;
private:
void allocHungoffUselist();
template<int Idx> void setHungoffOperand(Constant *C);

View File

@ -1459,6 +1459,15 @@ DISubprogram *Function::getSubprogram() const {
return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
}
bool Function::isDebugInfoForProfiling() const {
if (DISubprogram *SP = getSubprogram()) {
if (DICompileUnit *CU = SP->getUnit()) {
return CU->getDebugInfoForProfiling();
}
}
return false;
}
void GlobalVariable::addDebugInfo(DIGlobalVariableExpression *GV) {
addMetadata(LLVMContext::MD_dbg, *GV);
}

View File

@ -190,8 +190,8 @@ static bool addDiscriminators(Function &F) {
// discriminator is needed to distinguish both instructions.
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f;
I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator));
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " " << I
<< "\n");
@ -216,8 +216,8 @@ static bool addDiscriminators(Function &F) {
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
Current->setDebugLoc(
CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f));
unsigned Discriminator = ++LDM[L];
Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
Changed = true;
}
}

View File

@ -27,6 +27,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@ -462,6 +463,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
if (Header->getParent()->isDebugInfoForProfiling())
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (const DILocation *DIL = I.getDebugLoc())
I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;

View File

@ -616,6 +616,10 @@ protected:
/// vector of instructions.
void addMetadata(ArrayRef<Value *> To, Instruction *From);
/// \brief Set the debug location in the builder using the debug location in
/// the instruction.
void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
/// This is a helper class for maintaining vectorization state. It's used for
/// mapping values from the original loop to their corresponding values in
/// the new loop. Two mappings are maintained: one for vectorized values and
@ -865,12 +869,14 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
return I;
}
/// \brief Set the debug location in the builder using the debug location in the
/// instruction.
static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
B.SetCurrentDebugLocation(Inst->getDebugLoc());
else
void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
const DILocation *DIL = Inst->getDebugLoc();
if (DIL && Inst->getFunction()->isDebugInfoForProfiling())
B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
else
B.SetCurrentDebugLocation(DIL);
} else
B.SetCurrentDebugLocation(DebugLoc());
}

View File

@ -58,5 +58,5 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1)
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 2)
; CHECK: ![[END]] = !DILocation(line: 4, scope: ![[FOO]])

View File

@ -47,4 +47,4 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!14 = !DILocation(line: 4, column: 3, scope: !4)
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]])
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)

View File

@ -5,7 +5,7 @@
; #1 void bar();
; #2
; #3 void foo() {
; #4 bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/;
; #4 bar();bar()/*discriminator 2*/;bar()/*discriminator 4*/;
; #5 }
; Function Attrs: uwtable
@ -49,6 +49,6 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!13 = !DILocation(line: 5, column: 1, scope: !4)
; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)

View File

@ -10,7 +10,7 @@
; #6 }
; bar(5): discriminator 0
; bar(3): discriminator 1
; bar(3): discriminator 2
; Function Attrs: uwtable
define void @_Z3fooi(i32 %i) #0 !dbg !4 {
@ -69,4 +69,4 @@ attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!20 = !DILocation(line: 6, column: 1, scope: !4)
; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)

View File

@ -69,7 +69,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!12 = !DILocation(line: 3, scope: !13)
!13 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !11)
; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 1)
; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 2)
!14 = !DILocation(line: 4, scope: !13)
; CHECK: ![[BLOCK2]] = distinct !DILexicalBlock(scope: ![[BLOCK1]],{{.*}} line: 3)

View File

@ -62,8 +62,8 @@ attributes #3 = { nounwind readnone }
!12 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !2)
!13 = distinct !DILocation(line: 1, column: 17, scope: !14)
; CHECK: ![[BF:.*]] = !DILexicalBlockFile(scope: ![[LB1:[0-9]+]],
; CHECK-SAME: discriminator: 1)
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 1)
; CHECK-SAME: discriminator: 2)
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
; CHECK: ![[LB1]] = distinct !DILexicalBlock(scope: ![[LB2:[0-9]+]],
; CHECK-SAME: line: 1, column: 16)
!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 1, column: 16)

View File

@ -67,6 +67,6 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!12 = !DILocation(line: 4, scope: !4)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[THENBLOCK:[0-9]+]])
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 1)
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 2)
; CHECK: ![[ELSE]] = !DILocation(line: 3, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 2)
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 4)

View File

@ -7,9 +7,9 @@
; #3 }
; i == 3: discriminator 0
; i == 5: discriminator 1
; return 100: discriminator 2
; return 99: discriminator 3
; i == 5: discriminator 2
; return 100: discriminator 4
; return 99: discriminator 6
define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
%1 = alloca i32, align 4
@ -91,11 +91,11 @@ attributes #1 = { nounwind readnone }
; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "foo",
; CHECK: ![[IF:.*]] = distinct !DILexicalBlock(scope: ![[F]],{{.*}}line: 2, column: 7)
; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 1)
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[BRBLOCK:[0-9]+]])
; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 1)
; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 2)
; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 4)
; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 3)
; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 6)

View File

@ -0,0 +1,70 @@
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC_4_1 %s
; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=3 < %s | FileCheck --check-prefix=LOOPVEC_2_3 %s
; RUN: opt -S -loop-unroll -unroll-count=5 < %s | FileCheck --check-prefix=LOOPUNROLL_5 %s
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -loop-unroll -unroll-count=2 < %s | FileCheck --check-prefix=LOOPVEC_UNROLL %s
; Test if vectorization/unroll factor is recorded in discriminator.
;
; Original source code:
; 1 int *a;
; 2 int *b;
; 3
; 4 void foo() {
; 5 for (int i = 0; i < 4096; i++)
; 6 a[i] += b[i];
; 7 }
@a = local_unnamed_addr global i32* null, align 8
@b = local_unnamed_addr global i32* null, align 8
define void @_Z3foov() local_unnamed_addr #0 !dbg !6 {
%1 = load i32*, i32** @b, align 8, !dbg !8, !tbaa !9
%2 = load i32*, i32** @a, align 8, !dbg !13, !tbaa !9
br label %3, !dbg !14
; <label>:3: ; preds = %3, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %3 ]
%4 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv, !dbg !8
%5 = load i32, i32* %4, align 4, !dbg !8, !tbaa !15
%6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv, !dbg !13
%7 = load i32, i32* %6, align 4, !dbg !17, !tbaa !15
%8 = add nsw i32 %7, %5, !dbg !17
store i32 %8, i32* %6, align 4, !dbg !17, !tbaa !15
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !18
%exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !19
br i1 %exitcond, label %9, label %3, !dbg !14, !llvm.loop !20
; <label>:9: ; preds = %3
ret void, !dbg !21
}
;LOOPVEC_4_1: discriminator: 17
;LOOPVEC_2_3: discriminator: 25
;LOOPUNROLL_5: discriminator: 21
; When unrolling after loop vectorize, both vec_body and remainder loop
; are unrolled.
;LOOPVEC_UNROLL: discriminator: 385
;LOOPVEC_UNROLL: discriminator: 9
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, debugInfoForProfiling: true)
!1 = !DIFile(filename: "a.cc", directory: "/")
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
!8 = !DILocation(line: 6, column: 13, scope: !6)
!9 = !{!10, !10, i64 0}
!10 = !{!"any pointer", !11, i64 0}
!11 = !{!"omnipotent char", !12, i64 0}
!12 = !{!"Simple C++ TBAA"}
!13 = !DILocation(line: 6, column: 5, scope: !6)
!14 = !DILocation(line: 5, column: 3, scope: !6)
!15 = !{!16, !16, i64 0}
!16 = !{!"int", !11, i64 0}
!17 = !DILocation(line: 6, column: 10, scope: !6)
!18 = !DILocation(line: 5, column: 30, scope: !6)
!19 = !DILocation(line: 5, column: 21, scope: !6)
!20 = distinct !{!20, !14}
!21 = !DILocation(line: 7, column: 1, scope: !6)