mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-27 13:40:43 +00:00
Encode duplication factor from loop vectorization and loop unrolling to discriminator.
Summary: This patch starts the implementation as discuss in the following RFC: http://lists.llvm.org/pipermail/llvm-dev/2016-October/106532.html When optimization duplicates code that will scale down the execution count of a basic block, we will record the duplication factor as part of discriminator so that the offline process tool can find the duplication factor and collect the accurate execution frequency of the corresponding source code. Two important optimization that fall into this category is loop vectorization and loop unroll. This patch records the duplication factor for these 2 optimizations. The recording will be guarded by a flag encode-duplication-in-discriminators, which is off by default. Reviewers: probinson, aprantl, davidxl, hfinkel, echristo Reviewed By: hfinkel Subscribers: mehdi_amini, anemet, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D26420 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294782 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2c44e216a8
commit
d0b28d942d
@ -4003,7 +4003,9 @@ DICompileUnit
|
||||
``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:``
|
||||
fields are tuples containing the debug info to be emitted along with the compile
|
||||
unit, regardless of code optimizations (some nodes are only emitted if there are
|
||||
references to them from instructions).
|
||||
references to them from instructions). The ``debugInfoForProfiling:`` field is a
|
||||
boolean indicating whether or not line-table discriminators are updated to
|
||||
provide more-accurate debug info for profiling results.
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
|
@ -1315,10 +1315,48 @@ public:
|
||||
///
|
||||
/// DWARF discriminators distinguish identical file locations between
|
||||
/// instructions that are on different basic blocks.
|
||||
///
|
||||
/// There are 3 components stored in discriminator, from lower bits:
|
||||
///
|
||||
/// Base discriminator: assigned by AddDiscriminators pass to identify IRs
|
||||
/// that are defined by the same source line, but
|
||||
/// different basic blocks.
|
||||
/// Duplication factor: assigned by optimizations that will scale down
|
||||
/// the execution frequency of the original IR.
|
||||
/// Copy Identifier: assigned by optimizations that clones the IR.
|
||||
/// Each copy of the IR will be assigned an identifier.
|
||||
///
|
||||
/// Encoding:
|
||||
///
|
||||
/// The above 3 components are encoded into a 32bit unsigned integer in
|
||||
/// order. If the lowest bit is 1, the current component is empty, and the
|
||||
/// next component will start in the next bit. Otherwise, the the current
|
||||
/// component is non-empty, and its content starts in the next bit. The
|
||||
/// length of each components is either 5 bit or 12 bit: if the 7th bit
|
||||
/// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
|
||||
/// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
|
||||
/// represent the component.
|
||||
|
||||
inline unsigned getDiscriminator() const;
|
||||
|
||||
/// Returns a new DILocation with updated \p Discriminator.
|
||||
inline DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
|
||||
inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
|
||||
|
||||
/// Returns a new DILocation with updated base discriminator \p BD.
|
||||
inline const DILocation *setBaseDiscriminator(unsigned BD) const;
|
||||
|
||||
/// Returns the duplication factor stored in the discriminator.
|
||||
inline unsigned getDuplicationFactor() const;
|
||||
|
||||
/// Returns the copy identifier stored in the discriminator.
|
||||
inline unsigned getCopyIdentifier() const;
|
||||
|
||||
/// Returns the base discriminator stored in the discriminator.
|
||||
inline unsigned getBaseDiscriminator() const;
|
||||
|
||||
/// Returns a new DILocation with duplication factor \p DF encoded in the
|
||||
/// discriminator.
|
||||
inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const;
|
||||
|
||||
/// When two instructions are combined into a single instruction we also
|
||||
/// need to combine the original locations into a single location.
|
||||
@ -1351,6 +1389,28 @@ public:
|
||||
static bool classof(const Metadata *MD) {
|
||||
return MD->getMetadataID() == DILocationKind;
|
||||
}
|
||||
|
||||
/// With a give unsigned int \p U, use up to 13 bits to represent it.
|
||||
/// old_bit 1~5 --> new_bit 1~5
|
||||
/// old_bit 6~12 --> new_bit 7~13
|
||||
/// new_bit_6 is 0 if higher bits (7~13) are all 0
|
||||
static unsigned getPrefixEncodingFromUnsigned(unsigned U) {
|
||||
U &= 0xfff;
|
||||
return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
|
||||
}
|
||||
|
||||
/// Reverse transformation as getPrefixEncodingFromUnsigned.
|
||||
static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
|
||||
return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
|
||||
}
|
||||
|
||||
/// Returns the next component stored in discriminator.
|
||||
static unsigned getNextComponentInDiscriminator(unsigned D) {
|
||||
if ((D & 1) == 0)
|
||||
return D >> ((D & 0x40) ? 14 : 7);
|
||||
else
|
||||
return D >> 1;
|
||||
}
|
||||
};
|
||||
|
||||
/// Subprogram description.
|
||||
@ -1684,7 +1744,8 @@ unsigned DILocation::getDiscriminator() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
|
||||
const DILocation *
|
||||
DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
|
||||
DIScope *Scope = getScope();
|
||||
// Skip all parent DILexicalBlockFile that already have a discriminator
|
||||
// assigned. We do not want to have nested DILexicalBlockFiles that have
|
||||
@ -1700,6 +1761,52 @@ DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
|
||||
getInlinedAt());
|
||||
}
|
||||
|
||||
unsigned DILocation::getBaseDiscriminator() const {
|
||||
unsigned D = getDiscriminator();
|
||||
if ((D & 1) == 0)
|
||||
return getUnsignedFromPrefixEncoding(D >> 1);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned DILocation::getDuplicationFactor() const {
|
||||
unsigned D = getDiscriminator();
|
||||
D = getNextComponentInDiscriminator(D);
|
||||
if (D == 0 || (D & 1))
|
||||
return 1;
|
||||
else
|
||||
return getUnsignedFromPrefixEncoding(D >> 1);
|
||||
}
|
||||
|
||||
unsigned DILocation::getCopyIdentifier() const {
|
||||
return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(
|
||||
getNextComponentInDiscriminator(getDiscriminator())));
|
||||
}
|
||||
|
||||
const DILocation *DILocation::setBaseDiscriminator(unsigned D) const {
|
||||
if (D == 0)
|
||||
return this;
|
||||
else
|
||||
return cloneWithDiscriminator(getPrefixEncodingFromUnsigned(D) << 1);
|
||||
}
|
||||
|
||||
const DILocation *DILocation::cloneWithDuplicationFactor(unsigned DF) const {
|
||||
DF *= getDuplicationFactor();
|
||||
if (DF <= 1)
|
||||
return this;
|
||||
|
||||
unsigned BD = getBaseDiscriminator();
|
||||
unsigned CI = getCopyIdentifier() << (DF > 0x1f ? 14 : 7);
|
||||
unsigned D = CI | (getPrefixEncodingFromUnsigned(DF) << 1);
|
||||
|
||||
if (BD == 0)
|
||||
D = (D << 1) | 1;
|
||||
else
|
||||
D = (D << (BD > 0x1f ? 14 : 7)) | (getPrefixEncodingFromUnsigned(BD) << 1);
|
||||
|
||||
return cloneWithDiscriminator(D);
|
||||
}
|
||||
|
||||
class DINamespace : public DIScope {
|
||||
friend class LLVMContextImpl;
|
||||
friend class MDNode;
|
||||
|
@ -671,6 +671,9 @@ public:
|
||||
/// to \a DISubprogram.
|
||||
DISubprogram *getSubprogram() const;
|
||||
|
||||
/// Returns true if we should emit debug info for profiling.
|
||||
bool isDebugInfoForProfiling() const;
|
||||
|
||||
private:
|
||||
void allocHungoffUselist();
|
||||
template<int Idx> void setHungoffOperand(Constant *C);
|
||||
|
@ -1459,6 +1459,15 @@ DISubprogram *Function::getSubprogram() const {
|
||||
return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
|
||||
}
|
||||
|
||||
bool Function::isDebugInfoForProfiling() const {
|
||||
if (DISubprogram *SP = getSubprogram()) {
|
||||
if (DICompileUnit *CU = SP->getUnit()) {
|
||||
return CU->getDebugInfoForProfiling();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void GlobalVariable::addDebugInfo(DIGlobalVariableExpression *GV) {
|
||||
addMetadata(LLVMContext::MD_dbg, *GV);
|
||||
}
|
||||
|
@ -190,8 +190,8 @@ static bool addDiscriminators(Function &F) {
|
||||
// discriminator is needed to distinguish both instructions.
|
||||
// Only the lowest 7 bits are used to represent a discriminator to fit
|
||||
// it in 1 byte ULEB128 representation.
|
||||
unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f;
|
||||
I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator));
|
||||
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
|
||||
I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
|
||||
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
|
||||
<< DIL->getColumn() << ":" << Discriminator << " " << I
|
||||
<< "\n");
|
||||
@ -216,8 +216,8 @@ static bool addDiscriminators(Function &F) {
|
||||
Location L =
|
||||
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
|
||||
if (!CallLocations.insert(L).second) {
|
||||
Current->setDebugLoc(
|
||||
CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f));
|
||||
unsigned Discriminator = ++LDM[L];
|
||||
Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
@ -462,6 +463,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
for (Loop *SubLoop : *L)
|
||||
LoopsToSimplify.insert(SubLoop);
|
||||
|
||||
if (Header->getParent()->isDebugInfoForProfiling())
|
||||
for (BasicBlock *BB : L->getBlocks())
|
||||
for (Instruction &I : *BB)
|
||||
if (const DILocation *DIL = I.getDebugLoc())
|
||||
I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
|
||||
|
||||
for (unsigned It = 1; It != Count; ++It) {
|
||||
std::vector<BasicBlock*> NewBlocks;
|
||||
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
|
||||
|
@ -616,6 +616,10 @@ protected:
|
||||
/// vector of instructions.
|
||||
void addMetadata(ArrayRef<Value *> To, Instruction *From);
|
||||
|
||||
/// \brief Set the debug location in the builder using the debug location in
|
||||
/// the instruction.
|
||||
void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
|
||||
|
||||
/// This is a helper class for maintaining vectorization state. It's used for
|
||||
/// mapping values from the original loop to their corresponding values in
|
||||
/// the new loop. Two mappings are maintained: one for vectorized values and
|
||||
@ -865,12 +869,14 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
|
||||
return I;
|
||||
}
|
||||
|
||||
/// \brief Set the debug location in the builder using the debug location in the
|
||||
/// instruction.
|
||||
static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
|
||||
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
|
||||
B.SetCurrentDebugLocation(Inst->getDebugLoc());
|
||||
else
|
||||
void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
|
||||
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
|
||||
const DILocation *DIL = Inst->getDebugLoc();
|
||||
if (DIL && Inst->getFunction()->isDebugInfoForProfiling())
|
||||
B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
|
||||
else
|
||||
B.SetCurrentDebugLocation(DIL);
|
||||
} else
|
||||
B.SetCurrentDebugLocation(DebugLoc());
|
||||
}
|
||||
|
||||
|
@ -58,5 +58,5 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
|
||||
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
|
||||
; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
|
||||
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
|
||||
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1)
|
||||
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[END]] = !DILocation(line: 4, scope: ![[FOO]])
|
||||
|
@ -47,4 +47,4 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
|
||||
!14 = !DILocation(line: 4, column: 3, scope: !4)
|
||||
|
||||
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]])
|
||||
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
|
||||
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
|
||||
|
@ -5,7 +5,7 @@
|
||||
; #1 void bar();
|
||||
; #2
|
||||
; #3 void foo() {
|
||||
; #4 bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/;
|
||||
; #4 bar();bar()/*discriminator 2*/;bar()/*discriminator 4*/;
|
||||
; #5 }
|
||||
|
||||
; Function Attrs: uwtable
|
||||
@ -49,6 +49,6 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
|
||||
!13 = !DILocation(line: 5, column: 1, scope: !4)
|
||||
|
||||
; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
|
||||
; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
|
||||
; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
|
||||
; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
|
||||
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
|
||||
; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
|
||||
|
@ -10,7 +10,7 @@
|
||||
; #6 }
|
||||
|
||||
; bar(5): discriminator 0
|
||||
; bar(3): discriminator 1
|
||||
; bar(3): discriminator 2
|
||||
|
||||
; Function Attrs: uwtable
|
||||
define void @_Z3fooi(i32 %i) #0 !dbg !4 {
|
||||
@ -69,4 +69,4 @@ attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
|
||||
!20 = !DILocation(line: 6, column: 1, scope: !4)
|
||||
|
||||
; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
|
||||
|
@ -69,7 +69,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
|
||||
!12 = !DILocation(line: 3, scope: !13)
|
||||
|
||||
!13 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !11)
|
||||
; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 1)
|
||||
; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 2)
|
||||
|
||||
!14 = !DILocation(line: 4, scope: !13)
|
||||
; CHECK: ![[BLOCK2]] = distinct !DILexicalBlock(scope: ![[BLOCK1]],{{.*}} line: 3)
|
||||
|
@ -62,8 +62,8 @@ attributes #3 = { nounwind readnone }
|
||||
!12 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !2)
|
||||
!13 = distinct !DILocation(line: 1, column: 17, scope: !14)
|
||||
; CHECK: ![[BF:.*]] = !DILexicalBlockFile(scope: ![[LB1:[0-9]+]],
|
||||
; CHECK-SAME: discriminator: 1)
|
||||
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 1)
|
||||
; CHECK-SAME: discriminator: 2)
|
||||
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
|
||||
; CHECK: ![[LB1]] = distinct !DILexicalBlock(scope: ![[LB2:[0-9]+]],
|
||||
; CHECK-SAME: line: 1, column: 16)
|
||||
!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 1, column: 16)
|
||||
|
@ -67,6 +67,6 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
|
||||
!12 = !DILocation(line: 4, scope: !4)
|
||||
|
||||
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[THENBLOCK:[0-9]+]])
|
||||
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 1)
|
||||
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[ELSE]] = !DILocation(line: 3, scope: ![[ELSEBLOCK:[0-9]+]])
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 4)
|
||||
|
@ -7,9 +7,9 @@
|
||||
; #3 }
|
||||
|
||||
; i == 3: discriminator 0
|
||||
; i == 5: discriminator 1
|
||||
; return 100: discriminator 2
|
||||
; return 99: discriminator 3
|
||||
; i == 5: discriminator 2
|
||||
; return 100: discriminator 4
|
||||
; return 99: discriminator 6
|
||||
|
||||
define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
|
||||
%1 = alloca i32, align 4
|
||||
@ -91,11 +91,11 @@ attributes #1 = { nounwind readnone }
|
||||
; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "foo",
|
||||
; CHECK: ![[IF:.*]] = distinct !DILexicalBlock(scope: ![[F]],{{.*}}line: 2, column: 7)
|
||||
; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
|
||||
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 1)
|
||||
; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
|
||||
; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[BRBLOCK:[0-9]+]])
|
||||
; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 1)
|
||||
; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
|
||||
; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 4)
|
||||
; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
|
||||
; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 3)
|
||||
; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 6)
|
||||
|
70
test/Transforms/LoopVectorize/discriminator.ll
Normal file
70
test/Transforms/LoopVectorize/discriminator.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC_4_1 %s
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=3 < %s | FileCheck --check-prefix=LOOPVEC_2_3 %s
|
||||
; RUN: opt -S -loop-unroll -unroll-count=5 < %s | FileCheck --check-prefix=LOOPUNROLL_5 %s
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -loop-unroll -unroll-count=2 < %s | FileCheck --check-prefix=LOOPVEC_UNROLL %s
|
||||
|
||||
; Test if vectorization/unroll factor is recorded in discriminator.
|
||||
;
|
||||
; Original source code:
|
||||
; 1 int *a;
|
||||
; 2 int *b;
|
||||
; 3
|
||||
; 4 void foo() {
|
||||
; 5 for (int i = 0; i < 4096; i++)
|
||||
; 6 a[i] += b[i];
|
||||
; 7 }
|
||||
|
||||
@a = local_unnamed_addr global i32* null, align 8
|
||||
@b = local_unnamed_addr global i32* null, align 8
|
||||
|
||||
define void @_Z3foov() local_unnamed_addr #0 !dbg !6 {
|
||||
%1 = load i32*, i32** @b, align 8, !dbg !8, !tbaa !9
|
||||
%2 = load i32*, i32** @a, align 8, !dbg !13, !tbaa !9
|
||||
br label %3, !dbg !14
|
||||
|
||||
; <label>:3: ; preds = %3, %0
|
||||
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %3 ]
|
||||
%4 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv, !dbg !8
|
||||
%5 = load i32, i32* %4, align 4, !dbg !8, !tbaa !15
|
||||
%6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv, !dbg !13
|
||||
%7 = load i32, i32* %6, align 4, !dbg !17, !tbaa !15
|
||||
%8 = add nsw i32 %7, %5, !dbg !17
|
||||
store i32 %8, i32* %6, align 4, !dbg !17, !tbaa !15
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !18
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !19
|
||||
br i1 %exitcond, label %9, label %3, !dbg !14, !llvm.loop !20
|
||||
|
||||
; <label>:9: ; preds = %3
|
||||
ret void, !dbg !21
|
||||
}
|
||||
|
||||
;LOOPVEC_4_1: discriminator: 17
|
||||
;LOOPVEC_2_3: discriminator: 25
|
||||
;LOOPUNROLL_5: discriminator: 21
|
||||
; When unrolling after loop vectorize, both vec_body and remainder loop
|
||||
; are unrolled.
|
||||
;LOOPVEC_UNROLL: discriminator: 385
|
||||
;LOOPVEC_UNROLL: discriminator: 9
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, debugInfoForProfiling: true)
|
||||
!1 = !DIFile(filename: "a.cc", directory: "/")
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
|
||||
!8 = !DILocation(line: 6, column: 13, scope: !6)
|
||||
!9 = !{!10, !10, i64 0}
|
||||
!10 = !{!"any pointer", !11, i64 0}
|
||||
!11 = !{!"omnipotent char", !12, i64 0}
|
||||
!12 = !{!"Simple C++ TBAA"}
|
||||
!13 = !DILocation(line: 6, column: 5, scope: !6)
|
||||
!14 = !DILocation(line: 5, column: 3, scope: !6)
|
||||
!15 = !{!16, !16, i64 0}
|
||||
!16 = !{!"int", !11, i64 0}
|
||||
!17 = !DILocation(line: 6, column: 10, scope: !6)
|
||||
!18 = !DILocation(line: 5, column: 30, scope: !6)
|
||||
!19 = !DILocation(line: 5, column: 21, scope: !6)
|
||||
!20 = distinct !{!20, !14}
|
||||
!21 = !DILocation(line: 7, column: 1, scope: !6)
|
Loading…
Reference in New Issue
Block a user