mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-02 23:27:16 +00:00
[CodeGen] Track trip counts per-scop for performance measurement.
- Add a counter that is incremented once on exit from a scop. - Test cases got split into two: one to test the cycles, and another one to test trip counts. - Sample output: ```name=sample-output.txt scop function, entry block name, exit block name, total time, trip count warmup, %entry.split, %polly.merge_new_and_old, 5180, 1 f, %entry.split, %polly.merge_new_and_old, 409944, 500 g, %entry.split, %polly.merge_new_and_old, 1226, 1 ``` Differential Revision: https://reviews.llvm.org/D33822 llvm-svn: 304543
This commit is contained in:
parent
01bf58d6ec
commit
726c28f8c4
@ -62,6 +62,9 @@ private:
|
||||
/// The total number of cycles spent in the current scop S.
|
||||
llvm::Value *CyclesInCurrentScopPtr;
|
||||
|
||||
/// The total number of times the current scop S is executed.
|
||||
llvm::Value *TripCountForCurrentScopPtr;
|
||||
|
||||
/// The total number of cycles spent within scops.
|
||||
llvm::Value *CyclesInScopsPtr;
|
||||
|
||||
|
@ -87,15 +87,18 @@ static std::string GetScopUniqueVarname(const Scop &S) {
|
||||
std::string EntryString, ExitString;
|
||||
std::tie(EntryString, ExitString) = S.getEntryExitStr();
|
||||
|
||||
Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
|
||||
Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
|
||||
<< "_from__" << EntryString << "__to__" << ExitString;
|
||||
return Name.str();
|
||||
}
|
||||
|
||||
void PerfMonitor::addScopCounter() {
|
||||
const std::string varname = GetScopUniqueVarname(S);
|
||||
TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
|
||||
TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
|
||||
&CyclesInCurrentScopPtr);
|
||||
|
||||
TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
|
||||
&TripCountForCurrentScopPtr);
|
||||
}
|
||||
|
||||
void PerfMonitor::addGlobalVariables() {
|
||||
@ -160,7 +163,7 @@ Function *PerfMonitor::insertFinalReporting() {
|
||||
|
||||
RuntimeDebugBuilder::createCPUPrinter(
|
||||
Builder, "scop function, "
|
||||
"entry block name, exit block name, total time\n");
|
||||
"entry block name, exit block name, total time, trip count\n");
|
||||
ReturnFromFinal = Builder.CreateRetVoid();
|
||||
return ExitFn;
|
||||
}
|
||||
@ -179,13 +182,17 @@ void PerfMonitor::AppendScopReporting() {
|
||||
|
||||
Value *CyclesInCurrentScop =
|
||||
Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
|
||||
|
||||
Value *TripCountForCurrentScop =
|
||||
Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
|
||||
|
||||
std::string EntryName, ExitName;
|
||||
std::tie(EntryName, ExitName) = S.getEntryExitStr();
|
||||
|
||||
// print in CSV for easy parsing with other tools.
|
||||
RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(),
|
||||
", ", EntryName, ", ", ExitName, ", ",
|
||||
CyclesInCurrentScop, "\n");
|
||||
RuntimeDebugBuilder::createCPUPrinter(
|
||||
Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
|
||||
CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
|
||||
|
||||
ReturnFromFinal = Builder.CreateRetVoid();
|
||||
}
|
||||
@ -288,4 +295,11 @@ void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
|
||||
Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
|
||||
CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
|
||||
Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
|
||||
|
||||
Value *TripCountForCurrentScop =
|
||||
Builder.CreateLoad(TripCountForCurrentScopPtr, true);
|
||||
TripCountForCurrentScop =
|
||||
Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
|
||||
Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
|
||||
true);
|
||||
}
|
||||
|
75
polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
Normal file
75
polly/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
Normal file
@ -0,0 +1,75 @@
|
||||
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
|
||||
; RUN: -S < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
; void g(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @g(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Declaration of globals - Check for cycles declaration.
|
||||
; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
|
||||
; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
|
||||
|
||||
; Bumping up number of cycles in f
|
||||
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||
|
||||
; Bumping up number of cycles in g
|
||||
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
@ -1,98 +0,0 @@
|
||||
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
|
||||
; RUN: -S < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
; void g(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @g(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Declaration of globals
|
||||
; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
|
||||
; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
|
||||
|
||||
; Bumping up counter in f
|
||||
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
|
||||
; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
|
||||
; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
||||
; CHECK-NEXT: %7 = sub i64 %6, %5
|
||||
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
||||
; CHECK-NEXT: %9 = add i64 %8, %7
|
||||
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
|
||||
; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: br label %return
|
||||
|
||||
; Bumping up counter in g
|
||||
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
|
||||
; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
|
||||
; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
||||
; CHECK-NEXT: %7 = sub i64 %6, %5
|
||||
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
||||
; CHECK-NEXT: %9 = add i64 %8, %7
|
||||
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
|
||||
; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: br label %return
|
||||
|
||||
; Final reporting prints
|
||||
; CHECK: %20 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: %21 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @25, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @23, i32 0, i32 0), i64 %20, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @24, i32 0, i32 0))
|
||||
; CHECK-NEXT: %22 = call i32 @fflush(i8* null)
|
||||
; CHECK-NEXT: %23 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
|
||||
; CHECK-NEXT: %24 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @33, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @28, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @29, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @30, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @31, i32 0, i32 0), i64 %23, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @32, i32 0, i32 0))
|
@ -0,0 +1,75 @@
|
||||
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
|
||||
; RUN: -S < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
; void g(long A[], long N) {
|
||||
; long i;
|
||||
; if (true)
|
||||
; for (i = 0; i < N; ++i)
|
||||
; A[i] = i;
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @g(i64* %A, i64 %N) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %next
|
||||
|
||||
next:
|
||||
br i1 true, label %for.i, label %return
|
||||
|
||||
for.i:
|
||||
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar
|
||||
store i64 %indvar, i64* %scevgep
|
||||
%indvar.next = add nsw i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar.next, %N
|
||||
br i1 %exitcond, label %return, label %for.i
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Declaration of globals - Check for cycles declaration.
|
||||
; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
|
||||
; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
|
||||
|
||||
; Bumping up number of cycles in f
|
||||
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||
; CHECK-NEXT: %13 = add i64 %12, 1
|
||||
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||
|
||||
; Bumping up number of cycles in g
|
||||
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||
; CHECK-NEXT: %13 = add i64 %12, 1
|
||||
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
Loading…
x
Reference in New Issue
Block a user