mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-26 06:11:37 +00:00
Bug 653311 - Switch jprof from hand-rolled stackwalk code to glibc's backtrace() to work with modern x86 Linuxes, improve jprof output, update README - r=jim_nance (per bz)
This commit is contained in:
parent
04f1d9f037
commit
0c31687332
@ -6,7 +6,8 @@
|
||||
<center>
|
||||
<h1>The Jprof Profiler</h1>
|
||||
<font size="-1">
|
||||
<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a>
|
||||
<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a><p>
|
||||
Recent (4/2011) updates Randell Jesup (see bugzilla for contact info)
|
||||
</font>
|
||||
<hr>
|
||||
|
||||
@ -46,12 +47,26 @@ default pull. To do this do:
|
||||
<code>ac_add_options --enable-jprof</code> to your <code>.mozconfig</code>) and
|
||||
making sure that you do <strong>not</strong> have the
|
||||
<code>--enable-strip</code> configure option set -- jprof needs symbols to
|
||||
operate.</p>
|
||||
operate. On many architectures with GCC, you'll need to add
|
||||
<code>--enable-optimize="-O3 -fno-omit-frame-pointer"</code> or the
|
||||
equivalent to ensure frame pointer generation in the compiler you're using.</p>
|
||||
|
||||
<p>Finally, build mozilla with your new configuration. Now you can run jprof.</p>
|
||||
|
||||
<h3><a name="usage">Usage</a></h3>
|
||||
|
||||
Options:
|
||||
<ul>
|
||||
<li><b>-s depth</b> : Limit depth looked at from captured stack
|
||||
frames</li>
|
||||
<li><b>-v</b> : Output some information about the symbols, memory map, etc.</li>
|
||||
<li><b>-t</b> : Group output according to thread. Requires external
|
||||
LD_PRELOAD library to help force sampling of spawned threads; jprof
|
||||
normally captures the main thread only. See <a
|
||||
href="http://sam.zoy.org/writings/programming/gprof.html">gprof-helper</a>;
|
||||
it may need adaption for jprof.</li>
|
||||
<li><b>-e exclusion</b> : Allows excluding specific stack frames</li>
|
||||
<li><b>-i inclusion</b> : Allows including specific stack frames</li>
|
||||
</ul>
|
||||
The behavior of jprof is determined by the value of the JPROF_FLAGS environment
|
||||
variable. This environment variable can be composed of several substrings
|
||||
which have the following meanings:
|
||||
@ -67,7 +82,8 @@ which have the following meanings:
|
||||
<li> <b>JP_FIRST=x</b> : Wait x seconds before starting the timer
|
||||
|
||||
<li> <b>JP_PERIOD=y</b> : Set timer to interrupt every y seconds. Only
|
||||
values of y strictly greater than 0.001 are supported.
|
||||
values of y greater than or equal to 0.001 are supported. Default is
|
||||
0.050 (50ms).
|
||||
|
||||
<li> <b>JP_REALTIME</b> : Do the profiling in intervals of real time rather
|
||||
than intervals of time used by the mozilla process (and the kernel
|
||||
@ -103,10 +119,10 @@ being profiled
|
||||
<pre>
|
||||
setenv JPROF_FLAGS "JP_START JP_FIRST=3 JP_PERIOD=0.025" </pre>
|
||||
|
||||
<li>To make the timer start on your signal and fire every 1.5 milliseconds of
|
||||
<li>To make the timer start on your signal and fire every 1 millisecond of
|
||||
program time use:
|
||||
<pre>
|
||||
setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.0015" </pre>
|
||||
setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.001" </pre>
|
||||
|
||||
<li>To make the timer start on your signal and fire every 10 milliseconds of
|
||||
wall-clock time use:
|
||||
@ -189,82 +205,87 @@ corresponding to one function. A typical section looks something like
|
||||
this:
|
||||
|
||||
<blockquote><pre>
|
||||
<A href="#29355">141300 PL_ProcessPendingEvents</A>
|
||||
<A href="#29372"> 927 PL_ProcessEventsBeforeID</A>
|
||||
29358 0 <a name=29358> 142227</a> <b>PL_HandleEvent</b>
|
||||
<A href="#28546"> 92394 nsInputStreamReadyEvent::EventHandler(PLEvent*)</A>
|
||||
<A href="#41572"> 49181 HandlePLEvent(ReflowEvent*)</A>
|
||||
<A href="#29537"> 481 handleTimerEvent(TimerEventType*)</A>
|
||||
<A href="#34494"> 158 nsTransportStatusEvent::HandleEvent(PLEvent*)</A>
|
||||
<A href="#29359"> 9 PL_DestroyEvent</A>
|
||||
|
||||
<A href="#20319"> 4 __restore_rt</A>
|
||||
index Count Hits Function Name
|
||||
<A href="#72871"> 545 (46.4%) nsBlockFrame::ReflowInlineFrames(nsBlockReflowState&, nsLineList_iterator, int*)</A>
|
||||
<A href="#72873"> 100 (8.5%) nsBlockFrame::ReflowDirtyLines(nsBlockReflowState&)</A>
|
||||
72870 4 (0.3%) <a name=72870> 645 (54.9%)</a> <b>nsBlockFrame::DoReflowInlineFrames(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFlowAreaRect&, int&, nsFloatManager::SavedState*, int*, LineReflowStatus*, int)</b>
|
||||
<A href="#72821"> 545 (46.4%) nsBlockFrame::ReflowInlineFrame(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsIFrame*, LineReflowStatus*)</A>
|
||||
<A href="#72853"> 83 (7.1%) nsBlockFrame::PlaceLine(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFloatManager::SavedState*, nsRect&, int&, int*)</A>
|
||||
<A href="#74150"> 9 (0.8%) nsLineLayout::BeginLineReflow(int, int, int, int, int, int)</A>
|
||||
<A href="#74897"> 1 (0.1%) nsTextFrame::GetType() const</A>
|
||||
<A href="#74131"> 1 (0.1%) nsLineLayout::RelativePositionFrames(nsOverflowAreas&)</A>
|
||||
<A href="#58320"> 1 (0.1%) __i686.get_pc_thunk.bx</A>
|
||||
<A href="#53077"> 1 (0.1%) PL_ArenaAllocate</A>
|
||||
</pre></blockquote>
|
||||
|
||||
The information this block tells us is:
|
||||
|
||||
<ul>
|
||||
<li>There were 0 profiler hits <em>in</em> <code>PL_HandleEvent</code>
|
||||
<li>There were 142227 profiler hits <em>under</em> <code>PL_HandleEvent</code>. Of these:
|
||||
<li>There were 4 profiler hits <em>in</em> <code>nsBlockFrame::DoReflowInlineFrames</code>
|
||||
<li>There were 645 profiler hits <em>in or under</em> <code>nsBlockFrame::DoReflowInlineFrames</code>. Of these:
|
||||
<ul>
|
||||
<li>92394 were in or under <code>nsInputStreamReadyEvent::EventHandler</code>
|
||||
<li>49181 were in or under <code>HandlePLEvent(ReflowEvent*)</code>
|
||||
<li>481 were in or under <code>handleTimerEvent</code>
|
||||
<li>158 were in or under <code>nsTransportStatusEvent::HandleEvent</code>
|
||||
<li>9 were in or under <code>PL_DestroyEvent</code>
|
||||
<li>4 were in or under <code>__restore_rt</code>
|
||||
<li>545 were in or under <code>nsBlockFrame::ReflowInlineFrame</code>
|
||||
<li>83 were in or under <code>nsBlockFrame::PlaceLine</code>
|
||||
<li>9 were in or under <code>nsLineLayout::BeginLineReflow</code>
|
||||
<li>1 was in or under <code>nsTextFrame::GetType</code>
|
||||
<li>1 was in or under <code>nsLineLayout::RelativePositionFrames</code>
|
||||
<li>1 was in or under <code>__i686.get_pc_thunk.bx</code>
|
||||
<li>1 was in or under <code>PL_ArenaAllocate</code>
|
||||
</ul>
|
||||
<li>Of these 142227 calls into <code>PL_HandleEvent</code>:
|
||||
<li>Of these 645 calls into <code>nsBlockFrame::DoReflowInlineFrames</code>:
|
||||
<ul>
|
||||
<li>141300 came from <code>PL_ProcessPendingEvents</code>
|
||||
<li>927 came from <code>PL_ProcessEventsBeforeID</code>
|
||||
<li>545 came from <code>nsBlockFrame::ReflowInlineFrames</code>
|
||||
<li>100 came from <code>nsBlockFrame::ReflowDirtyLines</code>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
|
||||
The rest of this section explains how to read this information off from the jprof output.
|
||||
|
||||
<p>This block corresponds to the function <code>PL_HandleEvent</code>, which is
|
||||
<p>This block corresponds to the function <code>nsBlockFrame::DoReflowInlineFrames</code>, which is
|
||||
therefore bolded and not a link. The name of this function is preceded by
|
||||
three numbers which have the following meaning. The number on the left (29358)
|
||||
is the index number, and is not important. The center number (0) is the number
|
||||
of times this function was interrupted by the timer. The last number (142227)
|
||||
is the number of times this function was in the call stack when the timer went
|
||||
five numbers which have the following meaning. The number on the left (72870)
|
||||
is the index number, and is not important. The next number (4) and the
|
||||
percentage following (0.3%) are the number
|
||||
of times this function was interrupted by the timer and the percentage of
|
||||
the total hits that is. The last number pair ("645 (54.9%)")
|
||||
are the number of times this function was in the call stack when the timer went
|
||||
off. That is, the timer went off while we were in code that was ultimately
|
||||
called from <code>PL_HandleEvent</code>.
|
||||
called from <code>nsBlockFrame::DoReflowInlineFrames</code>.
|
||||
<p>For our example we can see that our function was in the call stack for
|
||||
142227 interrupt ticks, but we were never the function that was running when
|
||||
the interrupt arrived.
|
||||
645 interrupt ticks, but we were only the function that was running when
|
||||
the interrupt arrived 4 times.
|
||||
<P>
|
||||
The functions listed above the line for <code>PL_HandleEvent</code> are its
|
||||
The functions listed above the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
|
||||
callers. The numbers to the left of these function names are the numbers of
|
||||
times these functions were in the call stack as callers of
|
||||
<code>PL_HandleEvent</code>. In our example, we were called 927 times by
|
||||
<code>PL_ProcessEventsBeforeID</code> and 141300 times by
|
||||
<code>PL_ProcessPendingEvents</code>.
|
||||
<code>nsBlockFrame::DoReflowInlineFrames</code>. In our example, we were called 545 times by
|
||||
<code>nsBlockFrame::ReflowInlineFrames</code> and 100 times by
|
||||
<code>nsBlockFrame::ReflowDirtyLines</code>.
|
||||
<P>
|
||||
The functions listed below the line for <code>PL_HandleEvent</code> are its
|
||||
The functions listed below the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
|
||||
callees. The numbers to the left of the function names are the numbers of
|
||||
times these functions were in the callstack as callees of <code>PL_HandleEvent</code>. In our example, of the 142227 profiler hits under <code>PL_HandleEvent</code> 92394 were under <code>nsInputStreamReadyEvent::EventHandler</code>, 49181 were under <code>HandlePLEvent(ReflowEvent*)</code>, and so forth.
|
||||
times these functions were in the callstack as callees of
|
||||
<code>nsBlockFrame::DoReflowInlineFrames</code> and the corresponding percentages. In our example, of the 645 profiler hits under <code>nsBlockFrame::DoReflowInlineFrames</code> 545 were under <code>nsBlockFrame::ReflowInlineFrame</code>, 83 were under <code>nsBlockFrame::PlaceLine</code>, and so forth.<p>
|
||||
|
||||
<b>NOTE:</b> If there are loops of execution or recursion, the numbers will
|
||||
not add up and percentages can exceed 100%. If a function directly calls
|
||||
itself "(self)" will be appended to the line, but indirect recursion will
|
||||
not be marked.
|
||||
|
||||
<h3>Bugs</h3>
|
||||
Jprof has only been tested under Red Hat Linux 6.0, 6.1, and 6.2. It does
|
||||
not work under 6.0, though it is possible hack up the source code and make
|
||||
it work there. The way I determine the stack trace from inside the
|
||||
signal handler is tightly bound to the version of glibc that is running.
|
||||
If you know of a more portable way to get this information please let
|
||||
me know.
|
||||
|
||||
<h3>Update</h3>
|
||||
The current build of Jprof has only been tested under Ubuntu 8.04 LTS, but
|
||||
should work under any fairly modern linux distribution using GCC/GLIBC.
|
||||
Please update this document with any known compatibilities/incompatibilities.
|
||||
<p>
|
||||
If you get an error:<p><code>Inconsistency detected by ld.so: dl-open.c: 260: dl_open_worker: Assertion `_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT' failed!
|
||||
</code><p>that means you've hit a timing hole in the version of glibc you're
|
||||
running. See <a
|
||||
href="http://sources.redhat.com/bugzilla/show_bug.cgi?id=4578">Redhat bug 4578</a>.
|
||||
<!-- <h3>Update</h3>
|
||||
<ul>
|
||||
<li>Ben Bucksch reports that installing the Red Hat 6.1 glibc rpms on a Red Hat
|
||||
6.0 system allows jprof to work, and does not seem to break anything except
|
||||
gdm (the Gnome login program), and that can be fixed by installing the RH 6.1
|
||||
gdb rpm.</li>
|
||||
<li>David Baron reports that jprof works under RedHat 6.0 if one uncomments
|
||||
the <code>#define JPROF_PTHREAD_HACK</code> near the beginning of
|
||||
<code>libmalloc.cpp</code>.</li>
|
||||
</ul>
|
||||
-->
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
@ -128,6 +129,7 @@ leaky::leaky()
|
||||
|
||||
quiet = TRUE;
|
||||
showAddress = FALSE;
|
||||
showThreads = FALSE;
|
||||
stackDepth = 100000;
|
||||
|
||||
mappedLogFile = -1;
|
||||
@ -149,7 +151,8 @@ leaky::~leaky()
|
||||
|
||||
void leaky::usageError()
|
||||
{
|
||||
fprintf(stderr, "Usage: %s prog log\n", (char*) applicationName);
|
||||
fprintf(stderr, "Usage: %s [-v][-t] [-e exclude] [-i include] [-s stackdepth] prog log\n", (char*) applicationName);
|
||||
fprintf(stderr, "\t-v: verbose\n\t-t: split threads\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -165,14 +168,15 @@ void leaky::initialize(int argc, char** argv)
|
||||
|
||||
int arg;
|
||||
int errflg = 0;
|
||||
while ((arg = getopt(argc, argv, "adEe:gh:i:r:Rs:tqx")) != -1) {
|
||||
while ((arg = getopt(argc, argv, "adEe:gh:i:r:Rs:tqvx")) != -1) {
|
||||
switch (arg) {
|
||||
case '?':
|
||||
default:
|
||||
errflg++;
|
||||
break;
|
||||
case 'a':
|
||||
break;
|
||||
case 'A':
|
||||
case 'A': // not implemented
|
||||
showAddress = TRUE;
|
||||
break;
|
||||
case 'd':
|
||||
@ -184,7 +188,7 @@ void leaky::initialize(int argc, char** argv)
|
||||
break;
|
||||
case 'g':
|
||||
break;
|
||||
case 'r':
|
||||
case 'r': // not implemented
|
||||
roots.add(optarg);
|
||||
if (!includes.IsEmpty()) {
|
||||
errflg++;
|
||||
@ -207,7 +211,12 @@ void leaky::initialize(int argc, char** argv)
|
||||
case 'x':
|
||||
break;
|
||||
case 'q':
|
||||
quiet = TRUE;
|
||||
break;
|
||||
case 'v':
|
||||
quiet = !quiet;
|
||||
break;
|
||||
case 't':
|
||||
showThreads = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -265,6 +274,10 @@ void leaky::LoadMap()
|
||||
|
||||
void leaky::open()
|
||||
{
|
||||
int threadArray[100]; // should auto-expand
|
||||
int last_thread = -1;
|
||||
int numThreads=0;
|
||||
|
||||
LoadMap();
|
||||
|
||||
setupSymbols(progFile);
|
||||
@ -279,7 +292,61 @@ void leaky::open()
|
||||
firstLogEntry = (malloc_log_entry*) mapFile(mappedLogFile, PROT_READ, &size);
|
||||
lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size);
|
||||
|
||||
analyze();
|
||||
fprintf(stdout,"<html><head><title>Jprof Profile Report</title></head><body>\n");
|
||||
fprintf(stdout,"<h1><center>Jprof Profile Report</center></h1>\n");
|
||||
|
||||
if (showThreads)
|
||||
{
|
||||
// Find all the threads captured
|
||||
|
||||
// pthread/linux docs say the signal can be delivered to any thread in
|
||||
// the process. In practice, it appears in Linux that it's always
|
||||
// delivered to the thread that called setitimer(), and each thread can
|
||||
// have a separate itimer. There's a support library for gprof that
|
||||
// overlays pthread_create() to set timers in any threads you spawn.
|
||||
|
||||
// This loop walks through all the call stacks we recorded
|
||||
for (malloc_log_entry* lep=firstLogEntry;
|
||||
lep < lastLogEntry;
|
||||
lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
|
||||
if (lep->thread != last_thread)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<numThreads; i++)
|
||||
{
|
||||
if (lep->thread == threadArray[i])
|
||||
break;
|
||||
}
|
||||
if (i == numThreads &&
|
||||
i < (int) (sizeof(threadArray)/sizeof(threadArray[0])))
|
||||
{
|
||||
threadArray[i] = lep->thread;
|
||||
numThreads++;
|
||||
fprintf(stderr,"new thread %d\n",lep->thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stderr,"Num threads %d\n",numThreads);
|
||||
|
||||
fprintf(stdout,"<hr>Threads:<p><pre>\n");
|
||||
for (int i=0; i<numThreads; i++)
|
||||
{
|
||||
fprintf(stdout," <a href=\"thread_%d\">%d</a><p>\n",
|
||||
threadArray[i],threadArray[i]);
|
||||
}
|
||||
fprintf(stdout,"</pre><hr>");
|
||||
|
||||
for (int i=0; i<numThreads; i++)
|
||||
{
|
||||
analyze(threadArray[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
analyze(0);
|
||||
}
|
||||
|
||||
fprintf(stdout,"</pre></body></html>\n");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
@ -428,14 +495,19 @@ void leaky::dumpEntryToLog(malloc_log_entry* lep)
|
||||
displayStackTrace(stdout, lep);
|
||||
}
|
||||
|
||||
void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
|
||||
void leaky::generateReportHTML(FILE *fp, int *countArray, int count, int thread)
|
||||
{
|
||||
fprintf(fp,"<html><head><title>Jprof Profile Report</title></head><body>\n");
|
||||
fprintf(fp,"<h1><center>Jprof Profile Report</center></h1>\n");
|
||||
fprintf(fp,"<center>");
|
||||
fprintf(fp,"<A href=#flat>flat</A><b> | </b><A href=#hier>hierarchical</A>");
|
||||
if (showThreads)
|
||||
{
|
||||
fprintf(fp,"<hr><A NAME=thread_%d><b>Thread: %d</b></A><p>",
|
||||
thread,thread);
|
||||
}
|
||||
fprintf(fp,"<A href=#flat_%d>flat</A><b> | </b><A href=#hier_%d>hierarchical</A>",
|
||||
thread,thread);
|
||||
fprintf(fp,"</center><P><P><P>\n");
|
||||
|
||||
int totalTimerHits = count;
|
||||
int *rankingTable = new int[usefulSymbols];
|
||||
|
||||
for(int cnt=usefulSymbols; --cnt>=0; rankingTable[cnt]=cnt);
|
||||
@ -464,23 +536,29 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
|
||||
// this loop. Later we can get callers and callees into it like gprof
|
||||
// does
|
||||
fprintf(fp,
|
||||
"<h2><A NAME=hier></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n");
|
||||
"<h2><A NAME=hier_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
|
||||
thread);
|
||||
fprintf(fp, "<pre>\n");
|
||||
fprintf(fp, "%5s %5s %4s %s\n",
|
||||
"index", "Count", "Hits", "Function Name");
|
||||
fprintf(fp, "%6s %6s %4s %s\n",
|
||||
"index", "Count", "Hits", "Function Name");
|
||||
|
||||
for(i=0; i<usefulSymbols && countArray[rankingTable[i]]>0; i++) {
|
||||
Symbol *sp=&externalSymbols[rankingTable[i]];
|
||||
|
||||
sp->cntP.printReport(fp, this);
|
||||
sp->cntP.printReport(fp, this, rankingTable[i], totalTimerHits);
|
||||
|
||||
char *symname = htmlify(sp->name);
|
||||
fprintf(fp, "%6d %3d <a name=%d>%8d</a> <b>%s</b>\n", rankingTable[i],
|
||||
sp->timerHit, rankingTable[i], countArray[rankingTable[i]],
|
||||
fprintf(fp, "%6d %6d (%3.1f%%)%s <a name=%d>%8d (%3.1f%%)</a>%s <b>%s</b>\n",
|
||||
rankingTable[i],
|
||||
sp->timerHit, (sp->timerHit*1000/totalTimerHits)/10.0,
|
||||
(sp->timerHit*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
|
||||
rankingTable[i], countArray[rankingTable[i]],
|
||||
(countArray[rankingTable[i]]*1000/totalTimerHits)/10.0,
|
||||
(countArray[rankingTable[i]]*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
|
||||
symname);
|
||||
delete [] symname;
|
||||
|
||||
sp->cntC.printReport(fp, this);
|
||||
sp->cntC.printReport(fp, this, rankingTable[i], totalTimerHits);
|
||||
|
||||
fprintf(fp, "<hr>\n");
|
||||
}
|
||||
@ -508,14 +586,21 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
|
||||
// I wanted the total before walking the list, if this
|
||||
// double-pass over externalSymbols gets slow we can
|
||||
// do single-pass and print this out after the loop finishes.
|
||||
int totalTimerHits = 0;
|
||||
totalTimerHits = 0;
|
||||
for(i=0;
|
||||
i<usefulSymbols && externalSymbols[rankingTable[i]].timerHit>0; i++) {
|
||||
Symbol *sp=&externalSymbols[rankingTable[i]];
|
||||
totalTimerHits += sp->timerHit;
|
||||
}
|
||||
if (totalTimerHits == 0)
|
||||
totalTimerHits = 1;
|
||||
|
||||
fprintf(fp,"<h2><A NAME=flat></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n");
|
||||
if (totalTimerHits != count)
|
||||
fprintf(stderr,"Hit count mismatch: count=%d; totalTimerHits=%d",
|
||||
count,totalTimerHits);
|
||||
|
||||
fprintf(fp,"<h2><A NAME=flat_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n",
|
||||
thread);
|
||||
fprintf(fp, "<pre>\n");
|
||||
|
||||
fprintf(fp, "Total hit count: %d\n", totalTimerHits);
|
||||
@ -532,11 +617,9 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
|
||||
((float)sp->timerHit/(float)totalTimerHits)*100.0, symname);
|
||||
delete [] symname;
|
||||
}
|
||||
|
||||
fprintf(fp,"</pre></body></html>\n");
|
||||
}
|
||||
|
||||
void leaky::analyze()
|
||||
void leaky::analyze(int thread)
|
||||
{
|
||||
int *countArray = new int[usefulSymbols];
|
||||
int *flagArray = new int[usefulSymbols];
|
||||
@ -558,8 +641,11 @@ void leaky::analyze()
|
||||
lep < lastLogEntry;
|
||||
lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
|
||||
|
||||
if (excluded(lep) || !included(lep))
|
||||
if ((thread != 0 && lep->thread != thread) ||
|
||||
excluded(lep) || !included(lep))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
++stacks; // How many stack frames did we collect
|
||||
|
||||
@ -568,7 +654,7 @@ void leaky::analyze()
|
||||
u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
|
||||
char** pcp = &lep->pcs[n-1];
|
||||
int idx=-1, parrentIdx=-1; // Init idx incase n==0
|
||||
for(int i=n-1; i>=0; --i, --pcp, parrentIdx=idx) {
|
||||
for (int i=n-1; i>=0; --i, --pcp) {
|
||||
idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
|
||||
|
||||
if(idx>=0) {
|
||||
@ -593,6 +679,9 @@ void leaky::analyze()
|
||||
externalSymbols[parrentIdx].regChild(idx);
|
||||
externalSymbols[idx].regParrent(parrentIdx);
|
||||
}
|
||||
// inside if() so an unknown in the middle of a stack won't break
|
||||
// the link!
|
||||
parrentIdx=idx;
|
||||
}
|
||||
}
|
||||
|
||||
@ -602,12 +691,12 @@ void leaky::analyze()
|
||||
}
|
||||
}
|
||||
|
||||
generateReportHTML(stdout, countArray, stacks);
|
||||
generateReportHTML(stdout, countArray, stacks, thread);
|
||||
}
|
||||
|
||||
void FunctionCount::printReport(FILE *fp, leaky *lk)
|
||||
void FunctionCount::printReport(FILE *fp, leaky *lk, int parent, int total)
|
||||
{
|
||||
const char *fmt = " <A href=\"#%d\">%6d %s</A>\n";
|
||||
const char *fmt = " <A href=\"#%d\">%8d (%3.1f%%)%s %s</A>%s\n";
|
||||
|
||||
int nmax, tmax=((~0U)>>1);
|
||||
|
||||
@ -618,7 +707,11 @@ void FunctionCount::printReport(FILE *fp, leaky *lk)
|
||||
if(cnt==tmax) {
|
||||
int idx = getIndex(j);
|
||||
char *symname = htmlify(lk->indexToName(idx));
|
||||
fprintf(fp, fmt, idx, getCount(j), symname);
|
||||
fprintf(fp, fmt, idx, getCount(j),
|
||||
getCount(j)*100.0/total,
|
||||
getCount(j)*100.0/total >= 10.0 ? "" : " ",
|
||||
symname,
|
||||
parent == idx ? " (self)" : "");
|
||||
delete [] symname;
|
||||
} else if(cnt<tmax && cnt>nmax) {
|
||||
nmax=cnt;
|
||||
|
@ -52,7 +52,7 @@ struct leaky;
|
||||
class FunctionCount : public IntCount
|
||||
{
|
||||
public:
|
||||
void printReport(FILE *fp, leaky *lk);
|
||||
void printReport(FILE *fp, leaky *lk, int parent, int total);
|
||||
};
|
||||
|
||||
struct Symbol {
|
||||
@ -90,6 +90,7 @@ struct leaky {
|
||||
|
||||
int quiet;
|
||||
int showAddress;
|
||||
int showThreads;
|
||||
u_int stackDepth;
|
||||
|
||||
int mappedLogFile;
|
||||
@ -115,7 +116,7 @@ struct leaky {
|
||||
|
||||
void LoadMap();
|
||||
|
||||
void analyze();
|
||||
void analyze(int thread);
|
||||
|
||||
void dumpEntryToLog(malloc_log_entry* lep);
|
||||
|
||||
@ -133,7 +134,7 @@ struct leaky {
|
||||
const char* indexToName(int idx) {return externalSymbols[idx].name;}
|
||||
|
||||
private:
|
||||
void generateReportHTML(FILE *fp, int *countArray, int count);
|
||||
void generateReportHTML(FILE *fp, int *countArray, int count, int thread);
|
||||
int findSymbolIndex(u_long address);
|
||||
};
|
||||
|
||||
|
@ -48,6 +48,9 @@ EXPORTS =
|
||||
LIBRARY_NAME = jprof
|
||||
EXPORT_LIBRARY = 1
|
||||
|
||||
# override optimization
|
||||
MOZ_OPTIMIZE_FLAGS = -fno-omit-frame-pointer
|
||||
|
||||
CPPSRCS = \
|
||||
libmalloc.cpp \
|
||||
$(NULL)
|
||||
|
@ -23,6 +23,7 @@
|
||||
* Jim Nance
|
||||
* L. David Baron - JP_REALTIME, JPROF_PTHREAD_HACK, and SIGUSR1 handling
|
||||
* Mike Shaver - JP_RTC_HZ support
|
||||
* Randell Jesup - glibc backtrace() support
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
@ -61,7 +62,9 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <ucontext.h>
|
||||
#include <execinfo.h>
|
||||
|
||||
#include "libmalloc.h"
|
||||
#include "jprof.h"
|
||||
@ -77,6 +80,10 @@ extern r_debug _r_debug;
|
||||
#include <link.h>
|
||||
#endif
|
||||
|
||||
#define USE_GLIBC_BACKTRACE 1
|
||||
// To debug, use #define JPROF_STATIC
|
||||
#define JPROF_STATIC //static
|
||||
|
||||
static int gLogFD = -1;
|
||||
static pthread_t main_thread;
|
||||
|
||||
@ -87,9 +94,32 @@ static int enableRTCSignals(bool enable);
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#if defined(i386) || defined(_i386) || defined(__x86_64__)
|
||||
static void CrawlStack(malloc_log_entry* me,
|
||||
void* stack_top, void* top_instr_ptr)
|
||||
JPROF_STATIC void CrawlStack(malloc_log_entry* me,
|
||||
void* stack_top, void* top_instr_ptr)
|
||||
{
|
||||
#if USE_GLIBC_BACKTRACE
|
||||
// This probably works on more than x86! But we need a way to get the
|
||||
// top instruction pointer, which is kindof arch-specific
|
||||
void *array[500];
|
||||
int cnt, i;
|
||||
u_long numpcs = 0;
|
||||
bool tracing = false;
|
||||
|
||||
// This is from glibc. A more generic version might use
|
||||
// libunwind and/or CaptureStackBackTrace() on Windows
|
||||
cnt = backtrace(&array[0],sizeof(array)/sizeof(array[0]));
|
||||
|
||||
// StackHook->JprofLog->CrawlStack
|
||||
// Then we have sigaction, which replaced top_instr_ptr
|
||||
array[3] = top_instr_ptr;
|
||||
for (i = 3; i < cnt; i++)
|
||||
{
|
||||
me->pcs[numpcs++] = (char *) array[i];
|
||||
}
|
||||
me->numpcs = numpcs;
|
||||
|
||||
#else
|
||||
// original code - this breaks on many platforms
|
||||
void **bp;
|
||||
#if defined(__i386)
|
||||
__asm__( "movl %%ebp, %0" : "=g"(bp));
|
||||
@ -102,6 +132,7 @@ static void CrawlStack(malloc_log_entry* me,
|
||||
bp = __builtin_frame_address(0);
|
||||
#endif
|
||||
u_long numpcs = 0;
|
||||
bool tracing = false;
|
||||
|
||||
me->pcs[numpcs++] = (char*) top_instr_ptr;
|
||||
|
||||
@ -111,13 +142,17 @@ static void CrawlStack(malloc_log_entry* me,
|
||||
if (nextbp < bp) {
|
||||
break;
|
||||
}
|
||||
if (bp > stack_top) {
|
||||
if (tracing) {
|
||||
// Skip the signal handling.
|
||||
me->pcs[numpcs++] = (char*) pc;
|
||||
}
|
||||
else if (pc == top_instr_ptr) {
|
||||
tracing = true;
|
||||
}
|
||||
bp = nextbp;
|
||||
}
|
||||
me->numpcs = numpcs;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -169,13 +204,14 @@ static void EndProfilingHook(int signum)
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
Log(u_long aTime, void* stack_top, void* top_instr_ptr)
|
||||
JPROF_STATIC void
|
||||
JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr)
|
||||
{
|
||||
// Static is simply to make debugging tollerable
|
||||
static malloc_log_entry me;
|
||||
|
||||
me.delTime = aTime;
|
||||
me.thread = syscall(SYS_gettid); //gettid();
|
||||
|
||||
CrawlStack(&me, stack_top, top_instr_ptr);
|
||||
|
||||
@ -281,7 +317,7 @@ static int enableRTCSignals(bool enable)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void StackHook(
|
||||
JPROF_STATIC void StackHook(
|
||||
int signum,
|
||||
siginfo_t *info,
|
||||
void *ucontext)
|
||||
@ -325,9 +361,9 @@ void *ucontext)
|
||||
|
||||
gregset_t &gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs;
|
||||
#ifdef __x86_64__
|
||||
Log(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
|
||||
JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
|
||||
#else
|
||||
Log(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
|
||||
JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
|
||||
#endif
|
||||
|
||||
if (!rtcHz)
|
||||
@ -371,21 +407,26 @@ NS_EXPORT_(void) setupProfilingStuff(void)
|
||||
|
||||
char *delay = strstr(tst,"JP_PERIOD=");
|
||||
if(delay) {
|
||||
double tmp = strtod(delay+10, NULL);
|
||||
if(tmp>1e-3) {
|
||||
double tmp = strtod(delay+strlen("JP_PERIOD="), NULL);
|
||||
if (tmp>=1e-3) {
|
||||
timerMiliSec = static_cast<unsigned long>(1000 * tmp);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
|
||||
tmp);
|
||||
timerMiliSec = 1;
|
||||
}
|
||||
}
|
||||
|
||||
char *first = strstr(tst, "JP_FIRST=");
|
||||
if(first) {
|
||||
firstDelay = atol(first+9);
|
||||
firstDelay = atol(first+strlen("JP_FIRST="));
|
||||
}
|
||||
|
||||
char *rtc = strstr(tst, "JP_RTC_HZ=");
|
||||
if (rtc) {
|
||||
#if defined(linux)
|
||||
rtcHz = atol(rtc+10);
|
||||
rtcHz = atol(rtc+strlen("JP_RTC_HZ="));
|
||||
timerMiliSec = 0; /* This makes JP_FIRST work right. */
|
||||
realTime = 1; /* It's the _R_TC and all. ;) */
|
||||
|
||||
@ -420,6 +461,8 @@ NS_EXPORT_(void) setupProfilingStuff(void)
|
||||
atexit(DumpAddressMap);
|
||||
|
||||
main_thread = pthread_self();
|
||||
//fprintf(stderr,"jprof: main_thread = %u\n",
|
||||
// (unsigned int)main_thread);
|
||||
|
||||
sigemptyset(&mset);
|
||||
action.sa_handler = NULL;
|
||||
|
@ -52,6 +52,7 @@ typedef unsigned long u_long;
|
||||
struct malloc_log_entry {
|
||||
u_long delTime;
|
||||
u_long numpcs;
|
||||
int thread;
|
||||
char* pcs[MAX_STACK_CRAWL];
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user