Bug 653311 - Switch jprof from hand-rolled stackwalk code to glibc's backtrace() to work with modern x86 Linuxes, improve jprof output, update README - r=jim_nance (per bz)

This commit is contained in:
Randell Jesup 2011-05-15 05:47:48 -04:00
parent 04f1d9f037
commit 0c31687332
6 changed files with 262 additions and 100 deletions

View File

@ -6,7 +6,8 @@
<center>
<h1>The Jprof Profiler</h1>
<font size="-1">
<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a>
<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a><p>
Recent (4/2011) updates Randell Jesup (see bugzilla for contact info)
</font>
<hr>
@ -46,12 +47,26 @@ default pull. To do this do:
<code>ac_add_options --enable-jprof</code> to your <code>.mozconfig</code>) and
making sure that you do <strong>not</strong> have the
<code>--enable-strip</code> configure option set -- jprof needs symbols to
operate.</p>
operate. On many architectures with GCC, you'll need to add
<code>--enable-optimize="-O3 -fno-omit-frame-pointer"</code> or the
equivalent to ensure frame pointer generation in the compiler you're using.</p>
<p>Finally, build mozilla with your new configuration. Now you can run jprof.</p>
<h3><a name="usage">Usage</a></h3>
Options:
<ul>
<li><b>-s depth</b> : Limit depth looked at from captured stack
frames</li>
<li><b>-v</b> : Output some information about the symbols, memory map, etc.</li>
<li><b>-t</b> : Group output according to thread. Requires external
LD_PRELOAD library to help force sampling of spawned threads; jprof
normally captures the main thread only. See <a
href="http://sam.zoy.org/writings/programming/gprof.html">gprof-helper</a>;
it may need adaption for jprof.</li>
<li><b>-e exclusion</b> : Allows excluding specific stack frames</li>
<li><b>-i inclusion</b> : Allows including specific stack frames</li>
</ul>
The behavior of jprof is determined by the value of the JPROF_FLAGS environment
variable. This environment variable can be composed of several substrings
which have the following meanings:
@ -67,7 +82,8 @@ which have the following meanings:
<li> <b>JP_FIRST=x</b> : Wait x seconds before starting the timer
<li> <b>JP_PERIOD=y</b> : Set timer to interrupt every y seconds. Only
values of y strictly greater than 0.001 are supported.
values of y greater than or equal to 0.001 are supported. Default is
0.050 (50ms).
<li> <b>JP_REALTIME</b> : Do the profiling in intervals of real time rather
than intervals of time used by the mozilla process (and the kernel
@ -103,10 +119,10 @@ being profiled
<pre>
setenv JPROF_FLAGS "JP_START JP_FIRST=3 JP_PERIOD=0.025" </pre>
<li>To make the timer start on your signal and fire every 1.5 milliseconds of
<li>To make the timer start on your signal and fire every 1 millisecond of
program time use:
<pre>
setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.0015" </pre>
setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.001" </pre>
<li>To make the timer start on your signal and fire every 10 milliseconds of
wall-clock time use:
@ -189,82 +205,87 @@ corresponding to one function. A typical section looks something like
this:
<blockquote><pre>
<A href="#29355">141300 PL_ProcessPendingEvents</A>
<A href="#29372"> 927 PL_ProcessEventsBeforeID</A>
29358 0 <a name=29358> 142227</a> <b>PL_HandleEvent</b>
<A href="#28546"> 92394 nsInputStreamReadyEvent::EventHandler(PLEvent*)</A>
<A href="#41572"> 49181 HandlePLEvent(ReflowEvent*)</A>
<A href="#29537"> 481 handleTimerEvent(TimerEventType*)</A>
<A href="#34494"> 158 nsTransportStatusEvent::HandleEvent(PLEvent*)</A>
<A href="#29359"> 9 PL_DestroyEvent</A>
<A href="#20319"> 4 __restore_rt</A>
index Count Hits Function Name
<A href="#72871"> 545 (46.4%) nsBlockFrame::ReflowInlineFrames(nsBlockReflowState&, nsLineList_iterator, int*)</A>
<A href="#72873"> 100 (8.5%) nsBlockFrame::ReflowDirtyLines(nsBlockReflowState&)</A>
72870 4 (0.3%) <a name=72870> 645 (54.9%)</a> <b>nsBlockFrame::DoReflowInlineFrames(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFlowAreaRect&, int&, nsFloatManager::SavedState*, int*, LineReflowStatus*, int)</b>
<A href="#72821"> 545 (46.4%) nsBlockFrame::ReflowInlineFrame(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsIFrame*, LineReflowStatus*)</A>
<A href="#72853"> 83 (7.1%) nsBlockFrame::PlaceLine(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFloatManager::SavedState*, nsRect&, int&, int*)</A>
<A href="#74150"> 9 (0.8%) nsLineLayout::BeginLineReflow(int, int, int, int, int, int)</A>
<A href="#74897"> 1 (0.1%) nsTextFrame::GetType() const</A>
<A href="#74131"> 1 (0.1%) nsLineLayout::RelativePositionFrames(nsOverflowAreas&)</A>
<A href="#58320"> 1 (0.1%) __i686.get_pc_thunk.bx</A>
<A href="#53077"> 1 (0.1%) PL_ArenaAllocate</A>
</pre></blockquote>
The information this block tells us is:
<ul>
<li>There were 0 profiler hits <em>in</em> <code>PL_HandleEvent</code>
<li>There were 142227 profiler hits <em>under</em> <code>PL_HandleEvent</code>. Of these:
<li>There were 4 profiler hits <em>in</em> <code>nsBlockFrame::DoReflowInlineFrames</code>
<li>There were 645 profiler hits <em>in or under</em> <code>nsBlockFrame::DoReflowInlineFrames</code>. Of these:
<ul>
<li>92394 were in or under <code>nsInputStreamReadyEvent::EventHandler</code>
<li>49181 were in or under <code>HandlePLEvent(ReflowEvent*)</code>
<li>481 were in or under <code>handleTimerEvent</code>
<li>158 were in or under <code>nsTransportStatusEvent::HandleEvent</code>
<li>9 were in or under <code>PL_DestroyEvent</code>
<li>4 were in or under <code>__restore_rt</code>
<li>545 were in or under <code>nsBlockFrame::ReflowInlineFrame</code>
<li>83 were in or under <code>nsBlockFrame::PlaceLine</code>
<li>9 were in or under <code>nsLineLayout::BeginLineReflow</code>
<li>1 was in or under <code>nsTextFrame::GetType</code>
<li>1 was in or under <code>nsLineLayout::RelativePositionFrames</code>
<li>1 was in or under <code>__i686.get_pc_thunk.bx</code>
<li>1 was in or under <code>PL_ArenaAllocate</code>
</ul>
<li>Of these 142227 calls into <code>PL_HandleEvent</code>:
<li>Of these 645 calls into <code>nsBlockFrame::DoReflowInlineFrames</code>:
<ul>
<li>141300 came from <code>PL_ProcessPendingEvents</code>
<li>927 came from <code>PL_ProcessEventsBeforeID</code>
<li>545 came from <code>nsBlockFrame::ReflowInlineFrames</code>
<li>100 came from <code>nsBlockFrame::ReflowDirtyLines</code>
</ul>
</ul>
The rest of this section explains how to read this information off from the jprof output.
<p>This block corresponds to the function <code>PL_HandleEvent</code>, which is
<p>This block corresponds to the function <code>nsBlockFrame::DoReflowInlineFrames</code>, which is
therefore bolded and not a link. The name of this function is preceded by
three numbers which have the following meaning. The number on the left (29358)
is the index number, and is not important. The center number (0) is the number
of times this function was interrupted by the timer. The last number (142227)
is the number of times this function was in the call stack when the timer went
five numbers which have the following meaning. The number on the left (72870)
is the index number, and is not important. The next number (4) and the
percentage following (0.3%) are the number
of times this function was interrupted by the timer and the percentage of
the total hits that is. The last number pair ("645 (54.9%)")
are the number of times this function was in the call stack when the timer went
off. That is, the timer went off while we were in code that was ultimately
called from <code>PL_HandleEvent</code>.
called from <code>nsBlockFrame::DoReflowInlineFrames</code>.
<p>For our example we can see that our function was in the call stack for
142227 interrupt ticks, but we were never the function that was running when
the interrupt arrived.
645 interrupt ticks, but we were only the function that was running when
the interrupt arrived 4 times.
<P>
The functions listed above the line for <code>PL_HandleEvent</code> are its
The functions listed above the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
callers. The numbers to the left of these function names are the numbers of
times these functions were in the call stack as callers of
<code>PL_HandleEvent</code>. In our example, we were called 927 times by
<code>PL_ProcessEventsBeforeID</code> and 141300 times by
<code>PL_ProcessPendingEvents</code>.
<code>nsBlockFrame::DoReflowInlineFrames</code>. In our example, we were called 545 times by
<code>nsBlockFrame::ReflowInlineFrames</code> and 100 times by
<code>nsBlockFrame::ReflowDirtyLines</code>.
<P>
The functions listed below the line for <code>PL_HandleEvent</code> are its
The functions listed below the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
callees. The numbers to the left of the function names are the numbers of
times these functions were in the callstack as callees of <code>PL_HandleEvent</code>. In our example, of the 142227 profiler hits under <code>PL_HandleEvent</code> 92394 were under <code>nsInputStreamReadyEvent::EventHandler</code>, 49181 were under <code>HandlePLEvent(ReflowEvent*)</code>, and so forth.
times these functions were in the callstack as callees of
<code>nsBlockFrame::DoReflowInlineFrames</code> and the corresponding percentages. In our example, of the 645 profiler hits under <code>nsBlockFrame::DoReflowInlineFrames</code> 545 were under <code>nsBlockFrame::ReflowInlineFrame</code>, 83 were under <code>nsBlockFrame::PlaceLine</code>, and so forth.<p>
<b>NOTE:</b> If there are loops of execution or recursion, the numbers will
not add up and percentages can exceed 100%. If a function directly calls
itself "(self)" will be appended to the line, but indirect recursion will
not be marked.
<h3>Bugs</h3>
Jprof has only been tested under Red Hat Linux 6.0, 6.1, and 6.2. It does
not work under 6.0, though it is possible hack up the source code and make
it work there. The way I determine the stack trace from inside the
signal handler is tightly bound to the version of glibc that is running.
If you know of a more portable way to get this information please let
me know.
<h3>Update</h3>
The current build of Jprof has only been tested under Ubuntu 8.04 LTS, but
should work under any fairly modern linux distribution using GCC/GLIBC.
Please update this document with any known compatibilities/incompatibilities.
<p>
If you get an error:<p><code>Inconsistency detected by ld.so: dl-open.c: 260: dl_open_worker: Assertion `_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT' failed!
</code><p>that means you've hit a timing hole in the version of glibc you're
running. See <a
href="http://sources.redhat.com/bugzilla/show_bug.cgi?id=4578">Redhat bug 4578</a>.
<!-- <h3>Update</h3>
<ul>
<li>Ben Bucksch reports that installing the Red Hat 6.1 glibc rpms on a Red Hat
6.0 system allows jprof to work, and does not seem to break anything except
gdm (the Gnome login program), and that can be fixed by installing the RH 6.1
gdb rpm.</li>
<li>David Baron reports that jprof works under RedHat 6.0 if one uncomments
the <code>#define JPROF_PTHREAD_HACK</code> near the beginning of
<code>libmalloc.cpp</code>.</li>
</ul>
-->
</body>
</html>

View File

@ -1,3 +1,4 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -128,6 +129,7 @@ leaky::leaky()
quiet = TRUE;
showAddress = FALSE;
showThreads = FALSE;
stackDepth = 100000;
mappedLogFile = -1;
@ -149,7 +151,8 @@ leaky::~leaky()
void leaky::usageError()
{
fprintf(stderr, "Usage: %s prog log\n", (char*) applicationName);
fprintf(stderr, "Usage: %s [-v][-t] [-e exclude] [-i include] [-s stackdepth] prog log\n", (char*) applicationName);
fprintf(stderr, "\t-v: verbose\n\t-t: split threads\n");
exit(-1);
}
@ -165,14 +168,15 @@ void leaky::initialize(int argc, char** argv)
int arg;
int errflg = 0;
while ((arg = getopt(argc, argv, "adEe:gh:i:r:Rs:tqx")) != -1) {
while ((arg = getopt(argc, argv, "adEe:gh:i:r:Rs:tqvx")) != -1) {
switch (arg) {
case '?':
default:
errflg++;
break;
case 'a':
break;
case 'A':
case 'A': // not implemented
showAddress = TRUE;
break;
case 'd':
@ -184,7 +188,7 @@ void leaky::initialize(int argc, char** argv)
break;
case 'g':
break;
case 'r':
case 'r': // not implemented
roots.add(optarg);
if (!includes.IsEmpty()) {
errflg++;
@ -207,7 +211,12 @@ void leaky::initialize(int argc, char** argv)
case 'x':
break;
case 'q':
quiet = TRUE;
break;
case 'v':
quiet = !quiet;
break;
case 't':
showThreads = TRUE;
break;
}
}
@ -265,6 +274,10 @@ void leaky::LoadMap()
void leaky::open()
{
int threadArray[100]; // should auto-expand
int last_thread = -1;
int numThreads=0;
LoadMap();
setupSymbols(progFile);
@ -279,7 +292,61 @@ void leaky::open()
firstLogEntry = (malloc_log_entry*) mapFile(mappedLogFile, PROT_READ, &size);
lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size);
analyze();
fprintf(stdout,"<html><head><title>Jprof Profile Report</title></head><body>\n");
fprintf(stdout,"<h1><center>Jprof Profile Report</center></h1>\n");
if (showThreads)
{
// Find all the threads captured
// pthread/linux docs say the signal can be delivered to any thread in
// the process. In practice, it appears in Linux that it's always
// delivered to the thread that called setitimer(), and each thread can
// have a separate itimer. There's a support library for gprof that
// overlays pthread_create() to set timers in any threads you spawn.
// This loop walks through all the call stacks we recorded
for (malloc_log_entry* lep=firstLogEntry;
lep < lastLogEntry;
lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
if (lep->thread != last_thread)
{
int i;
for (i=0; i<numThreads; i++)
{
if (lep->thread == threadArray[i])
break;
}
if (i == numThreads &&
i < (int) (sizeof(threadArray)/sizeof(threadArray[0])))
{
threadArray[i] = lep->thread;
numThreads++;
fprintf(stderr,"new thread %d\n",lep->thread);
}
}
}
fprintf(stderr,"Num threads %d\n",numThreads);
fprintf(stdout,"<hr>Threads:<p><pre>\n");
for (int i=0; i<numThreads; i++)
{
fprintf(stdout," <a href=\"thread_%d\">%d</a><p>\n",
threadArray[i],threadArray[i]);
}
fprintf(stdout,"</pre><hr>");
for (int i=0; i<numThreads; i++)
{
analyze(threadArray[i]);
}
}
else
{
analyze(0);
}
fprintf(stdout,"</pre></body></html>\n");
exit(0);
}
@ -428,14 +495,19 @@ void leaky::dumpEntryToLog(malloc_log_entry* lep)
displayStackTrace(stdout, lep);
}
void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
void leaky::generateReportHTML(FILE *fp, int *countArray, int count, int thread)
{
fprintf(fp,"<html><head><title>Jprof Profile Report</title></head><body>\n");
fprintf(fp,"<h1><center>Jprof Profile Report</center></h1>\n");
fprintf(fp,"<center>");
fprintf(fp,"<A href=#flat>flat</A><b> | </b><A href=#hier>hierarchical</A>");
if (showThreads)
{
fprintf(fp,"<hr><A NAME=thread_%d><b>Thread: %d</b></A><p>",
thread,thread);
}
fprintf(fp,"<A href=#flat_%d>flat</A><b> | </b><A href=#hier_%d>hierarchical</A>",
thread,thread);
fprintf(fp,"</center><P><P><P>\n");
int totalTimerHits = count;
int *rankingTable = new int[usefulSymbols];
for(int cnt=usefulSymbols; --cnt>=0; rankingTable[cnt]=cnt);
@ -464,23 +536,29 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
// this loop. Later we can get callers and callees into it like gprof
// does
fprintf(fp,
"<h2><A NAME=hier></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n");
"<h2><A NAME=hier_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
thread);
fprintf(fp, "<pre>\n");
fprintf(fp, "%5s %5s %4s %s\n",
"index", "Count", "Hits", "Function Name");
fprintf(fp, "%6s %6s %4s %s\n",
"index", "Count", "Hits", "Function Name");
for(i=0; i<usefulSymbols && countArray[rankingTable[i]]>0; i++) {
Symbol *sp=&externalSymbols[rankingTable[i]];
sp->cntP.printReport(fp, this);
sp->cntP.printReport(fp, this, rankingTable[i], totalTimerHits);
char *symname = htmlify(sp->name);
fprintf(fp, "%6d %3d <a name=%d>%8d</a> <b>%s</b>\n", rankingTable[i],
sp->timerHit, rankingTable[i], countArray[rankingTable[i]],
fprintf(fp, "%6d %6d (%3.1f%%)%s <a name=%d>%8d (%3.1f%%)</a>%s <b>%s</b>\n",
rankingTable[i],
sp->timerHit, (sp->timerHit*1000/totalTimerHits)/10.0,
(sp->timerHit*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
rankingTable[i], countArray[rankingTable[i]],
(countArray[rankingTable[i]]*1000/totalTimerHits)/10.0,
(countArray[rankingTable[i]]*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
symname);
delete [] symname;
sp->cntC.printReport(fp, this);
sp->cntC.printReport(fp, this, rankingTable[i], totalTimerHits);
fprintf(fp, "<hr>\n");
}
@ -508,14 +586,21 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
// I wanted the total before walking the list, if this
// double-pass over externalSymbols gets slow we can
// do single-pass and print this out after the loop finishes.
int totalTimerHits = 0;
totalTimerHits = 0;
for(i=0;
i<usefulSymbols && externalSymbols[rankingTable[i]].timerHit>0; i++) {
Symbol *sp=&externalSymbols[rankingTable[i]];
totalTimerHits += sp->timerHit;
}
if (totalTimerHits == 0)
totalTimerHits = 1;
fprintf(fp,"<h2><A NAME=flat></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n");
if (totalTimerHits != count)
fprintf(stderr,"Hit count mismatch: count=%d; totalTimerHits=%d",
count,totalTimerHits);
fprintf(fp,"<h2><A NAME=flat_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n",
thread);
fprintf(fp, "<pre>\n");
fprintf(fp, "Total hit count: %d\n", totalTimerHits);
@ -532,11 +617,9 @@ void leaky::generateReportHTML(FILE *fp, int *countArray, int count)
((float)sp->timerHit/(float)totalTimerHits)*100.0, symname);
delete [] symname;
}
fprintf(fp,"</pre></body></html>\n");
}
void leaky::analyze()
void leaky::analyze(int thread)
{
int *countArray = new int[usefulSymbols];
int *flagArray = new int[usefulSymbols];
@ -558,8 +641,11 @@ void leaky::analyze()
lep < lastLogEntry;
lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
if (excluded(lep) || !included(lep))
if ((thread != 0 && lep->thread != thread) ||
excluded(lep) || !included(lep))
{
continue;
}
++stacks; // How many stack frames did we collect
@ -568,7 +654,7 @@ void leaky::analyze()
u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
char** pcp = &lep->pcs[n-1];
int idx=-1, parrentIdx=-1; // Init idx incase n==0
for(int i=n-1; i>=0; --i, --pcp, parrentIdx=idx) {
for (int i=n-1; i>=0; --i, --pcp) {
idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
if(idx>=0) {
@ -593,6 +679,9 @@ void leaky::analyze()
externalSymbols[parrentIdx].regChild(idx);
externalSymbols[idx].regParrent(parrentIdx);
}
// inside if() so an unknown in the middle of a stack won't break
// the link!
parrentIdx=idx;
}
}
@ -602,12 +691,12 @@ void leaky::analyze()
}
}
generateReportHTML(stdout, countArray, stacks);
generateReportHTML(stdout, countArray, stacks, thread);
}
void FunctionCount::printReport(FILE *fp, leaky *lk)
void FunctionCount::printReport(FILE *fp, leaky *lk, int parent, int total)
{
const char *fmt = " <A href=\"#%d\">%6d %s</A>\n";
const char *fmt = " <A href=\"#%d\">%8d (%3.1f%%)%s %s</A>%s\n";
int nmax, tmax=((~0U)>>1);
@ -618,7 +707,11 @@ void FunctionCount::printReport(FILE *fp, leaky *lk)
if(cnt==tmax) {
int idx = getIndex(j);
char *symname = htmlify(lk->indexToName(idx));
fprintf(fp, fmt, idx, getCount(j), symname);
fprintf(fp, fmt, idx, getCount(j),
getCount(j)*100.0/total,
getCount(j)*100.0/total >= 10.0 ? "" : " ",
symname,
parent == idx ? " (self)" : "");
delete [] symname;
} else if(cnt<tmax && cnt>nmax) {
nmax=cnt;

View File

@ -52,7 +52,7 @@ struct leaky;
class FunctionCount : public IntCount
{
public:
void printReport(FILE *fp, leaky *lk);
void printReport(FILE *fp, leaky *lk, int parent, int total);
};
struct Symbol {
@ -90,6 +90,7 @@ struct leaky {
int quiet;
int showAddress;
int showThreads;
u_int stackDepth;
int mappedLogFile;
@ -115,7 +116,7 @@ struct leaky {
void LoadMap();
void analyze();
void analyze(int thread);
void dumpEntryToLog(malloc_log_entry* lep);
@ -133,7 +134,7 @@ struct leaky {
const char* indexToName(int idx) {return externalSymbols[idx].name;}
private:
void generateReportHTML(FILE *fp, int *countArray, int count);
void generateReportHTML(FILE *fp, int *countArray, int count, int thread);
int findSymbolIndex(u_long address);
};

View File

@ -48,6 +48,9 @@ EXPORTS =
LIBRARY_NAME = jprof
EXPORT_LIBRARY = 1
# override optimization
MOZ_OPTIMIZE_FLAGS = -fno-omit-frame-pointer
CPPSRCS = \
libmalloc.cpp \
$(NULL)

View File

@ -23,6 +23,7 @@
* Jim Nance
* L. David Baron - JP_REALTIME, JPROF_PTHREAD_HACK, and SIGUSR1 handling
* Mike Shaver - JP_RTC_HZ support
* Randell Jesup - glibc backtrace() support
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -61,7 +62,9 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <ucontext.h>
#include <execinfo.h>
#include "libmalloc.h"
#include "jprof.h"
@ -77,6 +80,10 @@ extern r_debug _r_debug;
#include <link.h>
#endif
#define USE_GLIBC_BACKTRACE 1
// To debug, use #define JPROF_STATIC
#define JPROF_STATIC //static
static int gLogFD = -1;
static pthread_t main_thread;
@ -87,9 +94,32 @@ static int enableRTCSignals(bool enable);
//----------------------------------------------------------------------
#if defined(i386) || defined(_i386) || defined(__x86_64__)
static void CrawlStack(malloc_log_entry* me,
void* stack_top, void* top_instr_ptr)
JPROF_STATIC void CrawlStack(malloc_log_entry* me,
void* stack_top, void* top_instr_ptr)
{
#if USE_GLIBC_BACKTRACE
// This probably works on more than x86! But we need a way to get the
// top instruction pointer, which is kindof arch-specific
void *array[500];
int cnt, i;
u_long numpcs = 0;
bool tracing = false;
// This is from glibc. A more generic version might use
// libunwind and/or CaptureStackBackTrace() on Windows
cnt = backtrace(&array[0],sizeof(array)/sizeof(array[0]));
// StackHook->JprofLog->CrawlStack
// Then we have sigaction, which replaced top_instr_ptr
array[3] = top_instr_ptr;
for (i = 3; i < cnt; i++)
{
me->pcs[numpcs++] = (char *) array[i];
}
me->numpcs = numpcs;
#else
// original code - this breaks on many platforms
void **bp;
#if defined(__i386)
__asm__( "movl %%ebp, %0" : "=g"(bp));
@ -102,6 +132,7 @@ static void CrawlStack(malloc_log_entry* me,
bp = __builtin_frame_address(0);
#endif
u_long numpcs = 0;
bool tracing = false;
me->pcs[numpcs++] = (char*) top_instr_ptr;
@ -111,13 +142,17 @@ static void CrawlStack(malloc_log_entry* me,
if (nextbp < bp) {
break;
}
if (bp > stack_top) {
if (tracing) {
// Skip the signal handling.
me->pcs[numpcs++] = (char*) pc;
}
else if (pc == top_instr_ptr) {
tracing = true;
}
bp = nextbp;
}
me->numpcs = numpcs;
#endif
}
#endif
@ -169,13 +204,14 @@ static void EndProfilingHook(int signum)
//----------------------------------------------------------------------
static void
Log(u_long aTime, void* stack_top, void* top_instr_ptr)
JPROF_STATIC void
JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr)
{
// Static is simply to make debugging tollerable
static malloc_log_entry me;
me.delTime = aTime;
me.thread = syscall(SYS_gettid); //gettid();
CrawlStack(&me, stack_top, top_instr_ptr);
@ -281,7 +317,7 @@ static int enableRTCSignals(bool enable)
}
#endif
static void StackHook(
JPROF_STATIC void StackHook(
int signum,
siginfo_t *info,
void *ucontext)
@ -325,9 +361,9 @@ void *ucontext)
gregset_t &gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs;
#ifdef __x86_64__
Log(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
#else
Log(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
#endif
if (!rtcHz)
@ -371,21 +407,26 @@ NS_EXPORT_(void) setupProfilingStuff(void)
char *delay = strstr(tst,"JP_PERIOD=");
if(delay) {
double tmp = strtod(delay+10, NULL);
if(tmp>1e-3) {
double tmp = strtod(delay+strlen("JP_PERIOD="), NULL);
if (tmp>=1e-3) {
timerMiliSec = static_cast<unsigned long>(1000 * tmp);
}
} else {
fprintf(stderr,
"JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
tmp);
timerMiliSec = 1;
}
}
char *first = strstr(tst, "JP_FIRST=");
if(first) {
firstDelay = atol(first+9);
firstDelay = atol(first+strlen("JP_FIRST="));
}
char *rtc = strstr(tst, "JP_RTC_HZ=");
if (rtc) {
#if defined(linux)
rtcHz = atol(rtc+10);
rtcHz = atol(rtc+strlen("JP_RTC_HZ="));
timerMiliSec = 0; /* This makes JP_FIRST work right. */
realTime = 1; /* It's the _R_TC and all. ;) */
@ -420,6 +461,8 @@ NS_EXPORT_(void) setupProfilingStuff(void)
atexit(DumpAddressMap);
main_thread = pthread_self();
//fprintf(stderr,"jprof: main_thread = %u\n",
// (unsigned int)main_thread);
sigemptyset(&mset);
action.sa_handler = NULL;

View File

@ -52,6 +52,7 @@ typedef unsigned long u_long;
struct malloc_log_entry {
u_long delTime;
u_long numpcs;
int thread;
char* pcs[MAX_STACK_CRAWL];
};