From 2d11f79424637bd1676ea0976dbe468c254cc66d Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 17 Feb 2009 21:11:29 -0800 Subject: [PATCH] Bug 478195 - '[Mac] Same-thread deadlock with trace-malloc (causing random red on OS X 10.5.2 mozilla-central leak test build)'. r=dbaron. --- tools/trace-malloc/lib/nsTraceMalloc.c | 41 +++++++++++++++------ xpcom/base/nsStackWalk.cpp | 51 ++++++++++++++++++++++++++ xpcom/base/nsStackWalk.h | 3 ++ 3 files changed, 84 insertions(+), 11 deletions(-) diff --git a/tools/trace-malloc/lib/nsTraceMalloc.c b/tools/trace-malloc/lib/nsTraceMalloc.c index 4b7e587c69c8..02cbb8fb759c 100644 --- a/tools/trace-malloc/lib/nsTraceMalloc.c +++ b/tools/trace-malloc/lib/nsTraceMalloc.c @@ -913,14 +913,19 @@ stack_callback(void *pc, void *closure) /* * The caller MUST NOT be holding tmlock when calling backtrace. + * On return, if *immediate_abort is set, then the return value is NULL + * and the thread is in a very dangerous situation (e.g. holding + * sem_pool_lock in Mac OS X pthreads); the caller should bail out + * without doing anything (such as acquiring locks). */ callsite * -backtrace(tm_thread *t, int skip) +backtrace(tm_thread *t, int skip, int *immediate_abort) { callsite *site; stack_buffer_info *info = &t->backtrace_buf; void ** new_stack_buffer; size_t new_stack_buffer_size; + nsresult rv; t->suppress_tracing++; @@ -935,7 +940,12 @@ backtrace(tm_thread *t, int skip) /* skip == 0 means |backtrace| should show up, so don't use skip + 1 */ /* NB: this call is repeated below if the buffer is too small */ info->entries = 0; - NS_StackWalk(stack_callback, skip, info); + rv = NS_StackWalk(stack_callback, skip, info); + *immediate_abort = rv == NS_ERROR_UNEXPECTED; + if (rv == NS_ERROR_UNEXPECTED || info->entries == 0) { + t->suppress_tracing--; + return NULL; + } /* * To avoid allocating in stack_callback (which, on Windows, is @@ -959,11 +969,6 @@ backtrace(tm_thread *t, int skip) PR_ASSERT(info->entries * 2 == new_stack_buffer_size); /* same stack */ } - if (info->entries == 0) { - t->suppress_tracing--; - return NULL; - } - site = calltree(info->buffer, info->entries, t); TM_ENTER_LOCK(t); @@ -1707,8 +1712,9 @@ NS_TraceStack(int skip, FILE *ofp) { callsite *site; tm_thread *t = tm_get_thread(); + int immediate_abort; - site = backtrace(t, skip + 1); + site = backtrace(t, skip + 1, &immediate_abort); while (site) { if (site->name || site->parent) { fprintf(ofp, "%s[%s +0x%X]\n", @@ -1788,11 +1794,14 @@ MallocCallback(void *ptr, size_t size, PRUint32 start, PRUint32 end, tm_thread * callsite *site; PLHashEntry *he; allocation *alloc; + int immediate_abort; if (!tracing_enabled || t->suppress_tracing != 0) return; - site = backtrace(t, 2); + site = backtrace(t, 2, &immediate_abort); + if (immediate_abort) + return; TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t); tmstats.malloc_calls++; @@ -1822,11 +1831,14 @@ CallocCallback(void *ptr, size_t count, size_t size, PRUint32 start, PRUint32 en callsite *site; PLHashEntry *he; allocation *alloc; + int immediate_abort; if (!tracing_enabled || t->suppress_tracing != 0) return; - site = backtrace(t, 2); + site = backtrace(t, 2, &immediate_abort); + if (immediate_abort) + return; TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t); tmstats.calloc_calls++; @@ -1861,11 +1873,14 @@ ReallocCallback(void * oldptr, void *ptr, size_t size, PLHashEntry **hep, *he; allocation *alloc; FILE *trackfp = NULL; + int immediate_abort; if (!tracing_enabled || t->suppress_tracing != 0) return; - site = backtrace(t, 2); + site = backtrace(t, 2, &immediate_abort); + if (immediate_abort) + return; TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t); tmstats.realloc_calls++; @@ -1944,6 +1959,10 @@ FreeCallback(void * ptr, PRUint32 start, PRUint32 end, tm_thread *t) if (!tracing_enabled || t->suppress_tracing != 0) return; + // XXX Perhaps we should call backtrace() so we can check for + // immediate_abort. However, the only current contexts where + // immediate_abort will be true do not call free(), so for now, + // let's avoid the cost of backtrace(). TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t); tmstats.free_calls++; if (!ptr) { diff --git a/xpcom/base/nsStackWalk.cpp b/xpcom/base/nsStackWalk.cpp index 3e26d76681e2..a99b260de9f9 100644 --- a/xpcom/base/nsStackWalk.cpp +++ b/xpcom/base/nsStackWalk.cpp @@ -1406,11 +1406,58 @@ NS_FormatCodeAddressDetails(void *aPC, const nsCodeAddressDetails *aDetails, extern void *__libc_stack_end; // from ld-linux.so #endif +#ifdef XP_MACOSX +struct AddressRange { + void* mStart; + void* mEnd; +}; +// Addresses in this range must stop the stack walk +static AddressRange gCriticalRange; + +static void FindFunctionAddresses(const char* aName, AddressRange* aRange) +{ + aRange->mStart = dlsym(RTLD_DEFAULT, aName); + if (!aRange->mStart) + return; + aRange->mEnd = aRange->mStart; + while (PR_TRUE) { + Dl_info info; + if (!dladdr(aRange->mEnd, &info)) + break; + if (strcmp(info.dli_sname, aName)) + break; + aRange->mEnd = (char*)aRange->mEnd + 1; + } +} + +static void InitCriticalRanges() +{ + if (gCriticalRange.mStart) + return; + // We must not do work when 'new_sem_from_pool' calls realloc, since + // it holds a non-reentrant spin-lock and we will quickly deadlock. + // new_sem_from_pool is not directly accessible using dladdr but its + // code is bundled with pthread_cond_wait$UNIX2003 (on + // Leopard anyway). + FindFunctionAddresses("pthread_cond_wait$UNIX2003", &gCriticalRange); +} + +static PRBool InCriticalRange(void* aPC) +{ + return gCriticalRange.mStart && + gCriticalRange.mStart <= aPC && aPC < gCriticalRange.mEnd; +} +#else +static void InitCriticalRanges() {} +static PRBool InCriticalRange(void* aPC) { return PR_FALSE; } +#endif + EXPORT_XPCOM_API(nsresult) NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames, void *aClosure) { // Stack walking code courtesy Kipp's "leaky". + InitCriticalRanges(); // Get the frame pointer void **bp; @@ -1443,6 +1490,10 @@ NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames, #else // i386 or powerpc32 linux void *pc = *(bp+1); #endif + if (InCriticalRange(pc)) { + printf("Aborting stack trace, PC in critical range\n"); + return NS_ERROR_UNEXPECTED; + } if (--skip < 0) { (*aCallback)(pc, aClosure); } diff --git a/xpcom/base/nsStackWalk.h b/xpcom/base/nsStackWalk.h index 6c342d130657..a41bd941a1be 100644 --- a/xpcom/base/nsStackWalk.h +++ b/xpcom/base/nsStackWalk.h @@ -61,6 +61,9 @@ typedef void * * Returns NS_ERROR_NOT_IMPLEMENTED on platforms where it is * unimplemented. + * Returns NS_ERROR_UNEXPECTED when the stack indicates that the thread + * is in a very dangerous situation (e.g., holding sem_pool_lock in + * Mac OS X pthreads code). Callers should then bail out immediately. * * May skip some stack frames due to compiler optimizations or code * generation.