Bug 693404 - Part 1: On MacOS, add public jemalloc function to purge MADV_FREE'd pages, making our RSS reflect reality. r=khuey

--HG--
extra : rebase_source : fb043fcb06963ed2d5f94e8f4659da94f43ed014
This commit is contained in:
Justin Lebar 2011-10-24 13:23:47 -04:00
parent de4e38ad6c
commit 8957edb015
3 changed files with 293 additions and 19 deletions

View File

@ -1,5 +1,5 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 8; indent-tabs-mode: t -*- */
/* vim:set softtabstop=8 shiftwidth=8: */
/* vim:set softtabstop=8 shiftwidth=8 noet: */
/*-
* Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
* All rights reserved.
@ -105,6 +105,29 @@
#define _pthread_self() pthread_self()
#endif
/*
* On Linux, we use madvise(MADV_DONTNEED) to release memory back to the
* operating system. If we release 1MB of live pages with MADV_DONTNEED, our
* RSS will decrease by 1MB (almost) immediately.
*
* On Mac, we use madvise(MADV_FREE). Unlike MADV_DONTNEED on Linux, MADV_FREE
* on Mac doesn't cause the OS to release the specified pages immediately; the
* OS keeps them in our process until the machine comes under memory pressure.
*
* It's therefore difficult to measure the process's RSS on Mac, since, in the
* absence of memory pressure, the contribution from the heap to RSS will not
* decrease due to our madvise calls.
*
* We therefore define MALLOC_DOUBLE_PURGE on Mac. This causes jemalloc to
* track which pages have been MADV_FREE'd. You can then call
* jemalloc_purge_freed_pages(), which will force the OS to release those
* MADV_FREE'd pages, making the process's RSS reflect its true memory usage.
*
*/
#ifdef MOZ_MEMORY_DARWIN
#define MALLOC_DOUBLE_PURGE
#endif
/*
* MALLOC_PRODUCTION disables assertions and statistics gathering. It also
* defaults the A and J runtime options to off. These settings are appropriate
@ -354,6 +377,7 @@ __FBSDID("$FreeBSD: head/lib/libc/stdlib/malloc.c 180599 2008-07-18 19:35:44Z ja
#endif
#include "jemalloc.h"
#include "linkedlist.h"
/* Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that
* happen to override mmap() and call dlsym() from their overridden
@ -605,6 +629,11 @@ static const bool __isthreaded = true;
/******************************************************************************/
/* MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive. */
#if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE)
#error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
#endif
/*
* Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
* places, because they require malloc()ed memory, which causes bootstrapping
@ -807,13 +836,14 @@ struct arena_chunk_map_s {
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
*
* ???????? ???????? ????---- --ckdzla
* ???????? ???????? ????---- -mckdzla
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages.
* Small: Run address.
* Large: Run size for first page, unset for trailing pages.
* - : Unused.
* m : MADV_FREE/MADV_DONTNEED'ed?
* c : decommitted?
* k : key?
* d : dirty?
@ -845,8 +875,27 @@ struct arena_chunk_map_s {
* -------- -------- -------- ------la
*/
size_t bits;
#if defined(MALLOC_DECOMMIT) || defined(MALLOC_STATS)
/* Note that CHUNK_MAP_DECOMMITTED's meaning varies depending on whether
* MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are defined.
*
* If MALLOC_DECOMMIT is defined, a page which is CHUNK_MAP_DECOMMITTED must be
* re-committed with pages_commit() before it may be touched. If
* MALLOC_DECOMMIT is defined, MALLOC_DOUBLE_PURGE may not be defined.
*
* If neither MALLOC_DECOMMIT nor MALLOC_DOUBLE_PURGE is defined, pages which
* are madvised (with either MADV_DONTNEED or MADV_FREE) are marked with
* CHUNK_MAP_MADVISED.
*
* Otherwise, if MALLOC_DECOMMIT is not defined and MALLOC_DOUBLE_PURGE is
* defined, then a page which is madvised is marked as CHUNK_MAP_MADVISED.
* When it's finally freed with jemalloc_purge_freed_pages, the page is marked
* as CHUNK_MAP_DECOMMITTED.
*/
#if defined(MALLOC_DECOMMIT) || defined(MALLOC_STATS) || defined(MALLOC_DOUBLE_PURGE)
#define CHUNK_MAP_MADVISED ((size_t)0x40U)
#define CHUNK_MAP_DECOMMITTED ((size_t)0x20U)
#define CHUNK_MAP_MADVISED_OR_DECOMMITTED (CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED)
#endif
#define CHUNK_MAP_KEY ((size_t)0x10U)
#define CHUNK_MAP_DIRTY ((size_t)0x08U)
@ -866,6 +915,16 @@ struct arena_chunk_s {
/* Linkage for the arena's chunks_dirty tree. */
rb_node(arena_chunk_t) link_dirty;
#ifdef MALLOC_DOUBLE_PURGE
/* If we're double-purging, we maintain a linked list of chunks which
* have pages which have been madvise(MADV_FREE)'d but not explicitly
* purged.
*
* We're currently lazy and don't remove a chunk from this list when
* all its madvised pages are recommitted. */
LinkedList chunks_madvised_elem;
#endif
/* Number of dirty pages. */
size_t ndirty;
@ -951,6 +1010,12 @@ struct arena_s {
/* Tree of dirty-page-containing chunks this arena manages. */
arena_chunk_tree_t chunks_dirty;
#ifdef MALLOC_DOUBLE_PURGE
/* Head of a linked list of MADV_FREE'd-page-containing chunks this
* arena manages. */
LinkedList chunks_madvised;
#endif
/*
* In order to avoid rapid chunk allocation/deallocation when an arena
* oscillates right on the cusp of needing a new chunk, cache the most
@ -1808,7 +1873,6 @@ malloc_printf(const char *format, ...)
/******************************************************************************/
#ifdef MALLOC_DECOMMIT
static inline void
pages_decommit(void *addr, size_t size)
{
@ -1834,7 +1898,6 @@ pages_commit(void *addr, size_t size)
abort();
# endif
}
#endif
static bool
base_pages_alloc_mmap(size_t minsize)
@ -3069,25 +3132,29 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
}
for (i = 0; i < need_pages; i++) {
#if defined(MALLOC_DECOMMIT) || defined(MALLOC_STATS)
#if defined(MALLOC_DECOMMIT) || defined(MALLOC_STATS) || defined(MALLOC_DOUBLE_PURGE)
/*
* Commit decommitted pages if necessary. If a decommitted
* page is encountered, commit all needed adjacent decommitted
* pages in one operation, in order to reduce system call
* overhead.
*/
if (chunk->map[run_ind + i].bits & CHUNK_MAP_DECOMMITTED) {
if (chunk->map[run_ind + i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) {
size_t j;
/*
* Advance i+j to just past the index of the last page
* to commit. Clear CHUNK_MAP_DECOMMITTED along the
* way.
* to commit. Clear CHUNK_MAP_DECOMMITTED and
* CHUNK_MAP_MADVISED along the way.
*/
for (j = 0; i + j < need_pages && (chunk->map[run_ind +
i + j].bits & CHUNK_MAP_DECOMMITTED); j++) {
chunk->map[run_ind + i + j].bits ^=
CHUNK_MAP_DECOMMITTED;
i + j].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED); j++) {
/* DECOMMITTED and MADVISED are mutually exclusive. */
assert(!(chunk->map[run_ind + i + j].bits & CHUNK_MAP_DECOMMITTED &&
chunk->map[run_ind + i + j].bits & CHUNK_MAP_MADVISED));
chunk->map[run_ind + i + j].bits &=
~CHUNK_MAP_MADVISED_OR_DECOMMITTED;
}
# ifdef MALLOC_DECOMMIT
@ -3204,6 +3271,8 @@ arena_chunk_init(arena_t *arena, arena_chunk_t *chunk)
/* Insert the run into the runs_avail tree. */
arena_avail_tree_insert(&arena->runs_avail,
&chunk->map[arena_chunk_header_npages]);
LinkedList_Init(&chunk->chunks_madvised_elem);
}
static void
@ -3219,6 +3288,12 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
arena->stats.committed -= arena->spare->ndirty;
#endif
}
#ifdef MALLOC_DOUBLE_PURGE
/* This is safe to do even if arena->spare is not in the list. */
LinkedList_Remove(&arena->spare->chunks_madvised_elem);
#endif
VALGRIND_FREELIKE_BLOCK(arena->spare, 0);
chunk_dealloc((void *)arena->spare, chunksize);
#ifdef MALLOC_STATS
@ -3322,6 +3397,9 @@ arena_purge(arena_t *arena)
* purged.
*/
while (arena->ndirty > (opt_dirty_max >> 1)) {
#ifdef MALLOC_DOUBLE_PURGE
bool madvised = false;
#endif
chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
assert(chunk != NULL);
@ -3329,17 +3407,23 @@ arena_purge(arena_t *arena)
assert(i >= arena_chunk_header_npages);
if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
#ifdef MALLOC_DECOMMIT
const size_t free_operation = CHUNK_MAP_DECOMMITTED;
#else
const size_t free_operation = CHUNK_MAP_MADVISED;
#endif
assert((chunk->map[i].bits &
CHUNK_MAP_DECOMMITTED) == 0);
chunk->map[i].bits ^= CHUNK_MAP_DECOMMITTED | CHUNK_MAP_DIRTY;
CHUNK_MAP_MADVISED_OR_DECOMMITTED) == 0);
chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
/* Find adjacent dirty run(s). */
for (npages = 1; i > arena_chunk_header_npages
&& (chunk->map[i - 1].bits &
CHUNK_MAP_DIRTY); npages++) {
for (npages = 1;
i > arena_chunk_header_npages &&
(chunk->map[i - 1].bits & CHUNK_MAP_DIRTY);
npages++) {
i--;
assert((chunk->map[i].bits &
CHUNK_MAP_DECOMMITTED) == 0);
chunk->map[i].bits ^= CHUNK_MAP_DECOMMITTED | CHUNK_MAP_DIRTY;
CHUNK_MAP_MADVISED_OR_DECOMMITTED) == 0);
chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
}
chunk->ndirty -= npages;
arena->ndirty -= npages;
@ -3361,6 +3445,9 @@ arena_purge(arena_t *arena)
madvise((void *)((uintptr_t)chunk + (i <<
pagesize_2pow)), (npages << pagesize_2pow),
MADV_FREE);
# ifdef MALLOC_DOUBLE_PURGE
madvised = true;
# endif
#endif
#ifdef MALLOC_STATS
arena->stats.nmadvise++;
@ -3375,6 +3462,14 @@ arena_purge(arena_t *arena)
arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
chunk);
}
#ifdef MALLOC_DOUBLE_PURGE
if (madvised) {
/* The chunk might already be in the list, but this
* makes sure it's at the front. */
LinkedList_Remove(&chunk->chunks_madvised_elem);
LinkedList_InsertHead(&arena->chunks_madvised, &chunk->chunks_madvised_elem);
}
#endif
}
}
@ -4562,6 +4657,9 @@ arena_new(arena_t *arena)
/* Initialize chunks. */
arena_chunk_tree_dirty_new(&arena->chunks_dirty);
#ifdef MALLOC_DOUBLE_PURGE
LinkedList_Init(&arena->chunks_madvised);
#endif
arena->spare = NULL;
arena->ndirty = 0;
@ -6381,6 +6479,78 @@ jemalloc_stats(jemalloc_stats_t *stats)
assert(stats->committed >= stats->allocated);
}
#ifdef MALLOC_DOUBLE_PURGE
/* Explicitly remove all of this chunk's MADV_FREE'd pages from memory. */
static void
hard_purge_chunk(arena_chunk_t *chunk)
{
/* See similar logic in arena_purge(). */
size_t i;
for (i = arena_chunk_header_npages; i < chunk_npages; i++) {
/* Find all adjacent pages with CHUNK_MAP_MADVISED set. */
size_t npages;
for (npages = 0;
chunk->map[i + npages].bits & CHUNK_MAP_MADVISED && i + npages < chunk_npages;
npages++) {
/* Turn off the chunk's MADV_FREED bit and turn on its
* DECOMMITTED bit. */
assert(!(chunk->map[i + npages].bits & CHUNK_MAP_DECOMMITTED));
chunk->map[i + npages].bits ^= CHUNK_MAP_MADVISED_OR_DECOMMITTED;
}
/* We could use mincore to find out which pages are actually
* present, but it's not clear that's better. */
if (npages > 0) {
pages_decommit(((char*)chunk) + (i << pagesize_2pow), npages << pagesize_2pow);
pages_commit(((char*)chunk) + (i << pagesize_2pow), npages << pagesize_2pow);
}
i += npages;
}
}
/* Explicitly remove all of this arena's MADV_FREE'd pages from memory. */
static void
hard_purge_arena(arena_t *arena)
{
malloc_spin_lock(&arena->lock);
while (!LinkedList_IsEmpty(&arena->chunks_madvised)) {
LinkedList* next = arena->chunks_madvised.next;
arena_chunk_t *chunk =
LinkedList_Get(arena->chunks_madvised.next,
arena_chunk_t, chunks_madvised_elem);
hard_purge_chunk(chunk);
LinkedList_Remove(&chunk->chunks_madvised_elem);
}
malloc_spin_unlock(&arena->lock);
}
void
jemalloc_purge_freed_pages()
{
size_t i;
for (i = 0; i < narenas; i++) {
arena_t *arena = arenas[i];
if (arena != NULL)
hard_purge_arena(arena);
}
}
#else /* !defined MALLOC_DOUBLE_PURGE */
void
jemalloc_purge_freed_pages()
{
/* Do nothing. */
}
#endif /* defined MALLOC_DOUBLE_PURGE */
#ifdef MOZ_MEMORY_WINDOWS
void*
_recalloc(void *ptr, size_t count, size_t size)

View File

@ -80,6 +80,33 @@ size_t malloc_usable_size(const void *ptr);
void jemalloc_stats(jemalloc_stats_t *stats);
/*
* On some operating systems (Mac), we use madvise(MADV_FREE) to hand pages
* back to the operating system. On Mac, the operating system doesn't take
* this memory back immediately; instead, the OS takes it back only when the
* machine is running out of physical memory.
*
* This is great from the standpoint of efficiency, but it makes measuring our
* actual RSS difficult, because pages which we've MADV_FREE'd shouldn't count
* against our RSS.
*
* This function explicitly purges any MADV_FREE'd pages from physical memory,
* causing our reported RSS match the amount of memory we're actually using.
*
* Note that this call is expensive in two ways. First, it may be slow to
* execute, because it may make a number of slow syscalls to free memory. This
* function holds the big jemalloc locks, so basically all threads are blocked
* while this function runs.
*
* This function is also expensive in that the next time we go to access a page
* which we've just explicitly decommitted, the operating system has to attach
* to it a physical page! If we hadn't run this function, the OS would have
* less work to do.
*
* If MALLOC_DOUBLE_PURGE is not defined, this function does nothing.
*/
void jemalloc_purge_freed_pages();
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -0,0 +1,77 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 8; indent-tabs-mode: t -*- */
/* vim:set softtabstop=8 shiftwidth=8 noet: */
/*-
* Copyright (C) the Mozilla Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice(s), this list of conditions and the following disclaimer as
* the first lines of this file unmodified other than the possible
* addition of one or more copyright notices.
* 2. Redistributions in binary form must reproduce the above copyright
* notice(s), this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*******************************************************************************/
#ifndef linkedlist_h__
#define linkedlist_h__
#include <stddef.h>
typedef struct LinkedList_s LinkedList;
struct LinkedList_s {
LinkedList *next;
LinkedList *prev;
};
/* Convert from LinkedList* to foo*. */
#define LinkedList_Get(e, type, prop) \
(type*)((char*)(e) - offsetof(type, prop))
/* Insert |e| at the beginning of |l|. */
void LinkedList_InsertHead(LinkedList *l, LinkedList *e)
{
e->next = l;
e->prev = l->prev;
e->next->prev = e;
e->prev->next = e;
}
void LinkedList_Remove(LinkedList *e)
{
e->prev->next = e->next;
e->next->prev = e->prev;
e->next = e;
e->prev = e;
}
bool LinkedList_IsEmpty(LinkedList *e)
{
return e->next == e;
}
void LinkedList_Init(LinkedList *e)
{
e->next = e;
e->prev = e;
}
#endif