From 58c256a3a37ea7ff9456978659b927678be6df85 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jul 2014 16:17:15 -0700 Subject: [PATCH 1/7] PM / sleep: fix kernel-doc warnings in drivers/base/power/main.c Fix kernel-doc warnings in drivers/base/power/main.c: Warning(..//drivers/base/power/main.c:473): No description found for parameter 'async' Warning(..//drivers/base/power/main.c:601): No description found for parameter 'async' Warning(..//drivers/base/power/main.c:1012): No description found for parameter 'async' Warning(..//drivers/base/power/main.c:1151): No description found for parameter 'async' Warning(..//drivers/base/power/main.c:1305): No description found for parameter 'info' Signed-off-by: Randy Dunlap Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index bf412961a934..b67d9aef9fe4 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -465,6 +465,7 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) * device_resume_noirq - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. + * @async: If true, the device is being resumed asynchronously. * * The driver of @dev will not receive interrupts while this function is being * executed. @@ -594,6 +595,7 @@ static void dpm_resume_noirq(pm_message_t state) * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. + * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ @@ -1004,6 +1006,7 @@ static pm_message_t resume_event(pm_message_t sleep_state) * device_suspend_noirq - Execute a "late suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. + * @async: If true, the device is being suspended asynchronously. * * The driver of @dev will not receive interrupts while this function is being * executed. @@ -1144,6 +1147,7 @@ static int dpm_suspend_noirq(pm_message_t state) * device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. + * @async: If true, the device is being suspended asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ @@ -1298,6 +1302,7 @@ EXPORT_SYMBOL_GPL(dpm_suspend_end); * @dev: Device to suspend. * @state: PM transition of the system being carried out. * @cb: Suspend callback to execute. + * @info: string description of caller. */ static int legacy_suspend(struct device *dev, pm_message_t state, int (*cb)(struct device *dev, pm_message_t state), From f469f02dc6fa67f6c6a7d91400d08b9339147aed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:57 +0200 Subject: [PATCH 2/7] PM / Hibernate: Create a Radix-Tree to store memory bitmap This patch adds the code to allocate and build the radix tree to store the memory bitmap. The old data structure is left in place until the radix tree implementation is finished. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 223 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 222 insertions(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 1ea328aafdc9..5a0eafdbac79 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -248,11 +248,24 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) * information is stored (in the form of a block of bitmap) * It also contains the pfns that correspond to the start and end of * the represented memory area. + * + * The memory bitmap is organized as a radix tree to guarantee fast random + * access to the bits. There is one radix tree for each zone (as returned + * from create_mem_extents). + * + * One radix tree is represented by one struct mem_zone_bm_rtree. There are + * two linked lists for the nodes of the tree, one for the inner nodes and + * one for the leave nodes. The linked leave nodes are used for fast linear + * access of the memory bitmap. + * + * The struct rtree_node represents one node of the radix tree. */ #define BM_END_OF_MAP (~0UL) #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) +#define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) +#define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) struct bm_block { struct list_head hook; /* hook into a list of bitmap blocks */ @@ -266,6 +279,31 @@ static inline unsigned long bm_block_bits(struct bm_block *bb) return bb->end_pfn - bb->start_pfn; } +/* + * struct rtree_node is a wrapper struct to link the nodes + * of the rtree together for easy linear iteration over + * bits and easy freeing + */ +struct rtree_node { + struct list_head list; + unsigned long *data; +}; + +/* + * struct mem_zone_bm_rtree represents a bitmap used for one + * populated memory zone. + */ +struct mem_zone_bm_rtree { + struct list_head list; /* Link Zones together */ + struct list_head nodes; /* Radix Tree inner nodes */ + struct list_head leaves; /* Radix Tree leaves */ + unsigned long start_pfn; /* Zone start page frame */ + unsigned long end_pfn; /* Zone end page frame + 1 */ + struct rtree_node *rtree; /* Radix Tree Root */ + int levels; /* Number of Radix Tree Levels */ + unsigned int blocks; /* Number of Bitmap Blocks */ +}; + /* strcut bm_position is used for browsing memory bitmaps */ struct bm_position { @@ -274,6 +312,7 @@ struct bm_position { }; struct memory_bitmap { + struct list_head zones; struct list_head blocks; /* list of bitmap blocks */ struct linked_page *p_list; /* list of pages used to store zone * bitmap objects and bitmap block @@ -284,6 +323,166 @@ struct memory_bitmap { /* Functions that operate on memory bitmaps */ +#define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) +#if BITS_PER_LONG == 32 +#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) +#else +#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) +#endif +#define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) + +/* + * alloc_rtree_node - Allocate a new node and add it to the radix tree. + * + * This function is used to allocate inner nodes as well as the + * leave nodes of the radix tree. It also adds the node to the + * corresponding linked list passed in by the *list parameter. + */ +static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, + struct chain_allocator *ca, + struct list_head *list) +{ + struct rtree_node *node; + + node = chain_alloc(ca, sizeof(struct rtree_node)); + if (!node) + return NULL; + + node->data = get_image_page(gfp_mask, safe_needed); + if (!node->data) + return NULL; + + list_add_tail(&node->list, list); + + return node; +} + +/* + * add_rtree_block - Add a new leave node to the radix tree + * + * The leave nodes need to be allocated in order to keep the leaves + * linked list in order. This is guaranteed by the zone->blocks + * counter. + */ +static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, + int safe_needed, struct chain_allocator *ca) +{ + struct rtree_node *node, *block, **dst; + unsigned int levels_needed, block_nr; + int i; + + block_nr = zone->blocks; + levels_needed = 0; + + /* How many levels do we need for this block nr? */ + while (block_nr) { + levels_needed += 1; + block_nr >>= BM_RTREE_LEVEL_SHIFT; + } + + /* Make sure the rtree has enough levels */ + for (i = zone->levels; i < levels_needed; i++) { + node = alloc_rtree_node(gfp_mask, safe_needed, ca, + &zone->nodes); + if (!node) + return -ENOMEM; + + node->data[0] = (unsigned long)zone->rtree; + zone->rtree = node; + zone->levels += 1; + } + + /* Allocate new block */ + block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); + if (!block) + return -ENOMEM; + + /* Now walk the rtree to insert the block */ + node = zone->rtree; + dst = &zone->rtree; + block_nr = zone->blocks; + for (i = zone->levels; i > 0; i--) { + int index; + + if (!node) { + node = alloc_rtree_node(gfp_mask, safe_needed, ca, + &zone->nodes); + if (!node) + return -ENOMEM; + *dst = node; + } + + index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); + index &= BM_RTREE_LEVEL_MASK; + dst = (struct rtree_node **)&((*dst)->data[index]); + node = *dst; + } + + zone->blocks += 1; + *dst = block; + + return 0; +} + +static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, + int clear_nosave_free); + +/* + * create_zone_bm_rtree - create a radix tree for one zone + * + * Allocated the mem_zone_bm_rtree structure and initializes it. + * This function also allocated and builds the radix tree for the + * zone. + */ +static struct mem_zone_bm_rtree * +create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, + struct chain_allocator *ca, + unsigned long start, unsigned long end) +{ + struct mem_zone_bm_rtree *zone; + unsigned int i, nr_blocks; + unsigned long pages; + + pages = end - start; + zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); + if (!zone) + return NULL; + + INIT_LIST_HEAD(&zone->nodes); + INIT_LIST_HEAD(&zone->leaves); + zone->start_pfn = start; + zone->end_pfn = end; + nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); + + for (i = 0; i < nr_blocks; i++) { + if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { + free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); + return NULL; + } + } + + return zone; +} + +/* + * free_zone_bm_rtree - Free the memory of the radix tree + * + * Free all node pages of the radix tree. The mem_zone_bm_rtree + * structure itself is not freed here nor are the rtree_node + * structs. + */ +static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, + int clear_nosave_free) +{ + struct rtree_node *node; + + list_for_each_entry(node, &zone->nodes, list) + free_image_page(node->data, clear_nosave_free); + + list_for_each_entry(node, &zone->leaves, list) + free_image_page(node->data, clear_nosave_free); +} + static void memory_bm_position_reset(struct memory_bitmap *bm) { bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); @@ -408,12 +607,14 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) chain_init(&ca, gfp_mask, safe_needed); INIT_LIST_HEAD(&bm->blocks); + INIT_LIST_HEAD(&bm->zones); error = create_mem_extents(&mem_extents, gfp_mask); if (error) return error; list_for_each_entry(ext, &mem_extents, hook) { + struct mem_zone_bm_rtree *zone; struct bm_block *bb; unsigned long pfn = ext->start; unsigned long pages = ext->end - ext->start; @@ -441,6 +642,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) } bb->end_pfn = pfn; } + + zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, + ext->start, ext->end); + if (!zone) + goto Error; + list_add_tail(&zone->list, &bm->zones); } bm->p_list = ca.chain; @@ -460,14 +667,19 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) */ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { + struct mem_zone_bm_rtree *zone; struct bm_block *bb; list_for_each_entry(bb, &bm->blocks, hook) if (bb->data) free_image_page(bb->data, clear_nosave_free); + list_for_each_entry(zone, &bm->zones, list) + free_zone_bm_rtree(zone, clear_nosave_free); + free_list_of_pages(bm->p_list, clear_nosave_free); + INIT_LIST_HEAD(&bm->zones); INIT_LIST_HEAD(&bm->blocks); } @@ -816,12 +1028,21 @@ void free_basic_memory_bitmaps(void) unsigned int snapshot_additional_pages(struct zone *zone) { + unsigned int rtree, nodes; unsigned int res; res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); res += DIV_ROUND_UP(res * sizeof(struct bm_block), LINKED_PAGE_DATA_SIZE); - return 2 * res; + rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), + LINKED_PAGE_DATA_SIZE); + while (nodes > 1) { + nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); + rtree += nodes; + } + + return 2 * (res + rtree); } #ifdef CONFIG_HIGHMEM From 07a338236fdcd6caf41541dcdf879f5758020ab1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:58 +0200 Subject: [PATCH 3/7] PM / Hibernate: Add memory_rtree_find_bit function Add a function to find a bit in the radix tree for a given pfn. Also add code to the memory bitmap wrapper functions to use the radix tree together with the existing memory bitmap implementation. On read accesses compare the results of both bitmaps to make sure the radix tree behaves the same way. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 84 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5a0eafdbac79..0b7f93498077 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -720,6 +720,56 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, return 0; } +/* + * memory_rtree_find_bit - Find the bit for pfn in the memory + * bitmap + * + * Walks the radix tree to find the page which contains the bit for + * pfn and returns the bit position in **addr and *bit_nr. + */ +static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) +{ + struct mem_zone_bm_rtree *curr, *zone; + struct rtree_node *node; + int i, block_nr; + + zone = NULL; + + /* Find the right zone */ + list_for_each_entry(curr, &bm->zones, list) { + if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { + zone = curr; + break; + } + } + + if (!zone) + return -EFAULT; + + /* + * We have a zone. Now walk the radix tree to find the leave + * node for our pfn. + */ + node = zone->rtree; + block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; + + for (i = zone->levels; i > 0; i--) { + int index; + + index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); + index &= BM_RTREE_LEVEL_MASK; + BUG_ON(node->data[index] == 0); + node = (struct rtree_node *)node->data[index]; + } + + /* Set return values */ + *addr = node->data; + *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; + + return 0; +} + static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -729,6 +779,10 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); set_bit(bit, addr); + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + set_bit(bit, addr); } static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) @@ -740,6 +794,13 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); if (!error) set_bit(bit, addr); + else + return error; + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + if (!error) + set_bit(bit, addr); + return error; } @@ -752,25 +813,42 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); clear_bit(bit, addr); + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + clear_bit(bit, addr); } static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int error; + int error, error2; + int v; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); - return test_bit(bit, addr); + v = test_bit(bit, addr); + + error2 = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error2); + + WARN_ON_ONCE(v != test_bit(bit, addr)); + + return v; } static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; + int present; - return !memory_bm_find_bit(bm, pfn, &addr, &bit); + present = !memory_bm_find_bit(bm, pfn, &addr, &bit); + + WARN_ON_ONCE(present != !memory_rtree_find_bit(bm, pfn, &addr, &bit)); + + return present; } /** From 3a20cb1779616ebcaade393cc9beac0e03cbffef Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:59 +0200 Subject: [PATCH 4/7] PM / Hibernate: Implement position keeping in radix tree Add code to remember the last position that was requested in the radix tree. Use it as a cache for faster linear walking of the bitmap in the memory_bm_rtree_next_pfn() function which is also added with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0b7f93498077..802f2415408e 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -309,6 +309,11 @@ struct mem_zone_bm_rtree { struct bm_position { struct bm_block *block; int bit; + + struct mem_zone_bm_rtree *zone; + struct rtree_node *node; + unsigned long node_pfn; + int node_bit; }; struct memory_bitmap { @@ -487,6 +492,13 @@ static void memory_bm_position_reset(struct memory_bitmap *bm) { bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); bm->cur.bit = 0; + + bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, + list); + bm->cur.node = list_entry(bm->cur.zone->leaves.next, + struct rtree_node, list); + bm->cur.node_pfn = 0; + bm->cur.node_bit = 0; } static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); @@ -734,6 +746,11 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, struct rtree_node *node; int i, block_nr; + zone = bm->cur.zone; + + if (pfn >= zone->start_pfn && pfn < zone->end_pfn) + goto zone_found; + zone = NULL; /* Find the right zone */ @@ -747,10 +764,16 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, if (!zone) return -EFAULT; +zone_found: /* * We have a zone. Now walk the radix tree to find the leave * node for our pfn. */ + + node = bm->cur.node; + if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + goto node_found; + node = zone->rtree; block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; @@ -763,6 +786,12 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, node = (struct rtree_node *)node->data[index]; } +node_found: + /* Update last position */ + bm->cur.zone = zone; + bm->cur.node = node; + bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; + /* Set return values */ *addr = node->data; *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; @@ -860,11 +889,16 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) * this function. */ +static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm); + static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { + unsigned long rtree_pfn; struct bm_block *bb; int bit; + rtree_pfn = memory_bm_rtree_next_pfn(bm); + bb = bm->cur.block; do { bit = bm->cur.bit; @@ -878,13 +912,77 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) } while (&bb->hook != &bm->blocks); memory_bm_position_reset(bm); + WARN_ON_ONCE(rtree_pfn != BM_END_OF_MAP); return BM_END_OF_MAP; Return_pfn: + WARN_ON_ONCE(bb->start_pfn + bit != rtree_pfn); bm->cur.bit = bit + 1; return bb->start_pfn + bit; } +/* + * rtree_next_node - Jumps to the next leave node + * + * Sets the position to the beginning of the next node in the + * memory bitmap. This is either the next node in the current + * zone's radix tree or the first node in the radix tree of the + * next zone. + * + * Returns true if there is a next node, false otherwise. + */ +static bool rtree_next_node(struct memory_bitmap *bm) +{ + bm->cur.node = list_entry(bm->cur.node->list.next, + struct rtree_node, list); + if (&bm->cur.node->list != &bm->cur.zone->leaves) { + bm->cur.node_pfn += BM_BITS_PER_BLOCK; + bm->cur.node_bit = 0; + return true; + } + + /* No more nodes, goto next zone */ + bm->cur.zone = list_entry(bm->cur.zone->list.next, + struct mem_zone_bm_rtree, list); + if (&bm->cur.zone->list != &bm->zones) { + bm->cur.node = list_entry(bm->cur.zone->leaves.next, + struct rtree_node, list); + bm->cur.node_pfn = 0; + bm->cur.node_bit = 0; + return true; + } + + /* No more zones */ + return false; +} + +/* + * memory_bm_rtree_next_pfn - Find the next set bit + * + * Starting from the last returned position this function searches + * for the next set bit in the memory bitmap and returns its + * number. If no more bit is set BM_END_OF_MAP is returned. + */ +static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm) +{ + unsigned long bits, pfn, pages; + int bit; + + do { + pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; + bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); + bit = find_next_bit(bm->cur.node->data, bits, + bm->cur.node_bit); + if (bit < bits) { + pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; + bm->cur.node_bit = bit + 1; + return pfn; + } + } while (rtree_next_node(bm)); + + return BM_END_OF_MAP; +} + /** * This structure represents a range of page frames the contents of which * should not be saved during the suspend. From 6efde38f07690652bf0d93f5e4f1a5f496574806 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:00 +0200 Subject: [PATCH 5/7] PM / Hibernate: Iterate over set bits instead of PFNs in swsusp_free() The existing implementation of swsusp_free iterates over all pfns in the system and checks every bit in the two memory bitmaps. This doesn't scale very well with large numbers of pfns, especially when the bitmaps are not populated very densly. Change the algorithm to iterate over the set bits in the bitmaps instead to make it scale better in large memory configurations. Also add a memory_bm_clear_current() helper function that clears the bit for the last position returned from the memory bitmap. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 51 ++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 802f2415408e..5b71caf43d32 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -848,6 +848,17 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) clear_bit(bit, addr); } +static void memory_bm_clear_current(struct memory_bitmap *bm) +{ + int bit; + + bit = max(bm->cur.node_bit - 1, 0); + clear_bit(bit, bm->cur.node->data); + + bit = max(bm->cur.bit - 1, 0); + clear_bit(bit, bm->cur.block->data); +} + static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -1491,23 +1502,35 @@ static struct memory_bitmap copy_bm; void swsusp_free(void) { - struct zone *zone; - unsigned long pfn, max_zone_pfn; + unsigned long fb_pfn, fr_pfn; - for_each_populated_zone(zone) { - max_zone_pfn = zone_end_pfn(zone); - for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); + memory_bm_position_reset(forbidden_pages_map); + memory_bm_position_reset(free_pages_map); - if (swsusp_page_is_forbidden(page) && - swsusp_page_is_free(page)) { - swsusp_unset_page_forbidden(page); - swsusp_unset_page_free(page); - __free_page(page); - } - } +loop: + fr_pfn = memory_bm_next_pfn(free_pages_map); + fb_pfn = memory_bm_next_pfn(forbidden_pages_map); + + /* + * Find the next bit set in both bitmaps. This is guaranteed to + * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. + */ + do { + if (fb_pfn < fr_pfn) + fb_pfn = memory_bm_next_pfn(forbidden_pages_map); + if (fr_pfn < fb_pfn) + fr_pfn = memory_bm_next_pfn(free_pages_map); + } while (fb_pfn != fr_pfn); + + if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { + struct page *page = pfn_to_page(fr_pfn); + + memory_bm_clear_current(forbidden_pages_map); + memory_bm_clear_current(free_pages_map); + __free_page(page); + goto loop; } + nr_copy_pages = 0; nr_meta_pages = 0; restore_pblist = NULL; From 9047eb629e5cd25ae3834d8c62ae02eb8c32bc17 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:01 +0200 Subject: [PATCH 6/7] PM / Hibernate: Remove the old memory-bitmap implementation The radix tree implementatio is proved to work the same as the old implementation now. So the old implementation can be removed to finish the switch to the radix tree for the memory bitmaps. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 223 ++++------------------------------------ 1 file changed, 21 insertions(+), 202 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5b71caf43d32..ab1998adb0a9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -267,18 +267,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) -struct bm_block { - struct list_head hook; /* hook into a list of bitmap blocks */ - unsigned long start_pfn; /* pfn represented by the first bit */ - unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ - unsigned long *data; /* bitmap representing pages */ -}; - -static inline unsigned long bm_block_bits(struct bm_block *bb) -{ - return bb->end_pfn - bb->start_pfn; -} - /* * struct rtree_node is a wrapper struct to link the nodes * of the rtree together for easy linear iteration over @@ -307,9 +295,6 @@ struct mem_zone_bm_rtree { /* strcut bm_position is used for browsing memory bitmaps */ struct bm_position { - struct bm_block *block; - int bit; - struct mem_zone_bm_rtree *zone; struct rtree_node *node; unsigned long node_pfn; @@ -318,7 +303,6 @@ struct bm_position { struct memory_bitmap { struct list_head zones; - struct list_head blocks; /* list of bitmap blocks */ struct linked_page *p_list; /* list of pages used to store zone * bitmap objects and bitmap block * objects @@ -490,9 +474,6 @@ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, static void memory_bm_position_reset(struct memory_bitmap *bm) { - bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); - bm->cur.bit = 0; - bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, list); bm->cur.node = list_entry(bm->cur.zone->leaves.next, @@ -503,30 +484,6 @@ static void memory_bm_position_reset(struct memory_bitmap *bm) static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); -/** - * create_bm_block_list - create a list of block bitmap objects - * @pages - number of pages to track - * @list - list to put the allocated blocks into - * @ca - chain allocator to be used for allocating memory - */ -static int create_bm_block_list(unsigned long pages, - struct list_head *list, - struct chain_allocator *ca) -{ - unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); - - while (nr_blocks-- > 0) { - struct bm_block *bb; - - bb = chain_alloc(ca, sizeof(struct bm_block)); - if (!bb) - return -ENOMEM; - list_add(&bb->hook, list); - } - - return 0; -} - struct mem_extent { struct list_head hook; unsigned long start; @@ -618,7 +575,6 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) int error; chain_init(&ca, gfp_mask, safe_needed); - INIT_LIST_HEAD(&bm->blocks); INIT_LIST_HEAD(&bm->zones); error = create_mem_extents(&mem_extents, gfp_mask); @@ -627,38 +583,13 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) list_for_each_entry(ext, &mem_extents, hook) { struct mem_zone_bm_rtree *zone; - struct bm_block *bb; - unsigned long pfn = ext->start; - unsigned long pages = ext->end - ext->start; - - bb = list_entry(bm->blocks.prev, struct bm_block, hook); - - error = create_bm_block_list(pages, bm->blocks.prev, &ca); - if (error) - goto Error; - - list_for_each_entry_continue(bb, &bm->blocks, hook) { - bb->data = get_image_page(gfp_mask, safe_needed); - if (!bb->data) { - error = -ENOMEM; - goto Error; - } - - bb->start_pfn = pfn; - if (pages >= BM_BITS_PER_BLOCK) { - pfn += BM_BITS_PER_BLOCK; - pages -= BM_BITS_PER_BLOCK; - } else { - /* This is executed only once in the loop */ - pfn += pages; - } - bb->end_pfn = pfn; - } zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, ext->start, ext->end); - if (!zone) + if (!zone) { + error = -ENOMEM; goto Error; + } list_add_tail(&zone->list, &bm->zones); } @@ -680,11 +611,6 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { struct mem_zone_bm_rtree *zone; - struct bm_block *bb; - - list_for_each_entry(bb, &bm->blocks, hook) - if (bb->data) - free_image_page(bb->data, clear_nosave_free); list_for_each_entry(zone, &bm->zones, list) free_zone_bm_rtree(zone, clear_nosave_free); @@ -692,55 +618,20 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) free_list_of_pages(bm->p_list, clear_nosave_free); INIT_LIST_HEAD(&bm->zones); - INIT_LIST_HEAD(&bm->blocks); } /** - * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds - * to given pfn. The cur_zone_bm member of @bm and the cur_block member - * of @bm->cur_zone_bm are updated. - */ -static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, - void **addr, unsigned int *bit_nr) -{ - struct bm_block *bb; - - /* - * Check if the pfn corresponds to the current bitmap block and find - * the block where it fits if this is not the case. - */ - bb = bm->cur.block; - if (pfn < bb->start_pfn) - list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) - if (pfn >= bb->start_pfn) - break; - - if (pfn >= bb->end_pfn) - list_for_each_entry_continue(bb, &bm->blocks, hook) - if (pfn >= bb->start_pfn && pfn < bb->end_pfn) - break; - - if (&bb->hook == &bm->blocks) - return -EFAULT; - - /* The block has been found */ - bm->cur.block = bb; - pfn -= bb->start_pfn; - bm->cur.bit = pfn + 1; - *bit_nr = pfn; - *addr = bb->data; - return 0; -} - -/* - * memory_rtree_find_bit - Find the bit for pfn in the memory - * bitmap + * memory_bm_find_bit - Find the bit for pfn in the memory + * bitmap * - * Walks the radix tree to find the page which contains the bit for + * Find the bit in the bitmap @bm that corresponds to given pfn. + * The cur.zone, cur.block and cur.node_pfn member of @bm are + * updated. + * It walks the radix tree to find the page which contains the bit for * pfn and returns the bit position in **addr and *bit_nr. */ -static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, - void **addr, unsigned int *bit_nr) +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) { struct mem_zone_bm_rtree *curr, *zone; struct rtree_node *node; @@ -808,10 +699,6 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); set_bit(bit, addr); - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error); - set_bit(bit, addr); } static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) @@ -821,12 +708,6 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); - if (!error) - set_bit(bit, addr); - else - return error; - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); if (!error) set_bit(bit, addr); @@ -842,10 +723,6 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); clear_bit(bit, addr); - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error); - clear_bit(bit, addr); } static void memory_bm_clear_current(struct memory_bitmap *bm) @@ -854,82 +731,25 @@ static void memory_bm_clear_current(struct memory_bitmap *bm) bit = max(bm->cur.node_bit - 1, 0); clear_bit(bit, bm->cur.node->data); - - bit = max(bm->cur.bit - 1, 0); - clear_bit(bit, bm->cur.block->data); } static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int error, error2; - int v; + int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); - v = test_bit(bit, addr); - - error2 = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error2); - - WARN_ON_ONCE(v != test_bit(bit, addr)); - - return v; + return test_bit(bit, addr); } static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int present; - present = !memory_bm_find_bit(bm, pfn, &addr, &bit); - - WARN_ON_ONCE(present != !memory_rtree_find_bit(bm, pfn, &addr, &bit)); - - return present; -} - -/** - * memory_bm_next_pfn - find the pfn that corresponds to the next set bit - * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is - * returned. - * - * It is required to run memory_bm_position_reset() before the first call to - * this function. - */ - -static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm); - -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) -{ - unsigned long rtree_pfn; - struct bm_block *bb; - int bit; - - rtree_pfn = memory_bm_rtree_next_pfn(bm); - - bb = bm->cur.block; - do { - bit = bm->cur.bit; - bit = find_next_bit(bb->data, bm_block_bits(bb), bit); - if (bit < bm_block_bits(bb)) - goto Return_pfn; - - bb = list_entry(bb->hook.next, struct bm_block, hook); - bm->cur.block = bb; - bm->cur.bit = 0; - } while (&bb->hook != &bm->blocks); - - memory_bm_position_reset(bm); - WARN_ON_ONCE(rtree_pfn != BM_END_OF_MAP); - return BM_END_OF_MAP; - - Return_pfn: - WARN_ON_ONCE(bb->start_pfn + bit != rtree_pfn); - bm->cur.bit = bit + 1; - return bb->start_pfn + bit; + return !memory_bm_find_bit(bm, pfn, &addr, &bit); } /* @@ -967,14 +787,17 @@ static bool rtree_next_node(struct memory_bitmap *bm) return false; } -/* - * memory_bm_rtree_next_pfn - Find the next set bit +/** + * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm * * Starting from the last returned position this function searches * for the next set bit in the memory bitmap and returns its * number. If no more bit is set BM_END_OF_MAP is returned. + * + * It is required to run memory_bm_position_reset() before the + * first call to this function. */ -static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm) +static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { unsigned long bits, pfn, pages; int bit; @@ -1216,11 +1039,7 @@ void free_basic_memory_bitmaps(void) unsigned int snapshot_additional_pages(struct zone *zone) { unsigned int rtree, nodes; - unsigned int res; - res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); - res += DIV_ROUND_UP(res * sizeof(struct bm_block), - LINKED_PAGE_DATA_SIZE); rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), LINKED_PAGE_DATA_SIZE); @@ -1229,7 +1048,7 @@ unsigned int snapshot_additional_pages(struct zone *zone) rtree += nodes; } - return 2 * (res + rtree); + return 2 * rtree; } #ifdef CONFIG_HIGHMEM From 0f7d83e85dbd5bb8032ebed7713edf59670fb074 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:02 +0200 Subject: [PATCH 7/7] PM / Hibernate: Touch Soft Lockup Watchdog in rtree_next_node When a memory bitmap is fully populated on a large memory machine (several TB of RAM) it can take more than a minute to walk through all bits. This causes the soft lockup detector on these machine to report warnings. Avoid this by touching the soft lockup watchdog in the memory bitmap walking code. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ab1998adb0a9..4fc5c32422b3 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -769,6 +769,7 @@ static bool rtree_next_node(struct memory_bitmap *bm) if (&bm->cur.node->list != &bm->cur.zone->leaves) { bm->cur.node_pfn += BM_BITS_PER_BLOCK; bm->cur.node_bit = 0; + touch_softlockup_watchdog(); return true; }