From 4051aab762f161826f15ea63c8baaf33cb5e6fe3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 26 Jun 2015 13:25:12 +0100 Subject: [PATCH 01/15] dm cache policy smq: change the mutex to a spinlock We no longer sleep in any of the smq functions, so this can become a spinlock. Switching from mutex to spinlock improves performance when the fast cache device is a very low latency device (e.g. NVMe SSD). The switch to spinlock also allows for removal of the extra tick_lock; which is no longer needed since the main lock being a spinlock now fulfills the locking requirements needed by interrupt context. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 110 +++++++++++-------------------- 1 file changed, 39 insertions(+), 71 deletions(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 200366c62231..1ffbeb1b3ea6 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -772,7 +772,7 @@ struct smq_policy { struct dm_cache_policy policy; /* protects everything */ - struct mutex lock; + spinlock_t lock; dm_cblock_t cache_size; sector_t cache_block_size; @@ -807,13 +807,7 @@ struct smq_policy { /* * Keeps track of time, incremented by the core. We use this to * avoid attributing multiple hits within the same tick. - * - * Access to tick_protected should be done with the spin lock held. - * It's copied to tick at the start of the map function (within the - * mutex). */ - spinlock_t tick_lock; - unsigned tick_protected; unsigned tick; /* @@ -1296,46 +1290,20 @@ static void smq_destroy(struct dm_cache_policy *p) kfree(mq); } -static void copy_tick(struct smq_policy *mq) -{ - unsigned long flags, tick; - - spin_lock_irqsave(&mq->tick_lock, flags); - tick = mq->tick_protected; - if (tick != mq->tick) { - update_sentinels(mq); - end_hotspot_period(mq); - end_cache_period(mq); - mq->tick = tick; - } - spin_unlock_irqrestore(&mq->tick_lock, flags); -} - -static bool maybe_lock(struct smq_policy *mq, bool can_block) -{ - if (can_block) { - mutex_lock(&mq->lock); - return true; - } else - return mutex_trylock(&mq->lock); -} - static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool fast_promote, struct bio *bio, struct policy_locker *locker, struct policy_result *result) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); result->op = POLICY_MISS; - if (!maybe_lock(mq, can_block)) - return -EWOULDBLOCK; - - copy_tick(mq); + spin_lock_irqsave(&mq->lock, flags); r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1343,20 +1311,18 @@ static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); struct entry *e; - if (!mutex_trylock(&mq->lock)) - return -EWOULDBLOCK; - + spin_lock_irqsave(&mq->lock, flags); e = h_lookup(&mq->table, oblock); if (e) { *cblock = infer_cblock(mq, e); r = 0; } else r = -ENOENT; - - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1375,20 +1341,22 @@ static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, boo static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) { + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __smq_set_clear_dirty(mq, oblock, true); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) { struct smq_policy *mq = to_smq_policy(p); + unsigned long flags; - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __smq_set_clear_dirty(mq, oblock, false); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static int smq_load_mapping(struct dm_cache_policy *p, @@ -1433,14 +1401,14 @@ static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, struct smq_policy *mq = to_smq_policy(p); int r = 0; - mutex_lock(&mq->lock); - + /* + * We don't need to lock here since this method is only called once + * the IO has stopped. + */ r = smq_save_hints(mq, &mq->clean, fn, context); if (!r) r = smq_save_hints(mq, &mq->dirty, fn, context); - mutex_unlock(&mq->lock); - return r; } @@ -1458,10 +1426,11 @@ static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock) static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) { struct smq_policy *mq = to_smq_policy(p); + unsigned long flags; - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __remove_mapping(mq, oblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) @@ -1480,11 +1449,12 @@ static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = __remove_cblock(mq, cblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1537,11 +1507,12 @@ static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock, bool critical_only) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = __smq_writeback_work(mq, oblock, cblock, critical_only); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1562,21 +1533,23 @@ static void __force_mapping(struct smq_policy *mq, static void smq_force_mapping(struct dm_cache_policy *p, dm_oblock_t current_oblock, dm_oblock_t new_oblock) { + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __force_mapping(mq, current_oblock, new_oblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static dm_cblock_t smq_residency(struct dm_cache_policy *p) { dm_cblock_t r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = to_cblock(mq->cache_alloc.nr_allocated); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1586,15 +1559,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block) struct smq_policy *mq = to_smq_policy(p); unsigned long flags; - spin_lock_irqsave(&mq->tick_lock, flags); - mq->tick_protected++; - spin_unlock_irqrestore(&mq->tick_lock, flags); - - if (can_block) { - mutex_lock(&mq->lock); - copy_tick(mq); - mutex_unlock(&mq->lock); - } + spin_lock_irqsave(&mq->lock, flags); + mq->tick++; + update_sentinels(mq); + end_hotspot_period(mq); + end_cache_period(mq); + spin_unlock_irqrestore(&mq->lock, flags); } /* Init the policy plugin interface function pointers. */ @@ -1694,10 +1664,8 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, } else mq->cache_hit_bits = NULL; - mq->tick_protected = 0; mq->tick = 0; - mutex_init(&mq->lock); - spin_lock_init(&mq->tick_lock); + spin_lock_init(&mq->lock); q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS); mq->hotspot.nr_top_levels = 8; From 0a8d4c3ef8f14cbd26d97767e3676722d4eebee5 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 6 Jul 2015 11:55:40 -0400 Subject: [PATCH 02/15] dm btree: remove unused "dm_block_t root" parameter in btree_split_sibling() Signed-off-by: Vivek Goyal Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index c7726cebc495..b6cec258cc21 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -420,8 +420,8 @@ EXPORT_SYMBOL_GPL(dm_btree_lookup); * * Where A* is a shadow of A. */ -static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, - unsigned parent_index, uint64_t key) +static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index, + uint64_t key) { int r; size_t size; @@ -625,7 +625,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, if (top) r = btree_split_beneath(s, key); else - r = btree_split_sibling(s, root, i, key); + r = btree_split_sibling(s, i, key); if (r < 0) return r; From 8c747fd0c3f514233afaca98139c03cca2cf2d2f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 6 Jul 2015 11:55:41 -0400 Subject: [PATCH 03/15] dm btree remove: remove unused function get_nr_entries() rebalance_children() calls get_nr_entries() and assigns the result to an unused local 'child_entries' variable. Remove get_nr_entries() and cleanup rebalance_children() accordingly. Signed-off-by: Vivek Goyal Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-remove.c | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 4222f774cf36..421a36c593e3 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -409,29 +409,11 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, return 0; } -static int get_nr_entries(struct dm_transaction_manager *tm, - dm_block_t b, uint32_t *result) -{ - int r; - struct dm_block *block; - struct btree_node *n; - - r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); - if (r) - return r; - - n = dm_block_data(block); - *result = le32_to_cpu(n->header.nr_entries); - - return dm_tm_unlock(tm, block); -} - static int rebalance_children(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, uint64_t key) { int i, r, has_left_sibling, has_right_sibling; - uint32_t child_entries; struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -458,10 +440,6 @@ static int rebalance_children(struct shadow_spine *s, if (i < 0) return -ENODATA; - r = get_nr_entries(info->tm, value64(n, i), &child_entries); - if (r) - return r; - has_left_sibling = i > 0; has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); From e44b6a5a3c711c1ada4cf7135bf9dbf860caffd2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 30 Jul 2015 09:29:40 +0100 Subject: [PATCH 04/15] dm cache: move wake_waker() from free_migrations() to where it is needed This stops spurious wake ups from calls to prealloc_free_structs(). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1fe93cfea7d3..8cef66b33243 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg) wake_up(&cache->migration_wait); mempool_free(mg, cache->migration_pool); - wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) @@ -1125,6 +1124,7 @@ static void free_io_migration(struct dm_cache_migration *mg) { dec_io_migrations(mg->cache); free_migration(mg); + wake_worker(mg->cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -1361,6 +1361,7 @@ static void issue_discard(struct dm_cache_migration *mg) bio_endio(bio, 0); cell_defer(mg->cache, mg->new_ocell, false); free_migration(mg); + wake_worker(mg->cache); } static void issue_copy_or_discard(struct dm_cache_migration *mg) From 4fb9aa5abdb791defedb3a5ce254ad193e82d77b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 29 Jun 2015 14:14:00 +0100 Subject: [PATCH 05/15] dm verity: remove unused mempool Since commit 003b5c571 ("block: Convert drivers to immutable biovecs"), vec_mempool in struct dm_verity is no longer used. Remove it and related definitions. Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index bb9c6a00e4b0..3adb3323b8c3 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -26,8 +26,6 @@ #define DM_VERITY_ENV_LENGTH 42 #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" -#define DM_VERITY_IO_VEC_INLINE 16 -#define DM_VERITY_MEMPOOL_SIZE 4 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 #define DM_VERITY_MAX_LEVELS 63 @@ -76,8 +74,6 @@ struct dm_verity { enum verity_mode mode; /* mode for handling verification errors */ unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - mempool_t *vec_mempool; /* mempool of bio vector */ - struct workqueue_struct *verify_wq; /* starting blocks for each tree level. 0 is the lowest level. */ @@ -691,9 +687,6 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); - if (v->vec_mempool) - mempool_destroy(v->vec_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -962,14 +955,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - BIO_MAX_PAGES * sizeof(struct bio_vec)); - if (!v->vec_mempool) { - ti->error = "Cannot allocate vector mempool"; - r = -ENOMEM; - goto bad; - } - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); if (!v->verify_wq) { From ab37844d6169c2dd6f96e665b07b692ba1a4c180 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 1 Jul 2015 17:30:36 -0400 Subject: [PATCH 06/15] dm: test return value for DM_MAPIO_SUBMITTED In properly written code we should not assume that DM_MAPIO_SUBMITTED is zero. We should test the return value for DM_MAPIO_SUBMITTED rather than testing it for zero. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0d7ab20c58df..0907d9eb864e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1466,7 +1466,7 @@ static void __map_bio(struct dm_target_io *tio) md = tio->io->md; dec_pending(tio->io, r); free_tio(md, tio); - } else if (r) { + } else if (r != DM_MAPIO_SUBMITTED) { DMWARN("unimplemented target map return value: %d", r); BUG(); } From e80d1c805a3b2f0ad2081369be5dc5deedd5ee59 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 31 Jul 2015 09:20:36 -0400 Subject: [PATCH 07/15] dm: do not override error code returned from dm_get_device() Some of the device mapper targets override the error code returned by dm_get_device() and return either -EINVAL or -ENXIO. There is nothing gained by this override. It is better to propagate the returned error code unchanged to caller. This work was motivated by hitting an issue where the underlying device was busy but -EINVAL was being returned. After this change we get -EBUSY instead and it is easier to figure out the problem. Signed-off-by: Vivek Goyal Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 4 +++- drivers/md/dm-delay.c | 16 +++++++++++----- drivers/md/dm-flakey.c | 6 ++++-- drivers/md/dm-linear.c | 7 +++++-- drivers/md/dm-log-writes.c | 11 ++++++++--- drivers/md/dm-raid1.c | 8 +++++--- drivers/md/dm-stripe.c | 8 +++++--- 7 files changed, 41 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0f48fed44a17..9a75d462b3fb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1811,11 +1811,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->iv_offset = tmpll; - if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { + ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev); + if (ret) { ti->error = "Device lookup failed"; goto bad; } + ret = -EINVAL; if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 57b6a1901c91..b34f6e27293d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -129,6 +129,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct delay_c *dc; unsigned long long tmpll; char dummy; + int ret; if (argc != 3 && argc != 6) { ti->error = "requires exactly 3 or 6 arguments"; @@ -143,6 +144,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) dc->reads = dc->writes = 0; + ret = -EINVAL; if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; @@ -154,12 +156,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &dc->dev_read)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &dc->dev_read); + if (ret) { ti->error = "Device lookup failed"; goto bad; } + ret = -EINVAL; dc->dev_write = NULL; if (argc == 3) goto out; @@ -175,13 +179,15 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_dev_read; } - if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), - &dc->dev_write)) { + ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dc->dev_write); + if (ret) { ti->error = "Write device lookup failed"; goto bad_dev_read; } out: + ret = -EINVAL; dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { DMERR("Couldn't start kdelayd"); @@ -208,7 +214,7 @@ bad_dev_read: dm_put_device(ti, dc->dev_read); bad: kfree(dc); - return -EINVAL; + return ret; } static void delay_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b257e46876d3..ffb994527bcf 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -183,6 +183,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) devname = dm_shift_arg(&as); + r = -EINVAL; if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; @@ -211,7 +212,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { + r = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev); + if (r) { ti->error = "Device lookup failed"; goto bad; } @@ -224,7 +226,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(fc); - return -EINVAL; + return r; } static void flakey_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 53e848c10939..62c26e4ad6ac 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -30,6 +30,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct linear_c *lc; unsigned long long tmp; char dummy; + int ret; if (argc != 2) { ti->error = "Invalid argument count"; @@ -42,13 +43,15 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -ENOMEM; } + ret = -EINVAL; if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { ti->error = "dm-linear: Invalid device sector"; goto bad; } lc->start = tmp; - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev); + if (ret) { ti->error = "dm-linear: Device lookup failed"; goto bad; } @@ -61,7 +64,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(lc); - return -EINVAL; + return ret; } static void linear_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index ad1b049ae2ab..51d29b67eb01 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -420,6 +420,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct log_writes_c *lc; struct dm_arg_set as; const char *devname, *logdevname; + int ret; as.argc = argc; as.argv = argv; @@ -443,18 +444,22 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) atomic_set(&lc->pending_blocks, 0); devname = dm_shift_arg(&as); - if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev)) { + ret = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev); + if (ret) { ti->error = "Device lookup failed"; goto bad; } logdevname = dm_shift_arg(&as); - if (dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), &lc->logdev)) { + ret = dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), + &lc->logdev); + if (ret) { ti->error = "Log device lookup failed"; dm_put_device(ti, lc->dev); goto bad; } + ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); if (!lc->log_kthread) { ti->error = "Couldn't alloc kthread"; @@ -479,7 +484,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(lc); - return -EINVAL; + return ret; } static int log_mark(struct log_writes_c *lc, char *data) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d83696bf403b..933a1fd99b77 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -943,16 +943,18 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, { unsigned long long offset; char dummy; + int ret; if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { ti->error = "Invalid offset"; return -EINVAL; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &ms->mirror[mirror].dev)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &ms->mirror[mirror].dev); + if (ret) { ti->error = "Device lookup failure"; - return -ENXIO; + return ret; } ms->mirror[mirror].ms = ms; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a672a1502c14..9a814a5eb89f 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -75,13 +75,15 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, { unsigned long long start; char dummy; + int ret; if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1) return -EINVAL; - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &sc->stripe[stripe].dev)) - return -ENXIO; + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &sc->stripe[stripe].dev); + if (ret) + return ret; sc->stripe[stripe].physical_start = start; From fc0a446152877d3a4e40166ddb19b6e0cb5f6567 Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Mon, 10 Aug 2015 11:42:26 +0530 Subject: [PATCH 08/15] dm: remove unlikely() before IS_ERR() IS_ERR() already contains an 'unlikely' compiler flag so there is no need to do that again from IS_ERR() callers. Signed-off-by: Viresh Kumar Signed-off-by: Mike Snitzer --- drivers/md/dm-snap-persistent.c | 2 +- drivers/md/dm-verity.c | 2 +- drivers/md/persistent-data/dm-block-manager.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 808b8419bc48..bf71583296f7 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -533,7 +533,7 @@ static int read_exceptions(struct pstore *ps, chunk = area_location(ps, ps->current_area); area = dm_bufio_read(client, chunk, &bp); - if (unlikely(IS_ERR(area))) { + if (IS_ERR(area)) { r = PTR_ERR(area); goto ret_destroy_bufio; } diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 3adb3323b8c3..26eff8d41ed2 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -267,7 +267,7 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, verity_hash_at_level(v, block, level, &hash_block, &offset); data = dm_bufio_read(v->bufio, hash_block, &buf); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); aux = dm_bufio_get_aux_data(buf); diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 4d6c9b689eaa..88dbe7b97c2c 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -454,7 +454,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -490,7 +490,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -523,7 +523,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -559,7 +559,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); From 76c44f6d80e151d230844db7ffc058ac21b9e3f2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 21 Jun 2015 16:31:33 -0400 Subject: [PATCH 09/15] dm snapshot: don't invalidate on-disk image on snapshot write overflow When the snapshot overflows because of a write to the origin, the on-disk image has to be invalidated. However, when the snapshot overflows because of a write to the snapshot, the on-disk image doesn't have to be invalidated. Change the behavior so that the on-disk image is not invalidated in this case. When the snapshot overflows, the variable snapshot_overflowed is set. All writes to the snapshot are disallowed to minimize filesystem corruption - this condition is cleared when the snapshot is deactivated and activated. The user can extend the overflowed snapshot, deactivate and activate it again, run fsck (if journaling filesystem is not used) mount it and recover the data. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7c82d3ccce87..3903d7abeeeb 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -63,6 +63,13 @@ struct dm_snapshot { */ int valid; + /* + * The snapshot overflowed because of a write to the snapshot device. + * We don't have to invalidate the snapshot in this case, but we need + * to prevent further writes. + */ + int snapshot_overflowed; + /* Origin writes don't trigger exceptions until this is set */ int active; @@ -1152,6 +1159,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->ti = ti; s->valid = 1; + s->snapshot_overflowed = 0; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); s->exception_start_sequence = 0; @@ -1301,6 +1309,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, snap_dest->ti->max_io_len = snap_dest->store->chunk_size; snap_dest->valid = snap_src->valid; + snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed; /* * Set source invalid to ensure it receives no further I/O. @@ -1691,7 +1700,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) * to copy an exception */ down_write(&s->lock); - if (!s->valid) { + if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) { r = -EIO; goto out_unlock; } @@ -1715,7 +1724,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = alloc_pending_exception(s); down_write(&s->lock); - if (!s->valid) { + if (!s->valid || s->snapshot_overflowed) { free_pending_exception(pe); r = -EIO; goto out_unlock; @@ -1730,7 +1739,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = __find_pending_exception(s, pe, chunk); if (!pe) { - __invalidate_snapshot(s, -ENOMEM); + s->snapshot_overflowed = 1; + DMERR("Snapshot overflowed: Unable to allocate exception."); r = -EIO; goto out_unlock; } @@ -1990,6 +2000,8 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT("Invalid"); else if (snap->merge_failed) DMEMIT("Merge failed"); + else if (snap->snapshot_overflowed) + DMEMIT("Overflow"); else { if (snap->store->type->usage) { sector_t total_sectors, sectors_allocated, @@ -2368,7 +2380,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 13, 0}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, From 84f8bd86cc8977c344df572169f7ec10b8188cfa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 18 Aug 2015 10:31:09 -0400 Subject: [PATCH 10/15] dm thin: optimize async discard submission __blkdev_issue_discard_async() doesn't need to worry about further splitting because the upper layer blkdev_issue_discard() will have already handled splitting bios such that the bi_size isn't overflowed. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber --- drivers/md/dm-thin.c | 89 ++++++++------------------------------------ 1 file changed, 15 insertions(+), 74 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d2bbe8cc1e97..49e358a0c22f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -332,9 +332,6 @@ struct thin_c { * * Description: * Asynchronously issue a discard request for the sectors in question. - * NOTE: this variant of blk-core's blkdev_issue_discard() is a stop-gap - * that is being kept local to DM thinp until the block changes to allow - * late bio splitting land upstream. */ static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags, @@ -342,91 +339,36 @@ static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sect { struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; - unsigned int max_discard_sectors, granularity; - int alignment; struct bio *bio; - int ret = 0; - struct blk_plug plug; - if (!q) + if (!q || !nr_sects) return -ENXIO; if (!blk_queue_discard(q)) return -EOPNOTSUPP; - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - - /* - * Ensure that max_discard_sectors is of the proper - * granularity, so that requests stay aligned after a split. - */ - max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - max_discard_sectors -= max_discard_sectors % granularity; - if (unlikely(!max_discard_sectors)) { - /* Avoid infinite loop below. Being cautious never hurts. */ - return -EOPNOTSUPP; - } - if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; type |= REQ_SECURE; } - blk_start_plug(&plug); - while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; + /* + * Required bio_put occurs in bio_endio thanks to bio_chain below + */ + bio = bio_alloc(gfp_mask, 1); + if (!bio) + return -ENOMEM; - /* - * Required bio_put occurs in bio_endio thanks to bio_chain below - */ - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; - break; - } + bio_chain(bio, parent_bio); - req_sects = min_t(sector_t, nr_sects, max_discard_sectors); + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_iter.bi_size = nr_sects << 9; - /* - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ - end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } + submit_bio(type, bio); - bio_chain(bio, parent_bio); - - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - - bio->bi_iter.bi_size = req_sects << 9; - nr_sects -= req_sects; - sector = end_sect; - - submit_bio(type, bio); - - /* - * We can loop for a long time in here, if someone does - * full device discards (like mkfs). Be nice and allow - * us to schedule out to avoid softlocking if preempt - * is disabled. - */ - cond_resched(); - } - blk_finish_plug(&plug); - - return ret; + return 0; } static bool block_size_is_power_of_two(struct pool *pool) @@ -1539,9 +1481,8 @@ static void process_discard_cell_no_passdown(struct thin_c *tc, } /* - * FIXME: DM local hack to defer parent bios's end_io until we - * _know_ all chained sub range discard bios have completed. - * Will go away once late bio splitting lands upstream! + * __bio_inc_remaining() is used to defer parent bios's end_io until + * we _know_ all chained sub range discard bios have completed. */ static inline void __bio_inc_remaining(struct bio *bio) { From bd49784fd1e8f42c7600fbfa206361324857f373 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Aug 2015 16:26:16 -0400 Subject: [PATCH 11/15] dm stats: report precise_timestamps and histogram in @stats_list output If the user selected the precise_timestamps or histogram options, report it in the @stats_list message output. If the user didn't select these options, no extra tokens are reported, thus it is backward compatible with old software that doesn't know about precise timestamps and histogram. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.2 --- Documentation/device-mapper/statistics.txt | 4 ++++ drivers/md/dm-stats.c | 14 +++++++++++++- include/uapi/linux/dm-ioctl.h | 4 ++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt index 4919b2dfd1b3..6f5ef944ca4c 100644 --- a/Documentation/device-mapper/statistics.txt +++ b/Documentation/device-mapper/statistics.txt @@ -121,6 +121,10 @@ Messages Output format: : + + precise_timestamps histogram:n1,n2,n3,... + + The strings "precise_timestamps" and "histogram" are printed only + if they were specified when creating the region. @stats_print [ ] diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8a8b48fa901a..8289804ccd99 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -457,12 +457,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program, list_for_each_entry(s, &stats->list, list_entry) { if (!program || !strcmp(program, s->program_id)) { len = s->end - s->start; - DMEMIT("%d: %llu+%llu %llu %s %s\n", s->id, + DMEMIT("%d: %llu+%llu %llu %s %s", s->id, (unsigned long long)s->start, (unsigned long long)len, (unsigned long long)s->step, s->program_id, s->aux_data); + if (s->stat_flags & STAT_PRECISE_TIMESTAMPS) + DMEMIT(" precise_timestamps"); + if (s->n_histogram_entries) { + unsigned i; + DMEMIT(" histogram:"); + for (i = 0; i < s->n_histogram_entries; i++) { + if (i) + DMEMIT(","); + DMEMIT("%llu", s->histogram_boundaries[i]); + } + } + DMEMIT("\n"); } } mutex_unlock(&stats->mutex); diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 061aca3a962d..d34611e35a30 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 32 +#define DM_VERSION_MINOR 33 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2015-6-26)" +#define DM_VERSION_EXTRA "-ioctl (2015-8-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ From f15f4d720088c140cdf1fee6aeab3549dbdddc41 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 25 Aug 2015 17:15:41 +0200 Subject: [PATCH 12/15] dm raid: document RAID 4/5/6 discard support For RAID 4/5/6 data integrity reasons 'discard_zeroes_data' must work properly. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-raid.txt | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index cb12af3b51c2..df2d636b6088 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -209,6 +209,37 @@ include: "repair" - Initiate a repair of the array. "reshape"- Currently unsupported (-EINVAL). + +Discard Support +--------------- +The implementation of discard support among hardware vendors varies. +When a block is discarded, some storage devices will return zeroes when +the block is read. These devices set the 'discard_zeroes_data' +attribute. Other devices will return random data. Confusingly, some +devices that advertise 'discard_zeroes_data' will not reliably return +zeroes when discarded blocks are read! Since RAID 4/5/6 uses blocks +from a number of devices to calculate parity blocks and (for performance +reasons) relies on 'discard_zeroes_data' being reliable, it is important +that the devices be consistent. Blocks may be discarded in the middle +of a RAID 4/5/6 stripe and if subsequent read results are not +consistent, the parity blocks may be calculated differently at any time; +making the parity blocks useless for redundancy. It is important to +understand how your hardware behaves with discards if you are going to +enable discards with RAID 4/5/6. + +Since the behavior of storage devices is unreliable in this respect, +even when reporting 'discard_zeroes_data', by default RAID 4/5/6 +discard support is disabled -- this ensures data integrity at the +expense of losing some performance. + +Storage devices that properly support 'discard_zeroes_data' are +increasingly whitelisted in the kernel and can thus be trusted. + +For trusted devices, the following dm-raid module parameter can be set +to safely enable discard support for RAID 4/5/6: + 'devices_handle_discards_safely' + + Version History --------------- 1.0.0 Initial version. Support for RAID 4/5/6 From 9153df7405ae04c1b0466de720e0a685cfea1a3a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 31 Aug 2015 18:20:08 +0100 Subject: [PATCH 13/15] dm cache: fix leaking of deferred bio prison cells There were two cases where dm_cell_visit_release() was being called, which removes the cell from the prison's rbtree, but the callers didn't also return the cell to the mempool. Fix this by having them call free_prison_cell(). This leak manifested as the 'kmalloc-96' slab growing until OOM. Fixes: 651f5fa2a3 ("dm cache: defer whole cells") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.1+ --- drivers/md/dm-cache-target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8cef66b33243..2f60cbf404e5 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1730,6 +1730,8 @@ static void remap_cell_to_origin_clear_discard(struct cache *cache, remap_to_origin(cache, bio); issue(cache, bio); } + + free_prison_cell(cache, cell); } static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell, @@ -1764,6 +1766,8 @@ static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_ remap_to_cache(cache, bio, cblock); issue(cache, bio); } + + free_prison_cell(cache, cell); } /*----------------------------------------------------------------*/ From dc9cee5db50afaf38506bc12eb479fb8ea536dba Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 31 Aug 2015 15:41:34 -0400 Subject: [PATCH 14/15] dm cache: small cleanups related to deferred prison cell cleanup Eliminate __cell_release() since it only had one caller that always released the cell holder. Switch cell_error_with_code() to using free_prison_cell() for the sake of consistency. Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2f60cbf404e5..e13e5edf2298 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1063,14 +1063,6 @@ static void dec_io_migrations(struct cache *cache) atomic_dec(&cache->nr_io_migrations); } -static void __cell_release(struct cache *cache, struct dm_bio_prison_cell *cell, - bool holder, struct bio_list *bios) -{ - (holder ? dm_cell_release : dm_cell_release_no_holder) - (cache->prison, cell, bios); - free_prison_cell(cache, cell); -} - static bool discard_or_flush(struct bio *bio) { return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD); @@ -1078,14 +1070,13 @@ static bool discard_or_flush(struct bio *bio) static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) { - if (discard_or_flush(cell->holder)) + if (discard_or_flush(cell->holder)) { /* - * We have to handle these bios - * individually. + * We have to handle these bios individually. */ - __cell_release(cache, cell, true, &cache->deferred_bios); - - else + dm_cell_release(cache->prison, cell, &cache->deferred_bios); + free_prison_cell(cache, cell); + } else list_add_tail(&cell->user_list, &cache->deferred_cells); } @@ -1112,7 +1103,7 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, boo static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) { dm_cell_error(cache->prison, cell, err); - dm_bio_prison_free_cell(cache->prison, cell); + free_prison_cell(cache, cell); } static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) From cc7da0ba9c96699592d0a69d7d146ac6adcc18e7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 1 Sep 2015 11:38:19 +0100 Subject: [PATCH 15/15] dm cache: fix use after freeing migrations Both free_io_migration() and issue_discard() dereference a migration that was just freed. Fix those by saving off the migrations's cache object before freeing the migration. Also cleanup needless mg->cache dereferences now that the cache object is available directly. Fixes: e44b6a5a3c ("dm cache: move wake_waker() from free_migrations() to where it is needed") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index e13e5edf2298..f9d9cc6a094b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1113,9 +1113,11 @@ static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) static void free_io_migration(struct dm_cache_migration *mg) { - dec_io_migrations(mg->cache); + struct cache *cache = mg->cache; + + dec_io_migrations(cache); free_migration(mg); - wake_worker(mg->cache); + wake_worker(cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -1342,17 +1344,18 @@ static void issue_discard(struct dm_cache_migration *mg) { dm_dblock_t b, e; struct bio *bio = mg->new_ocell->holder; + struct cache *cache = mg->cache; - calc_discard_block_range(mg->cache, bio, &b, &e); + calc_discard_block_range(cache, bio, &b, &e); while (b != e) { - set_discard(mg->cache, b); + set_discard(cache, b); b = to_dblock(from_dblock(b) + 1); } bio_endio(bio, 0); - cell_defer(mg->cache, mg->new_ocell, false); + cell_defer(cache, mg->new_ocell, false); free_migration(mg); - wake_worker(mg->cache); + wake_worker(cache); } static void issue_copy_or_discard(struct dm_cache_migration *mg)