mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-02-22 21:36:13 +00:00
- Revert a DM bufio change from during the 5.3 merge window now that a
proper fix has been made to the block loopback driver. - Fix DM kcopyd to wakeup so failed subjobs get completed. - Various fixes to DM zoned target to address error handling, and other small tweaks (SPDX license identifiers and fix typos). - Fix DM integrity range locking race by tracking whether journal has changed. - Fix DM dust target to detect reads of badblocks beyond the first 512b sector (applicable if blocksize is larger than 512b). - Fix DM persistent-data issue in both the DM btree and DM space-map-metadata interfaces. - Fix out of bounds memory access with certain DM table configurations. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEJfWUX4UqZ4x1O2wixSPxCi2dA1oFAl1gCAITHHNuaXR6ZXJA cmVkaGF0LmNvbQAKCRDFI/EKLZ0DWmKwB/kBsKiN2Vt1a4RuwUvLvEr9aijZ3HEe l6lwZ8rB6WRDAc4rEbteqKbCMvjg1RMZwkzL3RPrtWtjYdsdC/yJzHGETIym3Ckd 0s1nfZgJ7jWFilwR5/RJ9bFYADjqUwAKdzc49sAT/aEPEaQywYrV7ZiD9rVZf/o5 oQxDMps/zWbayeF2oS1tyb7m1qi8xN3yGe575vXaj+ag+10JbGiYcSObLUwyYCJu WqELCL3JMiaC6QkZjZWpV99V9+0yO/Px0zwuq6jRSx6VAgKGLV2CoFk0ibsRa/vI 8IyeMwybRfSzUqMnzeh57F1H0FXrvYnD6c8obnDlGP28ZSRQQJvfm3TQ =R5Dn -----END PGP SIGNATURE----- Merge tag 'for-5.3/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper fixes from Mike Snitzer: - Revert a DM bufio change from during the 5.3 merge window now that a proper fix has been made to the block loopback driver. - Fix DM kcopyd to wakeup so failed subjobs get completed. - Various fixes to DM zoned target to address error handling, and other small tweaks (SPDX license identifiers and fix typos). - Fix DM integrity range locking race by tracking whether journal has changed. - Fix DM dust target to detect reads of badblocks beyond the first 512b sector (applicable if blocksize is larger than 512b). - Fix DM persistent-data issue in both the DM btree and DM space-map-metadata interfaces. - Fix out of bounds memory access with certain DM table configurations. * tag 'for-5.3/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm table: fix invalid memory accesses with too high sector number dm space map metadata: fix missing store of apply_bops() return value dm btree: fix order of block initialization in btree_split_beneath dm raid: add missing cleanup in raid_ctr() dm zoned: fix potential NULL dereference in dmz_do_reclaim() dm dust: use dust block size for badblocklist index dm integrity: fix a crash due to BUG_ON in __journal_read_write() dm zoned: fix a few typos dm zoned: add SPDX license identifiers dm zoned: properly handle backing device failure dm zoned: improve error handling in i/o map code dm zoned: improve error handling in reclaim dm kcopyd: always complete failed jobs Revert "dm bufio: fix deadlock with loop device"
This commit is contained in:
commit
dd469a4560
@ -1599,7 +1599,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
unsigned long freed;
|
||||
|
||||
c = container_of(shrink, struct dm_bufio_client, shrinker);
|
||||
if (!dm_bufio_trylock(c))
|
||||
if (sc->gfp_mask & __GFP_FS)
|
||||
dm_bufio_lock(c);
|
||||
else if (!dm_bufio_trylock(c))
|
||||
return SHRINK_STOP;
|
||||
|
||||
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
|
||||
|
@ -25,6 +25,7 @@ struct dust_device {
|
||||
unsigned long long badblock_count;
|
||||
spinlock_t dust_lock;
|
||||
unsigned int blksz;
|
||||
int sect_per_block_shift;
|
||||
unsigned int sect_per_block;
|
||||
sector_t start;
|
||||
bool fail_read_on_bb:1;
|
||||
@ -79,7 +80,7 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dd->dust_lock, flags);
|
||||
bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block);
|
||||
bblock = dust_rb_search(&dd->badblocklist, block);
|
||||
|
||||
if (bblock == NULL) {
|
||||
if (!dd->quiet_mode) {
|
||||
@ -113,7 +114,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&dd->dust_lock, flags);
|
||||
bblock->bb = block * dd->sect_per_block;
|
||||
bblock->bb = block;
|
||||
if (!dust_rb_insert(&dd->badblocklist, bblock)) {
|
||||
if (!dd->quiet_mode) {
|
||||
DMERR("%s: block %llu already in badblocklist",
|
||||
@ -138,7 +139,7 @@ static int dust_query_block(struct dust_device *dd, unsigned long long block)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dd->dust_lock, flags);
|
||||
bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block);
|
||||
bblock = dust_rb_search(&dd->badblocklist, block);
|
||||
if (bblock != NULL)
|
||||
DMINFO("%s: block %llu found in badblocklist", __func__, block);
|
||||
else
|
||||
@ -165,6 +166,7 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock,
|
||||
int ret = DM_MAPIO_REMAPPED;
|
||||
|
||||
if (fail_read_on_bb) {
|
||||
thisblock >>= dd->sect_per_block_shift;
|
||||
spin_lock_irqsave(&dd->dust_lock, flags);
|
||||
ret = __dust_map_read(dd, thisblock);
|
||||
spin_unlock_irqrestore(&dd->dust_lock, flags);
|
||||
@ -195,6 +197,7 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock,
|
||||
unsigned long flags;
|
||||
|
||||
if (fail_read_on_bb) {
|
||||
thisblock >>= dd->sect_per_block_shift;
|
||||
spin_lock_irqsave(&dd->dust_lock, flags);
|
||||
__dust_map_write(dd, thisblock);
|
||||
spin_unlock_irqrestore(&dd->dust_lock, flags);
|
||||
@ -331,6 +334,8 @@ static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
dd->blksz = blksz;
|
||||
dd->start = tmp;
|
||||
|
||||
dd->sect_per_block_shift = __ffs(sect_per_block);
|
||||
|
||||
/*
|
||||
* Whether to fail a read on a "bad" block.
|
||||
* Defaults to false; enabled later by message.
|
||||
|
@ -1943,7 +1943,22 @@ offload_to_thread:
|
||||
queue_work(ic->wait_wq, &dio->work);
|
||||
return;
|
||||
}
|
||||
if (journal_read_pos != NOT_FOUND)
|
||||
dio->range.n_sectors = ic->sectors_per_block;
|
||||
wait_and_add_new_range(ic, &dio->range);
|
||||
/*
|
||||
* wait_and_add_new_range drops the spinlock, so the journal
|
||||
* may have been changed arbitrarily. We need to recheck.
|
||||
* To simplify the code, we restrict I/O size to just one block.
|
||||
*/
|
||||
if (journal_read_pos != NOT_FOUND) {
|
||||
sector_t next_sector;
|
||||
unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
|
||||
if (unlikely(new_pos != journal_read_pos)) {
|
||||
remove_range_unlocked(ic, &dio->range);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&ic->endio_wait.lock);
|
||||
|
||||
|
@ -566,8 +566,10 @@ static int run_io_job(struct kcopyd_job *job)
|
||||
* no point in continuing.
|
||||
*/
|
||||
if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
|
||||
job->master_job->write_err)
|
||||
job->master_job->write_err) {
|
||||
job->write_err = job->master_job->write_err;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
io_job_start(job->kc->throttle);
|
||||
|
||||
@ -619,6 +621,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
|
||||
else
|
||||
job->read_err = 1;
|
||||
push(&kc->complete_jobs, job);
|
||||
wake(kc);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3194,7 +3194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
*/
|
||||
r = rs_prepare_reshape(rs);
|
||||
if (r)
|
||||
return r;
|
||||
goto bad;
|
||||
|
||||
/* Reshaping ain't recovery, so disable recovery */
|
||||
rs_setup_recovery(rs, MaxSector);
|
||||
|
@ -1342,7 +1342,7 @@ void dm_table_event(struct dm_table *t)
|
||||
}
|
||||
EXPORT_SYMBOL(dm_table_event);
|
||||
|
||||
sector_t dm_table_get_size(struct dm_table *t)
|
||||
inline sector_t dm_table_get_size(struct dm_table *t)
|
||||
{
|
||||
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
|
||||
}
|
||||
@ -1367,6 +1367,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
|
||||
unsigned int l, n = 0, k = 0;
|
||||
sector_t *node;
|
||||
|
||||
if (unlikely(sector >= dm_table_get_size(t)))
|
||||
return &t->targets[t->num_targets];
|
||||
|
||||
for (l = 0; l < t->depth; l++) {
|
||||
n = get_child(n, k);
|
||||
node = get_node(t, l, n);
|
||||
|
@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2017 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
@ -34,7 +35,7 @@
|
||||
* (1) Super block (1 block)
|
||||
* (2) Chunk mapping table (nr_map_blocks)
|
||||
* (3) Bitmap blocks (nr_bitmap_blocks)
|
||||
* All metadata blocks are stored in conventional zones, starting from the
|
||||
* All metadata blocks are stored in conventional zones, starting from
|
||||
* the first conventional zone found on disk.
|
||||
*/
|
||||
struct dmz_super {
|
||||
@ -233,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd)
|
||||
* Lock/unlock metadata access. This is a "read" lock on a semaphore
|
||||
* that prevents metadata flush from running while metadata are being
|
||||
* modified. The actual metadata write mutual exclusion is achieved with
|
||||
* the map lock and zone styate management (active and reclaim state are
|
||||
* the map lock and zone state management (active and reclaim state are
|
||||
* mutually exclusive).
|
||||
*/
|
||||
void dmz_lock_metadata(struct dmz_metadata *zmd)
|
||||
@ -402,15 +403,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
|
||||
sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
|
||||
struct bio *bio;
|
||||
|
||||
if (dmz_bdev_is_dying(zmd->dev))
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
/* Get a new block and a BIO to read it */
|
||||
mblk = dmz_alloc_mblock(zmd, mblk_no);
|
||||
if (!mblk)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, 1);
|
||||
if (!bio) {
|
||||
dmz_free_mblock(zmd, mblk);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
spin_lock(&zmd->mblk_lock);
|
||||
@ -541,8 +545,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
|
||||
if (!mblk) {
|
||||
/* Cache miss: read the block from disk */
|
||||
mblk = dmz_get_mblock_slow(zmd, mblk_no);
|
||||
if (!mblk)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (IS_ERR(mblk))
|
||||
return mblk;
|
||||
}
|
||||
|
||||
/* Wait for on-going read I/O and check for error */
|
||||
@ -570,16 +574,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
|
||||
/*
|
||||
* Issue a metadata block write BIO.
|
||||
*/
|
||||
static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
|
||||
unsigned int set)
|
||||
static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
|
||||
unsigned int set)
|
||||
{
|
||||
sector_t block = zmd->sb[set].block + mblk->no;
|
||||
struct bio *bio;
|
||||
|
||||
if (dmz_bdev_is_dying(zmd->dev))
|
||||
return -EIO;
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, 1);
|
||||
if (!bio) {
|
||||
set_bit(DMZ_META_ERROR, &mblk->state);
|
||||
return;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
set_bit(DMZ_META_WRITING, &mblk->state);
|
||||
@ -591,6 +598,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
|
||||
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
|
||||
submit_bio(bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -602,6 +611,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
if (dmz_bdev_is_dying(zmd->dev))
|
||||
return -EIO;
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, 1);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
@ -659,22 +671,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
|
||||
{
|
||||
struct dmz_mblock *mblk;
|
||||
struct blk_plug plug;
|
||||
int ret = 0;
|
||||
int ret = 0, nr_mblks_submitted = 0;
|
||||
|
||||
/* Issue writes */
|
||||
blk_start_plug(&plug);
|
||||
list_for_each_entry(mblk, write_list, link)
|
||||
dmz_write_mblock(zmd, mblk, set);
|
||||
list_for_each_entry(mblk, write_list, link) {
|
||||
ret = dmz_write_mblock(zmd, mblk, set);
|
||||
if (ret)
|
||||
break;
|
||||
nr_mblks_submitted++;
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
/* Wait for completion */
|
||||
list_for_each_entry(mblk, write_list, link) {
|
||||
if (!nr_mblks_submitted)
|
||||
break;
|
||||
wait_on_bit_io(&mblk->state, DMZ_META_WRITING,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
|
||||
clear_bit(DMZ_META_ERROR, &mblk->state);
|
||||
ret = -EIO;
|
||||
}
|
||||
nr_mblks_submitted--;
|
||||
}
|
||||
|
||||
/* Flush drive cache (this will also sync data) */
|
||||
@ -736,6 +755,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||
*/
|
||||
dmz_lock_flush(zmd);
|
||||
|
||||
if (dmz_bdev_is_dying(zmd->dev)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Get dirty blocks */
|
||||
spin_lock(&zmd->mblk_lock);
|
||||
list_splice_init(&zmd->mblk_dirty_list, &write_list);
|
||||
@ -1542,7 +1566,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
|
||||
struct dm_zone *zone;
|
||||
|
||||
if (list_empty(&zmd->map_rnd_list))
|
||||
return NULL;
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
list_for_each_entry(zone, &zmd->map_rnd_list, link) {
|
||||
if (dmz_is_buf(zone))
|
||||
@ -1553,7 +1577,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
|
||||
return dzone;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1564,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
|
||||
struct dm_zone *zone;
|
||||
|
||||
if (list_empty(&zmd->map_seq_list))
|
||||
return NULL;
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
list_for_each_entry(zone, &zmd->map_seq_list, link) {
|
||||
if (!zone->bzone)
|
||||
@ -1573,7 +1597,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
|
||||
return zone;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1628,9 +1652,13 @@ again:
|
||||
if (op != REQ_OP_WRITE)
|
||||
goto out;
|
||||
|
||||
/* Alloate a random zone */
|
||||
/* Allocate a random zone */
|
||||
dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
|
||||
if (!dzone) {
|
||||
if (dmz_bdev_is_dying(zmd->dev)) {
|
||||
dzone = ERR_PTR(-EIO);
|
||||
goto out;
|
||||
}
|
||||
dmz_wait_for_free_zones(zmd);
|
||||
goto again;
|
||||
}
|
||||
@ -1725,9 +1753,13 @@ again:
|
||||
if (bzone)
|
||||
goto out;
|
||||
|
||||
/* Alloate a random zone */
|
||||
/* Allocate a random zone */
|
||||
bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
|
||||
if (!bzone) {
|
||||
if (dmz_bdev_is_dying(zmd->dev)) {
|
||||
bzone = ERR_PTR(-EIO);
|
||||
goto out;
|
||||
}
|
||||
dmz_wait_for_free_zones(zmd);
|
||||
goto again;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2017 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
@ -37,7 +38,7 @@ enum {
|
||||
/*
|
||||
* Number of seconds of target BIO inactivity to consider the target idle.
|
||||
*/
|
||||
#define DMZ_IDLE_PERIOD (10UL * HZ)
|
||||
#define DMZ_IDLE_PERIOD (10UL * HZ)
|
||||
|
||||
/*
|
||||
* Percentage of unmapped (free) random zones below which reclaim starts
|
||||
@ -134,6 +135,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
|
||||
set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
|
||||
|
||||
while (block < end_block) {
|
||||
if (dev->flags & DMZ_BDEV_DYING)
|
||||
return -EIO;
|
||||
|
||||
/* Get a valid region from the source zone */
|
||||
ret = dmz_first_valid_block(zmd, src_zone, &block);
|
||||
if (ret <= 0)
|
||||
@ -215,7 +219,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
|
||||
|
||||
dmz_unlock_flush(zmd);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -259,7 +263,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
|
||||
|
||||
dmz_unlock_flush(zmd);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -312,7 +316,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
|
||||
|
||||
dmz_unlock_flush(zmd);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -334,7 +338,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
|
||||
/*
|
||||
* Find a candidate zone for reclaim and process it.
|
||||
*/
|
||||
static void dmz_reclaim(struct dmz_reclaim *zrc)
|
||||
static int dmz_do_reclaim(struct dmz_reclaim *zrc)
|
||||
{
|
||||
struct dmz_metadata *zmd = zrc->metadata;
|
||||
struct dm_zone *dzone;
|
||||
@ -344,8 +348,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc)
|
||||
|
||||
/* Get a data zone */
|
||||
dzone = dmz_get_zone_for_reclaim(zmd);
|
||||
if (!dzone)
|
||||
return;
|
||||
if (IS_ERR(dzone))
|
||||
return PTR_ERR(dzone);
|
||||
|
||||
start = jiffies;
|
||||
|
||||
@ -391,13 +395,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc)
|
||||
out:
|
||||
if (ret) {
|
||||
dmz_unlock_zone_reclaim(dzone);
|
||||
return;
|
||||
return ret;
|
||||
}
|
||||
|
||||
(void) dmz_flush_metadata(zrc->metadata);
|
||||
ret = dmz_flush_metadata(zrc->metadata);
|
||||
if (ret) {
|
||||
dmz_dev_debug(zrc->dev,
|
||||
"Metadata flush for zone %u failed, err %d\n",
|
||||
dmz_id(zmd, rzone), ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms",
|
||||
dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -427,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the percentage of unmappped random zones is low,
|
||||
* If the percentage of unmapped random zones is low,
|
||||
* reclaim even if the target is busy.
|
||||
*/
|
||||
return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
|
||||
@ -442,6 +453,10 @@ static void dmz_reclaim_work(struct work_struct *work)
|
||||
struct dmz_metadata *zmd = zrc->metadata;
|
||||
unsigned int nr_rnd, nr_unmap_rnd;
|
||||
unsigned int p_unmap_rnd;
|
||||
int ret;
|
||||
|
||||
if (dmz_bdev_is_dying(zrc->dev))
|
||||
return;
|
||||
|
||||
if (!dmz_should_reclaim(zrc)) {
|
||||
mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
|
||||
@ -471,7 +486,17 @@ static void dmz_reclaim_work(struct work_struct *work)
|
||||
(dmz_target_idle(zrc) ? "Idle" : "Busy"),
|
||||
p_unmap_rnd, nr_unmap_rnd, nr_rnd);
|
||||
|
||||
dmz_reclaim(zrc);
|
||||
ret = dmz_do_reclaim(zrc);
|
||||
if (ret) {
|
||||
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
|
||||
if (ret == -EIO)
|
||||
/*
|
||||
* LLD might be performing some error handling sequence
|
||||
* at the underlying device. To not interfere, do not
|
||||
* attempt to schedule the next reclaim run immediately.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
dmz_schedule_reclaim(zrc);
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2017 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
@ -133,6 +134,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
|
||||
|
||||
refcount_inc(&bioctx->ref);
|
||||
generic_make_request(clone);
|
||||
if (clone->bi_status == BLK_STS_IOERR)
|
||||
return -EIO;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
|
||||
zone->wp_block += nr_blocks;
|
||||
@ -277,8 +280,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
|
||||
|
||||
/* Get the buffer zone. One will be allocated if needed */
|
||||
bzone = dmz_get_chunk_buffer(zmd, zone);
|
||||
if (!bzone)
|
||||
return -ENOSPC;
|
||||
if (IS_ERR(bzone))
|
||||
return PTR_ERR(bzone);
|
||||
|
||||
if (dmz_is_readonly(bzone))
|
||||
return -EROFS;
|
||||
@ -389,6 +392,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
|
||||
|
||||
dmz_lock_metadata(zmd);
|
||||
|
||||
if (dmz->dev->flags & DMZ_BDEV_DYING) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the data zone mapping the chunk. There may be no
|
||||
* mapping for read and discard. If a mapping is obtained,
|
||||
@ -493,6 +501,8 @@ static void dmz_flush_work(struct work_struct *work)
|
||||
|
||||
/* Flush dirty metadata blocks */
|
||||
ret = dmz_flush_metadata(dmz->metadata);
|
||||
if (ret)
|
||||
dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret);
|
||||
|
||||
/* Process queued flush requests */
|
||||
while (1) {
|
||||
@ -513,22 +523,24 @@ static void dmz_flush_work(struct work_struct *work)
|
||||
* Get a chunk work and start it to process a new BIO.
|
||||
* If the BIO chunk has no work yet, create one.
|
||||
*/
|
||||
static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
|
||||
static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
|
||||
{
|
||||
unsigned int chunk = dmz_bio_chunk(dmz->dev, bio);
|
||||
struct dm_chunk_work *cw;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&dmz->chunk_lock);
|
||||
|
||||
/* Get the BIO chunk work. If one is not active yet, create one */
|
||||
cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
|
||||
if (!cw) {
|
||||
int ret;
|
||||
|
||||
/* Create a new chunk work */
|
||||
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
|
||||
if (!cw)
|
||||
if (unlikely(!cw)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_WORK(&cw->work, dmz_chunk_work);
|
||||
refcount_set(&cw->refcount, 0);
|
||||
@ -539,7 +551,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
|
||||
ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
|
||||
if (unlikely(ret)) {
|
||||
kfree(cw);
|
||||
cw = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -547,10 +558,38 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
|
||||
bio_list_add(&cw->bio_list, bio);
|
||||
dmz_get_chunk_work(cw);
|
||||
|
||||
dmz_reclaim_bio_acc(dmz->reclaim);
|
||||
if (queue_work(dmz->chunk_wq, &cw->work))
|
||||
dmz_get_chunk_work(cw);
|
||||
out:
|
||||
mutex_unlock(&dmz->chunk_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the backing device availability. If it's on the way out,
|
||||
* start failing I/O. Reclaim and metadata components also call this
|
||||
* function to cleanly abort operation in the event of such failure.
|
||||
*/
|
||||
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
|
||||
disk = dmz_dev->bdev->bd_disk;
|
||||
if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
|
||||
dmz_dev_warn(dmz_dev, "Backing device queue dying");
|
||||
dmz_dev->flags |= DMZ_BDEV_DYING;
|
||||
} else if (disk->fops->check_events) {
|
||||
if (disk->fops->check_events(disk, 0) &
|
||||
DISK_EVENT_MEDIA_CHANGE) {
|
||||
dmz_dev_warn(dmz_dev, "Backing device offline");
|
||||
dmz_dev->flags |= DMZ_BDEV_DYING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dmz_dev->flags & DMZ_BDEV_DYING;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -564,6 +603,10 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
unsigned int nr_sectors = bio_sectors(bio);
|
||||
sector_t chunk_sector;
|
||||
int ret;
|
||||
|
||||
if (dmz_bdev_is_dying(dmz->dev))
|
||||
return DM_MAPIO_KILL;
|
||||
|
||||
dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
|
||||
bio_op(bio), (unsigned long long)sector, nr_sectors,
|
||||
@ -601,8 +644,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
|
||||
dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector);
|
||||
|
||||
/* Now ready to handle this BIO */
|
||||
dmz_reclaim_bio_acc(dmz->reclaim);
|
||||
dmz_queue_chunk_work(dmz, bio);
|
||||
ret = dmz_queue_chunk_work(dmz, bio);
|
||||
if (ret) {
|
||||
dmz_dev_debug(dmz->dev,
|
||||
"BIO op %d, can't process chunk %llu, err %i\n",
|
||||
bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio),
|
||||
ret);
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
|
||||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
@ -855,6 +904,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
|
||||
{
|
||||
struct dmz_target *dmz = ti->private;
|
||||
|
||||
if (dmz_bdev_is_dying(dmz->dev))
|
||||
return -ENODEV;
|
||||
|
||||
*bdev = dmz->dev->bdev;
|
||||
|
||||
return 0;
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2017 Western Digital Corporation or its affiliates.
|
||||
*
|
||||
@ -56,6 +57,8 @@ struct dmz_dev {
|
||||
|
||||
unsigned int nr_zones;
|
||||
|
||||
unsigned int flags;
|
||||
|
||||
sector_t zone_nr_sectors;
|
||||
unsigned int zone_nr_sectors_shift;
|
||||
|
||||
@ -67,6 +70,9 @@ struct dmz_dev {
|
||||
(dev)->zone_nr_sectors_shift)
|
||||
#define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1))
|
||||
|
||||
/* Device flags. */
|
||||
#define DMZ_BDEV_DYING (1 << 0)
|
||||
|
||||
/*
|
||||
* Zone descriptor.
|
||||
*/
|
||||
@ -245,4 +251,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc);
|
||||
void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc);
|
||||
void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
|
||||
|
||||
/*
|
||||
* Functions defined in dm-zoned-target.c
|
||||
*/
|
||||
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
|
||||
|
||||
#endif /* DM_ZONED_H */
|
||||
|
@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
|
||||
|
||||
new_parent = shadow_current(s);
|
||||
|
||||
pn = dm_block_data(new_parent);
|
||||
size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
|
||||
sizeof(__le64) : s->info->value_type.size;
|
||||
|
||||
/* create & init the left block */
|
||||
r = new_block(s->info, &left);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
ln = dm_block_data(left);
|
||||
nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
|
||||
|
||||
ln->header.flags = pn->header.flags;
|
||||
ln->header.nr_entries = cpu_to_le32(nr_left);
|
||||
ln->header.max_entries = pn->header.max_entries;
|
||||
ln->header.value_size = pn->header.value_size;
|
||||
memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
|
||||
memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
|
||||
|
||||
/* create & init the right block */
|
||||
r = new_block(s->info, &right);
|
||||
if (r < 0) {
|
||||
unlock_block(s->info, left);
|
||||
return r;
|
||||
}
|
||||
|
||||
pn = dm_block_data(new_parent);
|
||||
ln = dm_block_data(left);
|
||||
rn = dm_block_data(right);
|
||||
|
||||
nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
|
||||
nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
|
||||
|
||||
ln->header.flags = pn->header.flags;
|
||||
ln->header.nr_entries = cpu_to_le32(nr_left);
|
||||
ln->header.max_entries = pn->header.max_entries;
|
||||
ln->header.value_size = pn->header.value_size;
|
||||
|
||||
rn->header.flags = pn->header.flags;
|
||||
rn->header.nr_entries = cpu_to_le32(nr_right);
|
||||
rn->header.max_entries = pn->header.max_entries;
|
||||
rn->header.value_size = pn->header.value_size;
|
||||
|
||||
memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
|
||||
memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
|
||||
|
||||
size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
|
||||
sizeof(__le64) : s->info->value_type.size;
|
||||
memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
|
||||
memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
|
||||
nr_right * size);
|
||||
|
||||
|
@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm)
|
||||
}
|
||||
|
||||
if (smm->recursion_count == 1)
|
||||
apply_bops(smm);
|
||||
r = apply_bops(smm);
|
||||
|
||||
smm->recursion_count--;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user