mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-15 05:11:32 +00:00
. various fixes and cleanups for request-based DM core
. add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled . DM cache improvements to speedup the loading metadata and the writing of the hint array . fix potential for a dm-crypt crash on device teardown . remove dm_bufio_cond_resched() and just using cond_resched() . change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJX7n9KAAoJEMUj8QotnQNab74IANm+rW2uYdpLNCxWUmcaih0d BK8dLS/Mz35S0TRSekvynuBcPx18VP2Zueulc+aHTWcT4sj79l6KnVYT9g6c98rL zzcv10QTteqhiiWwFmPHsZgv5dW8Y5wiRdt+SqcQ5sAHMFci6C05gzp9caNu7VTs fbcLUdyYm40y3j84Lx/+ABXgnBhq+40OTtdnYSkEmLtdscPLzwpHgPmMctkrEl7e 7mqGC1KbDDzartqOZOeGP2P2qOCNN21qA+8ctMw9Xyze33uwvj7Vx6cro6e28wMm ZClY9XNGlfuW9dCNtFR9o6NXS6NIK30UJbKqyZPPsK+70JrOgzh6GzQnwSXdyNs= =7SkG -----END PGP SIGNATURE----- Merge tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - various fixes and cleanups for request-based DM core - add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled - DM cache improvements to speedup the loading metadata and the writing of the hint array - fix potential for a dm-crypt crash on device teardown - remove dm_bufio_cond_resched() and just using cond_resched() - change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) * tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits) dm mpath: always return reservation conflict without failing over dm bufio: remove dm_bufio_cond_resched() dm crypt: fix crash on exit dm cache metadata: switch to using the new cursor api for loading metadata dm array: introduce cursor api dm btree: introduce cursor api dm cache policy smq: distribute entries to random levels when switching to smq dm cache: speed up writing of the hint array dm array: add dm_array_new() dm mpath: delay the requeue of blk-mq requests while all paths down dm mpath: use dm_mq_kick_requeue_list() dm rq: introduce dm_mq_kick_requeue_list() dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests dm: convert wait loops to use autoremove_wake_function() dm: use signal_pending_state() in dm_wait_for_completion() dm: rename task state function arguments dm: add two lockdep_assert_held() statements dm rq: simplify dm_old_stop_queue() dm mpath: check if path's request_queue is dying in activate_path() ...
This commit is contained in:
commit
48915c2cbc
@ -191,19 +191,6 @@ static void dm_bufio_unlock(struct dm_bufio_client *c)
|
|||||||
mutex_unlock(&c->lock);
|
mutex_unlock(&c->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* FIXME Move to sched.h?
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_PREEMPT_VOLUNTARY
|
|
||||||
# define dm_bufio_cond_resched() \
|
|
||||||
do { \
|
|
||||||
if (unlikely(need_resched())) \
|
|
||||||
_cond_resched(); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
|
||||||
# define dm_bufio_cond_resched() do { } while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------*/
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -741,7 +728,7 @@ static void __flush_write_list(struct list_head *write_list)
|
|||||||
list_entry(write_list->next, struct dm_buffer, write_list);
|
list_entry(write_list->next, struct dm_buffer, write_list);
|
||||||
list_del(&b->write_list);
|
list_del(&b->write_list);
|
||||||
submit_io(b, WRITE, b->block, write_endio);
|
submit_io(b, WRITE, b->block, write_endio);
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
blk_finish_plug(&plug);
|
blk_finish_plug(&plug);
|
||||||
}
|
}
|
||||||
@ -780,7 +767,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
|
|||||||
__unlink_buffer(b);
|
__unlink_buffer(b);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
|
list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
|
||||||
@ -791,7 +778,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
|
|||||||
__unlink_buffer(b);
|
__unlink_buffer(b);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -923,7 +910,7 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
__write_dirty_buffer(b, write_list);
|
__write_dirty_buffer(b, write_list);
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -973,7 +960,7 @@ static void __check_watermark(struct dm_bufio_client *c,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
__free_buffer_wake(b);
|
__free_buffer_wake(b);
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
|
if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
|
||||||
@ -1170,7 +1157,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
|
|||||||
submit_io(b, READ, b->block, read_endio);
|
submit_io(b, READ, b->block, read_endio);
|
||||||
dm_bufio_release(b);
|
dm_bufio_release(b);
|
||||||
|
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
if (!n_blocks)
|
if (!n_blocks)
|
||||||
goto flush_plug;
|
goto flush_plug;
|
||||||
@ -1291,7 +1278,7 @@ again:
|
|||||||
!test_bit(B_WRITING, &b->state))
|
!test_bit(B_WRITING, &b->state))
|
||||||
__relink_lru(b, LIST_CLEAN);
|
__relink_lru(b, LIST_CLEAN);
|
||||||
|
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we dropped the lock, the list is no longer consistent,
|
* If we dropped the lock, the list is no longer consistent,
|
||||||
@ -1574,7 +1561,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
|
|||||||
freed++;
|
freed++;
|
||||||
if (!--nr_to_scan || ((count - freed) <= retain_target))
|
if (!--nr_to_scan || ((count - freed) <= retain_target))
|
||||||
return freed;
|
return freed;
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return freed;
|
return freed;
|
||||||
@ -1808,7 +1795,7 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
|
|||||||
if (__try_evict_buffer(b, 0))
|
if (__try_evict_buffer(b, 0))
|
||||||
count--;
|
count--;
|
||||||
|
|
||||||
dm_bufio_cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
dm_bufio_unlock(c);
|
dm_bufio_unlock(c);
|
||||||
|
@ -140,6 +140,13 @@ struct dm_cache_metadata {
|
|||||||
* the device.
|
* the device.
|
||||||
*/
|
*/
|
||||||
bool fail_io:1;
|
bool fail_io:1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These structures are used when loading metadata. They're too
|
||||||
|
* big to put on the stack.
|
||||||
|
*/
|
||||||
|
struct dm_array_cursor mapping_cursor;
|
||||||
|
struct dm_array_cursor hint_cursor;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*-------------------------------------------------------------------
|
/*-------------------------------------------------------------------
|
||||||
@ -1171,31 +1178,37 @@ static bool hints_array_available(struct dm_cache_metadata *cmd,
|
|||||||
hints_array_initialized(cmd);
|
hints_array_initialized(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __load_mapping(void *context, uint64_t cblock, void *leaf)
|
static int __load_mapping(struct dm_cache_metadata *cmd,
|
||||||
|
uint64_t cb, bool hints_valid,
|
||||||
|
struct dm_array_cursor *mapping_cursor,
|
||||||
|
struct dm_array_cursor *hint_cursor,
|
||||||
|
load_mapping_fn fn, void *context)
|
||||||
{
|
{
|
||||||
int r = 0;
|
int r = 0;
|
||||||
bool dirty;
|
|
||||||
__le64 value;
|
__le64 mapping;
|
||||||
__le32 hint_value = 0;
|
__le32 hint = 0;
|
||||||
|
|
||||||
|
__le64 *mapping_value_le;
|
||||||
|
__le32 *hint_value_le;
|
||||||
|
|
||||||
dm_oblock_t oblock;
|
dm_oblock_t oblock;
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
struct thunk *thunk = context;
|
|
||||||
struct dm_cache_metadata *cmd = thunk->cmd;
|
|
||||||
|
|
||||||
memcpy(&value, leaf, sizeof(value));
|
dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
|
||||||
unpack_value(value, &oblock, &flags);
|
memcpy(&mapping, mapping_value_le, sizeof(mapping));
|
||||||
|
unpack_value(mapping, &oblock, &flags);
|
||||||
|
|
||||||
if (flags & M_VALID) {
|
if (flags & M_VALID) {
|
||||||
if (thunk->hints_valid) {
|
if (hints_valid) {
|
||||||
r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
|
dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
|
||||||
cblock, &hint_value);
|
memcpy(&hint, hint_value_le, sizeof(hint));
|
||||||
if (r && r != -ENODATA)
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
|
r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
|
||||||
r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
|
le32_to_cpu(hint), hints_valid);
|
||||||
dirty, le32_to_cpu(hint_value), thunk->hints_valid);
|
if (r)
|
||||||
|
DMERR("policy couldn't load cblock");
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
@ -1205,16 +1218,60 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
|
|||||||
struct dm_cache_policy *policy,
|
struct dm_cache_policy *policy,
|
||||||
load_mapping_fn fn, void *context)
|
load_mapping_fn fn, void *context)
|
||||||
{
|
{
|
||||||
struct thunk thunk;
|
int r;
|
||||||
|
uint64_t cb;
|
||||||
|
|
||||||
thunk.fn = fn;
|
bool hints_valid = hints_array_available(cmd, policy);
|
||||||
thunk.context = context;
|
|
||||||
|
|
||||||
thunk.cmd = cmd;
|
if (from_cblock(cmd->cache_blocks) == 0)
|
||||||
thunk.respect_dirty_flags = cmd->clean_when_opened;
|
/* Nothing to do */
|
||||||
thunk.hints_valid = hints_array_available(cmd, policy);
|
return 0;
|
||||||
|
|
||||||
return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
|
r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (hints_valid) {
|
||||||
|
r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
|
||||||
|
if (r) {
|
||||||
|
dm_array_cursor_end(&cmd->mapping_cursor);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (cb = 0; ; cb++) {
|
||||||
|
r = __load_mapping(cmd, cb, hints_valid,
|
||||||
|
&cmd->mapping_cursor, &cmd->hint_cursor,
|
||||||
|
fn, context);
|
||||||
|
if (r)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to break out before we move the cursors.
|
||||||
|
*/
|
||||||
|
if (cb >= (from_cblock(cmd->cache_blocks) - 1))
|
||||||
|
break;
|
||||||
|
|
||||||
|
r = dm_array_cursor_next(&cmd->mapping_cursor);
|
||||||
|
if (r) {
|
||||||
|
DMERR("dm_array_cursor_next for mapping failed");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hints_valid) {
|
||||||
|
r = dm_array_cursor_next(&cmd->hint_cursor);
|
||||||
|
if (r) {
|
||||||
|
DMERR("dm_array_cursor_next for hint failed");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
dm_array_cursor_end(&cmd->mapping_cursor);
|
||||||
|
if (hints_valid)
|
||||||
|
dm_array_cursor_end(&cmd->hint_cursor);
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
|
int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
|
||||||
@ -1368,10 +1425,24 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
|
|||||||
|
|
||||||
/*----------------------------------------------------------------*/
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
|
static int get_hint(uint32_t index, void *value_le, void *context)
|
||||||
|
{
|
||||||
|
uint32_t value;
|
||||||
|
struct dm_cache_policy *policy = context;
|
||||||
|
|
||||||
|
value = policy_get_hint(policy, to_cblock(index));
|
||||||
|
*((__le32 *) value_le) = cpu_to_le32(value);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's quicker to always delete the hint array, and recreate with
|
||||||
|
* dm_array_new().
|
||||||
|
*/
|
||||||
|
static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
__le32 value;
|
|
||||||
size_t hint_size;
|
size_t hint_size;
|
||||||
const char *policy_name = dm_cache_policy_get_name(policy);
|
const char *policy_name = dm_cache_policy_get_name(policy);
|
||||||
const unsigned *policy_version = dm_cache_policy_get_version(policy);
|
const unsigned *policy_version = dm_cache_policy_get_version(policy);
|
||||||
@ -1380,63 +1451,23 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
|
|||||||
(strlen(policy_name) > sizeof(cmd->policy_name) - 1))
|
(strlen(policy_name) > sizeof(cmd->policy_name) - 1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!policy_unchanged(cmd, policy)) {
|
strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
|
||||||
strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
|
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
|
||||||
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
|
|
||||||
|
|
||||||
hint_size = dm_cache_policy_get_hint_size(policy);
|
hint_size = dm_cache_policy_get_hint_size(policy);
|
||||||
if (!hint_size)
|
if (!hint_size)
|
||||||
return 0; /* short-circuit hints initialization */
|
return 0; /* short-circuit hints initialization */
|
||||||
cmd->policy_hint_size = hint_size;
|
cmd->policy_hint_size = hint_size;
|
||||||
|
|
||||||
if (cmd->hint_root) {
|
if (cmd->hint_root) {
|
||||||
r = dm_array_del(&cmd->hint_info, cmd->hint_root);
|
r = dm_array_del(&cmd->hint_info, cmd->hint_root);
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
value = cpu_to_le32(0);
|
|
||||||
__dm_bless_for_disk(&value);
|
|
||||||
r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
|
|
||||||
from_cblock(cmd->cache_blocks),
|
|
||||||
&value, &cmd->hint_root);
|
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return dm_array_new(&cmd->hint_info, &cmd->hint_root,
|
||||||
}
|
from_cblock(cmd->cache_blocks),
|
||||||
|
get_hint, policy);
|
||||||
static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint)
|
|
||||||
{
|
|
||||||
struct dm_cache_metadata *cmd = context;
|
|
||||||
__le32 value = cpu_to_le32(hint);
|
|
||||||
int r;
|
|
||||||
|
|
||||||
__dm_bless_for_disk(&value);
|
|
||||||
|
|
||||||
r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
|
|
||||||
from_cblock(cblock), &value, &cmd->hint_root);
|
|
||||||
cmd->changed = true;
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
|
|
||||||
{
|
|
||||||
int r;
|
|
||||||
|
|
||||||
r = begin_hints(cmd, policy);
|
|
||||||
if (r) {
|
|
||||||
DMERR("begin_hints failed");
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
return policy_walk_mappings(policy, save_hint, cmd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
|
int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
|
||||||
|
@ -395,7 +395,7 @@ static void init_policy_functions(struct policy *p)
|
|||||||
p->policy.set_dirty = wb_set_dirty;
|
p->policy.set_dirty = wb_set_dirty;
|
||||||
p->policy.clear_dirty = wb_clear_dirty;
|
p->policy.clear_dirty = wb_clear_dirty;
|
||||||
p->policy.load_mapping = wb_load_mapping;
|
p->policy.load_mapping = wb_load_mapping;
|
||||||
p->policy.walk_mappings = NULL;
|
p->policy.get_hint = NULL;
|
||||||
p->policy.remove_mapping = wb_remove_mapping;
|
p->policy.remove_mapping = wb_remove_mapping;
|
||||||
p->policy.writeback_work = wb_writeback_work;
|
p->policy.writeback_work = wb_writeback_work;
|
||||||
p->policy.force_mapping = wb_force_mapping;
|
p->policy.force_mapping = wb_force_mapping;
|
||||||
|
@ -48,10 +48,10 @@ static inline int policy_load_mapping(struct dm_cache_policy *p,
|
|||||||
return p->load_mapping(p, oblock, cblock, hint, hint_valid);
|
return p->load_mapping(p, oblock, cblock, hint, hint_valid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int policy_walk_mappings(struct dm_cache_policy *p,
|
static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
|
||||||
policy_walk_fn fn, void *context)
|
dm_cblock_t cblock)
|
||||||
{
|
{
|
||||||
return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0;
|
return p->get_hint ? p->get_hint(p, cblock) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int policy_writeback_work(struct dm_cache_policy *p,
|
static inline int policy_writeback_work(struct dm_cache_policy *p,
|
||||||
|
@ -1359,6 +1359,11 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
|
|||||||
spin_unlock_irqrestore(&mq->lock, flags);
|
spin_unlock_irqrestore(&mq->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned random_level(dm_cblock_t cblock)
|
||||||
|
{
|
||||||
|
return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
|
||||||
|
}
|
||||||
|
|
||||||
static int smq_load_mapping(struct dm_cache_policy *p,
|
static int smq_load_mapping(struct dm_cache_policy *p,
|
||||||
dm_oblock_t oblock, dm_cblock_t cblock,
|
dm_oblock_t oblock, dm_cblock_t cblock,
|
||||||
uint32_t hint, bool hint_valid)
|
uint32_t hint, bool hint_valid)
|
||||||
@ -1369,47 +1374,21 @@ static int smq_load_mapping(struct dm_cache_policy *p,
|
|||||||
e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
|
e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
|
||||||
e->oblock = oblock;
|
e->oblock = oblock;
|
||||||
e->dirty = false; /* this gets corrected in a minute */
|
e->dirty = false; /* this gets corrected in a minute */
|
||||||
e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1;
|
e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int smq_save_hints(struct smq_policy *mq, struct queue *q,
|
static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
|
||||||
policy_walk_fn fn, void *context)
|
|
||||||
{
|
|
||||||
int r;
|
|
||||||
unsigned level;
|
|
||||||
struct entry *e;
|
|
||||||
|
|
||||||
for (level = 0; level < q->nr_levels; level++)
|
|
||||||
for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
|
|
||||||
if (!e->sentinel) {
|
|
||||||
r = fn(context, infer_cblock(mq, e),
|
|
||||||
e->oblock, e->level);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
|
|
||||||
void *context)
|
|
||||||
{
|
{
|
||||||
struct smq_policy *mq = to_smq_policy(p);
|
struct smq_policy *mq = to_smq_policy(p);
|
||||||
int r = 0;
|
struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
|
||||||
|
|
||||||
/*
|
if (!e->allocated)
|
||||||
* We don't need to lock here since this method is only called once
|
return 0;
|
||||||
* the IO has stopped.
|
|
||||||
*/
|
|
||||||
r = smq_save_hints(mq, &mq->clean, fn, context);
|
|
||||||
if (!r)
|
|
||||||
r = smq_save_hints(mq, &mq->dirty, fn, context);
|
|
||||||
|
|
||||||
return r;
|
return e->level;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
|
static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
|
||||||
@ -1616,7 +1595,7 @@ static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
|
|||||||
mq->policy.set_dirty = smq_set_dirty;
|
mq->policy.set_dirty = smq_set_dirty;
|
||||||
mq->policy.clear_dirty = smq_clear_dirty;
|
mq->policy.clear_dirty = smq_clear_dirty;
|
||||||
mq->policy.load_mapping = smq_load_mapping;
|
mq->policy.load_mapping = smq_load_mapping;
|
||||||
mq->policy.walk_mappings = smq_walk_mappings;
|
mq->policy.get_hint = smq_get_hint;
|
||||||
mq->policy.remove_mapping = smq_remove_mapping;
|
mq->policy.remove_mapping = smq_remove_mapping;
|
||||||
mq->policy.remove_cblock = smq_remove_cblock;
|
mq->policy.remove_cblock = smq_remove_cblock;
|
||||||
mq->policy.writeback_work = smq_writeback_work;
|
mq->policy.writeback_work = smq_writeback_work;
|
||||||
|
@ -90,9 +90,6 @@ struct policy_result {
|
|||||||
dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
|
dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock,
|
|
||||||
dm_oblock_t oblock, uint32_t hint);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The cache policy object. Just a bunch of methods. It is envisaged that
|
* The cache policy object. Just a bunch of methods. It is envisaged that
|
||||||
* this structure will be embedded in a bigger, policy specific structure
|
* this structure will be embedded in a bigger, policy specific structure
|
||||||
@ -158,8 +155,11 @@ struct dm_cache_policy {
|
|||||||
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
|
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
|
||||||
dm_cblock_t cblock, uint32_t hint, bool hint_valid);
|
dm_cblock_t cblock, uint32_t hint, bool hint_valid);
|
||||||
|
|
||||||
int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn,
|
/*
|
||||||
void *context);
|
* Gets the hint for a given cblock. Called in a single threaded
|
||||||
|
* context. So no locking required.
|
||||||
|
*/
|
||||||
|
uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Override functions used on the error paths of the core target.
|
* Override functions used on the error paths of the core target.
|
||||||
|
@ -113,8 +113,7 @@ struct iv_tcw_private {
|
|||||||
* and encrypts / decrypts at the same time.
|
* and encrypts / decrypts at the same time.
|
||||||
*/
|
*/
|
||||||
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
|
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
|
||||||
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
|
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
|
||||||
DM_CRYPT_EXIT_THREAD};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The fields in here must be read only after initialization.
|
* The fields in here must be read only after initialization.
|
||||||
@ -1207,18 +1206,20 @@ continue_locked:
|
|||||||
if (!RB_EMPTY_ROOT(&cc->write_tree))
|
if (!RB_EMPTY_ROOT(&cc->write_tree))
|
||||||
goto pop_from_list;
|
goto pop_from_list;
|
||||||
|
|
||||||
if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
spin_unlock_irq(&cc->write_thread_wait.lock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
__set_current_state(TASK_INTERRUPTIBLE);
|
|
||||||
__add_wait_queue(&cc->write_thread_wait, &wait);
|
__add_wait_queue(&cc->write_thread_wait, &wait);
|
||||||
|
|
||||||
spin_unlock_irq(&cc->write_thread_wait.lock);
|
spin_unlock_irq(&cc->write_thread_wait.lock);
|
||||||
|
|
||||||
|
if (unlikely(kthread_should_stop())) {
|
||||||
|
set_task_state(current, TASK_RUNNING);
|
||||||
|
remove_wait_queue(&cc->write_thread_wait, &wait);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
schedule();
|
schedule();
|
||||||
|
|
||||||
|
set_task_state(current, TASK_RUNNING);
|
||||||
spin_lock_irq(&cc->write_thread_wait.lock);
|
spin_lock_irq(&cc->write_thread_wait.lock);
|
||||||
__remove_wait_queue(&cc->write_thread_wait, &wait);
|
__remove_wait_queue(&cc->write_thread_wait, &wait);
|
||||||
goto continue_locked;
|
goto continue_locked;
|
||||||
@ -1533,13 +1534,8 @@ static void crypt_dtr(struct dm_target *ti)
|
|||||||
if (!cc)
|
if (!cc)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (cc->write_thread) {
|
if (cc->write_thread)
|
||||||
spin_lock_irq(&cc->write_thread_wait.lock);
|
|
||||||
set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
|
|
||||||
wake_up_locked(&cc->write_thread_wait);
|
|
||||||
spin_unlock_irq(&cc->write_thread_wait.lock);
|
|
||||||
kthread_stop(cc->write_thread);
|
kthread_stop(cc->write_thread);
|
||||||
}
|
|
||||||
|
|
||||||
if (cc->io_queue)
|
if (cc->io_queue)
|
||||||
destroy_workqueue(cc->io_queue);
|
destroy_workqueue(cc->io_queue);
|
||||||
|
@ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
|
|||||||
pgpath = choose_pgpath(m, nr_bytes);
|
pgpath = choose_pgpath(m, nr_bytes);
|
||||||
|
|
||||||
if (!pgpath) {
|
if (!pgpath) {
|
||||||
if (!must_push_back_rq(m))
|
if (must_push_back_rq(m))
|
||||||
r = -EIO; /* Failed */
|
return DM_MAPIO_DELAY_REQUEUE;
|
||||||
return r;
|
return -EIO; /* Failed */
|
||||||
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
||||||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
||||||
pg_init_all_paths(m);
|
pg_init_all_paths(m);
|
||||||
@ -680,9 +680,11 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
|
|||||||
return __multipath_map_bio(m, bio, mpio);
|
return __multipath_map_bio(m, bio, mpio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void process_queued_bios_list(struct multipath *m)
|
static void process_queued_io_list(struct multipath *m)
|
||||||
{
|
{
|
||||||
if (m->queue_mode == DM_TYPE_BIO_BASED)
|
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
|
||||||
|
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
|
||||||
|
else if (m->queue_mode == DM_TYPE_BIO_BASED)
|
||||||
queue_work(kmultipathd, &m->process_queued_bios);
|
queue_work(kmultipathd, &m->process_queued_bios);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -752,7 +754,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
|
|||||||
|
|
||||||
if (!queue_if_no_path) {
|
if (!queue_if_no_path) {
|
||||||
dm_table_run_md_queue_async(m->ti->table);
|
dm_table_run_md_queue_async(m->ti->table);
|
||||||
process_queued_bios_list(m);
|
process_queued_io_list(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1193,21 +1195,17 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
|
|
||||||
static void multipath_wait_for_pg_init_completion(struct multipath *m)
|
static void multipath_wait_for_pg_init_completion(struct multipath *m)
|
||||||
{
|
{
|
||||||
DECLARE_WAITQUEUE(wait, current);
|
DEFINE_WAIT(wait);
|
||||||
|
|
||||||
add_wait_queue(&m->pg_init_wait, &wait);
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||||
|
|
||||||
if (!atomic_read(&m->pg_init_in_progress))
|
if (!atomic_read(&m->pg_init_in_progress))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
io_schedule();
|
io_schedule();
|
||||||
}
|
}
|
||||||
set_current_state(TASK_RUNNING);
|
finish_wait(&m->pg_init_wait, &wait);
|
||||||
|
|
||||||
remove_wait_queue(&m->pg_init_wait, &wait);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flush_multipath_work(struct multipath *m)
|
static void flush_multipath_work(struct multipath *m)
|
||||||
@ -1308,7 +1306,7 @@ out:
|
|||||||
spin_unlock_irqrestore(&m->lock, flags);
|
spin_unlock_irqrestore(&m->lock, flags);
|
||||||
if (run_queue) {
|
if (run_queue) {
|
||||||
dm_table_run_md_queue_async(m->ti->table);
|
dm_table_run_md_queue_async(m->ti->table);
|
||||||
process_queued_bios_list(m);
|
process_queued_io_list(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
@ -1506,7 +1504,7 @@ static void pg_init_done(void *data, int errors)
|
|||||||
}
|
}
|
||||||
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
||||||
|
|
||||||
process_queued_bios_list(m);
|
process_queued_io_list(m);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake up any thread waiting to suspend.
|
* Wake up any thread waiting to suspend.
|
||||||
@ -1521,10 +1519,10 @@ static void activate_path(struct work_struct *work)
|
|||||||
{
|
{
|
||||||
struct pgpath *pgpath =
|
struct pgpath *pgpath =
|
||||||
container_of(work, struct pgpath, activate_path.work);
|
container_of(work, struct pgpath, activate_path.work);
|
||||||
|
struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
|
||||||
|
|
||||||
if (pgpath->is_active)
|
if (pgpath->is_active && !blk_queue_dying(q))
|
||||||
scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
|
scsi_dh_activate(q, pg_init_done, pgpath);
|
||||||
pg_init_done, pgpath);
|
|
||||||
else
|
else
|
||||||
pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
|
pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
|
||||||
}
|
}
|
||||||
@ -1532,6 +1530,14 @@ static void activate_path(struct work_struct *work)
|
|||||||
static int noretry_error(int error)
|
static int noretry_error(int error)
|
||||||
{
|
{
|
||||||
switch (error) {
|
switch (error) {
|
||||||
|
case -EBADE:
|
||||||
|
/*
|
||||||
|
* EBADE signals an reservation conflict.
|
||||||
|
* We shouldn't fail the path here as we can communicate with
|
||||||
|
* the target. We should failover to the next path, but in
|
||||||
|
* doing so we might be causing a ping-pong between paths.
|
||||||
|
* So just return the reservation conflict error.
|
||||||
|
*/
|
||||||
case -EOPNOTSUPP:
|
case -EOPNOTSUPP:
|
||||||
case -EREMOTEIO:
|
case -EREMOTEIO:
|
||||||
case -EILSEQ:
|
case -EILSEQ:
|
||||||
@ -1576,9 +1582,6 @@ static int do_end_io(struct multipath *m, struct request *clone,
|
|||||||
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||||
if (!must_push_back_rq(m))
|
if (!must_push_back_rq(m))
|
||||||
r = -EIO;
|
r = -EIO;
|
||||||
} else {
|
|
||||||
if (error == -EBADE)
|
|
||||||
r = error;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1627,9 +1630,6 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
|
|||||||
if (!must_push_back_bio(m))
|
if (!must_push_back_bio(m))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
return DM_ENDIO_REQUEUE;
|
return DM_ENDIO_REQUEUE;
|
||||||
} else {
|
|
||||||
if (error == -EBADE)
|
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1941,7 +1941,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
|
|||||||
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
||||||
pg_init_all_paths(m);
|
pg_init_all_paths(m);
|
||||||
dm_table_run_md_queue_async(m->ti->table);
|
dm_table_run_md_queue_async(m->ti->table);
|
||||||
process_queued_bios_list(m);
|
process_queued_io_list(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1994,11 +1994,14 @@ static int multipath_busy(struct dm_target *ti)
|
|||||||
struct priority_group *pg, *next_pg;
|
struct priority_group *pg, *next_pg;
|
||||||
struct pgpath *pgpath;
|
struct pgpath *pgpath;
|
||||||
|
|
||||||
/* pg_init in progress or no paths available */
|
/* pg_init in progress */
|
||||||
if (atomic_read(&m->pg_init_in_progress) ||
|
if (atomic_read(&m->pg_init_in_progress))
|
||||||
(!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
|
||||||
|
if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||||
|
return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
|
||||||
|
|
||||||
/* Guess which priority_group will be used at next mapping time */
|
/* Guess which priority_group will be used at next mapping time */
|
||||||
pg = lockless_dereference(m->current_pg);
|
pg = lockless_dereference(m->current_pg);
|
||||||
next_pg = lockless_dereference(m->next_pg);
|
next_pg = lockless_dereference(m->next_pg);
|
||||||
|
@ -73,43 +73,60 @@ static void dm_old_start_queue(struct request_queue *q)
|
|||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dm_mq_start_queue(struct request_queue *q)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(q->queue_lock, flags);
|
||||||
|
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
||||||
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
|
|
||||||
|
blk_mq_start_stopped_hw_queues(q, true);
|
||||||
|
blk_mq_kick_requeue_list(q);
|
||||||
|
}
|
||||||
|
|
||||||
void dm_start_queue(struct request_queue *q)
|
void dm_start_queue(struct request_queue *q)
|
||||||
{
|
{
|
||||||
if (!q->mq_ops)
|
if (!q->mq_ops)
|
||||||
dm_old_start_queue(q);
|
dm_old_start_queue(q);
|
||||||
else {
|
else
|
||||||
queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q);
|
dm_mq_start_queue(q);
|
||||||
blk_mq_start_stopped_hw_queues(q, true);
|
|
||||||
blk_mq_kick_requeue_list(q);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_old_stop_queue(struct request_queue *q)
|
static void dm_old_stop_queue(struct request_queue *q)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(q->queue_lock, flags);
|
||||||
|
if (!blk_queue_stopped(q))
|
||||||
|
blk_stop_queue(q);
|
||||||
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dm_mq_stop_queue(struct request_queue *q)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(q->queue_lock, flags);
|
spin_lock_irqsave(q->queue_lock, flags);
|
||||||
if (blk_queue_stopped(q)) {
|
if (blk_queue_stopped(q)) {
|
||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_stop_queue(q);
|
queue_flag_set(QUEUE_FLAG_STOPPED, q);
|
||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
|
|
||||||
|
/* Avoid that requeuing could restart the queue. */
|
||||||
|
blk_mq_cancel_requeue_work(q);
|
||||||
|
blk_mq_stop_hw_queues(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
void dm_stop_queue(struct request_queue *q)
|
void dm_stop_queue(struct request_queue *q)
|
||||||
{
|
{
|
||||||
if (!q->mq_ops)
|
if (!q->mq_ops)
|
||||||
dm_old_stop_queue(q);
|
dm_old_stop_queue(q);
|
||||||
else {
|
else
|
||||||
spin_lock_irq(q->queue_lock);
|
dm_mq_stop_queue(q);
|
||||||
queue_flag_set(QUEUE_FLAG_STOPPED, q);
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
|
|
||||||
blk_mq_cancel_requeue_work(q);
|
|
||||||
blk_mq_stop_hw_queues(q);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
|
static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
|
||||||
@ -319,21 +336,32 @@ static void dm_old_requeue_request(struct request *rq)
|
|||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_mq_requeue_request(struct request *rq)
|
static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
blk_mq_requeue_request(rq);
|
|
||||||
spin_lock_irqsave(q->queue_lock, flags);
|
spin_lock_irqsave(q->queue_lock, flags);
|
||||||
if (!blk_queue_stopped(q))
|
if (!blk_queue_stopped(q))
|
||||||
blk_mq_kick_requeue_list(q);
|
blk_mq_delay_kick_requeue_list(q, msecs);
|
||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_requeue_original_request(struct mapped_device *md,
|
void dm_mq_kick_requeue_list(struct mapped_device *md)
|
||||||
struct request *rq)
|
|
||||||
{
|
{
|
||||||
|
__dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_mq_kick_requeue_list);
|
||||||
|
|
||||||
|
static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
|
||||||
|
{
|
||||||
|
blk_mq_requeue_request(rq);
|
||||||
|
__dm_mq_kick_requeue_list(rq->q, msecs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
|
||||||
|
{
|
||||||
|
struct mapped_device *md = tio->md;
|
||||||
|
struct request *rq = tio->orig;
|
||||||
int rw = rq_data_dir(rq);
|
int rw = rq_data_dir(rq);
|
||||||
|
|
||||||
rq_end_stats(md, rq);
|
rq_end_stats(md, rq);
|
||||||
@ -342,7 +370,7 @@ static void dm_requeue_original_request(struct mapped_device *md,
|
|||||||
if (!rq->q->mq_ops)
|
if (!rq->q->mq_ops)
|
||||||
dm_old_requeue_request(rq);
|
dm_old_requeue_request(rq);
|
||||||
else
|
else
|
||||||
dm_mq_requeue_request(rq);
|
dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
|
||||||
|
|
||||||
rq_completed(md, rw, false);
|
rq_completed(md, rw, false);
|
||||||
}
|
}
|
||||||
@ -372,7 +400,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
|
|||||||
return;
|
return;
|
||||||
else if (r == DM_ENDIO_REQUEUE)
|
else if (r == DM_ENDIO_REQUEUE)
|
||||||
/* The target wants to requeue the I/O */
|
/* The target wants to requeue the I/O */
|
||||||
dm_requeue_original_request(tio->md, tio->orig);
|
dm_requeue_original_request(tio, false);
|
||||||
else {
|
else {
|
||||||
DMWARN("unimplemented target endio return value: %d", r);
|
DMWARN("unimplemented target endio return value: %d", r);
|
||||||
BUG();
|
BUG();
|
||||||
@ -612,20 +640,23 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns:
|
* Returns:
|
||||||
* 0 : the request has been processed
|
* DM_MAPIO_* : the request has been processed as indicated
|
||||||
* DM_MAPIO_REQUEUE : the original request needs to be requeued
|
* DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
|
||||||
* < 0 : the request was completed due to failure
|
* < 0 : the request was completed due to failure
|
||||||
*/
|
*/
|
||||||
static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
static int map_request(struct dm_rq_target_io *tio)
|
||||||
struct mapped_device *md)
|
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
struct dm_target *ti = tio->ti;
|
struct dm_target *ti = tio->ti;
|
||||||
|
struct mapped_device *md = tio->md;
|
||||||
|
struct request *rq = tio->orig;
|
||||||
struct request *clone = NULL;
|
struct request *clone = NULL;
|
||||||
|
|
||||||
if (tio->clone) {
|
if (tio->clone) {
|
||||||
clone = tio->clone;
|
clone = tio->clone;
|
||||||
r = ti->type->map_rq(ti, clone, &tio->info);
|
r = ti->type->map_rq(ti, clone, &tio->info);
|
||||||
|
if (r == DM_MAPIO_DELAY_REQUEUE)
|
||||||
|
return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
|
||||||
} else {
|
} else {
|
||||||
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
|
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
@ -633,9 +664,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
|||||||
dm_kill_unmapped_request(rq, r);
|
dm_kill_unmapped_request(rq, r);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
if (r != DM_MAPIO_REMAPPED)
|
if (r == DM_MAPIO_REMAPPED &&
|
||||||
return r;
|
setup_clone(clone, rq, tio, GFP_ATOMIC)) {
|
||||||
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
|
|
||||||
/* -ENOMEM */
|
/* -ENOMEM */
|
||||||
ti->type->release_clone_rq(clone);
|
ti->type->release_clone_rq(clone);
|
||||||
return DM_MAPIO_REQUEUE;
|
return DM_MAPIO_REQUEUE;
|
||||||
@ -654,7 +684,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
|||||||
break;
|
break;
|
||||||
case DM_MAPIO_REQUEUE:
|
case DM_MAPIO_REQUEUE:
|
||||||
/* The target wants to requeue the I/O */
|
/* The target wants to requeue the I/O */
|
||||||
dm_requeue_original_request(md, tio->orig);
|
break;
|
||||||
|
case DM_MAPIO_DELAY_REQUEUE:
|
||||||
|
/* The target wants to requeue the I/O after a delay */
|
||||||
|
dm_requeue_original_request(tio, true);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (r > 0) {
|
if (r > 0) {
|
||||||
@ -664,10 +697,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
|||||||
|
|
||||||
/* The target wants to complete the I/O */
|
/* The target wants to complete the I/O */
|
||||||
dm_kill_unmapped_request(rq, r);
|
dm_kill_unmapped_request(rq, r);
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_start_request(struct mapped_device *md, struct request *orig)
|
static void dm_start_request(struct mapped_device *md, struct request *orig)
|
||||||
@ -706,11 +738,9 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
|
|||||||
static void map_tio_request(struct kthread_work *work)
|
static void map_tio_request(struct kthread_work *work)
|
||||||
{
|
{
|
||||||
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
|
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
|
||||||
struct request *rq = tio->orig;
|
|
||||||
struct mapped_device *md = tio->md;
|
|
||||||
|
|
||||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
|
if (map_request(tio) == DM_MAPIO_REQUEUE)
|
||||||
dm_requeue_original_request(md, rq);
|
dm_requeue_original_request(tio, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
|
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
|
||||||
@ -896,7 +926,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||||||
tio->ti = ti;
|
tio->ti = ti;
|
||||||
|
|
||||||
/* Direct call is fine since .queue_rq allows allocations */
|
/* Direct call is fine since .queue_rq allows allocations */
|
||||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
|
if (map_request(tio) == DM_MAPIO_REQUEUE) {
|
||||||
/* Undo dm_start_request() before requeuing */
|
/* Undo dm_start_request() before requeuing */
|
||||||
rq_end_stats(md, rq);
|
rq_end_stats(md, rq);
|
||||||
rq_completed(md, rq_data_dir(rq), false);
|
rq_completed(md, rq_data_dir(rq), false);
|
||||||
|
@ -55,6 +55,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md);
|
|||||||
void dm_start_queue(struct request_queue *q);
|
void dm_start_queue(struct request_queue *q);
|
||||||
void dm_stop_queue(struct request_queue *q);
|
void dm_stop_queue(struct request_queue *q);
|
||||||
|
|
||||||
|
void dm_mq_kick_requeue_list(struct mapped_device *md);
|
||||||
|
|
||||||
unsigned dm_get_reserved_rq_based_ios(void);
|
unsigned dm_get_reserved_rq_based_ios(void);
|
||||||
|
|
||||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
|
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
|
||||||
|
@ -1648,6 +1648,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
|
|||||||
struct request_queue *q = md->queue;
|
struct request_queue *q = md->queue;
|
||||||
sector_t size;
|
sector_t size;
|
||||||
|
|
||||||
|
lockdep_assert_held(&md->suspend_lock);
|
||||||
|
|
||||||
size = dm_table_get_size(t);
|
size = dm_table_get_size(t);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1873,6 +1875,7 @@ EXPORT_SYMBOL_GPL(dm_device_name);
|
|||||||
|
|
||||||
static void __dm_destroy(struct mapped_device *md, bool wait)
|
static void __dm_destroy(struct mapped_device *md, bool wait)
|
||||||
{
|
{
|
||||||
|
struct request_queue *q = dm_get_md_queue(md);
|
||||||
struct dm_table *map;
|
struct dm_table *map;
|
||||||
int srcu_idx;
|
int srcu_idx;
|
||||||
|
|
||||||
@ -1883,6 +1886,10 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
|
|||||||
set_bit(DMF_FREEING, &md->flags);
|
set_bit(DMF_FREEING, &md->flags);
|
||||||
spin_unlock(&_minor_lock);
|
spin_unlock(&_minor_lock);
|
||||||
|
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
if (dm_request_based(md) && md->kworker_task)
|
if (dm_request_based(md) && md->kworker_task)
|
||||||
flush_kthread_worker(&md->kworker);
|
flush_kthread_worker(&md->kworker);
|
||||||
|
|
||||||
@ -1934,30 +1941,25 @@ void dm_put(struct mapped_device *md)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dm_put);
|
EXPORT_SYMBOL_GPL(dm_put);
|
||||||
|
|
||||||
static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
|
static int dm_wait_for_completion(struct mapped_device *md, long task_state)
|
||||||
{
|
{
|
||||||
int r = 0;
|
int r = 0;
|
||||||
DECLARE_WAITQUEUE(wait, current);
|
DEFINE_WAIT(wait);
|
||||||
|
|
||||||
add_wait_queue(&md->wait, &wait);
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
set_current_state(interruptible);
|
prepare_to_wait(&md->wait, &wait, task_state);
|
||||||
|
|
||||||
if (!md_in_flight(md))
|
if (!md_in_flight(md))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (interruptible == TASK_INTERRUPTIBLE &&
|
if (signal_pending_state(task_state, current)) {
|
||||||
signal_pending(current)) {
|
|
||||||
r = -EINTR;
|
r = -EINTR;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
io_schedule();
|
io_schedule();
|
||||||
}
|
}
|
||||||
set_current_state(TASK_RUNNING);
|
finish_wait(&md->wait, &wait);
|
||||||
|
|
||||||
remove_wait_queue(&md->wait, &wait);
|
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -2075,6 +2077,10 @@ static void unlock_fs(struct mapped_device *md)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
|
||||||
|
* @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
|
||||||
|
* @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY
|
||||||
|
*
|
||||||
* If __dm_suspend returns 0, the device is completely quiescent
|
* If __dm_suspend returns 0, the device is completely quiescent
|
||||||
* now. There is no request-processing activity. All new requests
|
* now. There is no request-processing activity. All new requests
|
||||||
* are being added to md->deferred list.
|
* are being added to md->deferred list.
|
||||||
@ -2082,13 +2088,15 @@ static void unlock_fs(struct mapped_device *md)
|
|||||||
* Caller must hold md->suspend_lock
|
* Caller must hold md->suspend_lock
|
||||||
*/
|
*/
|
||||||
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
||||||
unsigned suspend_flags, int interruptible,
|
unsigned suspend_flags, long task_state,
|
||||||
int dmf_suspended_flag)
|
int dmf_suspended_flag)
|
||||||
{
|
{
|
||||||
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
|
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
|
||||||
bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
|
bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
lockdep_assert_held(&md->suspend_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
|
* DMF_NOFLUSH_SUSPENDING must be set before presuspend.
|
||||||
* This flag is cleared before dm_suspend returns.
|
* This flag is cleared before dm_suspend returns.
|
||||||
@ -2149,7 +2157,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
|||||||
* We call dm_wait_for_completion to wait for all existing requests
|
* We call dm_wait_for_completion to wait for all existing requests
|
||||||
* to finish.
|
* to finish.
|
||||||
*/
|
*/
|
||||||
r = dm_wait_for_completion(md, interruptible);
|
r = dm_wait_for_completion(md, task_state);
|
||||||
if (!r)
|
if (!r)
|
||||||
set_bit(dmf_suspended_flag, &md->flags);
|
set_bit(dmf_suspended_flag, &md->flags);
|
||||||
|
|
||||||
@ -2249,10 +2257,11 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map)
|
|||||||
|
|
||||||
int dm_resume(struct mapped_device *md)
|
int dm_resume(struct mapped_device *md)
|
||||||
{
|
{
|
||||||
int r = -EINVAL;
|
int r;
|
||||||
struct dm_table *map = NULL;
|
struct dm_table *map = NULL;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
|
r = -EINVAL;
|
||||||
mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
|
mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
|
||||||
|
|
||||||
if (!dm_suspended_md(md))
|
if (!dm_suspended_md(md))
|
||||||
@ -2276,8 +2285,6 @@ retry:
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
clear_bit(DMF_SUSPENDED, &md->flags);
|
clear_bit(DMF_SUSPENDED, &md->flags);
|
||||||
|
|
||||||
r = 0;
|
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&md->suspend_lock);
|
mutex_unlock(&md->suspend_lock);
|
||||||
|
|
||||||
|
@ -277,6 +277,48 @@ static int insert_ablock(struct dm_array_info *info, uint64_t index,
|
|||||||
return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
|
return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
|
||||||
|
struct dm_block **block, struct array_block **ab)
|
||||||
|
{
|
||||||
|
int inc;
|
||||||
|
int r = dm_tm_shadow_block(info->btree_info.tm, b,
|
||||||
|
&array_validator, block, &inc);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
*ab = dm_block_data(*block);
|
||||||
|
if (inc)
|
||||||
|
inc_ablock_entries(info, *ab);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The shadow op will often be a noop. Only insert if it really
|
||||||
|
* copied data.
|
||||||
|
*/
|
||||||
|
static int __reinsert_ablock(struct dm_array_info *info, unsigned index,
|
||||||
|
struct dm_block *block, dm_block_t b,
|
||||||
|
dm_block_t *root)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
if (dm_block_location(block) != b) {
|
||||||
|
/*
|
||||||
|
* dm_tm_shadow_block will have already decremented the old
|
||||||
|
* block, but it is still referenced by the btree. We
|
||||||
|
* increment to stop the insert decrementing it below zero
|
||||||
|
* when overwriting the old value.
|
||||||
|
*/
|
||||||
|
dm_tm_inc(info->btree_info.tm, b);
|
||||||
|
r = insert_ablock(info, index, block, root);
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Looks up an array block in the btree. Then shadows it, and updates the
|
* Looks up an array block in the btree. Then shadows it, and updates the
|
||||||
* btree to point to this new shadow. 'root' is an input/output parameter
|
* btree to point to this new shadow. 'root' is an input/output parameter
|
||||||
@ -286,49 +328,21 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
|
|||||||
unsigned index, struct dm_block **block,
|
unsigned index, struct dm_block **block,
|
||||||
struct array_block **ab)
|
struct array_block **ab)
|
||||||
{
|
{
|
||||||
int r, inc;
|
int r;
|
||||||
uint64_t key = index;
|
uint64_t key = index;
|
||||||
dm_block_t b;
|
dm_block_t b;
|
||||||
__le64 block_le;
|
__le64 block_le;
|
||||||
|
|
||||||
/*
|
|
||||||
* lookup
|
|
||||||
*/
|
|
||||||
r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
|
r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
b = le64_to_cpu(block_le);
|
b = le64_to_cpu(block_le);
|
||||||
|
|
||||||
/*
|
r = __shadow_ablock(info, b, block, ab);
|
||||||
* shadow
|
|
||||||
*/
|
|
||||||
r = dm_tm_shadow_block(info->btree_info.tm, b,
|
|
||||||
&array_validator, block, &inc);
|
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
*ab = dm_block_data(*block);
|
return __reinsert_ablock(info, index, *block, b, root);
|
||||||
if (inc)
|
|
||||||
inc_ablock_entries(info, *ab);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reinsert.
|
|
||||||
*
|
|
||||||
* The shadow op will often be a noop. Only insert if it really
|
|
||||||
* copied data.
|
|
||||||
*/
|
|
||||||
if (dm_block_location(*block) != b) {
|
|
||||||
/*
|
|
||||||
* dm_tm_shadow_block will have already decremented the old
|
|
||||||
* block, but it is still referenced by the btree. We
|
|
||||||
* increment to stop the insert decrementing it below zero
|
|
||||||
* when overwriting the old value.
|
|
||||||
*/
|
|
||||||
dm_tm_inc(info->btree_info.tm, b);
|
|
||||||
r = insert_ablock(info, index, *block, root);
|
|
||||||
}
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -681,6 +695,72 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dm_array_resize);
|
EXPORT_SYMBOL_GPL(dm_array_resize);
|
||||||
|
|
||||||
|
static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
|
||||||
|
value_fn fn, void *context, unsigned base, unsigned new_nr)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
unsigned i;
|
||||||
|
uint32_t nr_entries;
|
||||||
|
struct dm_btree_value_type *vt = &info->value_type;
|
||||||
|
|
||||||
|
BUG_ON(le32_to_cpu(ab->nr_entries));
|
||||||
|
BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
|
||||||
|
|
||||||
|
nr_entries = le32_to_cpu(ab->nr_entries);
|
||||||
|
for (i = 0; i < new_nr; i++) {
|
||||||
|
r = fn(base + i, element_at(info, ab, i), context);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (vt->inc)
|
||||||
|
vt->inc(vt->context, element_at(info, ab, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ab->nr_entries = cpu_to_le32(new_nr);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dm_array_new(struct dm_array_info *info, dm_block_t *root,
|
||||||
|
uint32_t size, value_fn fn, void *context)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
struct dm_block *block;
|
||||||
|
struct array_block *ab;
|
||||||
|
unsigned block_index, end_block, size_of_block, max_entries;
|
||||||
|
|
||||||
|
r = dm_array_empty(info, root);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
|
||||||
|
max_entries = calc_max_entries(info->value_type.size, size_of_block);
|
||||||
|
end_block = dm_div_up(size, max_entries);
|
||||||
|
|
||||||
|
for (block_index = 0; block_index != end_block; block_index++) {
|
||||||
|
r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
|
||||||
|
r = populate_ablock_with_values(info, ab, fn, context,
|
||||||
|
block_index * max_entries,
|
||||||
|
min(max_entries, size));
|
||||||
|
if (r) {
|
||||||
|
unlock_ablock(info, block);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = insert_ablock(info, block_index, block, root);
|
||||||
|
unlock_ablock(info, block);
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
|
||||||
|
size -= max_entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_array_new);
|
||||||
|
|
||||||
int dm_array_del(struct dm_array_info *info, dm_block_t root)
|
int dm_array_del(struct dm_array_info *info, dm_block_t root)
|
||||||
{
|
{
|
||||||
return dm_btree_del(&info->btree_info, root);
|
return dm_btree_del(&info->btree_info, root);
|
||||||
@ -819,3 +899,89 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root,
|
|||||||
EXPORT_SYMBOL_GPL(dm_array_walk);
|
EXPORT_SYMBOL_GPL(dm_array_walk);
|
||||||
|
|
||||||
/*----------------------------------------------------------------*/
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static int load_ablock(struct dm_array_cursor *c)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
__le64 value_le;
|
||||||
|
uint64_t key;
|
||||||
|
|
||||||
|
if (c->block)
|
||||||
|
unlock_ablock(c->info, c->block);
|
||||||
|
|
||||||
|
c->block = NULL;
|
||||||
|
c->ab = NULL;
|
||||||
|
c->index = 0;
|
||||||
|
|
||||||
|
r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
|
||||||
|
if (r) {
|
||||||
|
DMERR("dm_btree_cursor_get_value failed");
|
||||||
|
dm_btree_cursor_end(&c->cursor);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
|
||||||
|
if (r) {
|
||||||
|
DMERR("get_ablock failed");
|
||||||
|
dm_btree_cursor_end(&c->cursor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
|
||||||
|
struct dm_array_cursor *c)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
memset(c, 0, sizeof(*c));
|
||||||
|
c->info = info;
|
||||||
|
r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
|
||||||
|
if (r) {
|
||||||
|
DMERR("couldn't create btree cursor");
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
return load_ablock(c);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
|
||||||
|
|
||||||
|
void dm_array_cursor_end(struct dm_array_cursor *c)
|
||||||
|
{
|
||||||
|
if (c->block) {
|
||||||
|
unlock_ablock(c->info, c->block);
|
||||||
|
dm_btree_cursor_end(&c->cursor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_array_cursor_end);
|
||||||
|
|
||||||
|
int dm_array_cursor_next(struct dm_array_cursor *c)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!c->block)
|
||||||
|
return -ENODATA;
|
||||||
|
|
||||||
|
c->index++;
|
||||||
|
|
||||||
|
if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
|
||||||
|
r = dm_btree_cursor_next(&c->cursor);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = load_ablock(c);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_array_cursor_next);
|
||||||
|
|
||||||
|
void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
|
||||||
|
{
|
||||||
|
*value_le = element_at(c->info, c->ab, c->index);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
@ -111,6 +111,25 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root,
|
|||||||
const void *value, dm_block_t *new_root)
|
const void *value, dm_block_t *new_root)
|
||||||
__dm_written_to_disk(value);
|
__dm_written_to_disk(value);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Creates a new array populated with values provided by a callback
|
||||||
|
* function. This is more efficient than creating an empty array,
|
||||||
|
* resizing, and then setting values since that process incurs a lot of
|
||||||
|
* copying.
|
||||||
|
*
|
||||||
|
* Assumes 32bit values for now since it's only used by the cache hint
|
||||||
|
* array.
|
||||||
|
*
|
||||||
|
* info - describes the array
|
||||||
|
* root - the root block of the array on disk
|
||||||
|
* size - the number of entries in the array
|
||||||
|
* fn - the callback
|
||||||
|
* context - passed to the callback
|
||||||
|
*/
|
||||||
|
typedef int (*value_fn)(uint32_t index, void *value_le, void *context);
|
||||||
|
int dm_array_new(struct dm_array_info *info, dm_block_t *root,
|
||||||
|
uint32_t size, value_fn fn, void *context);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Frees a whole array. The value_type's decrement operation will be called
|
* Frees a whole array. The value_type's decrement operation will be called
|
||||||
* for all values in the array
|
* for all values in the array
|
||||||
@ -163,4 +182,37 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root,
|
|||||||
|
|
||||||
/*----------------------------------------------------------------*/
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cursor api.
|
||||||
|
*
|
||||||
|
* This lets you iterate through all the entries in an array efficiently
|
||||||
|
* (it will preload metadata).
|
||||||
|
*
|
||||||
|
* I'm using a cursor, rather than a walk function with a callback because
|
||||||
|
* the cache target needs to iterate both the mapping and hint arrays in
|
||||||
|
* unison.
|
||||||
|
*/
|
||||||
|
struct dm_array_cursor {
|
||||||
|
struct dm_array_info *info;
|
||||||
|
struct dm_btree_cursor cursor;
|
||||||
|
|
||||||
|
struct dm_block *block;
|
||||||
|
struct array_block *ab;
|
||||||
|
unsigned index;
|
||||||
|
};
|
||||||
|
|
||||||
|
int dm_array_cursor_begin(struct dm_array_info *info,
|
||||||
|
dm_block_t root, struct dm_array_cursor *c);
|
||||||
|
void dm_array_cursor_end(struct dm_array_cursor *c);
|
||||||
|
|
||||||
|
uint32_t dm_array_cursor_index(struct dm_array_cursor *c);
|
||||||
|
int dm_array_cursor_next(struct dm_array_cursor *c);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* value_le is only valid while the cursor points at the current value.
|
||||||
|
*/
|
||||||
|
void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le);
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
#endif /* _LINUX_DM_ARRAY_H */
|
#endif /* _LINUX_DM_ARRAY_H */
|
||||||
|
@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
|
|||||||
return walk_node(info, root, fn, context);
|
return walk_node(info, root, fn, context);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dm_btree_walk);
|
EXPORT_SYMBOL_GPL(dm_btree_walk);
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static void prefetch_values(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
unsigned i, nr;
|
||||||
|
__le64 value_le;
|
||||||
|
struct cursor_node *n = c->nodes + c->depth - 1;
|
||||||
|
struct btree_node *bn = dm_block_data(n->b);
|
||||||
|
struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);
|
||||||
|
|
||||||
|
BUG_ON(c->info->value_type.size != sizeof(value_le));
|
||||||
|
|
||||||
|
nr = le32_to_cpu(bn->header.nr_entries);
|
||||||
|
for (i = 0; i < nr; i++) {
|
||||||
|
memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
|
||||||
|
dm_bm_prefetch(bm, le64_to_cpu(value_le));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool leaf_node(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
struct cursor_node *n = c->nodes + c->depth - 1;
|
||||||
|
struct btree_node *bn = dm_block_data(n->b);
|
||||||
|
|
||||||
|
return le32_to_cpu(bn->header.flags) & LEAF_NODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int push_node(struct dm_btree_cursor *c, dm_block_t b)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
struct cursor_node *n = c->nodes + c->depth;
|
||||||
|
|
||||||
|
if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
|
||||||
|
DMERR("couldn't push cursor node, stack depth too high");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = bn_read_lock(c->info, b, &n->b);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
n->index = 0;
|
||||||
|
c->depth++;
|
||||||
|
|
||||||
|
if (c->prefetch_leaves || !leaf_node(c))
|
||||||
|
prefetch_values(c);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pop_node(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
c->depth--;
|
||||||
|
unlock_block(c->info, c->nodes[c->depth].b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int inc_or_backtrack(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
struct cursor_node *n;
|
||||||
|
struct btree_node *bn;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (!c->depth)
|
||||||
|
return -ENODATA;
|
||||||
|
|
||||||
|
n = c->nodes + c->depth - 1;
|
||||||
|
bn = dm_block_data(n->b);
|
||||||
|
|
||||||
|
n->index++;
|
||||||
|
if (n->index < le32_to_cpu(bn->header.nr_entries))
|
||||||
|
break;
|
||||||
|
|
||||||
|
pop_node(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int find_leaf(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
struct cursor_node *n;
|
||||||
|
struct btree_node *bn;
|
||||||
|
__le64 value_le;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
n = c->nodes + c->depth - 1;
|
||||||
|
bn = dm_block_data(n->b);
|
||||||
|
|
||||||
|
if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
|
||||||
|
break;
|
||||||
|
|
||||||
|
memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
|
||||||
|
r = push_node(c, le64_to_cpu(value_le));
|
||||||
|
if (r) {
|
||||||
|
DMERR("push_node failed");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
|
||||||
|
return -ENODATA;
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
|
||||||
|
bool prefetch_leaves, struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
c->info = info;
|
||||||
|
c->root = root;
|
||||||
|
c->depth = 0;
|
||||||
|
c->prefetch_leaves = prefetch_leaves;
|
||||||
|
|
||||||
|
r = push_node(c, root);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
return find_leaf(c);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);
|
||||||
|
|
||||||
|
void dm_btree_cursor_end(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
while (c->depth)
|
||||||
|
pop_node(c);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_btree_cursor_end);
|
||||||
|
|
||||||
|
int dm_btree_cursor_next(struct dm_btree_cursor *c)
|
||||||
|
{
|
||||||
|
int r = inc_or_backtrack(c);
|
||||||
|
if (!r) {
|
||||||
|
r = find_leaf(c);
|
||||||
|
if (r)
|
||||||
|
DMERR("find_leaf failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_btree_cursor_next);
|
||||||
|
|
||||||
|
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
|
||||||
|
{
|
||||||
|
if (c->depth) {
|
||||||
|
struct cursor_node *n = c->nodes + c->depth - 1;
|
||||||
|
struct btree_node *bn = dm_block_data(n->b);
|
||||||
|
|
||||||
|
if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
*key = le64_to_cpu(*key_ptr(bn, n->index));
|
||||||
|
memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
} else
|
||||||
|
return -ENODATA;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
|
||||||
|
@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
|
|||||||
int (*fn)(void *context, uint64_t *keys, void *leaf),
|
int (*fn)(void *context, uint64_t *keys, void *leaf),
|
||||||
void *context);
|
void *context);
|
||||||
|
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cursor API. This does not follow the rolling lock convention. Since we
|
||||||
|
* know the order that values are required we can issue prefetches to speed
|
||||||
|
* up iteration. Use on a single level btree only.
|
||||||
|
*/
|
||||||
|
#define DM_BTREE_CURSOR_MAX_DEPTH 16
|
||||||
|
|
||||||
|
struct cursor_node {
|
||||||
|
struct dm_block *b;
|
||||||
|
unsigned index;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct dm_btree_cursor {
|
||||||
|
struct dm_btree_info *info;
|
||||||
|
dm_block_t root;
|
||||||
|
|
||||||
|
bool prefetch_leaves;
|
||||||
|
unsigned depth;
|
||||||
|
struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Creates a fresh cursor. If prefetch_leaves is set then it is assumed
|
||||||
|
* the btree contains block indexes that will be prefetched. The cursor is
|
||||||
|
* quite large, so you probably don't want to put it on the stack.
|
||||||
|
*/
|
||||||
|
int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
|
||||||
|
bool prefetch_leaves, struct dm_btree_cursor *c);
|
||||||
|
void dm_btree_cursor_end(struct dm_btree_cursor *c);
|
||||||
|
int dm_btree_cursor_next(struct dm_btree_cursor *c);
|
||||||
|
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
|
||||||
|
|
||||||
#endif /* _LINUX_DM_BTREE_H */
|
#endif /* _LINUX_DM_BTREE_H */
|
||||||
|
@ -590,6 +590,7 @@ extern struct ratelimit_state dm_ratelimit_state;
|
|||||||
#define DM_MAPIO_SUBMITTED 0
|
#define DM_MAPIO_SUBMITTED 0
|
||||||
#define DM_MAPIO_REMAPPED 1
|
#define DM_MAPIO_REMAPPED 1
|
||||||
#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE
|
#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE
|
||||||
|
#define DM_MAPIO_DELAY_REQUEUE 3
|
||||||
|
|
||||||
#define dm_sector_div64(x, y)( \
|
#define dm_sector_div64(x, y)( \
|
||||||
{ \
|
{ \
|
||||||
|
Loading…
Reference in New Issue
Block a user