mirror of
https://github.com/joel16/android_kernel_sony_msm8994_rework.git
synced 2024-11-27 22:00:42 +00:00
bcache: Fix a writeback performance regression
commit c2a4f3183a1248f615a695fbd8905da55ad11bba upstream. Background writeback works by scanning the btree for dirty data and adding those keys into a fixed size buffer, then for each dirty key in the keybuf writing it to the backing device. When read_dirty() finishes and it's time to scan for more dirty data, we need to wait for the outstanding writeback IO to finish - they still take up slots in the keybuf (so that foreground writes can check for them to avoid races) - without that wait, we'll continually rescan when we'll be able to add at most a key or two to the keybuf, and that takes locks that starves foreground IO. Doh. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
808eea9d29
commit
48c4100e10
@ -499,7 +499,7 @@ struct cached_dev {
|
||||
*/
|
||||
atomic_t has_dirty;
|
||||
|
||||
struct ratelimit writeback_rate;
|
||||
struct bch_ratelimit writeback_rate;
|
||||
struct delayed_work writeback_rate_update;
|
||||
|
||||
/*
|
||||
@ -508,10 +508,9 @@ struct cached_dev {
|
||||
*/
|
||||
sector_t last_read;
|
||||
|
||||
/* Number of writeback bios in flight */
|
||||
atomic_t in_flight;
|
||||
/* Limit number of writeback bios in flight */
|
||||
struct semaphore in_flight;
|
||||
struct closure_with_timer writeback;
|
||||
struct closure_waitlist writeback_wait;
|
||||
|
||||
struct keybuf writeback_keys;
|
||||
|
||||
|
@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
|
||||
stats->last = now ?: 1;
|
||||
}
|
||||
|
||||
unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
|
||||
/**
|
||||
* bch_next_delay() - increment @d by the amount of work done, and return how
|
||||
* long to delay until the next time to do some work.
|
||||
*
|
||||
* @d - the struct bch_ratelimit to update
|
||||
* @done - the amount of work done, in arbitrary units
|
||||
*
|
||||
* Returns the amount of time to delay by, in jiffies
|
||||
*/
|
||||
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
|
||||
{
|
||||
uint64_t now = local_clock();
|
||||
|
||||
|
@ -452,17 +452,23 @@ read_attribute(name ## _last_ ## frequency_units)
|
||||
(ewma) >> factor; \
|
||||
})
|
||||
|
||||
struct ratelimit {
|
||||
struct bch_ratelimit {
|
||||
/* Next time we want to do some work, in nanoseconds */
|
||||
uint64_t next;
|
||||
|
||||
/*
|
||||
* Rate at which we want to do work, in units per nanosecond
|
||||
* The units here correspond to the units passed to bch_next_delay()
|
||||
*/
|
||||
unsigned rate;
|
||||
};
|
||||
|
||||
static inline void ratelimit_reset(struct ratelimit *d)
|
||||
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
|
||||
{
|
||||
d->next = local_clock();
|
||||
}
|
||||
|
||||
unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
|
||||
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
|
||||
|
||||
#define __DIV_SAFE(n, d, zero) \
|
||||
({ \
|
||||
|
@ -91,11 +91,15 @@ static void update_writeback_rate(struct work_struct *work)
|
||||
|
||||
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
|
||||
{
|
||||
uint64_t ret;
|
||||
|
||||
if (atomic_read(&dc->disk.detaching) ||
|
||||
!dc->writeback_percent)
|
||||
return 0;
|
||||
|
||||
return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
|
||||
ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
|
||||
|
||||
return min_t(uint64_t, ret, HZ);
|
||||
}
|
||||
|
||||
/* Background writeback */
|
||||
@ -165,7 +169,7 @@ static void refill_dirty(struct closure *cl)
|
||||
|
||||
up_write(&dc->writeback_lock);
|
||||
|
||||
ratelimit_reset(&dc->writeback_rate);
|
||||
bch_ratelimit_reset(&dc->writeback_rate);
|
||||
|
||||
/* Punt to workqueue only so we don't recurse and blow the stack */
|
||||
continue_at(cl, read_dirty, dirty_wq);
|
||||
@ -246,9 +250,7 @@ static void write_dirty_finish(struct closure *cl)
|
||||
}
|
||||
|
||||
bch_keybuf_del(&dc->writeback_keys, w);
|
||||
atomic_dec_bug(&dc->in_flight);
|
||||
|
||||
closure_wake_up(&dc->writeback_wait);
|
||||
up(&dc->in_flight);
|
||||
|
||||
closure_return_with_destructor(cl, dirty_io_destructor);
|
||||
}
|
||||
@ -278,7 +280,7 @@ static void write_dirty(struct closure *cl)
|
||||
trace_bcache_write_dirty(&io->bio);
|
||||
closure_bio_submit(&io->bio, cl, &io->dc->disk);
|
||||
|
||||
continue_at(cl, write_dirty_finish, dirty_wq);
|
||||
continue_at(cl, write_dirty_finish, system_wq);
|
||||
}
|
||||
|
||||
static void read_dirty_endio(struct bio *bio, int error)
|
||||
@ -299,7 +301,7 @@ static void read_dirty_submit(struct closure *cl)
|
||||
trace_bcache_read_dirty(&io->bio);
|
||||
closure_bio_submit(&io->bio, cl, &io->dc->disk);
|
||||
|
||||
continue_at(cl, write_dirty, dirty_wq);
|
||||
continue_at(cl, write_dirty, system_wq);
|
||||
}
|
||||
|
||||
static void read_dirty(struct closure *cl)
|
||||
@ -324,12 +326,9 @@ static void read_dirty(struct closure *cl)
|
||||
|
||||
if (delay > 0 &&
|
||||
(KEY_START(&w->key) != dc->last_read ||
|
||||
jiffies_to_msecs(delay) > 50)) {
|
||||
w->private = NULL;
|
||||
|
||||
closure_delay(&dc->writeback, delay);
|
||||
continue_at(cl, read_dirty, dirty_wq);
|
||||
}
|
||||
jiffies_to_msecs(delay) > 50))
|
||||
while (delay)
|
||||
delay = schedule_timeout(delay);
|
||||
|
||||
dc->last_read = KEY_OFFSET(&w->key);
|
||||
|
||||
@ -354,15 +353,10 @@ static void read_dirty(struct closure *cl)
|
||||
|
||||
pr_debug("%s", pkey(&w->key));
|
||||
|
||||
closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
|
||||
down(&dc->in_flight);
|
||||
closure_call(&io->cl, read_dirty_submit, NULL, cl);
|
||||
|
||||
delay = writeback_delay(dc, KEY_SIZE(&w->key));
|
||||
|
||||
atomic_inc(&dc->in_flight);
|
||||
|
||||
if (!closure_wait_event(&dc->writeback_wait, cl,
|
||||
atomic_read(&dc->in_flight) < 64))
|
||||
continue_at(cl, read_dirty, dirty_wq);
|
||||
}
|
||||
|
||||
if (0) {
|
||||
@ -372,11 +366,16 @@ err:
|
||||
bch_keybuf_del(&dc->writeback_keys, w);
|
||||
}
|
||||
|
||||
refill_dirty(cl);
|
||||
/*
|
||||
* Wait for outstanding writeback IOs to finish (and keybuf slots to be
|
||||
* freed) before refilling again
|
||||
*/
|
||||
continue_at(cl, refill_dirty, dirty_wq);
|
||||
}
|
||||
|
||||
void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
||||
{
|
||||
sema_init(&dc->in_flight, 64);
|
||||
closure_init_unlocked(&dc->writeback);
|
||||
init_rwsem(&dc->writeback_lock);
|
||||
|
||||
@ -406,7 +405,7 @@ void bch_writeback_exit(void)
|
||||
|
||||
int __init bch_writeback_init(void)
|
||||
{
|
||||
dirty_wq = create_singlethread_workqueue("bcache_writeback");
|
||||
dirty_wq = create_workqueue("bcache_writeback");
|
||||
if (!dirty_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user