mirror of
https://github.com/joel16/android_kernel_sony_msm8994_rework.git
synced 2024-11-30 23:30:48 +00:00
Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block
Pull block core updates from Jens Axboe: - Major bit is Kents prep work for immutable bio vecs. - Stable candidate fix for a scheduling-while-atomic in the queue bypass operation. - Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging discard bios. - Tejuns changes to convert the writeback thread pool to the generic workqueue mechanism. - Runtime PM framework, SCSI patches exists on top of these in James' tree. - A few random fixes. * 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits) relay: move remove_buf_file inside relay_close_buf partitions/efi.c: replace useless kzalloc's by kmalloc's fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read() block: fix max discard sectors limit blkcg: fix "scheduling while atomic" in blk_queue_bypass_start Documentation: cfq-iosched: update documentation help for cfq tunables writeback: expose the bdi_wq workqueue writeback: replace custom worker pool implementation with unbound workqueue writeback: remove unused bdi_pending_list aoe: Fix unitialized var usage bio-integrity: Add explicit field for owner of bip_buf block: Add an explicit bio flag for bios that own their bvec block: Add bio_alloc_pages() block: Convert some code to bio_for_each_segment_all() block: Add bio_for_each_segment_all() bounce: Refactor __blk_queue_bounce to not use bi_io_vec raid1: use bio_copy_data() pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage pktcdvd: use bio_copy_data() block: Add bio_copy_data() ...
This commit is contained in:
commit
4de13d7aa8
@ -5,7 +5,7 @@ The main aim of CFQ scheduler is to provide a fair allocation of the disk
|
||||
I/O bandwidth for all the processes which requests an I/O operation.
|
||||
|
||||
CFQ maintains the per process queue for the processes which request I/O
|
||||
operation(syncronous requests). In case of asynchronous requests, all the
|
||||
operation(synchronous requests). In case of asynchronous requests, all the
|
||||
requests from all the processes are batched together according to their
|
||||
process's I/O priority.
|
||||
|
||||
@ -66,6 +66,47 @@ This parameter is used to set the timeout of synchronous requests. Default
|
||||
value of this is 124ms. In case to favor synchronous requests over asynchronous
|
||||
one, this value should be decreased relative to fifo_expire_async.
|
||||
|
||||
group_idle
|
||||
-----------
|
||||
This parameter forces idling at the CFQ group level instead of CFQ
|
||||
queue level. This was introduced after after a bottleneck was observed
|
||||
in higher end storage due to idle on sequential queue and allow dispatch
|
||||
from a single queue. The idea with this parameter is that it can be run with
|
||||
slice_idle=0 and group_idle=8, so that idling does not happen on individual
|
||||
queues in the group but happens overall on the group and thus still keeps the
|
||||
IO controller working.
|
||||
Not idling on individual queues in the group will dispatch requests from
|
||||
multiple queues in the group at the same time and achieve higher throughput
|
||||
on higher end storage.
|
||||
|
||||
Default value for this parameter is 8ms.
|
||||
|
||||
latency
|
||||
-------
|
||||
This parameter is used to enable/disable the latency mode of the CFQ
|
||||
scheduler. If latency mode (called low_latency) is enabled, CFQ tries
|
||||
to recompute the slice time for each process based on the target_latency set
|
||||
for the system. This favors fairness over throughput. Disabling low
|
||||
latency (setting it to 0) ignores target latency, allowing each process in the
|
||||
system to get a full time slice.
|
||||
|
||||
By default low latency mode is enabled.
|
||||
|
||||
target_latency
|
||||
--------------
|
||||
This parameter is used to calculate the time slice for a process if cfq's
|
||||
latency mode is enabled. It will ensure that sync requests have an estimated
|
||||
latency. But if sequential workload is higher(e.g. sequential read),
|
||||
then to meet the latency constraints, throughput may decrease because of less
|
||||
time for each process to issue I/O request before the cfq queue is switched.
|
||||
|
||||
Though this can be overcome by disabling the latency_mode, it may increase
|
||||
the read latency for some applications. This parameter allows for changing
|
||||
target_latency through the sysfs interface which can provide the balanced
|
||||
throughput and read latency.
|
||||
|
||||
Default value for target_latency is 300ms.
|
||||
|
||||
slice_async
|
||||
-----------
|
||||
This parameter is same as of slice_sync but for asynchronous queue. The
|
||||
@ -98,8 +139,8 @@ in the device exceeds this parameter. This parameter is used for synchronous
|
||||
request.
|
||||
|
||||
In case of storage with several disk, this setting can limit the parallel
|
||||
processing of request. Therefore, increasing the value can imporve the
|
||||
performace although this can cause the latency of some I/O to increase due
|
||||
processing of request. Therefore, increasing the value can improve the
|
||||
performance although this can cause the latency of some I/O to increase due
|
||||
to more number of requests.
|
||||
|
||||
CFQ Group scheduling
|
||||
|
@ -972,10 +972,10 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
if (!new_blkg)
|
||||
return -ENOMEM;
|
||||
|
||||
preloaded = !radix_tree_preload(GFP_KERNEL);
|
||||
|
||||
blk_queue_bypass_start(q);
|
||||
|
||||
preloaded = !radix_tree_preload(GFP_KERNEL);
|
||||
|
||||
/*
|
||||
* Make sure the root blkg exists and count the existing blkgs. As
|
||||
* @q is bypassing at this point, blkg_lookup_create() can't be
|
||||
|
265
block/blk-core.c
265
block/blk-core.c
@ -30,6 +30,7 @@
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/block.h>
|
||||
@ -159,20 +160,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
|
||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
error = -EIO;
|
||||
|
||||
if (unlikely(nbytes > bio->bi_size)) {
|
||||
printk(KERN_ERR "%s: want %u bytes done, %u left\n",
|
||||
__func__, nbytes, bio->bi_size);
|
||||
nbytes = bio->bi_size;
|
||||
}
|
||||
|
||||
if (unlikely(rq->cmd_flags & REQ_QUIET))
|
||||
set_bit(BIO_QUIET, &bio->bi_flags);
|
||||
|
||||
bio->bi_size -= nbytes;
|
||||
bio->bi_sector += (nbytes >> 9);
|
||||
|
||||
if (bio_integrity(bio))
|
||||
bio_integrity_advance(bio, nbytes);
|
||||
bio_advance(bio, nbytes);
|
||||
|
||||
/* don't actually finish bio if it's part of flush sequence */
|
||||
if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
|
||||
@ -1264,6 +1255,16 @@ void part_round_stats(int cpu, struct hd_struct *part)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(part_round_stats);
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
static void blk_pm_put_request(struct request *rq)
|
||||
{
|
||||
if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
|
||||
pm_runtime_mark_last_busy(rq->q->dev);
|
||||
}
|
||||
#else
|
||||
static inline void blk_pm_put_request(struct request *rq) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* queue lock must be held
|
||||
*/
|
||||
@ -1274,6 +1275,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
||||
if (unlikely(--req->ref_count))
|
||||
return;
|
||||
|
||||
blk_pm_put_request(req);
|
||||
|
||||
elv_completed_request(q, req);
|
||||
|
||||
/* this is a bio leak */
|
||||
@ -1597,7 +1600,7 @@ static void handle_bad_sector(struct bio *bio)
|
||||
printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
|
||||
bdevname(bio->bi_bdev, b),
|
||||
bio->bi_rw,
|
||||
(unsigned long long)bio->bi_sector + bio_sectors(bio),
|
||||
(unsigned long long)bio_end_sector(bio),
|
||||
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
|
||||
|
||||
set_bit(BIO_EOF, &bio->bi_flags);
|
||||
@ -2053,6 +2056,28 @@ static void blk_account_io_done(struct request *req)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
/*
|
||||
* Don't process normal requests when queue is suspended
|
||||
* or in the process of suspending/resuming
|
||||
*/
|
||||
static struct request *blk_pm_peek_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
|
||||
(q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
|
||||
return NULL;
|
||||
else
|
||||
return rq;
|
||||
}
|
||||
#else
|
||||
static inline struct request *blk_pm_peek_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
return rq;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* blk_peek_request - peek at the top of a request queue
|
||||
* @q: request queue to peek at
|
||||
@ -2075,6 +2100,11 @@ struct request *blk_peek_request(struct request_queue *q)
|
||||
int ret;
|
||||
|
||||
while ((rq = __elv_next_request(q)) != NULL) {
|
||||
|
||||
rq = blk_pm_peek_request(q, rq);
|
||||
if (!rq)
|
||||
break;
|
||||
|
||||
if (!(rq->cmd_flags & REQ_STARTED)) {
|
||||
/*
|
||||
* This is the first time the device driver
|
||||
@ -2253,8 +2283,7 @@ EXPORT_SYMBOL(blk_fetch_request);
|
||||
**/
|
||||
bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
||||
{
|
||||
int total_bytes, bio_nbytes, next_idx = 0;
|
||||
struct bio *bio;
|
||||
int total_bytes;
|
||||
|
||||
if (!req->bio)
|
||||
return false;
|
||||
@ -2300,56 +2329,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
||||
|
||||
blk_account_io_completion(req, nr_bytes);
|
||||
|
||||
total_bytes = bio_nbytes = 0;
|
||||
while ((bio = req->bio) != NULL) {
|
||||
int nbytes;
|
||||
total_bytes = 0;
|
||||
while (req->bio) {
|
||||
struct bio *bio = req->bio;
|
||||
unsigned bio_bytes = min(bio->bi_size, nr_bytes);
|
||||
|
||||
if (nr_bytes >= bio->bi_size) {
|
||||
if (bio_bytes == bio->bi_size)
|
||||
req->bio = bio->bi_next;
|
||||
nbytes = bio->bi_size;
|
||||
req_bio_endio(req, bio, nbytes, error);
|
||||
next_idx = 0;
|
||||
bio_nbytes = 0;
|
||||
} else {
|
||||
int idx = bio->bi_idx + next_idx;
|
||||
|
||||
if (unlikely(idx >= bio->bi_vcnt)) {
|
||||
blk_dump_rq_flags(req, "__end_that");
|
||||
printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
|
||||
__func__, idx, bio->bi_vcnt);
|
||||
break;
|
||||
}
|
||||
req_bio_endio(req, bio, bio_bytes, error);
|
||||
|
||||
nbytes = bio_iovec_idx(bio, idx)->bv_len;
|
||||
BIO_BUG_ON(nbytes > bio->bi_size);
|
||||
total_bytes += bio_bytes;
|
||||
nr_bytes -= bio_bytes;
|
||||
|
||||
/*
|
||||
* not a complete bvec done
|
||||
*/
|
||||
if (unlikely(nbytes > nr_bytes)) {
|
||||
bio_nbytes += nr_bytes;
|
||||
total_bytes += nr_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* advance to the next vector
|
||||
*/
|
||||
next_idx++;
|
||||
bio_nbytes += nbytes;
|
||||
}
|
||||
|
||||
total_bytes += nbytes;
|
||||
nr_bytes -= nbytes;
|
||||
|
||||
bio = req->bio;
|
||||
if (bio) {
|
||||
/*
|
||||
* end more in this run, or just return 'not-done'
|
||||
*/
|
||||
if (unlikely(nr_bytes <= 0))
|
||||
break;
|
||||
}
|
||||
if (!nr_bytes)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2365,16 +2359,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the request wasn't completed, update state
|
||||
*/
|
||||
if (bio_nbytes) {
|
||||
req_bio_endio(req, bio, bio_nbytes, error);
|
||||
bio->bi_idx += next_idx;
|
||||
bio_iovec(bio)->bv_offset += nr_bytes;
|
||||
bio_iovec(bio)->bv_len -= nr_bytes;
|
||||
}
|
||||
|
||||
req->__data_len -= total_bytes;
|
||||
req->buffer = bio_data(req->bio);
|
||||
|
||||
@ -3046,6 +3030,149 @@ void blk_finish_plug(struct blk_plug *plug)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_finish_plug);
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
/**
|
||||
* blk_pm_runtime_init - Block layer runtime PM initialization routine
|
||||
* @q: the queue of the device
|
||||
* @dev: the device the queue belongs to
|
||||
*
|
||||
* Description:
|
||||
* Initialize runtime-PM-related fields for @q and start auto suspend for
|
||||
* @dev. Drivers that want to take advantage of request-based runtime PM
|
||||
* should call this function after @dev has been initialized, and its
|
||||
* request queue @q has been allocated, and runtime PM for it can not happen
|
||||
* yet(either due to disabled/forbidden or its usage_count > 0). In most
|
||||
* cases, driver should call this function before any I/O has taken place.
|
||||
*
|
||||
* This function takes care of setting up using auto suspend for the device,
|
||||
* the autosuspend delay is set to -1 to make runtime suspend impossible
|
||||
* until an updated value is either set by user or by driver. Drivers do
|
||||
* not need to touch other autosuspend settings.
|
||||
*
|
||||
* The block layer runtime PM is request based, so only works for drivers
|
||||
* that use request as their IO unit instead of those directly use bio's.
|
||||
*/
|
||||
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
|
||||
{
|
||||
q->dev = dev;
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_set_autosuspend_delay(q->dev, -1);
|
||||
pm_runtime_use_autosuspend(q->dev);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pm_runtime_init);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_suspend - Pre runtime suspend check
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* This function will check if runtime suspend is allowed for the device
|
||||
* by examining if there are any requests pending in the queue. If there
|
||||
* are requests pending, the device can not be runtime suspended; otherwise,
|
||||
* the queue's status will be updated to SUSPENDING and the driver can
|
||||
* proceed to suspend the device.
|
||||
*
|
||||
* For the not allowed case, we mark last busy for the device so that
|
||||
* runtime PM core will try to autosuspend it some time later.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_suspend callback.
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK to runtime suspend the device
|
||||
* -EBUSY - Device should not be runtime suspended
|
||||
*/
|
||||
int blk_pre_runtime_suspend(struct request_queue *q)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (q->nr_pending) {
|
||||
ret = -EBUSY;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
} else {
|
||||
q->rpm_status = RPM_SUSPENDING;
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_suspend - Post runtime suspend processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_suspend function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime suspend function and mark last busy for the device so
|
||||
* that PM core will try to auto suspend the device at a later time.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_suspend callback.
|
||||
*/
|
||||
void blk_post_runtime_suspend(struct request_queue *q, int err)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
} else {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_resume - Pre runtime resume processing
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status to RESUMING in preparation for the
|
||||
* runtime resume of the device.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_pre_runtime_resume(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->rpm_status = RPM_RESUMING;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_resume);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_resume - Post runtime resume processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_resume function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime_resume function. If it is successfully resumed, process
|
||||
* the requests that are queued into the device's queue when it is resuming
|
||||
* and then mark last busy and initiate autosuspend for it.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_post_runtime_resume(struct request_queue *q, int err)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
__blk_run_queue(q);
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_runtime_autosuspend(q->dev);
|
||||
} else {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_resume);
|
||||
#endif
|
||||
|
||||
int __init blk_dev_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
|
||||
|
@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
|
||||
return NULL;
|
||||
|
||||
cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
|
||||
if (cfqq) {
|
||||
sector_t sector = bio->bi_sector + bio_sectors(bio);
|
||||
|
||||
return elv_rb_find(&cfqq->sort_list, sector);
|
||||
}
|
||||
if (cfqq)
|
||||
return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
|
||||
* check for front merge
|
||||
*/
|
||||
if (dd->front_merges) {
|
||||
sector_t sector = bio->bi_sector + bio_sectors(bio);
|
||||
sector_t sector = bio_end_sector(bio);
|
||||
|
||||
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
|
||||
if (__rq) {
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -536,6 +537,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
|
||||
e->type->ops.elevator_bio_merged_fn(q, rq, bio);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
static void blk_pm_requeue_request(struct request *rq)
|
||||
{
|
||||
if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
|
||||
rq->q->nr_pending--;
|
||||
}
|
||||
|
||||
static void blk_pm_add_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
|
||||
(q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
|
||||
pm_request_resume(q->dev);
|
||||
}
|
||||
#else
|
||||
static inline void blk_pm_requeue_request(struct request *rq) {}
|
||||
static inline void blk_pm_add_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void elv_requeue_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
/*
|
||||
@ -550,6 +572,8 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
|
||||
|
||||
rq->cmd_flags &= ~REQ_STARTED;
|
||||
|
||||
blk_pm_requeue_request(rq);
|
||||
|
||||
__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
|
||||
}
|
||||
|
||||
@ -572,6 +596,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
|
||||
{
|
||||
trace_block_rq_insert(q, rq);
|
||||
|
||||
blk_pm_add_request(q, rq);
|
||||
|
||||
rq->q = q;
|
||||
|
||||
if (rq->cmd_flags & REQ_SOFTBARRIER) {
|
||||
|
@ -238,7 +238,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
|
||||
le32_to_cpu(gpt->sizeof_partition_entry);
|
||||
if (!count)
|
||||
return NULL;
|
||||
pte = kzalloc(count, GFP_KERNEL);
|
||||
pte = kmalloc(count, GFP_KERNEL);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
|
||||
@ -267,7 +267,7 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
|
||||
gpt_header *gpt;
|
||||
unsigned ssz = bdev_logical_block_size(state->bdev);
|
||||
|
||||
gpt = kzalloc(ssz, GFP_KERNEL);
|
||||
gpt = kmalloc(ssz, GFP_KERNEL);
|
||||
if (!gpt)
|
||||
return NULL;
|
||||
|
||||
|
@ -928,7 +928,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
|
||||
buf->resid = bio->bi_size;
|
||||
buf->sector = bio->bi_sector;
|
||||
bio_pageinc(bio);
|
||||
buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
|
||||
buf->bv = bv = bio_iovec(bio);
|
||||
buf->bv_resid = bv->bv_len;
|
||||
WARN_ON(buf->bv_resid == 0);
|
||||
}
|
||||
|
@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
|
||||
int err = -EIO;
|
||||
|
||||
sector = bio->bi_sector;
|
||||
if (sector + (bio->bi_size >> SECTOR_SHIFT) >
|
||||
get_capacity(bdev->bd_disk))
|
||||
if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
|
||||
goto out;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
|
||||
|
@ -3775,7 +3775,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
|
||||
bio_vec.bv_len = size;
|
||||
bio_vec.bv_offset = 0;
|
||||
bio.bi_vcnt = 1;
|
||||
bio.bi_idx = 0;
|
||||
bio.bi_size = size;
|
||||
bio.bi_bdev = bdev;
|
||||
bio.bi_sector = 0;
|
||||
|
@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
|
||||
pd->iosched.successive_reads += bio->bi_size >> 10;
|
||||
else {
|
||||
pd->iosched.successive_reads = 0;
|
||||
pd->iosched.last_write = bio->bi_sector + bio_sectors(bio);
|
||||
pd->iosched.last_write = bio_end_sector(bio);
|
||||
}
|
||||
if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
|
||||
if (pd->read_speed == pd->write_speed) {
|
||||
@ -947,31 +947,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy CD_FRAMESIZE bytes from src_bio into a destination page
|
||||
*/
|
||||
static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs)
|
||||
{
|
||||
unsigned int copy_size = CD_FRAMESIZE;
|
||||
|
||||
while (copy_size > 0) {
|
||||
struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
|
||||
void *vfrom = kmap_atomic(src_bvl->bv_page) +
|
||||
src_bvl->bv_offset + offs;
|
||||
void *vto = page_address(dst_page) + dst_offs;
|
||||
int len = min_t(int, copy_size, src_bvl->bv_len - offs);
|
||||
|
||||
BUG_ON(len < 0);
|
||||
memcpy(vto, vfrom, len);
|
||||
kunmap_atomic(vfrom);
|
||||
|
||||
seg++;
|
||||
offs = 0;
|
||||
dst_offs += len;
|
||||
copy_size -= len;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy all data for this packet to pkt->pages[], so that
|
||||
* a) The number of required segments for the write bio is minimized, which
|
||||
@ -1181,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt)
|
||||
new_sector = new_block * (CD_FRAMESIZE >> 9);
|
||||
pkt->sector = new_sector;
|
||||
|
||||
bio_reset(pkt->bio);
|
||||
pkt->bio->bi_bdev = pd->bdev;
|
||||
pkt->bio->bi_rw = REQ_WRITE;
|
||||
pkt->bio->bi_sector = new_sector;
|
||||
pkt->bio->bi_next = NULL;
|
||||
pkt->bio->bi_flags = 1 << BIO_UPTODATE;
|
||||
pkt->bio->bi_idx = 0;
|
||||
pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
|
||||
pkt->bio->bi_vcnt = pkt->frames;
|
||||
|
||||
BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
|
||||
BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
|
||||
BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
|
||||
BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
|
||||
BUG_ON(pkt->bio->bi_private != pkt);
|
||||
pkt->bio->bi_end_io = pkt_end_io_packet_write;
|
||||
pkt->bio->bi_private = pkt;
|
||||
|
||||
drop_super(sb);
|
||||
return 1;
|
||||
@ -1325,55 +1299,35 @@ try_next_bio:
|
||||
*/
|
||||
static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
|
||||
{
|
||||
struct bio *bio;
|
||||
int f;
|
||||
int frames_write;
|
||||
struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
|
||||
|
||||
bio_reset(pkt->w_bio);
|
||||
pkt->w_bio->bi_sector = pkt->sector;
|
||||
pkt->w_bio->bi_bdev = pd->bdev;
|
||||
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
|
||||
pkt->w_bio->bi_private = pkt;
|
||||
|
||||
/* XXX: locking? */
|
||||
for (f = 0; f < pkt->frames; f++) {
|
||||
bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
|
||||
bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
|
||||
if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
|
||||
BUG();
|
||||
}
|
||||
VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
|
||||
|
||||
/*
|
||||
* Fill-in bvec with data from orig_bios.
|
||||
*/
|
||||
frames_write = 0;
|
||||
spin_lock(&pkt->lock);
|
||||
bio_list_for_each(bio, &pkt->orig_bios) {
|
||||
int segment = bio->bi_idx;
|
||||
int src_offs = 0;
|
||||
int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
|
||||
int num_frames = bio->bi_size / CD_FRAMESIZE;
|
||||
BUG_ON(first_frame < 0);
|
||||
BUG_ON(first_frame + num_frames > pkt->frames);
|
||||
for (f = first_frame; f < first_frame + num_frames; f++) {
|
||||
struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
|
||||
bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
|
||||
|
||||
while (src_offs >= src_bvl->bv_len) {
|
||||
src_offs -= src_bvl->bv_len;
|
||||
segment++;
|
||||
BUG_ON(segment >= bio->bi_vcnt);
|
||||
src_bvl = bio_iovec_idx(bio, segment);
|
||||
}
|
||||
|
||||
if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
|
||||
bvec[f].bv_page = src_bvl->bv_page;
|
||||
bvec[f].bv_offset = src_bvl->bv_offset + src_offs;
|
||||
} else {
|
||||
pkt_copy_bio_data(bio, segment, src_offs,
|
||||
bvec[f].bv_page, bvec[f].bv_offset);
|
||||
}
|
||||
src_offs += CD_FRAMESIZE;
|
||||
frames_write++;
|
||||
}
|
||||
}
|
||||
pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
|
||||
spin_unlock(&pkt->lock);
|
||||
|
||||
VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
|
||||
frames_write, (unsigned long long)pkt->sector);
|
||||
BUG_ON(frames_write != pkt->write_size);
|
||||
pkt->write_size, (unsigned long long)pkt->sector);
|
||||
|
||||
if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
|
||||
pkt_make_local_copy(pkt, bvec);
|
||||
@ -1383,16 +1337,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
|
||||
}
|
||||
|
||||
/* Start the write request */
|
||||
bio_reset(pkt->w_bio);
|
||||
pkt->w_bio->bi_sector = pkt->sector;
|
||||
pkt->w_bio->bi_bdev = pd->bdev;
|
||||
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
|
||||
pkt->w_bio->bi_private = pkt;
|
||||
for (f = 0; f < pkt->frames; f++)
|
||||
if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
|
||||
BUG();
|
||||
VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
|
||||
|
||||
atomic_set(&pkt->io_wait, 1);
|
||||
pkt->w_bio->bi_rw = WRITE;
|
||||
pkt_queue_bio(pd, pkt->w_bio);
|
||||
@ -2431,7 +2375,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
cloned_bio->bi_bdev = pd->bdev;
|
||||
cloned_bio->bi_private = psd;
|
||||
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
|
||||
pd->stats.secs_r += bio->bi_size >> 9;
|
||||
pd->stats.secs_r += bio_sectors(bio);
|
||||
pkt_queue_bio(pd, cloned_bio);
|
||||
return;
|
||||
}
|
||||
@ -2452,7 +2396,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
zone = ZONE(bio->bi_sector, pd);
|
||||
VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
|
||||
(unsigned long long)bio->bi_sector,
|
||||
(unsigned long long)(bio->bi_sector + bio_sectors(bio)));
|
||||
(unsigned long long)bio_end_sector(bio));
|
||||
|
||||
/* Check if we have to split the bio */
|
||||
{
|
||||
@ -2460,7 +2404,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
sector_t last_zone;
|
||||
int first_sectors;
|
||||
|
||||
last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
|
||||
last_zone = ZONE(bio_end_sector(bio) - 1, pd);
|
||||
if (last_zone != zone) {
|
||||
BUG_ON(last_zone != zone + pd->settings.size);
|
||||
first_sectors = last_zone - bio->bi_sector;
|
||||
|
@ -1143,7 +1143,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
|
||||
/* Find first affected segment... */
|
||||
|
||||
resid = offset;
|
||||
__bio_for_each_segment(bv, bio_src, idx, 0) {
|
||||
bio_for_each_segment(bv, bio_src, idx) {
|
||||
if (resid < bv->bv_len)
|
||||
break;
|
||||
resid -= bv->bv_len;
|
||||
|
@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
|
||||
unsigned int i;
|
||||
struct bio_vec *bv;
|
||||
|
||||
for (i = 0; i < clone->bi_vcnt; i++) {
|
||||
bv = bio_iovec_idx(clone, i);
|
||||
bio_for_each_segment_all(bv, clone, i) {
|
||||
BUG_ON(!bv->bv_page);
|
||||
mempool_free(bv->bv_page, cc->page_pool);
|
||||
bv->bv_page = NULL;
|
||||
|
@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
|
||||
{
|
||||
io->bdev = m->dev->bdev;
|
||||
io->sector = map_sector(m, bio);
|
||||
io->count = bio->bi_size >> 9;
|
||||
io->count = bio_sectors(bio);
|
||||
}
|
||||
|
||||
static void hold_bio(struct mirror_set *ms, struct bio *bio)
|
||||
|
@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
|
||||
sector_t begin, end;
|
||||
|
||||
stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
|
||||
stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
|
||||
stripe_map_range_sector(sc, bio_end_sector(bio),
|
||||
target_stripe, &end);
|
||||
if (begin < end) {
|
||||
bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
|
||||
|
@ -501,7 +501,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if ((bio->bi_sector + bio_sectors(bio)) >>
|
||||
if (bio_end_sector(bio) >>
|
||||
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
|
||||
DMERR_LIMIT("io out of range");
|
||||
return -EIO;
|
||||
@ -519,7 +519,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
|
||||
|
||||
bio->bi_end_io = verity_end_io;
|
||||
bio->bi_private = io;
|
||||
io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
|
||||
io->io_vec_size = bio_segments(bio);
|
||||
if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
|
||||
io->io_vec = io->io_vec_inline;
|
||||
else
|
||||
|
@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
|
||||
WRITE))
|
||||
if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE))
|
||||
failit = 1;
|
||||
if (check_mode(conf, WritePersistent)) {
|
||||
add_sector(conf, bio->bi_sector, WritePersistent);
|
||||
@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
|
||||
failit = 1;
|
||||
} else {
|
||||
/* read request */
|
||||
if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9),
|
||||
READ))
|
||||
if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ))
|
||||
failit = 1;
|
||||
if (check_mode(conf, ReadTransient))
|
||||
failit = 1;
|
||||
|
@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
|
||||
tmp_dev->end_sector)) {
|
||||
if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to
|
||||
* split it.
|
||||
*/
|
||||
|
@ -197,21 +197,12 @@ void md_trim_bio(struct bio *bio, int offset, int size)
|
||||
if (offset == 0 && size == bio->bi_size)
|
||||
return;
|
||||
|
||||
bio->bi_sector += offset;
|
||||
bio->bi_size = size;
|
||||
offset <<= 9;
|
||||
clear_bit(BIO_SEG_VALID, &bio->bi_flags);
|
||||
|
||||
while (bio->bi_idx < bio->bi_vcnt &&
|
||||
bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
|
||||
/* remove this whole bio_vec */
|
||||
offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
|
||||
bio->bi_idx++;
|
||||
}
|
||||
if (bio->bi_idx < bio->bi_vcnt) {
|
||||
bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
|
||||
bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
|
||||
}
|
||||
bio_advance(bio, offset << 9);
|
||||
|
||||
bio->bi_size = size;
|
||||
|
||||
/* avoid any complications with bi_idx being non-zero*/
|
||||
if (bio->bi_idx) {
|
||||
memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
|
||||
|
@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
|
||||
{
|
||||
if (likely(is_power_of_2(chunk_sects))) {
|
||||
return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
|
||||
+ (bio->bi_size >> 9));
|
||||
+ bio_sectors(bio));
|
||||
} else{
|
||||
sector_t sector = bio->bi_sector;
|
||||
return chunk_sects >= (sector_div(sector, chunk_sects)
|
||||
+ (bio->bi_size >> 9));
|
||||
+ bio_sectors(bio));
|
||||
}
|
||||
}
|
||||
|
||||
@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
sector_t sector = bio->bi_sector;
|
||||
struct bio_pair *bp;
|
||||
/* Sanity check -- queue functions should prevent this happening */
|
||||
if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
|
||||
bio->bi_idx != 0)
|
||||
if (bio_segments(bio) > 1)
|
||||
goto bad_map;
|
||||
/* This is a one page bio that upper layers
|
||||
* refuse to split for us, so we need to split it.
|
||||
@ -567,7 +566,7 @@ bad_map:
|
||||
printk("md/raid0:%s: make_request bug: can't convert block across chunks"
|
||||
" or bigger than %dk %llu %d\n",
|
||||
mdname(mddev), chunk_sects / 2,
|
||||
(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
|
||||
(unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
|
||||
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
|
@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
|
||||
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||
{
|
||||
struct pool_info *pi = data;
|
||||
struct page *page;
|
||||
struct r1bio *r1_bio;
|
||||
struct bio *bio;
|
||||
int i, j;
|
||||
@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||
j = 1;
|
||||
while(j--) {
|
||||
bio = r1_bio->bios[j];
|
||||
for (i = 0; i < RESYNC_PAGES; i++) {
|
||||
page = alloc_page(gfp_flags);
|
||||
if (unlikely(!page))
|
||||
goto out_free_pages;
|
||||
bio->bi_vcnt = RESYNC_PAGES;
|
||||
|
||||
bio->bi_io_vec[i].bv_page = page;
|
||||
bio->bi_vcnt = i+1;
|
||||
}
|
||||
if (bio_alloc_pages(bio, gfp_flags))
|
||||
goto out_free_bio;
|
||||
}
|
||||
/* If not user-requests, copy the page pointers to all bios */
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
|
||||
@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||
|
||||
return r1_bio;
|
||||
|
||||
out_free_pages:
|
||||
for (j=0 ; j < pi->raid_disks; j++)
|
||||
for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
|
||||
put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
|
||||
j = -1;
|
||||
out_free_bio:
|
||||
while (++j < pi->raid_disks)
|
||||
bio_put(r1_bio->bios[j]);
|
||||
@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
|
||||
(bio_data_dir(bio) == WRITE) ? "write" : "read",
|
||||
(unsigned long long) bio->bi_sector,
|
||||
(unsigned long long) bio->bi_sector +
|
||||
(bio->bi_size >> 9) - 1);
|
||||
bio_sectors(bio) - 1);
|
||||
|
||||
call_bio_endio(r1_bio);
|
||||
}
|
||||
@ -458,7 +448,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
|
||||
" %llu-%llu\n",
|
||||
(unsigned long long) mbio->bi_sector,
|
||||
(unsigned long long) mbio->bi_sector +
|
||||
(mbio->bi_size >> 9) - 1);
|
||||
bio_sectors(mbio) - 1);
|
||||
call_bio_endio(r1_bio);
|
||||
}
|
||||
}
|
||||
@ -925,7 +915,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
|
||||
if (unlikely(!bvecs))
|
||||
return;
|
||||
|
||||
bio_for_each_segment(bvec, bio, i) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
bvecs[i] = *bvec;
|
||||
bvecs[i].bv_page = alloc_page(GFP_NOIO);
|
||||
if (unlikely(!bvecs[i].bv_page))
|
||||
@ -1023,7 +1013,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
md_write_start(mddev, bio); /* wait on superblock update early */
|
||||
|
||||
if (bio_data_dir(bio) == WRITE &&
|
||||
bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
|
||||
bio_end_sector(bio) > mddev->suspend_lo &&
|
||||
bio->bi_sector < mddev->suspend_hi) {
|
||||
/* As the suspend_* range is controlled by
|
||||
* userspace, we want an interruptible
|
||||
@ -1034,7 +1024,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
flush_signals(current);
|
||||
prepare_to_wait(&conf->wait_barrier,
|
||||
&w, TASK_INTERRUPTIBLE);
|
||||
if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
|
||||
if (bio_end_sector(bio) <= mddev->suspend_lo ||
|
||||
bio->bi_sector >= mddev->suspend_hi)
|
||||
break;
|
||||
schedule();
|
||||
@ -1054,7 +1044,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||
|
||||
r1_bio->master_bio = bio;
|
||||
r1_bio->sectors = bio->bi_size >> 9;
|
||||
r1_bio->sectors = bio_sectors(bio);
|
||||
r1_bio->state = 0;
|
||||
r1_bio->mddev = mddev;
|
||||
r1_bio->sector = bio->bi_sector;
|
||||
@ -1132,7 +1122,7 @@ read_again:
|
||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||
|
||||
r1_bio->master_bio = bio;
|
||||
r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
|
||||
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
r1_bio->state = 0;
|
||||
r1_bio->mddev = mddev;
|
||||
r1_bio->sector = bio->bi_sector + sectors_handled;
|
||||
@ -1289,14 +1279,10 @@ read_again:
|
||||
struct bio_vec *bvec;
|
||||
int j;
|
||||
|
||||
/* Yes, I really want the '__' version so that
|
||||
* we clear any unused pointer in the io_vec, rather
|
||||
* than leave them unchanged. This is important
|
||||
* because when we come to free the pages, we won't
|
||||
* know the original bi_idx, so we just free
|
||||
* them all
|
||||
/*
|
||||
* We trimmed the bio, so _all is legit
|
||||
*/
|
||||
__bio_for_each_segment(bvec, mbio, j, 0)
|
||||
bio_for_each_segment_all(bvec, mbio, j)
|
||||
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
|
||||
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
|
||||
atomic_inc(&r1_bio->behind_remaining);
|
||||
@ -1334,14 +1320,14 @@ read_again:
|
||||
/* Mustn't call r1_bio_write_done before this next test,
|
||||
* as it could result in the bio being freed.
|
||||
*/
|
||||
if (sectors_handled < (bio->bi_size >> 9)) {
|
||||
if (sectors_handled < bio_sectors(bio)) {
|
||||
r1_bio_write_done(r1_bio);
|
||||
/* We need another r1_bio. It has already been counted
|
||||
* in bio->bi_phys_segments
|
||||
*/
|
||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||
r1_bio->master_bio = bio;
|
||||
r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
|
||||
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
r1_bio->state = 0;
|
||||
r1_bio->mddev = mddev;
|
||||
r1_bio->sector = bio->bi_sector + sectors_handled;
|
||||
@ -1867,7 +1853,7 @@ static int process_checks(struct r1bio *r1_bio)
|
||||
struct bio *sbio = r1_bio->bios[i];
|
||||
int size;
|
||||
|
||||
if (r1_bio->bios[i]->bi_end_io != end_sync_read)
|
||||
if (sbio->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
|
||||
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
|
||||
@ -1892,16 +1878,15 @@ static int process_checks(struct r1bio *r1_bio)
|
||||
continue;
|
||||
}
|
||||
/* fixup the bio for reuse */
|
||||
bio_reset(sbio);
|
||||
sbio->bi_vcnt = vcnt;
|
||||
sbio->bi_size = r1_bio->sectors << 9;
|
||||
sbio->bi_idx = 0;
|
||||
sbio->bi_phys_segments = 0;
|
||||
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
sbio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
sbio->bi_next = NULL;
|
||||
sbio->bi_sector = r1_bio->sector +
|
||||
conf->mirrors[i].rdev->data_offset;
|
||||
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||
sbio->bi_end_io = end_sync_read;
|
||||
sbio->bi_private = r1_bio;
|
||||
|
||||
size = sbio->bi_size;
|
||||
for (j = 0; j < vcnt ; j++) {
|
||||
struct bio_vec *bi;
|
||||
@ -1912,10 +1897,9 @@ static int process_checks(struct r1bio *r1_bio)
|
||||
else
|
||||
bi->bv_len = size;
|
||||
size -= PAGE_SIZE;
|
||||
memcpy(page_address(bi->bv_page),
|
||||
page_address(pbio->bi_io_vec[j].bv_page),
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
bio_copy_data(sbio, pbio);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1952,7 +1936,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
wbio->bi_rw = WRITE;
|
||||
wbio->bi_end_io = end_sync_write;
|
||||
atomic_inc(&r1_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
|
||||
md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
|
||||
|
||||
generic_make_request(wbio);
|
||||
}
|
||||
@ -2064,32 +2048,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
}
|
||||
}
|
||||
|
||||
static void bi_complete(struct bio *bio, int error)
|
||||
{
|
||||
complete((struct completion *)bio->bi_private);
|
||||
}
|
||||
|
||||
static int submit_bio_wait(int rw, struct bio *bio)
|
||||
{
|
||||
struct completion event;
|
||||
rw |= REQ_SYNC;
|
||||
|
||||
init_completion(&event);
|
||||
bio->bi_private = &event;
|
||||
bio->bi_end_io = bi_complete;
|
||||
submit_bio(rw, bio);
|
||||
wait_for_completion(&event);
|
||||
|
||||
return test_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
}
|
||||
|
||||
static int narrow_write_error(struct r1bio *r1_bio, int i)
|
||||
{
|
||||
struct mddev *mddev = r1_bio->mddev;
|
||||
struct r1conf *conf = mddev->private;
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
int vcnt, idx;
|
||||
struct bio_vec *vec;
|
||||
|
||||
/* bio has the data to be written to device 'i' where
|
||||
* we just recently had a write error.
|
||||
@ -2117,30 +2080,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
|
||||
& ~(sector_t)(block_sectors - 1))
|
||||
- sector;
|
||||
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||
vcnt = r1_bio->behind_page_count;
|
||||
vec = r1_bio->behind_bvecs;
|
||||
idx = 0;
|
||||
while (vec[idx].bv_page == NULL)
|
||||
idx++;
|
||||
} else {
|
||||
vcnt = r1_bio->master_bio->bi_vcnt;
|
||||
vec = r1_bio->master_bio->bi_io_vec;
|
||||
idx = r1_bio->master_bio->bi_idx;
|
||||
}
|
||||
while (sect_to_write) {
|
||||
struct bio *wbio;
|
||||
if (sectors > sect_to_write)
|
||||
sectors = sect_to_write;
|
||||
/* Write at 'sector' for 'sectors'*/
|
||||
|
||||
wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
|
||||
memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
|
||||
wbio->bi_sector = r1_bio->sector;
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||
unsigned vcnt = r1_bio->behind_page_count;
|
||||
struct bio_vec *vec = r1_bio->behind_bvecs;
|
||||
|
||||
while (!vec->bv_page) {
|
||||
vec++;
|
||||
vcnt--;
|
||||
}
|
||||
|
||||
wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
|
||||
memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
|
||||
|
||||
wbio->bi_vcnt = vcnt;
|
||||
} else {
|
||||
wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
|
||||
}
|
||||
|
||||
wbio->bi_rw = WRITE;
|
||||
wbio->bi_vcnt = vcnt;
|
||||
wbio->bi_sector = r1_bio->sector;
|
||||
wbio->bi_size = r1_bio->sectors << 9;
|
||||
wbio->bi_idx = idx;
|
||||
|
||||
md_trim_bio(wbio, sector - r1_bio->sector, sectors);
|
||||
wbio->bi_sector += rdev->data_offset;
|
||||
@ -2289,8 +2254,7 @@ read_more:
|
||||
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
|
||||
|
||||
r1_bio->master_bio = mbio;
|
||||
r1_bio->sectors = (mbio->bi_size >> 9)
|
||||
- sectors_handled;
|
||||
r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
|
||||
r1_bio->state = 0;
|
||||
set_bit(R1BIO_ReadError, &r1_bio->state);
|
||||
r1_bio->mddev = mddev;
|
||||
@ -2464,18 +2428,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||
struct md_rdev *rdev;
|
||||
bio = r1_bio->bios[i];
|
||||
|
||||
/* take from bio_init */
|
||||
bio->bi_next = NULL;
|
||||
bio->bi_flags &= ~(BIO_POOL_MASK-1);
|
||||
bio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
bio->bi_rw = READ;
|
||||
bio->bi_vcnt = 0;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_phys_segments = 0;
|
||||
bio->bi_size = 0;
|
||||
bio->bi_end_io = NULL;
|
||||
bio->bi_private = NULL;
|
||||
bio_reset(bio);
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
if (rdev == NULL ||
|
||||
|
@ -1174,14 +1174,13 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
/* If this request crosses a chunk boundary, we need to
|
||||
* split it. This will only happen for 1 PAGE (or less) requests.
|
||||
*/
|
||||
if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9)
|
||||
if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
|
||||
> chunk_sects
|
||||
&& (conf->geo.near_copies < conf->geo.raid_disks
|
||||
|| conf->prev.near_copies < conf->prev.raid_disks))) {
|
||||
struct bio_pair *bp;
|
||||
/* Sanity check -- queue functions should prevent this happening */
|
||||
if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
|
||||
bio->bi_idx != 0)
|
||||
if (bio_segments(bio) > 1)
|
||||
goto bad_map;
|
||||
/* This is a one page bio that upper layers
|
||||
* refuse to split for us, so we need to split it.
|
||||
@ -1214,7 +1213,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
bad_map:
|
||||
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
|
||||
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
|
||||
(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
|
||||
(unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
|
||||
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
@ -1229,7 +1228,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
*/
|
||||
wait_barrier(conf);
|
||||
|
||||
sectors = bio->bi_size >> 9;
|
||||
sectors = bio_sectors(bio);
|
||||
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
bio->bi_sector < conf->reshape_progress &&
|
||||
bio->bi_sector + sectors > conf->reshape_progress) {
|
||||
@ -1331,8 +1330,7 @@ read_again:
|
||||
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
||||
|
||||
r10_bio->master_bio = bio;
|
||||
r10_bio->sectors = ((bio->bi_size >> 9)
|
||||
- sectors_handled);
|
||||
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
r10_bio->state = 0;
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = bio->bi_sector + sectors_handled;
|
||||
@ -1574,7 +1572,7 @@ retry_write:
|
||||
* after checking if we need to go around again.
|
||||
*/
|
||||
|
||||
if (sectors_handled < (bio->bi_size >> 9)) {
|
||||
if (sectors_handled < bio_sectors(bio)) {
|
||||
one_write_done(r10_bio);
|
||||
/* We need another r10_bio. It has already been counted
|
||||
* in bio->bi_phys_segments.
|
||||
@ -1582,7 +1580,7 @@ retry_write:
|
||||
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
||||
|
||||
r10_bio->master_bio = bio;
|
||||
r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
|
||||
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = bio->bi_sector + sectors_handled;
|
||||
@ -2084,13 +2082,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
* First we need to fixup bv_offset, bv_len and
|
||||
* bi_vecs, as the read request might have corrupted these
|
||||
*/
|
||||
bio_reset(tbio);
|
||||
|
||||
tbio->bi_vcnt = vcnt;
|
||||
tbio->bi_size = r10_bio->sectors << 9;
|
||||
tbio->bi_idx = 0;
|
||||
tbio->bi_phys_segments = 0;
|
||||
tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
tbio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
tbio->bi_next = NULL;
|
||||
tbio->bi_rw = WRITE;
|
||||
tbio->bi_private = r10_bio;
|
||||
tbio->bi_sector = r10_bio->devs[i].addr;
|
||||
@ -2108,7 +2103,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
d = r10_bio->devs[i].devnum;
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
|
||||
|
||||
tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
|
||||
tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
@ -2133,7 +2128,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
d = r10_bio->devs[i].devnum;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[d].replacement->bdev,
|
||||
tbio->bi_size >> 9);
|
||||
bio_sectors(tbio));
|
||||
generic_make_request(tbio);
|
||||
}
|
||||
|
||||
@ -2259,13 +2254,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
wbio2 = r10_bio->devs[1].repl_bio;
|
||||
if (wbio->bi_end_io) {
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
|
||||
generic_make_request(wbio);
|
||||
}
|
||||
if (wbio2 && wbio2->bi_end_io) {
|
||||
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
|
||||
md_sync_acct(conf->mirrors[d].replacement->bdev,
|
||||
wbio2->bi_size >> 9);
|
||||
bio_sectors(wbio2));
|
||||
generic_make_request(wbio2);
|
||||
}
|
||||
}
|
||||
@ -2536,25 +2531,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
}
|
||||
}
|
||||
|
||||
static void bi_complete(struct bio *bio, int error)
|
||||
{
|
||||
complete((struct completion *)bio->bi_private);
|
||||
}
|
||||
|
||||
static int submit_bio_wait(int rw, struct bio *bio)
|
||||
{
|
||||
struct completion event;
|
||||
rw |= REQ_SYNC;
|
||||
|
||||
init_completion(&event);
|
||||
bio->bi_private = &event;
|
||||
bio->bi_end_io = bi_complete;
|
||||
submit_bio(rw, bio);
|
||||
wait_for_completion(&event);
|
||||
|
||||
return test_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
}
|
||||
|
||||
static int narrow_write_error(struct r10bio *r10_bio, int i)
|
||||
{
|
||||
struct bio *bio = r10_bio->master_bio;
|
||||
@ -2695,8 +2671,7 @@ read_more:
|
||||
r10_bio = mempool_alloc(conf->r10bio_pool,
|
||||
GFP_NOIO);
|
||||
r10_bio->master_bio = mbio;
|
||||
r10_bio->sectors = (mbio->bi_size >> 9)
|
||||
- sectors_handled;
|
||||
r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
|
||||
r10_bio->state = 0;
|
||||
set_bit(R10BIO_ReadError,
|
||||
&r10_bio->state);
|
||||
@ -3133,6 +3108,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
}
|
||||
}
|
||||
bio = r10_bio->devs[0].bio;
|
||||
bio_reset(bio);
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
@ -3157,6 +3133,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
rdev = mirror->rdev;
|
||||
if (!test_bit(In_sync, &rdev->flags)) {
|
||||
bio = r10_bio->devs[1].bio;
|
||||
bio_reset(bio);
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
@ -3185,6 +3162,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
if (rdev == NULL || bio == NULL ||
|
||||
test_bit(Faulty, &rdev->flags))
|
||||
break;
|
||||
bio_reset(bio);
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
@ -3283,7 +3261,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
|
||||
|
||||
bio = r10_bio->devs[i].bio;
|
||||
bio->bi_end_io = NULL;
|
||||
bio_reset(bio);
|
||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
if (conf->mirrors[d].rdev == NULL ||
|
||||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
|
||||
@ -3320,6 +3298,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
|
||||
/* Need to set up for writing to the replacement */
|
||||
bio = r10_bio->devs[i].repl_bio;
|
||||
bio_reset(bio);
|
||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
|
||||
sector = r10_bio->devs[i].addr;
|
||||
@ -3353,17 +3332,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
}
|
||||
}
|
||||
|
||||
for (bio = biolist; bio ; bio=bio->bi_next) {
|
||||
|
||||
bio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
if (bio->bi_end_io)
|
||||
bio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
bio->bi_vcnt = 0;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_phys_segments = 0;
|
||||
bio->bi_size = 0;
|
||||
}
|
||||
|
||||
nr_sectors = 0;
|
||||
if (sector_nr + max_sync < max_sector)
|
||||
max_sector = sector_nr + max_sync;
|
||||
@ -4411,7 +4379,6 @@ read_more:
|
||||
read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
read_bio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
read_bio->bi_vcnt = 0;
|
||||
read_bio->bi_idx = 0;
|
||||
read_bio->bi_size = 0;
|
||||
r10_bio->master_bio = read_bio;
|
||||
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
@ -4435,17 +4402,14 @@ read_more:
|
||||
}
|
||||
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
|
||||
continue;
|
||||
|
||||
bio_reset(b);
|
||||
b->bi_bdev = rdev2->bdev;
|
||||
b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
|
||||
b->bi_private = r10_bio;
|
||||
b->bi_end_io = end_reshape_write;
|
||||
b->bi_rw = WRITE;
|
||||
b->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
b->bi_flags |= 1 << BIO_UPTODATE;
|
||||
b->bi_next = blist;
|
||||
b->bi_vcnt = 0;
|
||||
b->bi_idx = 0;
|
||||
b->bi_size = 0;
|
||||
blist = b;
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
|
||||
*/
|
||||
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
|
||||
{
|
||||
int sectors = bio->bi_size >> 9;
|
||||
int sectors = bio_sectors(bio);
|
||||
if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
|
||||
return bio->bi_next;
|
||||
else
|
||||
@ -569,14 +569,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
bi = &sh->dev[i].req;
|
||||
rbi = &sh->dev[i].rreq; /* For writing to replacement */
|
||||
|
||||
bi->bi_rw = rw;
|
||||
rbi->bi_rw = rw;
|
||||
if (rw & WRITE) {
|
||||
bi->bi_end_io = raid5_end_write_request;
|
||||
rbi->bi_end_io = raid5_end_write_request;
|
||||
} else
|
||||
bi->bi_end_io = raid5_end_read_request;
|
||||
|
||||
rcu_read_lock();
|
||||
rrdev = rcu_dereference(conf->disks[i].replacement);
|
||||
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
|
||||
@ -651,7 +643,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
|
||||
set_bit(STRIPE_IO_STARTED, &sh->state);
|
||||
|
||||
bio_reset(bi);
|
||||
bi->bi_bdev = rdev->bdev;
|
||||
bi->bi_rw = rw;
|
||||
bi->bi_end_io = (rw & WRITE)
|
||||
? raid5_end_write_request
|
||||
: raid5_end_read_request;
|
||||
bi->bi_private = sh;
|
||||
|
||||
pr_debug("%s: for %llu schedule op %ld on disc %d\n",
|
||||
__func__, (unsigned long long)sh->sector,
|
||||
bi->bi_rw, i);
|
||||
@ -665,12 +664,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
|
||||
bi->bi_rw |= REQ_FLUSH;
|
||||
|
||||
bi->bi_flags = 1 << BIO_UPTODATE;
|
||||
bi->bi_idx = 0;
|
||||
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||
bi->bi_io_vec[0].bv_offset = 0;
|
||||
bi->bi_size = STRIPE_SIZE;
|
||||
bi->bi_next = NULL;
|
||||
if (rrdev)
|
||||
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
|
||||
|
||||
@ -687,7 +683,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
|
||||
set_bit(STRIPE_IO_STARTED, &sh->state);
|
||||
|
||||
bio_reset(rbi);
|
||||
rbi->bi_bdev = rrdev->bdev;
|
||||
rbi->bi_rw = rw;
|
||||
BUG_ON(!(rw & WRITE));
|
||||
rbi->bi_end_io = raid5_end_write_request;
|
||||
rbi->bi_private = sh;
|
||||
|
||||
pr_debug("%s: for %llu schedule op %ld on "
|
||||
"replacement disc %d\n",
|
||||
__func__, (unsigned long long)sh->sector,
|
||||
@ -699,12 +701,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
else
|
||||
rbi->bi_sector = (sh->sector
|
||||
+ rrdev->data_offset);
|
||||
rbi->bi_flags = 1 << BIO_UPTODATE;
|
||||
rbi->bi_idx = 0;
|
||||
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||
rbi->bi_io_vec[0].bv_offset = 0;
|
||||
rbi->bi_size = STRIPE_SIZE;
|
||||
rbi->bi_next = NULL;
|
||||
if (conf->mddev->gendisk)
|
||||
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
|
||||
rbi, disk_devt(conf->mddev->gendisk),
|
||||
@ -2402,11 +2401,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
|
||||
} else
|
||||
bip = &sh->dev[dd_idx].toread;
|
||||
while (*bip && (*bip)->bi_sector < bi->bi_sector) {
|
||||
if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
|
||||
if (bio_end_sector(*bip) > bi->bi_sector)
|
||||
goto overlap;
|
||||
bip = & (*bip)->bi_next;
|
||||
}
|
||||
if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
|
||||
if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
|
||||
goto overlap;
|
||||
|
||||
BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
|
||||
@ -2422,8 +2421,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
|
||||
sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
|
||||
bi && bi->bi_sector <= sector;
|
||||
bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
|
||||
if (bi->bi_sector + (bi->bi_size>>9) >= sector)
|
||||
sector = bi->bi_sector + (bi->bi_size>>9);
|
||||
if (bio_end_sector(bi) >= sector)
|
||||
sector = bio_end_sector(bi);
|
||||
}
|
||||
if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
|
||||
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
|
||||
@ -3849,7 +3848,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
|
||||
unsigned int chunk_sectors = mddev->chunk_sectors;
|
||||
unsigned int bio_sectors = bio->bi_size >> 9;
|
||||
unsigned int bio_sectors = bio_sectors(bio);
|
||||
|
||||
if (mddev->new_chunk_sectors < mddev->chunk_sectors)
|
||||
chunk_sectors = mddev->new_chunk_sectors;
|
||||
@ -3941,7 +3940,7 @@ static int bio_fits_rdev(struct bio *bi)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bi->bi_bdev);
|
||||
|
||||
if ((bi->bi_size>>9) > queue_max_sectors(q))
|
||||
if (bio_sectors(bi) > queue_max_sectors(q))
|
||||
return 0;
|
||||
blk_recount_segments(q, bi);
|
||||
if (bi->bi_phys_segments > queue_max_segments(q))
|
||||
@ -3988,7 +3987,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||
0,
|
||||
&dd_idx, NULL);
|
||||
|
||||
end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
|
||||
end_sector = bio_end_sector(align_bi);
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
@ -4011,7 +4010,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
|
||||
|
||||
if (!bio_fits_rdev(align_bi) ||
|
||||
is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
|
||||
is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
|
||||
&first_bad, &bad_sectors)) {
|
||||
/* too big in some way, or has a known bad block */
|
||||
bio_put(align_bi);
|
||||
@ -4273,7 +4272,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
}
|
||||
|
||||
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
|
||||
last_sector = bi->bi_sector + (bi->bi_size>>9);
|
||||
last_sector = bio_end_sector(bi);
|
||||
bi->bi_next = NULL;
|
||||
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
|
||||
|
||||
@ -4739,7 +4738,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
|
||||
logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
|
||||
sector = raid5_compute_sector(conf, logical_sector,
|
||||
0, &dd_idx, NULL);
|
||||
last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
|
||||
last_sector = bio_end_sector(raid_bio);
|
||||
|
||||
for (; logical_sector < last_sector;
|
||||
logical_sector += STRIPE_SECTORS,
|
||||
|
@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
}
|
||||
|
||||
/* do we need to support multiple segments? */
|
||||
if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
|
||||
printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
|
||||
ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
|
||||
rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
|
||||
ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req),
|
||||
bio_segments(rsp->bio), blk_rq_bytes(rsp));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -822,8 +822,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
|
||||
if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
|
||||
/* Request is not page-aligned. */
|
||||
goto fail;
|
||||
if (((bio->bi_size >> 9) + bio->bi_sector)
|
||||
> get_capacity(bio->bi_bdev->bd_disk)) {
|
||||
if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
|
||||
/* Request beyond end of DCSS segment. */
|
||||
goto fail;
|
||||
}
|
||||
|
@ -2163,10 +2163,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
}
|
||||
|
||||
/* do we need to support multiple segments? */
|
||||
if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
|
||||
printk("%s: multiple segments req %u %u, rsp %u %u\n",
|
||||
__func__, req->bio->bi_vcnt, blk_rq_bytes(req),
|
||||
rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
|
||||
__func__, bio_segments(req->bio), blk_rq_bytes(req),
|
||||
bio_segments(rsp->bio), blk_rq_bytes(rsp));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
ioc->transport_cmds.status = MPT2_CMD_PENDING;
|
||||
|
||||
/* Check if the request is split across multiple segments */
|
||||
if (req->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(req->bio) > 1) {
|
||||
u32 offset = 0;
|
||||
|
||||
/* Allocate memory and copy the request */
|
||||
@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
|
||||
/* Check if the response needs to be populated across
|
||||
* multiple segments */
|
||||
if (rsp->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(rsp->bio) > 1) {
|
||||
pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
|
||||
&pci_dma_in);
|
||||
if (!pci_addr_in) {
|
||||
@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
|
||||
MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
|
||||
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
|
||||
if (req->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(req->bio) > 1) {
|
||||
ioc->base_add_sg_single(psge, sgl_flags |
|
||||
(blk_rq_bytes(req) - 4), pci_dma_out);
|
||||
} else {
|
||||
@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
|
||||
MPI2_SGE_FLAGS_END_OF_LIST);
|
||||
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
|
||||
if (rsp->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(rsp->bio) > 1) {
|
||||
ioc->base_add_sg_single(psge, sgl_flags |
|
||||
(blk_rq_bytes(rsp) + 4), pci_dma_in);
|
||||
} else {
|
||||
@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
|
||||
le16_to_cpu(mpi_reply->ResponseDataLength);
|
||||
/* check if the resp needs to be copied from the allocated
|
||||
* pci mem */
|
||||
if (rsp->bio->bi_vcnt > 1) {
|
||||
if (bio_segments(rsp->bio) > 1) {
|
||||
u32 offset = 0;
|
||||
u32 bytes_to_copy =
|
||||
le16_to_cpu(mpi_reply->ResponseDataLength);
|
||||
|
@ -27,48 +27,11 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
struct integrity_slab {
|
||||
struct kmem_cache *slab;
|
||||
unsigned short nr_vecs;
|
||||
char name[8];
|
||||
};
|
||||
|
||||
#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
|
||||
struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
|
||||
IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
|
||||
};
|
||||
#undef IS
|
||||
#define BIP_INLINE_VECS 4
|
||||
|
||||
static struct kmem_cache *bip_slab;
|
||||
static struct workqueue_struct *kintegrityd_wq;
|
||||
|
||||
static inline unsigned int vecs_to_idx(unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
case 1:
|
||||
return 0;
|
||||
case 2 ... 4:
|
||||
return 1;
|
||||
case 5 ... 16:
|
||||
return 2;
|
||||
case 17 ... 64:
|
||||
return 3;
|
||||
case 65 ... 128:
|
||||
return 4;
|
||||
case 129 ... BIO_MAX_PAGES:
|
||||
return 5;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline int use_bip_pool(unsigned int idx)
|
||||
{
|
||||
if (idx == BIOVEC_MAX_IDX)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
|
||||
* @bio: bio to attach integrity metadata to
|
||||
@ -84,37 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
unsigned int nr_vecs)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
unsigned int idx = vecs_to_idx(nr_vecs);
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
unsigned long idx = BIO_POOL_NONE;
|
||||
unsigned inline_vecs;
|
||||
|
||||
if (!bs)
|
||||
bs = fs_bio_set;
|
||||
|
||||
BUG_ON(bio == NULL);
|
||||
bip = NULL;
|
||||
|
||||
/* Lower order allocations come straight from slab */
|
||||
if (!use_bip_pool(idx))
|
||||
bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
|
||||
|
||||
/* Use mempool if lower order alloc failed or max vecs were requested */
|
||||
if (bip == NULL) {
|
||||
idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
|
||||
if (!bs) {
|
||||
bip = kmalloc(sizeof(struct bio_integrity_payload) +
|
||||
sizeof(struct bio_vec) * nr_vecs, gfp_mask);
|
||||
inline_vecs = nr_vecs;
|
||||
} else {
|
||||
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
|
||||
|
||||
if (unlikely(bip == NULL)) {
|
||||
printk(KERN_ERR "%s: could not alloc bip\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
inline_vecs = BIP_INLINE_VECS;
|
||||
}
|
||||
|
||||
if (unlikely(!bip))
|
||||
return NULL;
|
||||
|
||||
memset(bip, 0, sizeof(*bip));
|
||||
|
||||
if (nr_vecs > inline_vecs) {
|
||||
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
|
||||
bs->bvec_integrity_pool);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
} else {
|
||||
bip->bip_vec = bip->bip_inline_vecs;
|
||||
}
|
||||
|
||||
bip->bip_slab = idx;
|
||||
bip->bip_bio = bio;
|
||||
bio->bi_integrity = bip;
|
||||
|
||||
return bip;
|
||||
err:
|
||||
mempool_free(bip, bs->bio_integrity_pool);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||
|
||||
@ -130,20 +97,18 @@ void bio_integrity_free(struct bio *bio)
|
||||
struct bio_integrity_payload *bip = bio->bi_integrity;
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
|
||||
if (!bs)
|
||||
bs = fs_bio_set;
|
||||
|
||||
BUG_ON(bip == NULL);
|
||||
|
||||
/* A cloned bio doesn't own the integrity metadata */
|
||||
if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
|
||||
&& bip->bip_buf != NULL)
|
||||
if (bip->bip_owns_buf)
|
||||
kfree(bip->bip_buf);
|
||||
|
||||
if (use_bip_pool(bip->bip_slab))
|
||||
if (bs) {
|
||||
if (bip->bip_slab != BIO_POOL_NONE)
|
||||
bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
|
||||
bip->bip_slab);
|
||||
|
||||
mempool_free(bip, bs->bio_integrity_pool);
|
||||
else
|
||||
kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
|
||||
} else {
|
||||
kfree(bip);
|
||||
}
|
||||
|
||||
bio->bi_integrity = NULL;
|
||||
}
|
||||
@ -419,6 +384,7 @@ int bio_integrity_prep(struct bio *bio)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
bip->bip_owns_buf = 1;
|
||||
bip->bip_buf = buf;
|
||||
bip->bip_size = len;
|
||||
bip->bip_sector = bio->bi_sector;
|
||||
@ -694,11 +660,11 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
|
||||
bp->bio1.bi_integrity = &bp->bip1;
|
||||
bp->bio2.bi_integrity = &bp->bip2;
|
||||
|
||||
bp->iv1 = bip->bip_vec[0];
|
||||
bp->iv2 = bip->bip_vec[0];
|
||||
bp->iv1 = bip->bip_vec[bip->bip_idx];
|
||||
bp->iv2 = bip->bip_vec[bip->bip_idx];
|
||||
|
||||
bp->bip1.bip_vec[0] = bp->iv1;
|
||||
bp->bip2.bip_vec[0] = bp->iv2;
|
||||
bp->bip1.bip_vec = &bp->iv1;
|
||||
bp->bip2.bip_vec = &bp->iv2;
|
||||
|
||||
bp->iv1.bv_len = sectors * bi->tuple_size;
|
||||
bp->iv2.bv_offset += sectors * bi->tuple_size;
|
||||
@ -746,13 +712,14 @@ EXPORT_SYMBOL(bio_integrity_clone);
|
||||
|
||||
int bioset_integrity_create(struct bio_set *bs, int pool_size)
|
||||
{
|
||||
unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
|
||||
|
||||
if (bs->bio_integrity_pool)
|
||||
return 0;
|
||||
|
||||
bs->bio_integrity_pool =
|
||||
mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
|
||||
bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
|
||||
|
||||
bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
|
||||
if (!bs->bvec_integrity_pool)
|
||||
return -1;
|
||||
|
||||
if (!bs->bio_integrity_pool)
|
||||
return -1;
|
||||
@ -765,13 +732,14 @@ void bioset_integrity_free(struct bio_set *bs)
|
||||
{
|
||||
if (bs->bio_integrity_pool)
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
|
||||
if (bs->bvec_integrity_pool)
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_integrity_free);
|
||||
|
||||
void __init bio_integrity_init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* kintegrityd won't block much but may burn a lot of CPU cycles.
|
||||
* Make it highpri CPU intensive wq with max concurrency of 1.
|
||||
@ -781,14 +749,10 @@ void __init bio_integrity_init(void)
|
||||
if (!kintegrityd_wq)
|
||||
panic("Failed to create kintegrityd\n");
|
||||
|
||||
for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
|
||||
unsigned int size;
|
||||
|
||||
size = sizeof(struct bio_integrity_payload)
|
||||
+ bip_slab[i].nr_vecs * sizeof(struct bio_vec);
|
||||
|
||||
bip_slab[i].slab =
|
||||
kmem_cache_create(bip_slab[i].name, size, 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
}
|
||||
bip_slab = kmem_cache_create("bio_integrity_payload",
|
||||
sizeof(struct bio_integrity_payload) +
|
||||
sizeof(struct bio_vec) * BIP_INLINE_VECS,
|
||||
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
if (!bip_slab)
|
||||
panic("Failed to create slab\n");
|
||||
}
|
||||
|
366
fs/bio.c
366
fs/bio.c
@ -161,12 +161,12 @@ unsigned int bvec_nr_vecs(unsigned short idx)
|
||||
return bvec_slabs[idx].nr_vecs;
|
||||
}
|
||||
|
||||
void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
|
||||
{
|
||||
BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
|
||||
|
||||
if (idx == BIOVEC_MAX_IDX)
|
||||
mempool_free(bv, bs->bvec_pool);
|
||||
mempool_free(bv, pool);
|
||||
else {
|
||||
struct biovec_slab *bvs = bvec_slabs + idx;
|
||||
|
||||
@ -174,8 +174,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
|
||||
}
|
||||
}
|
||||
|
||||
struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
|
||||
struct bio_set *bs)
|
||||
struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
|
||||
mempool_t *pool)
|
||||
{
|
||||
struct bio_vec *bvl;
|
||||
|
||||
@ -211,7 +211,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
|
||||
*/
|
||||
if (*idx == BIOVEC_MAX_IDX) {
|
||||
fallback:
|
||||
bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
|
||||
bvl = mempool_alloc(pool, gfp_mask);
|
||||
} else {
|
||||
struct biovec_slab *bvs = bvec_slabs + *idx;
|
||||
gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
|
||||
@ -253,8 +253,8 @@ static void bio_free(struct bio *bio)
|
||||
__bio_free(bio);
|
||||
|
||||
if (bs) {
|
||||
if (bio_has_allocated_vec(bio))
|
||||
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
|
||||
if (bio_flagged(bio, BIO_OWNS_VEC))
|
||||
bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
|
||||
|
||||
/*
|
||||
* If we have front padding, adjust the bio pointer before freeing
|
||||
@ -298,6 +298,54 @@ void bio_reset(struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(bio_reset);
|
||||
|
||||
static void bio_alloc_rescue(struct work_struct *work)
|
||||
{
|
||||
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
|
||||
struct bio *bio;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&bs->rescue_lock);
|
||||
bio = bio_list_pop(&bs->rescue_list);
|
||||
spin_unlock(&bs->rescue_lock);
|
||||
|
||||
if (!bio)
|
||||
break;
|
||||
|
||||
generic_make_request(bio);
|
||||
}
|
||||
}
|
||||
|
||||
static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
{
|
||||
struct bio_list punt, nopunt;
|
||||
struct bio *bio;
|
||||
|
||||
/*
|
||||
* In order to guarantee forward progress we must punt only bios that
|
||||
* were allocated from this bio_set; otherwise, if there was a bio on
|
||||
* there for a stacking driver higher up in the stack, processing it
|
||||
* could require allocating bios from this bio_set, and doing that from
|
||||
* our own rescuer would be bad.
|
||||
*
|
||||
* Since bio lists are singly linked, pop them all instead of trying to
|
||||
* remove from the middle of the list:
|
||||
*/
|
||||
|
||||
bio_list_init(&punt);
|
||||
bio_list_init(&nopunt);
|
||||
|
||||
while ((bio = bio_list_pop(current->bio_list)))
|
||||
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
|
||||
|
||||
*current->bio_list = nopunt;
|
||||
|
||||
spin_lock(&bs->rescue_lock);
|
||||
bio_list_merge(&bs->rescue_list, &punt);
|
||||
spin_unlock(&bs->rescue_lock);
|
||||
|
||||
queue_work(bs->rescue_workqueue, &bs->rescue_work);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_alloc_bioset - allocate a bio for I/O
|
||||
* @gfp_mask: the GFP_ mask given to the slab allocator
|
||||
@ -315,11 +363,27 @@ EXPORT_SYMBOL(bio_reset);
|
||||
* previously allocated bio for IO before attempting to allocate a new one.
|
||||
* Failure to do so can cause deadlocks under memory pressure.
|
||||
*
|
||||
* Note that when running under generic_make_request() (i.e. any block
|
||||
* driver), bios are not submitted until after you return - see the code in
|
||||
* generic_make_request() that converts recursion into iteration, to prevent
|
||||
* stack overflows.
|
||||
*
|
||||
* This would normally mean allocating multiple bios under
|
||||
* generic_make_request() would be susceptible to deadlocks, but we have
|
||||
* deadlock avoidance code that resubmits any blocked bios from a rescuer
|
||||
* thread.
|
||||
*
|
||||
* However, we do not guarantee forward progress for allocations from other
|
||||
* mempools. Doing multiple allocations from the same mempool under
|
||||
* generic_make_request() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
*
|
||||
* RETURNS:
|
||||
* Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
||||
{
|
||||
gfp_t saved_gfp = gfp_mask;
|
||||
unsigned front_pad;
|
||||
unsigned inline_vecs;
|
||||
unsigned long idx = BIO_POOL_NONE;
|
||||
@ -337,7 +401,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
||||
front_pad = 0;
|
||||
inline_vecs = nr_iovecs;
|
||||
} else {
|
||||
/*
|
||||
* generic_make_request() converts recursion to iteration; this
|
||||
* means if we're running beneath it, any bios we allocate and
|
||||
* submit will not be submitted (and thus freed) until after we
|
||||
* return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate
|
||||
* multiple bios from the same bio_set() while running
|
||||
* underneath generic_make_request(). If we were to allocate
|
||||
* multiple bios (say a stacking block driver that was splitting
|
||||
* bios), we would deadlock if we exhausted the mempool's
|
||||
* reserve.
|
||||
*
|
||||
* We solve this, and guarantee forward progress, with a rescuer
|
||||
* workqueue per bio_set. If we go to allocate and there are
|
||||
* bios on current->bio_list, we first try the allocation
|
||||
* without __GFP_WAIT; if that fails, we punt those bios we
|
||||
* would be blocking to the rescuer workqueue before we retry
|
||||
* with the original gfp_flags.
|
||||
*/
|
||||
|
||||
if (current->bio_list && !bio_list_empty(current->bio_list))
|
||||
gfp_mask &= ~__GFP_WAIT;
|
||||
|
||||
p = mempool_alloc(bs->bio_pool, gfp_mask);
|
||||
if (!p && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
p = mempool_alloc(bs->bio_pool, gfp_mask);
|
||||
}
|
||||
|
||||
front_pad = bs->front_pad;
|
||||
inline_vecs = BIO_INLINE_VECS;
|
||||
}
|
||||
@ -349,9 +443,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
||||
bio_init(bio);
|
||||
|
||||
if (nr_iovecs > inline_vecs) {
|
||||
bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
|
||||
if (!bvl && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
|
||||
}
|
||||
|
||||
if (unlikely(!bvl))
|
||||
goto err_free;
|
||||
|
||||
bio->bi_flags |= 1 << BIO_OWNS_VEC;
|
||||
} else if (nr_iovecs) {
|
||||
bvl = bio->bi_inline_vecs;
|
||||
}
|
||||
@ -653,6 +755,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_page);
|
||||
|
||||
struct submit_bio_ret {
|
||||
struct completion event;
|
||||
int error;
|
||||
};
|
||||
|
||||
static void submit_bio_wait_endio(struct bio *bio, int error)
|
||||
{
|
||||
struct submit_bio_ret *ret = bio->bi_private;
|
||||
|
||||
ret->error = error;
|
||||
complete(&ret->event);
|
||||
}
|
||||
|
||||
/**
|
||||
* submit_bio_wait - submit a bio, and wait until it completes
|
||||
* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
|
||||
* @bio: The &struct bio which describes the I/O
|
||||
*
|
||||
* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
|
||||
* bio_endio() on failure.
|
||||
*/
|
||||
int submit_bio_wait(int rw, struct bio *bio)
|
||||
{
|
||||
struct submit_bio_ret ret;
|
||||
|
||||
rw |= REQ_SYNC;
|
||||
init_completion(&ret.event);
|
||||
bio->bi_private = &ret;
|
||||
bio->bi_end_io = submit_bio_wait_endio;
|
||||
submit_bio(rw, bio);
|
||||
wait_for_completion(&ret.event);
|
||||
|
||||
return ret.error;
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio_wait);
|
||||
|
||||
/**
|
||||
* bio_advance - increment/complete a bio by some number of bytes
|
||||
* @bio: bio to advance
|
||||
* @bytes: number of bytes to complete
|
||||
*
|
||||
* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
|
||||
* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
|
||||
* be updated on the last bvec as well.
|
||||
*
|
||||
* @bio will then represent the remaining, uncompleted portion of the io.
|
||||
*/
|
||||
void bio_advance(struct bio *bio, unsigned bytes)
|
||||
{
|
||||
if (bio_integrity(bio))
|
||||
bio_integrity_advance(bio, bytes);
|
||||
|
||||
bio->bi_sector += bytes >> 9;
|
||||
bio->bi_size -= bytes;
|
||||
|
||||
if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
|
||||
return;
|
||||
|
||||
while (bytes) {
|
||||
if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
|
||||
WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
|
||||
bio->bi_idx, bio->bi_vcnt);
|
||||
break;
|
||||
}
|
||||
|
||||
if (bytes >= bio_iovec(bio)->bv_len) {
|
||||
bytes -= bio_iovec(bio)->bv_len;
|
||||
bio->bi_idx++;
|
||||
} else {
|
||||
bio_iovec(bio)->bv_len -= bytes;
|
||||
bio_iovec(bio)->bv_offset += bytes;
|
||||
bytes = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(bio_advance);
|
||||
|
||||
/**
|
||||
* bio_alloc_pages - allocates a single page for each bvec in a bio
|
||||
* @bio: bio to allocate pages for
|
||||
* @gfp_mask: flags for allocation
|
||||
*
|
||||
* Allocates pages up to @bio->bi_vcnt.
|
||||
*
|
||||
* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
|
||||
* freed.
|
||||
*/
|
||||
int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
|
||||
{
|
||||
int i;
|
||||
struct bio_vec *bv;
|
||||
|
||||
bio_for_each_segment_all(bv, bio, i) {
|
||||
bv->bv_page = alloc_page(gfp_mask);
|
||||
if (!bv->bv_page) {
|
||||
while (--bv >= bio->bi_io_vec)
|
||||
__free_page(bv->bv_page);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_alloc_pages);
|
||||
|
||||
/**
|
||||
* bio_copy_data - copy contents of data buffers from one chain of bios to
|
||||
* another
|
||||
* @src: source bio list
|
||||
* @dst: destination bio list
|
||||
*
|
||||
* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
|
||||
* @src and @dst as linked lists of bios.
|
||||
*
|
||||
* Stops when it reaches the end of either @src or @dst - that is, copies
|
||||
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
|
||||
*/
|
||||
void bio_copy_data(struct bio *dst, struct bio *src)
|
||||
{
|
||||
struct bio_vec *src_bv, *dst_bv;
|
||||
unsigned src_offset, dst_offset, bytes;
|
||||
void *src_p, *dst_p;
|
||||
|
||||
src_bv = bio_iovec(src);
|
||||
dst_bv = bio_iovec(dst);
|
||||
|
||||
src_offset = src_bv->bv_offset;
|
||||
dst_offset = dst_bv->bv_offset;
|
||||
|
||||
while (1) {
|
||||
if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
|
||||
src_bv++;
|
||||
if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
|
||||
src = src->bi_next;
|
||||
if (!src)
|
||||
break;
|
||||
|
||||
src_bv = bio_iovec(src);
|
||||
}
|
||||
|
||||
src_offset = src_bv->bv_offset;
|
||||
}
|
||||
|
||||
if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
|
||||
dst_bv++;
|
||||
if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
|
||||
dst = dst->bi_next;
|
||||
if (!dst)
|
||||
break;
|
||||
|
||||
dst_bv = bio_iovec(dst);
|
||||
}
|
||||
|
||||
dst_offset = dst_bv->bv_offset;
|
||||
}
|
||||
|
||||
bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
|
||||
src_bv->bv_offset + src_bv->bv_len - src_offset);
|
||||
|
||||
src_p = kmap_atomic(src_bv->bv_page);
|
||||
dst_p = kmap_atomic(dst_bv->bv_page);
|
||||
|
||||
memcpy(dst_p + dst_bv->bv_offset,
|
||||
src_p + src_bv->bv_offset,
|
||||
bytes);
|
||||
|
||||
kunmap_atomic(dst_p);
|
||||
kunmap_atomic(src_p);
|
||||
|
||||
src_offset += bytes;
|
||||
dst_offset += bytes;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_data);
|
||||
|
||||
struct bio_map_data {
|
||||
struct bio_vec *iovecs;
|
||||
struct sg_iovec *sgvecs;
|
||||
@ -715,7 +992,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
|
||||
int iov_idx = 0;
|
||||
unsigned int iov_off = 0;
|
||||
|
||||
__bio_for_each_segment(bvec, bio, i, 0) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *bv_addr = page_address(bvec->bv_page);
|
||||
unsigned int bv_len = iovecs[i].bv_len;
|
||||
|
||||
@ -897,7 +1174,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
|
||||
return bio;
|
||||
cleanup:
|
||||
if (!map_data)
|
||||
bio_for_each_segment(bvec, bio, i)
|
||||
bio_for_each_segment_all(bvec, bio, i)
|
||||
__free_page(bvec->bv_page);
|
||||
|
||||
bio_put(bio);
|
||||
@ -1111,7 +1388,7 @@ static void __bio_unmap_user(struct bio *bio)
|
||||
/*
|
||||
* make sure we dirty pages we wrote to
|
||||
*/
|
||||
__bio_for_each_segment(bvec, bio, i, 0) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
if (bio_data_dir(bio) == READ)
|
||||
set_page_dirty_lock(bvec->bv_page);
|
||||
|
||||
@ -1217,7 +1494,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
|
||||
int i;
|
||||
char *p = bmd->sgvecs[0].iov_base;
|
||||
|
||||
__bio_for_each_segment(bvec, bio, i, 0) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *addr = page_address(bvec->bv_page);
|
||||
int len = bmd->iovecs[i].bv_len;
|
||||
|
||||
@ -1257,7 +1534,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
if (!reading) {
|
||||
void *p = data;
|
||||
|
||||
bio_for_each_segment(bvec, bio, i) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
char *addr = page_address(bvec->bv_page);
|
||||
|
||||
memcpy(addr, p, bvec->bv_len);
|
||||
@ -1302,11 +1579,11 @@ EXPORT_SYMBOL(bio_copy_kern);
|
||||
*/
|
||||
void bio_set_pages_dirty(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bio->bi_vcnt; i++) {
|
||||
struct page *page = bvec[i].bv_page;
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
if (page && !PageCompound(page))
|
||||
set_page_dirty_lock(page);
|
||||
@ -1315,11 +1592,11 @@ void bio_set_pages_dirty(struct bio *bio)
|
||||
|
||||
static void bio_release_pages(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bio->bi_vcnt; i++) {
|
||||
struct page *page = bvec[i].bv_page;
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
if (page)
|
||||
put_page(page);
|
||||
@ -1368,16 +1645,16 @@ static void bio_dirty_fn(struct work_struct *work)
|
||||
|
||||
void bio_check_pages_dirty(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
struct bio_vec *bvec;
|
||||
int nr_clean_pages = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bio->bi_vcnt; i++) {
|
||||
struct page *page = bvec[i].bv_page;
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
if (PageDirty(page) || PageCompound(page)) {
|
||||
page_cache_release(page);
|
||||
bvec[i].bv_page = NULL;
|
||||
bvec->bv_page = NULL;
|
||||
} else {
|
||||
nr_clean_pages++;
|
||||
}
|
||||
@ -1478,8 +1755,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
|
||||
trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
|
||||
bi->bi_sector + first_sectors);
|
||||
|
||||
BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
|
||||
BUG_ON(bi->bi_idx != 0);
|
||||
BUG_ON(bio_segments(bi) > 1);
|
||||
atomic_set(&bp->cnt, 3);
|
||||
bp->error = 0;
|
||||
bp->bio1 = *bi;
|
||||
@ -1489,8 +1765,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
|
||||
bp->bio1.bi_size = first_sectors << 9;
|
||||
|
||||
if (bi->bi_vcnt != 0) {
|
||||
bp->bv1 = bi->bi_io_vec[0];
|
||||
bp->bv2 = bi->bi_io_vec[0];
|
||||
bp->bv1 = *bio_iovec(bi);
|
||||
bp->bv2 = *bio_iovec(bi);
|
||||
|
||||
if (bio_is_rw(bi)) {
|
||||
bp->bv2.bv_offset += first_sectors << 9;
|
||||
@ -1542,7 +1818,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index,
|
||||
if (index >= bio->bi_idx)
|
||||
index = bio->bi_vcnt - 1;
|
||||
|
||||
__bio_for_each_segment(bv, bio, i, 0) {
|
||||
bio_for_each_segment_all(bv, bio, i) {
|
||||
if (i == index) {
|
||||
if (offset > bv->bv_offset)
|
||||
sectors += (offset - bv->bv_offset) / sector_sz;
|
||||
@ -1560,29 +1836,25 @@ EXPORT_SYMBOL(bio_sector_offset);
|
||||
* create memory pools for biovec's in a bio_set.
|
||||
* use the global biovec slabs created for general use.
|
||||
*/
|
||||
static int biovec_create_pools(struct bio_set *bs, int pool_entries)
|
||||
mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
|
||||
{
|
||||
struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
|
||||
|
||||
bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
|
||||
if (!bs->bvec_pool)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void biovec_free_pools(struct bio_set *bs)
|
||||
{
|
||||
mempool_destroy(bs->bvec_pool);
|
||||
return mempool_create_slab_pool(pool_entries, bp->slab);
|
||||
}
|
||||
|
||||
void bioset_free(struct bio_set *bs)
|
||||
{
|
||||
if (bs->rescue_workqueue)
|
||||
destroy_workqueue(bs->rescue_workqueue);
|
||||
|
||||
if (bs->bio_pool)
|
||||
mempool_destroy(bs->bio_pool);
|
||||
|
||||
if (bs->bvec_pool)
|
||||
mempool_destroy(bs->bvec_pool);
|
||||
|
||||
bioset_integrity_free(bs);
|
||||
biovec_free_pools(bs);
|
||||
bio_put_slab(bs);
|
||||
|
||||
kfree(bs);
|
||||
@ -1613,6 +1885,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
|
||||
|
||||
bs->front_pad = front_pad;
|
||||
|
||||
spin_lock_init(&bs->rescue_lock);
|
||||
bio_list_init(&bs->rescue_list);
|
||||
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
|
||||
|
||||
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
|
||||
if (!bs->bio_slab) {
|
||||
kfree(bs);
|
||||
@ -1623,9 +1899,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
|
||||
if (!bs->bio_pool)
|
||||
goto bad;
|
||||
|
||||
if (!biovec_create_pools(bs, pool_size))
|
||||
return bs;
|
||||
bs->bvec_pool = biovec_create_pool(bs, pool_size);
|
||||
if (!bs->bvec_pool)
|
||||
goto bad;
|
||||
|
||||
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
|
||||
if (!bs->rescue_workqueue)
|
||||
goto bad;
|
||||
|
||||
return bs;
|
||||
bad:
|
||||
bioset_free(bs);
|
||||
return NULL;
|
||||
|
@ -1556,7 +1556,7 @@ static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
return 0;
|
||||
|
||||
size -= pos;
|
||||
if (size < INT_MAX)
|
||||
if (size < iocb->ki_left)
|
||||
nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
|
||||
return generic_file_aio_read(iocb, iov, nr_segs, pos);
|
||||
}
|
||||
|
@ -2560,8 +2560,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
|
||||
if (old_compressed)
|
||||
contig = bio->bi_sector == sector;
|
||||
else
|
||||
contig = bio->bi_sector + (bio->bi_size >> 9) ==
|
||||
sector;
|
||||
contig = bio_end_sector(bio) == sector;
|
||||
|
||||
if (prev_bio_flags != bio_flags || !contig ||
|
||||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
|
||||
|
@ -5177,7 +5177,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
|
||||
}
|
||||
|
||||
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
if ((bio->bi_size >> 9) > max_sectors)
|
||||
if (bio_sectors(bio) > max_sectors)
|
||||
return 0;
|
||||
|
||||
if (!q->merge_bvec_fn)
|
||||
|
@ -2977,7 +2977,6 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
|
||||
bio->bi_io_vec[0].bv_offset = bh_offset(bh);
|
||||
|
||||
bio->bi_vcnt = 1;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = bh->b_size;
|
||||
|
||||
bio->bi_end_io = end_bio_bh_io_sync;
|
||||
|
@ -442,8 +442,8 @@ static struct bio *dio_await_one(struct dio *dio)
|
||||
static int dio_bio_complete(struct dio *dio, struct bio *bio)
|
||||
{
|
||||
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
struct bio_vec *bvec = bio->bi_io_vec;
|
||||
int page_no;
|
||||
struct bio_vec *bvec;
|
||||
unsigned i;
|
||||
|
||||
if (!uptodate)
|
||||
dio->io_error = -EIO;
|
||||
@ -451,8 +451,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
|
||||
if (dio->is_async && dio->rw == READ) {
|
||||
bio_check_pages_dirty(bio); /* transfers ownership */
|
||||
} else {
|
||||
for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
|
||||
struct page *page = bvec[page_no].bv_page;
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
if (dio->rw == READ && !PageCompound(page))
|
||||
set_page_dirty_lock(page);
|
||||
|
@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio)
|
||||
struct bio_vec *bv;
|
||||
unsigned i;
|
||||
|
||||
__bio_for_each_segment(bv, bio, i, 0) {
|
||||
bio_for_each_segment_all(bv, bio, i) {
|
||||
unsigned this_count = bv->bv_len;
|
||||
|
||||
if (likely(PAGE_SIZE == this_count))
|
||||
|
@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
|
||||
if (!bio)
|
||||
continue;
|
||||
|
||||
__bio_for_each_segment(bv, bio, i, 0) {
|
||||
bio_for_each_segment_all(bv, bio, i) {
|
||||
struct page *page = bv->bv_page;
|
||||
|
||||
SetPageUptodate(page);
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head)
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/writeback.h>
|
||||
|
||||
/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
|
||||
static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
|
||||
{
|
||||
if (bdi->wb.task) {
|
||||
wake_up_process(bdi->wb.task);
|
||||
} else {
|
||||
/*
|
||||
* The bdi thread isn't there, wake up the forker thread which
|
||||
* will create and run it.
|
||||
*/
|
||||
wake_up_process(default_backing_dev_info.wb.task);
|
||||
}
|
||||
}
|
||||
|
||||
static void bdi_queue_work(struct backing_dev_info *bdi,
|
||||
struct wb_writeback_work *work)
|
||||
{
|
||||
@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
|
||||
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
list_add_tail(&work->list, &bdi->work_list);
|
||||
if (!bdi->wb.task)
|
||||
trace_writeback_nothread(bdi, work);
|
||||
bdi_wakeup_flusher(bdi);
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
|
||||
*/
|
||||
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
||||
if (!work) {
|
||||
if (bdi->wb.task) {
|
||||
trace_writeback_nowork(bdi);
|
||||
wake_up_process(bdi->wb.task);
|
||||
}
|
||||
trace_writeback_nowork(bdi);
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
|
||||
* writeback as soon as there is no other work to do.
|
||||
*/
|
||||
trace_writeback_wake_background(bdi);
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
bdi_wakeup_flusher(bdi);
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1020,67 +1000,49 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
|
||||
|
||||
/*
|
||||
* Handle writeback of dirty data for the device backed by this bdi. Also
|
||||
* wakes up periodically and does kupdated style flushing.
|
||||
* reschedules periodically and does kupdated style flushing.
|
||||
*/
|
||||
int bdi_writeback_thread(void *data)
|
||||
void bdi_writeback_workfn(struct work_struct *work)
|
||||
{
|
||||
struct bdi_writeback *wb = data;
|
||||
struct bdi_writeback *wb = container_of(to_delayed_work(work),
|
||||
struct bdi_writeback, dwork);
|
||||
struct backing_dev_info *bdi = wb->bdi;
|
||||
long pages_written;
|
||||
|
||||
set_worker_desc("flush-%s", dev_name(bdi->dev));
|
||||
current->flags |= PF_SWAPWRITE;
|
||||
set_freezable();
|
||||
wb->last_active = jiffies;
|
||||
|
||||
/*
|
||||
* Our parent may run at a different priority, just set us to normal
|
||||
*/
|
||||
set_user_nice(current, 0);
|
||||
|
||||
trace_writeback_thread_start(bdi);
|
||||
|
||||
while (!kthread_freezable_should_stop(NULL)) {
|
||||
if (likely(!current_is_workqueue_rescuer() ||
|
||||
list_empty(&bdi->bdi_list))) {
|
||||
/*
|
||||
* Remove own delayed wake-up timer, since we are already awake
|
||||
* and we'll take care of the periodic write-back.
|
||||
* The normal path. Keep writing back @bdi until its
|
||||
* work_list is empty. Note that this path is also taken
|
||||
* if @bdi is shutting down even when we're running off the
|
||||
* rescuer as work_list needs to be drained.
|
||||
*/
|
||||
del_timer(&wb->wakeup_timer);
|
||||
|
||||
pages_written = wb_do_writeback(wb, 0);
|
||||
|
||||
do {
|
||||
pages_written = wb_do_writeback(wb, 0);
|
||||
trace_writeback_pages_written(pages_written);
|
||||
} while (!list_empty(&bdi->work_list));
|
||||
} else {
|
||||
/*
|
||||
* bdi_wq can't get enough workers and we're running off
|
||||
* the emergency worker. Don't hog it. Hopefully, 1024 is
|
||||
* enough for efficient IO.
|
||||
*/
|
||||
pages_written = writeback_inodes_wb(&bdi->wb, 1024,
|
||||
WB_REASON_FORKER_THREAD);
|
||||
trace_writeback_pages_written(pages_written);
|
||||
|
||||
if (pages_written)
|
||||
wb->last_active = jiffies;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wb_has_dirty_io(wb) && dirty_writeback_interval)
|
||||
schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
|
||||
else {
|
||||
/*
|
||||
* We have nothing to do, so can go sleep without any
|
||||
* timeout and save power. When a work is queued or
|
||||
* something is made dirty - we will be woken up.
|
||||
*/
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
/* Flush any work that raced with us exiting */
|
||||
if (!list_empty(&bdi->work_list))
|
||||
wb_do_writeback(wb, 1);
|
||||
if (!list_empty(&bdi->work_list) ||
|
||||
(wb_has_dirty_io(wb) && dirty_writeback_interval))
|
||||
queue_delayed_work(bdi_wq, &wb->dwork,
|
||||
msecs_to_jiffies(dirty_writeback_interval * 10));
|
||||
|
||||
trace_writeback_thread_stop(bdi);
|
||||
return 0;
|
||||
current->flags &= ~PF_SWAPWRITE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
|
||||
* the whole world.
|
||||
|
@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
|
||||
u64 nblk;
|
||||
|
||||
if (bio) {
|
||||
nblk = bio->bi_sector + bio_sectors(bio);
|
||||
nblk = bio_end_sector(bio);
|
||||
nblk >>= sdp->sd_fsb2bb_shift;
|
||||
if (blkno == nblk)
|
||||
return bio;
|
||||
|
@ -2005,7 +2005,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
|
||||
bio->bi_io_vec[0].bv_offset = bp->l_offset;
|
||||
|
||||
bio->bi_vcnt = 1;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = LOGPSIZE;
|
||||
|
||||
bio->bi_end_io = lbmIODone;
|
||||
@ -2146,7 +2145,6 @@ static void lbmStartIO(struct lbuf * bp)
|
||||
bio->bi_io_vec[0].bv_offset = bp->l_offset;
|
||||
|
||||
bio->bi_vcnt = 1;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = LOGPSIZE;
|
||||
|
||||
bio->bi_end_io = lbmIODone;
|
||||
|
@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
|
||||
bio_vec.bv_len = PAGE_SIZE;
|
||||
bio_vec.bv_offset = 0;
|
||||
bio.bi_vcnt = 1;
|
||||
bio.bi_idx = 0;
|
||||
bio.bi_size = PAGE_SIZE;
|
||||
bio.bi_bdev = bdev;
|
||||
bio.bi_sector = page->index * (PAGE_SIZE >> 9);
|
||||
@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
|
||||
if (i >= max_pages) {
|
||||
/* Block layer cannot split bios :( */
|
||||
bio->bi_vcnt = i;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = i * PAGE_SIZE;
|
||||
bio->bi_bdev = super->s_bdev;
|
||||
bio->bi_sector = ofs >> 9;
|
||||
@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
|
||||
unlock_page(page);
|
||||
}
|
||||
bio->bi_vcnt = nr_pages;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = nr_pages * PAGE_SIZE;
|
||||
bio->bi_bdev = super->s_bdev;
|
||||
bio->bi_sector = ofs >> 9;
|
||||
@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
|
||||
if (i >= max_pages) {
|
||||
/* Block layer cannot split bios :( */
|
||||
bio->bi_vcnt = i;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = i * PAGE_SIZE;
|
||||
bio->bi_bdev = super->s_bdev;
|
||||
bio->bi_sector = ofs >> 9;
|
||||
@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
|
||||
bio->bi_io_vec[i].bv_offset = 0;
|
||||
}
|
||||
bio->bi_vcnt = nr_pages;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = nr_pages * PAGE_SIZE;
|
||||
bio->bi_bdev = super->s_bdev;
|
||||
bio->bi_sector = ofs >> 9;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct page;
|
||||
struct device;
|
||||
@ -27,7 +28,6 @@ struct dentry;
|
||||
* Bits in backing_dev_info.state
|
||||
*/
|
||||
enum bdi_state {
|
||||
BDI_pending, /* On its way to being activated */
|
||||
BDI_wb_alloc, /* Default embedded wb allocated */
|
||||
BDI_async_congested, /* The async (write) queue is getting full */
|
||||
BDI_sync_congested, /* The sync queue is getting full */
|
||||
@ -53,10 +53,8 @@ struct bdi_writeback {
|
||||
unsigned int nr;
|
||||
|
||||
unsigned long last_old_flush; /* last old data flush */
|
||||
unsigned long last_active; /* last time bdi thread was active */
|
||||
|
||||
struct task_struct *task; /* writeback thread */
|
||||
struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
|
||||
struct delayed_work dwork; /* work item used for writeback */
|
||||
struct list_head b_dirty; /* dirty inodes */
|
||||
struct list_head b_io; /* parked for writeback */
|
||||
struct list_head b_more_io; /* parked for more writeback */
|
||||
@ -123,14 +121,15 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
|
||||
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
|
||||
enum wb_reason reason);
|
||||
void bdi_start_background_writeback(struct backing_dev_info *bdi);
|
||||
int bdi_writeback_thread(void *data);
|
||||
void bdi_writeback_workfn(struct work_struct *work);
|
||||
int bdi_has_dirty_io(struct backing_dev_info *bdi);
|
||||
void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
|
||||
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
|
||||
|
||||
extern spinlock_t bdi_lock;
|
||||
extern struct list_head bdi_list;
|
||||
extern struct list_head bdi_pending_list;
|
||||
|
||||
extern struct workqueue_struct *bdi_wq;
|
||||
|
||||
static inline int wb_has_dirty_io(struct bdi_writeback *wb)
|
||||
{
|
||||
@ -336,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
|
||||
return bdi->capabilities & BDI_CAP_SWAP_BACKED;
|
||||
}
|
||||
|
||||
static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
|
||||
{
|
||||
return bdi == &default_backing_dev_info;
|
||||
}
|
||||
|
||||
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
|
||||
{
|
||||
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define bio_offset(bio) bio_iovec((bio))->bv_offset
|
||||
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
|
||||
#define bio_sectors(bio) ((bio)->bi_size >> 9)
|
||||
#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio)))
|
||||
|
||||
static inline unsigned int bio_cur_bytes(struct bio *bio)
|
||||
{
|
||||
@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int bio_has_allocated_vec(struct bio *bio)
|
||||
{
|
||||
return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
|
||||
}
|
||||
|
||||
/*
|
||||
* will die
|
||||
*/
|
||||
@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio)
|
||||
#define bio_io_error(bio) bio_endio((bio), -EIO)
|
||||
|
||||
/*
|
||||
* drivers should not use the __ version unless they _really_ want to
|
||||
* run through the entire bio and not just pending pieces
|
||||
* drivers should not use the __ version unless they _really_ know what
|
||||
* they're doing
|
||||
*/
|
||||
#define __bio_for_each_segment(bvl, bio, i, start_idx) \
|
||||
for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
|
||||
i < (bio)->bi_vcnt; \
|
||||
bvl++, i++)
|
||||
|
||||
/*
|
||||
* drivers should _never_ use the all version - the bio may have been split
|
||||
* before it got to the driver and the driver won't own all of it
|
||||
*/
|
||||
#define bio_for_each_segment_all(bvl, bio, i) \
|
||||
for (i = 0; \
|
||||
bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
|
||||
i++)
|
||||
|
||||
#define bio_for_each_segment(bvl, bio, i) \
|
||||
__bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
|
||||
for (i = (bio)->bi_idx; \
|
||||
bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
|
||||
i++)
|
||||
|
||||
/*
|
||||
* get a reference to a bio, so it won't disappear. the intended use is
|
||||
@ -180,9 +187,12 @@ struct bio_integrity_payload {
|
||||
unsigned short bip_slab; /* slab the bip came from */
|
||||
unsigned short bip_vcnt; /* # of integrity bio_vecs */
|
||||
unsigned short bip_idx; /* current bip_vec index */
|
||||
unsigned bip_owns_buf:1; /* should free bip_buf */
|
||||
|
||||
struct work_struct bip_work; /* I/O completion */
|
||||
struct bio_vec bip_vec[0]; /* embedded bvec array */
|
||||
|
||||
struct bio_vec *bip_vec;
|
||||
struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
|
||||
};
|
||||
#endif /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio);
|
||||
|
||||
extern struct bio_set *bioset_create(unsigned int, unsigned int);
|
||||
extern void bioset_free(struct bio_set *);
|
||||
extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
|
||||
|
||||
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
|
||||
extern void bio_put(struct bio *);
|
||||
@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int);
|
||||
struct request_queue;
|
||||
extern int bio_phys_segments(struct request_queue *, struct bio *);
|
||||
|
||||
extern int submit_bio_wait(int rw, struct bio *bio);
|
||||
extern void bio_advance(struct bio *, unsigned);
|
||||
|
||||
extern void bio_init(struct bio *);
|
||||
extern void bio_reset(struct bio *);
|
||||
|
||||
@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
|
||||
|
||||
extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
|
||||
unsigned long, unsigned int, int, gfp_t);
|
||||
extern struct bio *bio_copy_user_iov(struct request_queue *,
|
||||
@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
|
||||
int, int, gfp_t);
|
||||
extern int bio_uncopy_user(struct bio *);
|
||||
void zero_fill_bio(struct bio *bio);
|
||||
extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
|
||||
extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
|
||||
extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
|
||||
extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
|
||||
extern unsigned int bvec_nr_vecs(unsigned short idx);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
|
||||
static inline void bio_disassociate_task(struct bio *bio) { }
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
/*
|
||||
* bio_set is used to allow other portions of the IO system to
|
||||
* allocate their own private memory pools for bio and iovec structures.
|
||||
* These memory pools in turn all allocate from the bio_slab
|
||||
* and the bvec_slabs[].
|
||||
*/
|
||||
#define BIO_POOL_SIZE 2
|
||||
#define BIOVEC_NR_POOLS 6
|
||||
#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
|
||||
|
||||
struct bio_set {
|
||||
struct kmem_cache *bio_slab;
|
||||
unsigned int front_pad;
|
||||
|
||||
mempool_t *bio_pool;
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
mempool_t *bio_integrity_pool;
|
||||
#endif
|
||||
mempool_t *bvec_pool;
|
||||
};
|
||||
|
||||
struct biovec_slab {
|
||||
int nr_vecs;
|
||||
char *name;
|
||||
struct kmem_cache *slab;
|
||||
};
|
||||
|
||||
/*
|
||||
* a small number of entries is fine, not going to be performance critical.
|
||||
* basically we just need to survive
|
||||
*/
|
||||
#define BIO_SPLIT_ENTRIES 2
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
/*
|
||||
* remember never ever reenable interrupts between a bvec_kmap_irq and
|
||||
@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
|
||||
return bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* bio_set is used to allow other portions of the IO system to
|
||||
* allocate their own private memory pools for bio and iovec structures.
|
||||
* These memory pools in turn all allocate from the bio_slab
|
||||
* and the bvec_slabs[].
|
||||
*/
|
||||
#define BIO_POOL_SIZE 2
|
||||
#define BIOVEC_NR_POOLS 6
|
||||
#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
|
||||
|
||||
struct bio_set {
|
||||
struct kmem_cache *bio_slab;
|
||||
unsigned int front_pad;
|
||||
|
||||
mempool_t *bio_pool;
|
||||
mempool_t *bvec_pool;
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
mempool_t *bio_integrity_pool;
|
||||
mempool_t *bvec_integrity_pool;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Deadlock avoidance for stacking block drivers: see comments in
|
||||
* bio_alloc_bioset() for details
|
||||
*/
|
||||
spinlock_t rescue_lock;
|
||||
struct bio_list rescue_list;
|
||||
struct work_struct rescue_work;
|
||||
struct workqueue_struct *rescue_workqueue;
|
||||
};
|
||||
|
||||
struct biovec_slab {
|
||||
int nr_vecs;
|
||||
char *name;
|
||||
struct kmem_cache *slab;
|
||||
};
|
||||
|
||||
/*
|
||||
* a small number of entries is fine, not going to be performance critical.
|
||||
* basically we just need to survive
|
||||
*/
|
||||
#define BIO_SPLIT_ENTRIES 2
|
||||
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
|
||||
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
|
||||
|
@ -118,6 +118,7 @@ struct bio {
|
||||
* BIO_POOL_IDX()
|
||||
*/
|
||||
#define BIO_RESET_BITS 13
|
||||
#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */
|
||||
|
||||
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
|
||||
|
||||
@ -176,6 +177,7 @@ enum rq_flag_bits {
|
||||
__REQ_IO_STAT, /* account I/O stat */
|
||||
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
|
||||
__REQ_KERNEL, /* direct IO to kernel pages */
|
||||
__REQ_PM, /* runtime pm request */
|
||||
__REQ_NR_BITS, /* stops here */
|
||||
};
|
||||
|
||||
@ -198,6 +200,8 @@ enum rq_flag_bits {
|
||||
REQ_SECURE)
|
||||
#define REQ_CLONE_MASK REQ_COMMON_MASK
|
||||
|
||||
#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
|
||||
|
||||
/* This mask is used for both bio and request merge checking */
|
||||
#define REQ_NOMERGE_FLAGS \
|
||||
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
|
||||
@ -224,5 +228,6 @@ enum rq_flag_bits {
|
||||
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
|
||||
#define REQ_SECURE (1 << __REQ_SECURE)
|
||||
#define REQ_KERNEL (1 << __REQ_KERNEL)
|
||||
#define REQ_PM (1 << __REQ_PM)
|
||||
|
||||
#endif /* __LINUX_BLK_TYPES_H */
|
||||
|
@ -361,6 +361,12 @@ struct request_queue {
|
||||
*/
|
||||
struct kobject kobj;
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
struct device *dev;
|
||||
int rpm_status;
|
||||
unsigned int nr_pending;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* queue settings
|
||||
*/
|
||||
@ -838,7 +844,7 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
|
||||
unsigned int cmd_flags)
|
||||
{
|
||||
if (unlikely(cmd_flags & REQ_DISCARD))
|
||||
return q->limits.max_discard_sectors;
|
||||
return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
|
||||
|
||||
if (unlikely(cmd_flags & REQ_WRITE_SAME))
|
||||
return q->limits.max_write_same_sectors;
|
||||
@ -960,6 +966,27 @@ struct request_queue *blk_alloc_queue(gfp_t);
|
||||
struct request_queue *blk_alloc_queue_node(gfp_t, int);
|
||||
extern void blk_put_queue(struct request_queue *);
|
||||
|
||||
/*
|
||||
* block layer runtime pm functions
|
||||
*/
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
|
||||
extern int blk_pre_runtime_suspend(struct request_queue *q);
|
||||
extern void blk_post_runtime_suspend(struct request_queue *q, int err);
|
||||
extern void blk_pre_runtime_resume(struct request_queue *q);
|
||||
extern void blk_post_runtime_resume(struct request_queue *q, int err);
|
||||
#else
|
||||
static inline void blk_pm_runtime_init(struct request_queue *q,
|
||||
struct device *dev) {}
|
||||
static inline int blk_pre_runtime_suspend(struct request_queue *q)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
|
||||
static inline void blk_pre_runtime_resume(struct request_queue *q) {}
|
||||
static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* blk_plug permits building a queue of related requests by holding the I/O
|
||||
* fragments for a short period. This allows merging of sequential requests
|
||||
|
@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce,
|
||||
__entry->dev = bio->bi_bdev ?
|
||||
bio->bi_bdev->bd_dev : 0;
|
||||
__entry->sector = bio->bi_sector;
|
||||
__entry->nr_sector = bio->bi_size >> 9;
|
||||
__entry->nr_sector = bio_sectors(bio);
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
|
||||
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
||||
),
|
||||
@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bio->bi_bdev->bd_dev;
|
||||
__entry->sector = bio->bi_sector;
|
||||
__entry->nr_sector = bio->bi_size >> 9;
|
||||
__entry->nr_sector = bio_sectors(bio);
|
||||
__entry->error = error;
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
|
||||
),
|
||||
@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bio->bi_bdev->bd_dev;
|
||||
__entry->sector = bio->bi_sector;
|
||||
__entry->nr_sector = bio->bi_size >> 9;
|
||||
__entry->nr_sector = bio_sectors(bio);
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
|
||||
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
||||
),
|
||||
@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bio->bi_bdev->bd_dev;
|
||||
__entry->sector = bio->bi_sector;
|
||||
__entry->nr_sector = bio->bi_size >> 9;
|
||||
__entry->nr_sector = bio_sectors(bio);
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
|
||||
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
||||
),
|
||||
@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
|
||||
__entry->sector = bio ? bio->bi_sector : 0;
|
||||
__entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
|
||||
__entry->nr_sector = bio ? bio_sectors(bio) : 0;
|
||||
blk_fill_rwbs(__entry->rwbs,
|
||||
bio ? bio->bi_rw : 0, __entry->nr_sector);
|
||||
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
||||
@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap,
|
||||
TP_fast_assign(
|
||||
__entry->dev = bio->bi_bdev->bd_dev;
|
||||
__entry->sector = bio->bi_sector;
|
||||
__entry->nr_sector = bio->bi_size >> 9;
|
||||
__entry->nr_sector = bio_sectors(bio);
|
||||
__entry->old_dev = dev;
|
||||
__entry->old_sector = from;
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
|
||||
|
@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class,
|
||||
DEFINE_EVENT(writeback_work_class, name, \
|
||||
TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
|
||||
TP_ARGS(bdi, work))
|
||||
DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
|
||||
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
|
||||
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
|
||||
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
|
||||
@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \
|
||||
|
||||
DEFINE_WRITEBACK_EVENT(writeback_nowork);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_wake_background);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_thread_start);
|
||||
DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
|
||||
|
||||
DECLARE_EVENT_CLASS(wbc_class,
|
||||
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
|
||||
|
@ -234,7 +234,6 @@ static void relay_destroy_buf(struct rchan_buf *buf)
|
||||
static void relay_remove_buf(struct kref *kref)
|
||||
{
|
||||
struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
|
||||
buf->chan->cb->remove_buf_file(buf->dentry);
|
||||
relay_destroy_buf(buf);
|
||||
}
|
||||
|
||||
@ -484,6 +483,7 @@ static void relay_close_buf(struct rchan_buf *buf)
|
||||
{
|
||||
buf->finalized = 1;
|
||||
del_timer_sync(&buf->timer);
|
||||
buf->chan->cb->remove_buf_file(buf->dentry);
|
||||
kref_put(&buf->kref, relay_remove_buf);
|
||||
}
|
||||
|
||||
|
259
mm/backing-dev.c
259
mm/backing-dev.c
@ -31,13 +31,14 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);
|
||||
static struct class *bdi_class;
|
||||
|
||||
/*
|
||||
* bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
|
||||
* reader side protection for bdi_pending_list. bdi_list has RCU reader side
|
||||
* bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
|
||||
* locking.
|
||||
*/
|
||||
DEFINE_SPINLOCK(bdi_lock);
|
||||
LIST_HEAD(bdi_list);
|
||||
LIST_HEAD(bdi_pending_list);
|
||||
|
||||
/* bdi_wq serves all asynchronous writeback tasks */
|
||||
struct workqueue_struct *bdi_wq;
|
||||
|
||||
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
|
||||
{
|
||||
@ -257,6 +258,11 @@ static int __init default_bdi_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
|
||||
WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
if (!bdi_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
err = bdi_init(&default_backing_dev_info);
|
||||
if (!err)
|
||||
bdi_register(&default_backing_dev_info, NULL, "default");
|
||||
@ -271,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
|
||||
return wb_has_dirty_io(&bdi->wb);
|
||||
}
|
||||
|
||||
static void wakeup_timer_fn(unsigned long data)
|
||||
{
|
||||
struct backing_dev_info *bdi = (struct backing_dev_info *)data;
|
||||
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
if (bdi->wb.task) {
|
||||
trace_writeback_wake_thread(bdi);
|
||||
wake_up_process(bdi->wb.task);
|
||||
} else if (bdi->dev) {
|
||||
/*
|
||||
* When bdi tasks are inactive for long time, they are killed.
|
||||
* In this case we have to wake-up the forker thread which
|
||||
* should create and run the bdi thread.
|
||||
*/
|
||||
trace_writeback_wake_forker_thread(bdi);
|
||||
wake_up_process(default_backing_dev_info.wb.task);
|
||||
}
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used when the first inode for this bdi is marked dirty. It
|
||||
* wakes-up the corresponding bdi thread which should then take care of the
|
||||
@ -307,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
|
||||
unsigned long timeout;
|
||||
|
||||
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
|
||||
mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the longest interval (jiffies) bdi threads are allowed to be
|
||||
* inactive.
|
||||
*/
|
||||
static unsigned long bdi_longest_inactive(void)
|
||||
{
|
||||
unsigned long interval;
|
||||
|
||||
interval = msecs_to_jiffies(dirty_writeback_interval * 10);
|
||||
return max(5UL * 60 * HZ, interval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear pending bit and wakeup anybody waiting for flusher thread creation or
|
||||
* shutdown
|
||||
*/
|
||||
static void bdi_clear_pending(struct backing_dev_info *bdi)
|
||||
{
|
||||
clear_bit(BDI_pending, &bdi->state);
|
||||
smp_mb__after_clear_bit();
|
||||
wake_up_bit(&bdi->state, BDI_pending);
|
||||
}
|
||||
|
||||
static int bdi_forker_thread(void *ptr)
|
||||
{
|
||||
struct bdi_writeback *me = ptr;
|
||||
|
||||
current->flags |= PF_SWAPWRITE;
|
||||
set_freezable();
|
||||
|
||||
/*
|
||||
* Our parent may run at a different priority, just set us to normal
|
||||
*/
|
||||
set_user_nice(current, 0);
|
||||
|
||||
for (;;) {
|
||||
struct task_struct *task = NULL;
|
||||
struct backing_dev_info *bdi;
|
||||
enum {
|
||||
NO_ACTION, /* Nothing to do */
|
||||
FORK_THREAD, /* Fork bdi thread */
|
||||
KILL_THREAD, /* Kill inactive bdi thread */
|
||||
} action = NO_ACTION;
|
||||
|
||||
/*
|
||||
* Temporary measure, we want to make sure we don't see
|
||||
* dirty data on the default backing_dev_info
|
||||
*/
|
||||
if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
|
||||
del_timer(&me->wakeup_timer);
|
||||
wb_do_writeback(me, 0);
|
||||
}
|
||||
|
||||
spin_lock_bh(&bdi_lock);
|
||||
/*
|
||||
* In the following loop we are going to check whether we have
|
||||
* some work to do without any synchronization with tasks
|
||||
* waking us up to do work for them. Set the task state here
|
||||
* so that we don't miss wakeups after verifying conditions.
|
||||
*/
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
list_for_each_entry(bdi, &bdi_list, bdi_list) {
|
||||
bool have_dirty_io;
|
||||
|
||||
if (!bdi_cap_writeback_dirty(bdi) ||
|
||||
bdi_cap_flush_forker(bdi))
|
||||
continue;
|
||||
|
||||
WARN(!test_bit(BDI_registered, &bdi->state),
|
||||
"bdi %p/%s is not registered!\n", bdi, bdi->name);
|
||||
|
||||
have_dirty_io = !list_empty(&bdi->work_list) ||
|
||||
wb_has_dirty_io(&bdi->wb);
|
||||
|
||||
/*
|
||||
* If the bdi has work to do, but the thread does not
|
||||
* exist - create it.
|
||||
*/
|
||||
if (!bdi->wb.task && have_dirty_io) {
|
||||
/*
|
||||
* Set the pending bit - if someone will try to
|
||||
* unregister this bdi - it'll wait on this bit.
|
||||
*/
|
||||
set_bit(BDI_pending, &bdi->state);
|
||||
action = FORK_THREAD;
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&bdi->wb_lock);
|
||||
|
||||
/*
|
||||
* If there is no work to do and the bdi thread was
|
||||
* inactive long enough - kill it. The wb_lock is taken
|
||||
* to make sure no-one adds more work to this bdi and
|
||||
* wakes the bdi thread up.
|
||||
*/
|
||||
if (bdi->wb.task && !have_dirty_io &&
|
||||
time_after(jiffies, bdi->wb.last_active +
|
||||
bdi_longest_inactive())) {
|
||||
task = bdi->wb.task;
|
||||
bdi->wb.task = NULL;
|
||||
spin_unlock(&bdi->wb_lock);
|
||||
set_bit(BDI_pending, &bdi->state);
|
||||
action = KILL_THREAD;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&bdi->wb_lock);
|
||||
}
|
||||
spin_unlock_bh(&bdi_lock);
|
||||
|
||||
/* Keep working if default bdi still has things to do */
|
||||
if (!list_empty(&me->bdi->work_list))
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
switch (action) {
|
||||
case FORK_THREAD:
|
||||
__set_current_state(TASK_RUNNING);
|
||||
task = kthread_create(bdi_writeback_thread, &bdi->wb,
|
||||
"flush-%s", dev_name(bdi->dev));
|
||||
if (IS_ERR(task)) {
|
||||
/*
|
||||
* If thread creation fails, force writeout of
|
||||
* the bdi from the thread. Hopefully 1024 is
|
||||
* large enough for efficient IO.
|
||||
*/
|
||||
writeback_inodes_wb(&bdi->wb, 1024,
|
||||
WB_REASON_FORKER_THREAD);
|
||||
} else {
|
||||
/*
|
||||
* The spinlock makes sure we do not lose
|
||||
* wake-ups when racing with 'bdi_queue_work()'.
|
||||
* And as soon as the bdi thread is visible, we
|
||||
* can start it.
|
||||
*/
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
bdi->wb.task = task;
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
wake_up_process(task);
|
||||
}
|
||||
bdi_clear_pending(bdi);
|
||||
break;
|
||||
|
||||
case KILL_THREAD:
|
||||
__set_current_state(TASK_RUNNING);
|
||||
kthread_stop(task);
|
||||
bdi_clear_pending(bdi);
|
||||
break;
|
||||
|
||||
case NO_ACTION:
|
||||
if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
|
||||
/*
|
||||
* There are no dirty data. The only thing we
|
||||
* should now care about is checking for
|
||||
* inactive bdi threads and killing them. Thus,
|
||||
* let's sleep for longer time, save energy and
|
||||
* be friendly for battery-driven devices.
|
||||
*/
|
||||
schedule_timeout(bdi_longest_inactive());
|
||||
else
|
||||
schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
|
||||
try_to_freeze();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -489,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
|
||||
spin_unlock_bh(&bdi_lock);
|
||||
|
||||
synchronize_rcu_expedited();
|
||||
|
||||
/* bdi_list is now unused, clear it to mark @bdi dying */
|
||||
INIT_LIST_HEAD(&bdi->bdi_list);
|
||||
}
|
||||
|
||||
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
||||
@ -508,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
||||
|
||||
bdi->dev = dev;
|
||||
|
||||
/*
|
||||
* Just start the forker thread for our default backing_dev_info,
|
||||
* and add other bdi's to the list. They will get a thread created
|
||||
* on-demand when they need it.
|
||||
*/
|
||||
if (bdi_cap_flush_forker(bdi)) {
|
||||
struct bdi_writeback *wb = &bdi->wb;
|
||||
|
||||
wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
|
||||
dev_name(dev));
|
||||
if (IS_ERR(wb->task))
|
||||
return PTR_ERR(wb->task);
|
||||
}
|
||||
|
||||
bdi_debug_register(bdi, dev_name(dev));
|
||||
set_bit(BDI_registered, &bdi->state);
|
||||
|
||||
@ -545,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
|
||||
*/
|
||||
static void bdi_wb_shutdown(struct backing_dev_info *bdi)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
if (!bdi_cap_writeback_dirty(bdi))
|
||||
return;
|
||||
|
||||
@ -556,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
|
||||
bdi_remove_from_list(bdi);
|
||||
|
||||
/*
|
||||
* If setup is pending, wait for that to complete first
|
||||
* Drain work list and shutdown the delayed_work. At this point,
|
||||
* @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
|
||||
* is dying and its work_list needs to be drained no matter what.
|
||||
*/
|
||||
wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
||||
flush_delayed_work(&bdi->wb.dwork);
|
||||
WARN_ON(!list_empty(&bdi->work_list));
|
||||
|
||||
/*
|
||||
* Finally, kill the kernel thread. We don't need to be RCU
|
||||
* safe anymore, since the bdi is gone from visibility.
|
||||
* This shouldn't be necessary unless @bdi for some reason has
|
||||
* unflushed dirty IO after work_list is drained. Do it anyway
|
||||
* just in case.
|
||||
*/
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
task = bdi->wb.task;
|
||||
bdi->wb.task = NULL;
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
|
||||
if (task)
|
||||
kthread_stop(task);
|
||||
cancel_delayed_work_sync(&bdi->wb.dwork);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -597,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
|
||||
bdi_set_min_ratio(bdi, 0);
|
||||
trace_writeback_bdi_unregister(bdi);
|
||||
bdi_prune_sb(bdi);
|
||||
del_timer_sync(&bdi->wb.wakeup_timer);
|
||||
|
||||
if (!bdi_cap_flush_forker(bdi))
|
||||
bdi_wb_shutdown(bdi);
|
||||
bdi_wb_shutdown(bdi);
|
||||
bdi_debug_unregister(bdi);
|
||||
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
@ -622,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
|
||||
INIT_LIST_HEAD(&wb->b_io);
|
||||
INIT_LIST_HEAD(&wb->b_more_io);
|
||||
spin_lock_init(&wb->list_lock);
|
||||
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
|
||||
INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -695,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
|
||||
bdi_unregister(bdi);
|
||||
|
||||
/*
|
||||
* If bdi_unregister() had already been called earlier, the
|
||||
* wakeup_timer could still be armed because bdi_prune_sb()
|
||||
* can race with the bdi_wakeup_thread_delayed() calls from
|
||||
* __mark_inode_dirty().
|
||||
* If bdi_unregister() had already been called earlier, the dwork
|
||||
* could still be pending because bdi_prune_sb() can race with the
|
||||
* bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
|
||||
*/
|
||||
del_timer_sync(&bdi->wb.wakeup_timer);
|
||||
cancel_delayed_work_sync(&bdi->wb.dwork);
|
||||
|
||||
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
|
||||
percpu_counter_destroy(&bdi->bdi_stat[i]);
|
||||
|
75
mm/bounce.c
75
mm/bounce.c
@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
|
||||
struct bio_vec *tovec, *fromvec;
|
||||
int i;
|
||||
|
||||
__bio_for_each_segment(tovec, to, i, 0) {
|
||||
bio_for_each_segment(tovec, to, i) {
|
||||
fromvec = from->bi_io_vec + i;
|
||||
|
||||
/*
|
||||
@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
|
||||
/*
|
||||
* free up bounce indirect pages used
|
||||
*/
|
||||
__bio_for_each_segment(bvec, bio, i, 0) {
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
org_vec = bio_orig->bi_io_vec + i;
|
||||
if (bvec->bv_page == org_vec->bv_page)
|
||||
continue;
|
||||
@ -199,78 +199,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
|
||||
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
mempool_t *pool, int force)
|
||||
{
|
||||
struct page *page;
|
||||
struct bio *bio = NULL;
|
||||
int i, rw = bio_data_dir(*bio_orig);
|
||||
struct bio *bio;
|
||||
int rw = bio_data_dir(*bio_orig);
|
||||
struct bio_vec *to, *from;
|
||||
unsigned i;
|
||||
|
||||
bio_for_each_segment(from, *bio_orig, i) {
|
||||
page = from->bv_page;
|
||||
bio_for_each_segment(from, *bio_orig, i)
|
||||
if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
|
||||
goto bounce;
|
||||
|
||||
return;
|
||||
bounce:
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
|
||||
|
||||
bio_for_each_segment_all(to, bio, i) {
|
||||
struct page *page = to->bv_page;
|
||||
|
||||
/*
|
||||
* is destination page below bounce pfn?
|
||||
*/
|
||||
if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* irk, bounce it
|
||||
*/
|
||||
if (!bio) {
|
||||
unsigned int cnt = (*bio_orig)->bi_vcnt;
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, cnt);
|
||||
memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
|
||||
}
|
||||
|
||||
|
||||
to = bio->bi_io_vec + i;
|
||||
|
||||
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
|
||||
to->bv_len = from->bv_len;
|
||||
to->bv_offset = from->bv_offset;
|
||||
inc_zone_page_state(to->bv_page, NR_BOUNCE);
|
||||
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
|
||||
|
||||
if (rw == WRITE) {
|
||||
char *vto, *vfrom;
|
||||
|
||||
flush_dcache_page(from->bv_page);
|
||||
flush_dcache_page(page);
|
||||
|
||||
vto = page_address(to->bv_page) + to->bv_offset;
|
||||
vfrom = kmap(from->bv_page) + from->bv_offset;
|
||||
vfrom = kmap_atomic(page) + to->bv_offset;
|
||||
memcpy(vto, vfrom, to->bv_len);
|
||||
kunmap(from->bv_page);
|
||||
kunmap_atomic(vfrom);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* no pages bounced
|
||||
*/
|
||||
if (!bio)
|
||||
return;
|
||||
|
||||
trace_block_bio_bounce(q, *bio_orig);
|
||||
|
||||
/*
|
||||
* at least one page was bounced, fill in possible non-highmem
|
||||
* pages
|
||||
*/
|
||||
__bio_for_each_segment(from, *bio_orig, i, 0) {
|
||||
to = bio_iovec_idx(bio, i);
|
||||
if (!to->bv_page) {
|
||||
to->bv_page = from->bv_page;
|
||||
to->bv_len = from->bv_len;
|
||||
to->bv_offset = from->bv_offset;
|
||||
}
|
||||
}
|
||||
|
||||
bio->bi_bdev = (*bio_orig)->bi_bdev;
|
||||
bio->bi_flags |= (1 << BIO_BOUNCED);
|
||||
bio->bi_sector = (*bio_orig)->bi_sector;
|
||||
bio->bi_rw = (*bio_orig)->bi_rw;
|
||||
|
||||
bio->bi_vcnt = (*bio_orig)->bi_vcnt;
|
||||
bio->bi_idx = (*bio_orig)->bi_idx;
|
||||
bio->bi_size = (*bio_orig)->bi_size;
|
||||
|
||||
if (pool == page_pool) {
|
||||
bio->bi_end_io = bounce_end_io_write;
|
||||
|
@ -36,7 +36,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
|
||||
bio->bi_io_vec[0].bv_len = PAGE_SIZE;
|
||||
bio->bi_io_vec[0].bv_offset = 0;
|
||||
bio->bi_vcnt = 1;
|
||||
bio->bi_idx = 0;
|
||||
bio->bi_size = PAGE_SIZE;
|
||||
bio->bi_end_io = end_io;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user