mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-13 12:53:27 +00:00
btrfs: add framework to handle device flush error as a volume
This adds comments to the flush error handling part of the code, and hopes to maintain the same logic with a framework which can be used to handle the errors at the volume level. Signed-off-by: Anand Jain <anand.jain@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
6b349dfe80
commit
401b41e5a8
@ -3509,6 +3509,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
|
|||||||
if (wait) {
|
if (wait) {
|
||||||
bio = device->flush_bio;
|
bio = device->flush_bio;
|
||||||
if (!bio)
|
if (!bio)
|
||||||
|
/*
|
||||||
|
* This means the alloc has failed with ENOMEM, however
|
||||||
|
* here we return 0, as its not a device error.
|
||||||
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
wait_for_completion(&device->flush_wait);
|
wait_for_completion(&device->flush_wait);
|
||||||
@ -3548,6 +3552,32 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
|
||||||
|
{
|
||||||
|
int submit_flush_error = 0;
|
||||||
|
int dev_flush_error = 0;
|
||||||
|
struct btrfs_device *dev;
|
||||||
|
int tolerance;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
|
||||||
|
if (!dev->bdev) {
|
||||||
|
submit_flush_error++;
|
||||||
|
dev_flush_error++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (dev->last_flush_error == -ENOMEM)
|
||||||
|
submit_flush_error++;
|
||||||
|
if (dev->last_flush_error && dev->last_flush_error != -ENOMEM)
|
||||||
|
dev_flush_error++;
|
||||||
|
}
|
||||||
|
|
||||||
|
tolerance = fsdevs->fs_info->num_tolerated_disk_barrier_failures;
|
||||||
|
if (submit_flush_error > tolerance || dev_flush_error > tolerance)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* send an empty flush down to each device in parallel,
|
* send an empty flush down to each device in parallel,
|
||||||
* then wait for them
|
* then wait for them
|
||||||
@ -3575,6 +3605,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
|
|||||||
ret = write_dev_flush(dev, 0);
|
ret = write_dev_flush(dev, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
errors_send++;
|
errors_send++;
|
||||||
|
dev->last_flush_error = ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* wait for all the barriers */
|
/* wait for all the barriers */
|
||||||
@ -3589,12 +3620,30 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
ret = write_dev_flush(dev, 1);
|
ret = write_dev_flush(dev, 1);
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
dev->last_flush_error = ret;
|
||||||
errors_wait++;
|
errors_wait++;
|
||||||
}
|
}
|
||||||
if (errors_send > info->num_tolerated_disk_barrier_failures ||
|
}
|
||||||
errors_wait > info->num_tolerated_disk_barrier_failures)
|
|
||||||
return -EIO;
|
/*
|
||||||
|
* Try hard in case of flush. Lets say, in RAID1 we have
|
||||||
|
* the following situation
|
||||||
|
* dev1: EIO dev2: ENOMEM
|
||||||
|
* this is not a fatal error as we hope to recover from
|
||||||
|
* ENOMEM in the next attempt to flush.
|
||||||
|
* But the following is considered as fatal
|
||||||
|
* dev1: ENOMEM dev2: ENOMEM
|
||||||
|
* dev1: bdev == NULL dev2: ENOMEM
|
||||||
|
*/
|
||||||
|
if (errors_send || errors_wait) {
|
||||||
|
/*
|
||||||
|
* At some point we need the status of all disks
|
||||||
|
* to arrive at the volume status. So error checking
|
||||||
|
* is being pushed to a separate loop.
|
||||||
|
*/
|
||||||
|
return check_barrier_error(info->fs_devices);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ struct btrfs_device {
|
|||||||
int missing;
|
int missing;
|
||||||
int can_discard;
|
int can_discard;
|
||||||
int is_tgtdev_for_dev_replace;
|
int is_tgtdev_for_dev_replace;
|
||||||
|
int last_flush_error;
|
||||||
|
|
||||||
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
|
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
|
||||||
seqcount_t data_seqcount;
|
seqcount_t data_seqcount;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user