mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-15 13:22:55 +00:00
Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "We have a lot of subvolume quota improvements in here, along with big piles of cleanups from Dave Sterba and Anand Jain and others. Josef pitched in a batch of allocator fixes based on production use here at FB. We found that mount -o ssd_spread greatly improved our performance on hardware raid5/6, but it exposed some CPU bottlenecks in the allocator. These patches make a huge difference" * 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (100 commits) Btrfs: fix hole punching when using the no-holes feature Btrfs: find_free_extent: Do not erroneously skip LOOP_CACHING_WAIT state btrfs: Fix a data space underflow warning btrfs: qgroup: Fix a rebase bug which will cause qgroup double free btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans btrfs: clear PF_NOFREEZE in cleaner_kthread() btrfs: qgroup: Don't copy extent buffer to do qgroup rescan btrfs: add balance filters limits, stripes and usage to supported mask btrfs: extend balance filter usage to take minimum and maximum btrfs: add balance filter for stripes btrfs: extend balance filter limit to take minimum and maximum btrfs: fix use after free iterating extrefs btrfs: check unsupported filters in balance arguments Btrfs: fix regression running delayed references when using qgroups Btrfs: fix regression when running delayed references Btrfs: don't do extra bitmap search in one bit case Btrfs: keep track of largest extent in bitmaps Btrfs: don't keep trying to build clusters if we are fragmented Btrfs: cut down on loops through the allocator Btrfs: don't continue setting up space cache when enospc ...
This commit is contained in:
commit
27eb427bdc
@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_test_is_dummy_root(root)) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (path->search_commit_root)
|
||||
root_level = btrfs_header_level(root->commit_root);
|
||||
else if (time_seq == (u64)-1)
|
||||
|
@ -667,7 +667,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
|
||||
if (NULL == selected_super) {
|
||||
printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
list_for_each_entry(device, dev_head, dev_list) {
|
||||
@ -845,8 +845,8 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
superblock_tmp->never_written = 0;
|
||||
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||
printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
|
||||
" @%llu (%s/%llu/%d)\n",
|
||||
btrfs_info_in_rcu(device->dev_root->fs_info,
|
||||
"new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
|
||||
superblock_bdev,
|
||||
rcu_str_deref(device->name), dev_bytenr,
|
||||
dev_state->name, dev_bytenr,
|
||||
@ -1660,7 +1660,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
||||
sizeof(*block_ctx->pagev)) *
|
||||
num_pages, GFP_NOFS);
|
||||
if (!block_ctx->mem_to_free)
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
block_ctx->datav = block_ctx->mem_to_free;
|
||||
block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
|
@ -745,11 +745,13 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
|
||||
static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
|
||||
static struct {
|
||||
struct list_head idle_ws;
|
||||
spinlock_t ws_lock;
|
||||
int num_ws;
|
||||
atomic_t alloc_ws;
|
||||
wait_queue_head_t ws_wait;
|
||||
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
|
||||
|
||||
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
|
||||
&btrfs_zlib_compress,
|
||||
@ -761,10 +763,10 @@ void __init btrfs_init_compress(void)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
INIT_LIST_HEAD(&comp_idle_workspace[i]);
|
||||
spin_lock_init(&comp_workspace_lock[i]);
|
||||
atomic_set(&comp_alloc_workspace[i], 0);
|
||||
init_waitqueue_head(&comp_workspace_wait[i]);
|
||||
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
|
||||
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
|
||||
atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
|
||||
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
|
||||
}
|
||||
}
|
||||
|
||||
@ -778,38 +780,38 @@ static struct list_head *find_workspace(int type)
|
||||
int cpus = num_online_cpus();
|
||||
int idx = type - 1;
|
||||
|
||||
struct list_head *idle_workspace = &comp_idle_workspace[idx];
|
||||
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
|
||||
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
|
||||
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
|
||||
int *num_workspace = &comp_num_workspace[idx];
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *num_ws = &btrfs_comp_ws[idx].num_ws;
|
||||
again:
|
||||
spin_lock(workspace_lock);
|
||||
if (!list_empty(idle_workspace)) {
|
||||
workspace = idle_workspace->next;
|
||||
spin_lock(ws_lock);
|
||||
if (!list_empty(idle_ws)) {
|
||||
workspace = idle_ws->next;
|
||||
list_del(workspace);
|
||||
(*num_workspace)--;
|
||||
spin_unlock(workspace_lock);
|
||||
(*num_ws)--;
|
||||
spin_unlock(ws_lock);
|
||||
return workspace;
|
||||
|
||||
}
|
||||
if (atomic_read(alloc_workspace) > cpus) {
|
||||
if (atomic_read(alloc_ws) > cpus) {
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
spin_unlock(workspace_lock);
|
||||
prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
|
||||
spin_unlock(ws_lock);
|
||||
prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(alloc_ws) > cpus && !*num_ws)
|
||||
schedule();
|
||||
finish_wait(workspace_wait, &wait);
|
||||
finish_wait(ws_wait, &wait);
|
||||
goto again;
|
||||
}
|
||||
atomic_inc(alloc_workspace);
|
||||
spin_unlock(workspace_lock);
|
||||
atomic_inc(alloc_ws);
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
if (IS_ERR(workspace)) {
|
||||
atomic_dec(alloc_workspace);
|
||||
wake_up(workspace_wait);
|
||||
atomic_dec(alloc_ws);
|
||||
wake_up(ws_wait);
|
||||
}
|
||||
return workspace;
|
||||
}
|
||||
@ -821,27 +823,30 @@ again:
|
||||
static void free_workspace(int type, struct list_head *workspace)
|
||||
{
|
||||
int idx = type - 1;
|
||||
struct list_head *idle_workspace = &comp_idle_workspace[idx];
|
||||
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
|
||||
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
|
||||
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
|
||||
int *num_workspace = &comp_num_workspace[idx];
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *num_ws = &btrfs_comp_ws[idx].num_ws;
|
||||
|
||||
spin_lock(workspace_lock);
|
||||
if (*num_workspace < num_online_cpus()) {
|
||||
list_add(workspace, idle_workspace);
|
||||
(*num_workspace)++;
|
||||
spin_unlock(workspace_lock);
|
||||
spin_lock(ws_lock);
|
||||
if (*num_ws < num_online_cpus()) {
|
||||
list_add(workspace, idle_ws);
|
||||
(*num_ws)++;
|
||||
spin_unlock(ws_lock);
|
||||
goto wake;
|
||||
}
|
||||
spin_unlock(workspace_lock);
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
atomic_dec(alloc_workspace);
|
||||
atomic_dec(alloc_ws);
|
||||
wake:
|
||||
/*
|
||||
* Make sure counter is updated before we wake up waiters.
|
||||
*/
|
||||
smp_mb();
|
||||
if (waitqueue_active(workspace_wait))
|
||||
wake_up(workspace_wait);
|
||||
if (waitqueue_active(ws_wait))
|
||||
wake_up(ws_wait);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -853,11 +858,11 @@ static void free_workspaces(void)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
while (!list_empty(&comp_idle_workspace[i])) {
|
||||
workspace = comp_idle_workspace[i].next;
|
||||
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
|
||||
workspace = btrfs_comp_ws[i].idle_ws.next;
|
||||
list_del(workspace);
|
||||
btrfs_compress_op[i]->free_workspace(workspace);
|
||||
atomic_dec(&comp_alloc_workspace[i]);
|
||||
atomic_dec(&btrfs_comp_ws[i].alloc_ws);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
if (refs == 0) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
@ -1927,7 +1927,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
child = read_node_slot(root, mid, 0);
|
||||
if (!child) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
goto enospc;
|
||||
}
|
||||
|
||||
@ -2030,7 +2030,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (!left) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
goto enospc;
|
||||
}
|
||||
wret = balance_node_right(trans, root, mid, left);
|
||||
@ -4940,8 +4940,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_item *item;
|
||||
int last_off;
|
||||
int dsize = 0;
|
||||
u32 last_off;
|
||||
u32 dsize = 0;
|
||||
int ret = 0;
|
||||
int wret;
|
||||
int i;
|
||||
|
180
fs/btrfs/ctree.h
180
fs/btrfs/ctree.h
@ -823,8 +823,18 @@ struct btrfs_disk_balance_args {
|
||||
*/
|
||||
__le64 profiles;
|
||||
|
||||
/* usage filter */
|
||||
__le64 usage;
|
||||
/*
|
||||
* usage filter
|
||||
* BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
|
||||
* BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
|
||||
*/
|
||||
union {
|
||||
__le64 usage;
|
||||
struct {
|
||||
__le32 usage_min;
|
||||
__le32 usage_max;
|
||||
};
|
||||
};
|
||||
|
||||
/* devid filter */
|
||||
__le64 devid;
|
||||
@ -846,10 +856,27 @@ struct btrfs_disk_balance_args {
|
||||
/* BTRFS_BALANCE_ARGS_* */
|
||||
__le64 flags;
|
||||
|
||||
/* BTRFS_BALANCE_ARGS_LIMIT value */
|
||||
__le64 limit;
|
||||
/*
|
||||
* BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
|
||||
* BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
|
||||
* and maximum
|
||||
*/
|
||||
union {
|
||||
__le64 limit;
|
||||
struct {
|
||||
__le32 limit_min;
|
||||
__le32 limit_max;
|
||||
};
|
||||
};
|
||||
|
||||
__le64 unused[7];
|
||||
/*
|
||||
* Process chunks that cross stripes_min..stripes_max devices,
|
||||
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
|
||||
*/
|
||||
__le32 stripes_min;
|
||||
__le32 stripes_max;
|
||||
|
||||
__le64 unused[6];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/*
|
||||
@ -1154,6 +1181,10 @@ struct btrfs_space_info {
|
||||
delalloc/allocations */
|
||||
u64 bytes_readonly; /* total bytes that are read only */
|
||||
|
||||
u64 max_extent_size; /* This will hold the maximum extent size of
|
||||
the space info if we had an ENOSPC in the
|
||||
allocator. */
|
||||
|
||||
unsigned int full:1; /* indicates that we cannot allocate any more
|
||||
chunks for this space */
|
||||
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
|
||||
@ -1228,6 +1259,9 @@ struct btrfs_free_cluster {
|
||||
/* first extent starting offset */
|
||||
u64 window_start;
|
||||
|
||||
/* We did a full search and couldn't create a cluster */
|
||||
bool fragmented;
|
||||
|
||||
struct btrfs_block_group_cache *block_group;
|
||||
/*
|
||||
* when a cluster is allocated from a block group, we put the
|
||||
@ -1943,6 +1977,9 @@ struct btrfs_root {
|
||||
int send_in_progress;
|
||||
struct btrfs_subvolume_writers *subv_writers;
|
||||
atomic_t will_be_snapshoted;
|
||||
|
||||
/* For qgroup metadata space reserve */
|
||||
atomic_t qgroup_meta_rsv;
|
||||
};
|
||||
|
||||
struct btrfs_ioctl_defrag_range_args {
|
||||
@ -2145,6 +2182,8 @@ struct btrfs_ioctl_defrag_range_args {
|
||||
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
|
||||
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
|
||||
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
|
||||
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
|
||||
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
|
||||
|
||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||
#define BTRFS_DEFAULT_MAX_INLINE (8192)
|
||||
@ -2169,6 +2208,18 @@ struct btrfs_ioctl_defrag_range_args {
|
||||
btrfs_clear_opt(root->fs_info->mount_opt, opt); \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline int
|
||||
btrfs_should_fragment_free_space(struct btrfs_root *root,
|
||||
struct btrfs_block_group_cache *block_group)
|
||||
{
|
||||
return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
(btrfs_test_opt(root, FRAGMENT_DATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Requests for changes that need to be done during transaction commit.
|
||||
*
|
||||
@ -3379,7 +3430,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 root_objectid, u64 owner,
|
||||
u64 offset, struct btrfs_key *ins);
|
||||
u64 offset, u64 ram_bytes,
|
||||
struct btrfs_key *ins);
|
||||
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 root_objectid, u64 owner, u64 offset,
|
||||
@ -3398,7 +3450,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
|
||||
u64 owner, u64 offset, int no_quota);
|
||||
u64 owner, u64 offset);
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
|
||||
int delalloc);
|
||||
@ -3411,7 +3463,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
|
||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, u64 num_bytes, u64 parent,
|
||||
u64 root_objectid, u64 owner, u64 offset, int no_quota);
|
||||
u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
@ -3449,8 +3501,11 @@ enum btrfs_reserve_flush_enum {
|
||||
BTRFS_RESERVE_FLUSH_ALL,
|
||||
};
|
||||
|
||||
int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
|
||||
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
|
||||
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
|
||||
void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
|
||||
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
|
||||
u64 len);
|
||||
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
|
||||
@ -3466,8 +3521,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
|
||||
u64 qgroup_reserved);
|
||||
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
|
||||
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
|
||||
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
|
||||
void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
|
||||
unsigned short type);
|
||||
@ -4004,8 +4059,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
/* sysfs.c */
|
||||
int btrfs_init_sysfs(void);
|
||||
void btrfs_exit_sysfs(void);
|
||||
int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* xattr.c */
|
||||
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
|
||||
@ -4039,14 +4094,102 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
#define btrfs_info(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use printk_in_rcu
|
||||
*/
|
||||
#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use a ratelimited printk_in_rcu
|
||||
*/
|
||||
#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use a ratelimited printk
|
||||
*/
|
||||
#define btrfs_emerg_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args)
|
||||
#ifdef DEBUG
|
||||
#define btrfs_debug(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#else
|
||||
#define btrfs_debug(fs_info, fmt, args...) \
|
||||
no_printk(KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
|
||||
no_printk(KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
|
||||
no_printk(KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl(fs_info, fmt, args...) \
|
||||
no_printk(KERN_DEBUG fmt, ##args)
|
||||
#endif
|
||||
|
||||
#define btrfs_printk_in_rcu(fs_info, fmt, args...) \
|
||||
do { \
|
||||
rcu_read_lock(); \
|
||||
btrfs_printk(fs_info, fmt, ##args); \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_printk_ratelimited(fs_info, fmt, args...) \
|
||||
do { \
|
||||
static DEFINE_RATELIMIT_STATE(_rs, \
|
||||
DEFAULT_RATELIMIT_INTERVAL, \
|
||||
DEFAULT_RATELIMIT_BURST); \
|
||||
if (__ratelimit(&_rs)) \
|
||||
btrfs_printk(fs_info, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \
|
||||
do { \
|
||||
rcu_read_lock(); \
|
||||
btrfs_printk_ratelimited(fs_info, fmt, ##args); \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
|
||||
__cold
|
||||
@ -4127,14 +4270,7 @@ do { \
|
||||
__LINE__, (errno)); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_std_error(fs_info, errno) \
|
||||
do { \
|
||||
if ((errno)) \
|
||||
__btrfs_std_error((fs_info), __func__, \
|
||||
__LINE__, (errno), NULL); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_error(fs_info, errno, fmt, args...) \
|
||||
#define btrfs_std_error(fs_info, errno, fmt, args...) \
|
||||
do { \
|
||||
__btrfs_std_error((fs_info), __func__, __LINE__, \
|
||||
(errno), fmt, ##args); \
|
||||
|
@ -463,6 +463,10 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
|
||||
static void finish_one_item(struct btrfs_delayed_root *delayed_root)
|
||||
{
|
||||
int seq = atomic_inc_return(&delayed_root->items_seq);
|
||||
|
||||
/*
|
||||
* atomic_dec_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if ((atomic_dec_return(&delayed_root->items) <
|
||||
BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
|
||||
waitqueue_active(&delayed_root->wait))
|
||||
|
@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
trans->delayed_ref_updates--;
|
||||
}
|
||||
|
||||
static bool merge_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head,
|
||||
struct btrfs_delayed_ref_node *ref,
|
||||
u64 seq)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *next;
|
||||
bool done = false;
|
||||
|
||||
next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
while (!done && &next->list != &head->ref_list) {
|
||||
int mod;
|
||||
struct btrfs_delayed_ref_node *next2;
|
||||
|
||||
next2 = list_next_entry(next, list);
|
||||
|
||||
if (next == ref)
|
||||
goto next;
|
||||
|
||||
if (seq && next->seq >= seq)
|
||||
goto next;
|
||||
|
||||
if (next->type != ref->type)
|
||||
goto next;
|
||||
|
||||
if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
|
||||
comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
|
||||
btrfs_delayed_node_to_tree_ref(next),
|
||||
ref->type))
|
||||
goto next;
|
||||
if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
|
||||
comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
|
||||
btrfs_delayed_node_to_data_ref(next)))
|
||||
goto next;
|
||||
|
||||
if (ref->action == next->action) {
|
||||
mod = next->ref_mod;
|
||||
} else {
|
||||
if (ref->ref_mod < next->ref_mod) {
|
||||
swap(ref, next);
|
||||
done = true;
|
||||
}
|
||||
mod = -next->ref_mod;
|
||||
}
|
||||
|
||||
drop_delayed_ref(trans, delayed_refs, head, next);
|
||||
ref->ref_mod += mod;
|
||||
if (ref->ref_mod == 0) {
|
||||
drop_delayed_ref(trans, delayed_refs, head, ref);
|
||||
done = true;
|
||||
} else {
|
||||
/*
|
||||
* Can't have multiples of the same ref on a tree block.
|
||||
*/
|
||||
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
|
||||
}
|
||||
next:
|
||||
next = next2;
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
u64 seq = 0;
|
||||
|
||||
assert_spin_locked(&head->lock);
|
||||
|
||||
if (list_empty(&head->ref_list))
|
||||
return;
|
||||
|
||||
/* We don't have too many refs to merge for data. */
|
||||
if (head->is_data)
|
||||
return;
|
||||
|
||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||
struct seq_list *elem;
|
||||
|
||||
elem = list_first_entry(&fs_info->tree_mod_seq_list,
|
||||
struct seq_list, list);
|
||||
seq = elem->seq;
|
||||
}
|
||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
||||
|
||||
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
while (&ref->list != &head->ref_list) {
|
||||
if (seq && ref->seq >= seq)
|
||||
goto next;
|
||||
|
||||
if (merge_ref(trans, delayed_refs, head, ref, seq)) {
|
||||
if (list_empty(&head->ref_list))
|
||||
break;
|
||||
ref = list_first_entry(&head->ref_list,
|
||||
struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
continue;
|
||||
}
|
||||
next:
|
||||
ref = list_next_entry(ref, list);
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 seq)
|
||||
@ -292,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
|
||||
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
/* No need to compare bytenr nor is_head */
|
||||
if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
|
||||
exist->seq != ref->seq)
|
||||
if (exist->type != ref->type || exist->seq != ref->seq)
|
||||
goto add_tail;
|
||||
|
||||
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
@ -423,7 +535,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_node *ref,
|
||||
struct btrfs_qgroup_extent_record *qrecord,
|
||||
u64 bytenr, u64 num_bytes, int action, int is_data)
|
||||
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
|
||||
int action, int is_data)
|
||||
{
|
||||
struct btrfs_delayed_ref_head *existing;
|
||||
struct btrfs_delayed_ref_head *head_ref = NULL;
|
||||
@ -432,6 +545,9 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
int count_mod = 1;
|
||||
int must_insert_reserved = 0;
|
||||
|
||||
/* If reserved is provided, it must be a data extent. */
|
||||
BUG_ON(!is_data && reserved);
|
||||
|
||||
/*
|
||||
* the head node stores the sum of all the mods, so dropping a ref
|
||||
* should drop the sum in the head node by one.
|
||||
@ -476,9 +592,16 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
INIT_LIST_HEAD(&head_ref->ref_list);
|
||||
head_ref->processing = 0;
|
||||
head_ref->total_ref_mod = count_mod;
|
||||
head_ref->qgroup_reserved = 0;
|
||||
head_ref->qgroup_ref_root = 0;
|
||||
|
||||
/* Record qgroup extent info if provided */
|
||||
if (qrecord) {
|
||||
if (ref_root && reserved) {
|
||||
head_ref->qgroup_ref_root = ref_root;
|
||||
head_ref->qgroup_reserved = reserved;
|
||||
}
|
||||
|
||||
qrecord->bytenr = bytenr;
|
||||
qrecord->num_bytes = num_bytes;
|
||||
qrecord->old_roots = NULL;
|
||||
@ -497,6 +620,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
existing = htree_insert(&delayed_refs->href_root,
|
||||
&head_ref->href_node);
|
||||
if (existing) {
|
||||
WARN_ON(ref_root && reserved && existing->qgroup_ref_root
|
||||
&& existing->qgroup_reserved);
|
||||
update_existing_head_ref(delayed_refs, &existing->node, ref);
|
||||
/*
|
||||
* we've updated the existing ref, free the newly
|
||||
@ -524,7 +649,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_head *head_ref,
|
||||
struct btrfs_delayed_ref_node *ref, u64 bytenr,
|
||||
u64 num_bytes, u64 parent, u64 ref_root, int level,
|
||||
int action, int no_quota)
|
||||
int action)
|
||||
{
|
||||
struct btrfs_delayed_tree_ref *full_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
@ -546,7 +671,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
ref->action = action;
|
||||
ref->is_head = 0;
|
||||
ref->in_tree = 1;
|
||||
ref->no_quota = no_quota;
|
||||
ref->seq = seq;
|
||||
|
||||
full_ref = btrfs_delayed_node_to_tree_ref(ref);
|
||||
@ -579,7 +703,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_head *head_ref,
|
||||
struct btrfs_delayed_ref_node *ref, u64 bytenr,
|
||||
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
|
||||
u64 offset, int action, int no_quota)
|
||||
u64 offset, int action)
|
||||
{
|
||||
struct btrfs_delayed_data_ref *full_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
@ -602,7 +726,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
ref->action = action;
|
||||
ref->is_head = 0;
|
||||
ref->in_tree = 1;
|
||||
ref->no_quota = no_quota;
|
||||
ref->seq = seq;
|
||||
|
||||
full_ref = btrfs_delayed_node_to_data_ref(ref);
|
||||
@ -633,17 +756,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 parent,
|
||||
u64 ref_root, int level, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op,
|
||||
int no_quota)
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
struct btrfs_delayed_tree_ref *ref;
|
||||
struct btrfs_delayed_ref_head *head_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_qgroup_extent_record *record = NULL;
|
||||
|
||||
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
|
||||
no_quota = 0;
|
||||
|
||||
BUG_ON(extent_op && extent_op->is_data);
|
||||
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
|
||||
if (!ref)
|
||||
@ -669,11 +788,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
* the spin lock
|
||||
*/
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
|
||||
bytenr, num_bytes, action, 0);
|
||||
bytenr, num_bytes, 0, 0, action, 0);
|
||||
|
||||
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
|
||||
num_bytes, parent, ref_root, level, action,
|
||||
no_quota);
|
||||
num_bytes, parent, ref_root, level, action);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
return 0;
|
||||
@ -693,18 +811,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
u64 parent, u64 ref_root,
|
||||
u64 owner, u64 offset, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op,
|
||||
int no_quota)
|
||||
u64 owner, u64 offset, u64 reserved, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
struct btrfs_delayed_data_ref *ref;
|
||||
struct btrfs_delayed_ref_head *head_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_qgroup_extent_record *record = NULL;
|
||||
|
||||
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
|
||||
no_quota = 0;
|
||||
|
||||
BUG_ON(extent_op && !extent_op->is_data);
|
||||
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
|
||||
if (!ref)
|
||||
@ -736,16 +850,44 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
* the spin lock
|
||||
*/
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
|
||||
bytenr, num_bytes, action, 1);
|
||||
bytenr, num_bytes, ref_root, reserved,
|
||||
action, 1);
|
||||
|
||||
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
|
||||
num_bytes, parent, ref_root, owner, offset,
|
||||
action, no_quota);
|
||||
action);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 ref_root, u64 bytenr, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_delayed_ref_head *ref_head;
|
||||
int ret = 0;
|
||||
|
||||
if (!fs_info->quota_enabled || !is_fstree(ref_root))
|
||||
return 0;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
|
||||
if (!ref_head) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
|
||||
ref_head->qgroup_ref_root = ref_root;
|
||||
ref_head->qgroup_reserved = num_bytes;
|
||||
out:
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
@ -764,7 +906,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
||||
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
|
||||
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
|
||||
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
|
||||
extent_op->is_data);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node {
|
||||
|
||||
unsigned int action:8;
|
||||
unsigned int type:8;
|
||||
unsigned int no_quota:1;
|
||||
/* is this node still in the rbtree? */
|
||||
unsigned int is_head:1;
|
||||
unsigned int in_tree:1;
|
||||
@ -112,6 +111,17 @@ struct btrfs_delayed_ref_head {
|
||||
*/
|
||||
int total_ref_mod;
|
||||
|
||||
/*
|
||||
* For qgroup reserved space freeing.
|
||||
*
|
||||
* ref_root and reserved will be recorded after
|
||||
* BTRFS_ADD_DELAYED_EXTENT is called.
|
||||
* And will be used to free reserved qgroup space at
|
||||
* run_delayed_refs() time.
|
||||
*/
|
||||
u64 qgroup_ref_root;
|
||||
u64 qgroup_reserved;
|
||||
|
||||
/*
|
||||
* when a new extent is allocated, it is just reserved in memory
|
||||
* The actual extent isn't inserted into the extent allocation tree
|
||||
@ -233,15 +243,16 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 parent,
|
||||
u64 ref_root, int level, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op,
|
||||
int no_quota);
|
||||
struct btrfs_delayed_extent_op *extent_op);
|
||||
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
u64 parent, u64 ref_root,
|
||||
u64 owner, u64 offset, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op,
|
||||
int no_quota);
|
||||
u64 owner, u64 offset, u64 reserved, int action,
|
||||
struct btrfs_delayed_extent_op *extent_op);
|
||||
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 ref_root, u64 bytenr, u64 num_bytes);
|
||||
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
|
@ -327,19 +327,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
args->start.tgtdev_name[0] == '\0')
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Here we commit the transaction to make sure commit_total_bytes
|
||||
* of all the devices are updated.
|
||||
*/
|
||||
trans = btrfs_attach_transaction(root);
|
||||
if (!IS_ERR(trans)) {
|
||||
ret = btrfs_commit_transaction(trans, root);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (PTR_ERR(trans) != -ENOENT) {
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
/* the disk copy procedure reuses the scrub code */
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
|
||||
@ -356,6 +343,19 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Here we commit the transaction to make sure commit_total_bytes
|
||||
* of all the devices are updated.
|
||||
*/
|
||||
trans = btrfs_attach_transaction(root);
|
||||
if (!IS_ERR(trans)) {
|
||||
ret = btrfs_commit_transaction(trans, root);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (PTR_ERR(trans) != -ENOENT) {
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
btrfs_dev_replace_lock(dev_replace);
|
||||
switch (dev_replace->replace_state) {
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
|
||||
@ -375,12 +375,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
WARN_ON(!tgt_device);
|
||||
dev_replace->tgtdev = tgt_device;
|
||||
|
||||
ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
|
||||
if (ret)
|
||||
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
|
||||
|
||||
printk_in_rcu(KERN_INFO
|
||||
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
|
||||
btrfs_info_in_rcu(root->fs_info,
|
||||
"dev_replace from %s (devid %llu) to %s started",
|
||||
src_device->missing ? "<missing disk>" :
|
||||
rcu_str_deref(src_device->name),
|
||||
src_device->devid,
|
||||
@ -401,6 +397,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
|
||||
btrfs_dev_replace_unlock(dev_replace);
|
||||
|
||||
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
|
||||
if (ret)
|
||||
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
|
||||
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1);
|
||||
|
||||
/* force writing the updated state information to disk */
|
||||
@ -454,8 +454,7 @@ static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
|
||||
static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
|
||||
if (waitqueue_active(&fs_info->replace_wait))
|
||||
wake_up(&fs_info->replace_wait);
|
||||
wake_up(&fs_info->replace_wait);
|
||||
}
|
||||
|
||||
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
@ -523,8 +522,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
src_device,
|
||||
tgt_device);
|
||||
} else {
|
||||
printk_in_rcu(KERN_ERR
|
||||
"BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
|
||||
btrfs_err_in_rcu(root->fs_info,
|
||||
"btrfs_scrub_dev(%s, %llu, %s) failed %d",
|
||||
src_device->missing ? "<missing disk>" :
|
||||
rcu_str_deref(src_device->name),
|
||||
src_device->devid,
|
||||
@ -540,8 +539,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
return scrub_ret;
|
||||
}
|
||||
|
||||
printk_in_rcu(KERN_INFO
|
||||
"BTRFS: dev_replace from %s (devid %llu) to %s finished\n",
|
||||
btrfs_info_in_rcu(root->fs_info,
|
||||
"dev_replace from %s (devid %llu) to %s finished",
|
||||
src_device->missing ? "<missing disk>" :
|
||||
rcu_str_deref(src_device->name),
|
||||
src_device->devid,
|
||||
@ -586,7 +585,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&uuid_mutex);
|
||||
|
||||
/* replace the sysfs entry */
|
||||
btrfs_kobj_rm_device(fs_info->fs_devices, src_device);
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
|
||||
btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
|
||||
|
||||
/* write back the superblocks */
|
||||
@ -809,8 +808,8 @@ static int btrfs_dev_replace_kthread(void *data)
|
||||
progress = status_args->status.progress_1000;
|
||||
kfree(status_args);
|
||||
progress = div_u64(progress, 10);
|
||||
printk_in_rcu(KERN_INFO
|
||||
"BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
|
||||
btrfs_info_in_rcu(fs_info,
|
||||
"continuing dev_replace from %s (devid %llu) to %s @%u%%",
|
||||
dev_replace->srcdev->missing ? "<missing disk>" :
|
||||
rcu_str_deref(dev_replace->srcdev->name),
|
||||
dev_replace->srcdev->devid,
|
||||
|
@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
|
||||
memcpy(&found, result, csum_size);
|
||||
|
||||
read_extent_buffer(buf, &val, 0, csum_size);
|
||||
printk_ratelimited(KERN_WARNING
|
||||
"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
|
||||
"level %d\n",
|
||||
btrfs_warn_rl(fs_info,
|
||||
"%s checksum verify failed on %llu wanted %X found %X "
|
||||
"level %d",
|
||||
fs_info->sb->s_id, buf->start,
|
||||
val, found, btrfs_header_level(buf));
|
||||
if (result != (char *)&inline_result)
|
||||
@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
printk_ratelimited(KERN_ERR
|
||||
"BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
|
||||
eb->fs_info->sb->s_id, eb->start,
|
||||
btrfs_err_rl(eb->fs_info,
|
||||
"parent transid verify failed on %llu wanted %llu found %llu",
|
||||
eb->start,
|
||||
parent_transid, btrfs_header_generation(eb));
|
||||
ret = 1;
|
||||
|
||||
@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
|
||||
found_start = btrfs_header_bytenr(eb);
|
||||
if (found_start != eb->start) {
|
||||
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
|
||||
"%llu %llu\n",
|
||||
eb->fs_info->sb->s_id, found_start, eb->start);
|
||||
btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
|
||||
found_start, eb->start);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
if (check_tree_block_fsid(root->fs_info, eb)) {
|
||||
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
|
||||
eb->fs_info->sb->s_id, eb->start);
|
||||
btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
|
||||
eb->start);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
@ -802,6 +801,9 @@ static void run_one_async_done(struct btrfs_work *work)
|
||||
limit = btrfs_async_submit_limit(fs_info);
|
||||
limit = limit * 2 / 3;
|
||||
|
||||
/*
|
||||
* atomic_dec_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
@ -1265,6 +1267,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
|
||||
atomic_set(&root->orphan_inodes, 0);
|
||||
atomic_set(&root->refs, 1);
|
||||
atomic_set(&root->will_be_snapshoted, 0);
|
||||
atomic_set(&root->qgroup_meta_rsv, 0);
|
||||
root->log_transid = 0;
|
||||
root->log_transid_committed = -1;
|
||||
root->last_log_commit = 0;
|
||||
@ -1759,6 +1762,7 @@ static int cleaner_kthread(void *arg)
|
||||
int again;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
set_freezable();
|
||||
do {
|
||||
again = 0;
|
||||
|
||||
@ -2348,8 +2352,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr = btrfs_super_log_root(disk_super);
|
||||
|
||||
if (fs_devices->rw_devices == 0) {
|
||||
printk(KERN_WARNING "BTRFS: log replay required "
|
||||
"on RO media\n");
|
||||
btrfs_warn(fs_info, "log replay required on RO media");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -2364,12 +2367,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
log_tree_root->node = read_tree_block(tree_root, bytenr,
|
||||
fs_info->generation + 1);
|
||||
if (IS_ERR(log_tree_root->node)) {
|
||||
printk(KERN_ERR "BTRFS: failed to read log tree\n");
|
||||
btrfs_warn(fs_info, "failed to read log tree");
|
||||
ret = PTR_ERR(log_tree_root->node);
|
||||
kfree(log_tree_root);
|
||||
return ret;
|
||||
} else if (!extent_buffer_uptodate(log_tree_root->node)) {
|
||||
printk(KERN_ERR "BTRFS: failed to read log tree\n");
|
||||
btrfs_err(fs_info, "failed to read log tree");
|
||||
free_extent_buffer(log_tree_root->node);
|
||||
kfree(log_tree_root);
|
||||
return -EIO;
|
||||
@ -2377,7 +2380,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
/* returns with log_tree_root freed on success */
|
||||
ret = btrfs_recover_log_trees(log_tree_root);
|
||||
if (ret) {
|
||||
btrfs_error(tree_root->fs_info, ret,
|
||||
btrfs_std_error(tree_root->fs_info, ret,
|
||||
"Failed to recover log tree");
|
||||
free_extent_buffer(log_tree_root->node);
|
||||
kfree(log_tree_root);
|
||||
@ -2653,8 +2656,8 @@ int open_ctree(struct super_block *sb,
|
||||
* Read super block and check the signature bytes only
|
||||
*/
|
||||
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
|
||||
if (!bh) {
|
||||
err = -EINVAL;
|
||||
if (IS_ERR(bh)) {
|
||||
err = PTR_ERR(bh);
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
@ -2937,7 +2940,7 @@ retry_root_backup:
|
||||
goto fail_fsdev_sysfs;
|
||||
}
|
||||
|
||||
ret = btrfs_sysfs_add_one(fs_info);
|
||||
ret = btrfs_sysfs_add_mounted(fs_info);
|
||||
if (ret) {
|
||||
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
|
||||
goto fail_fsdev_sysfs;
|
||||
@ -3117,7 +3120,7 @@ fail_cleaner:
|
||||
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
|
||||
|
||||
fail_sysfs:
|
||||
btrfs_sysfs_remove_one(fs_info);
|
||||
btrfs_sysfs_remove_mounted(fs_info);
|
||||
|
||||
fail_fsdev_sysfs:
|
||||
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
|
||||
@ -3179,8 +3182,8 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
|
||||
struct btrfs_device *device = (struct btrfs_device *)
|
||||
bh->b_private;
|
||||
|
||||
printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
|
||||
"I/O error on %s\n",
|
||||
btrfs_warn_rl_in_rcu(device->dev_root->fs_info,
|
||||
"lost page write due to IO error on %s",
|
||||
rcu_str_deref(device->name));
|
||||
/* note, we dont' set_buffer_write_io_error because we have
|
||||
* our own ways of dealing with the IO errors
|
||||
@ -3192,6 +3195,37 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
|
||||
put_bh(bh);
|
||||
}
|
||||
|
||||
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
|
||||
struct buffer_head **bh_ret)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
struct btrfs_super_block *super;
|
||||
u64 bytenr;
|
||||
|
||||
bytenr = btrfs_sb_offset(copy_num);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
|
||||
return -EINVAL;
|
||||
|
||||
bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE);
|
||||
/*
|
||||
* If we fail to read from the underlying devices, as of now
|
||||
* the best option we have is to mark it EIO.
|
||||
*/
|
||||
if (!bh)
|
||||
return -EIO;
|
||||
|
||||
super = (struct btrfs_super_block *)bh->b_data;
|
||||
if (btrfs_super_bytenr(super) != bytenr ||
|
||||
btrfs_super_magic(super) != BTRFS_MAGIC) {
|
||||
brelse(bh);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*bh_ret = bh;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
@ -3199,7 +3233,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
|
||||
struct btrfs_super_block *super;
|
||||
int i;
|
||||
u64 transid = 0;
|
||||
u64 bytenr;
|
||||
int ret = -EINVAL;
|
||||
|
||||
/* we would like to check all the supers, but that would make
|
||||
* a btrfs mount succeed after a mkfs from a different FS.
|
||||
@ -3207,21 +3241,11 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
|
||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||
*/
|
||||
for (i = 0; i < 1; i++) {
|
||||
bytenr = btrfs_sb_offset(i);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
|
||||
i_size_read(bdev->bd_inode))
|
||||
break;
|
||||
bh = __bread(bdev, bytenr / 4096,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (!bh)
|
||||
ret = btrfs_read_dev_one_super(bdev, i, &bh);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
super = (struct btrfs_super_block *)bh->b_data;
|
||||
if (btrfs_super_bytenr(super) != bytenr ||
|
||||
btrfs_super_magic(super) != BTRFS_MAGIC) {
|
||||
brelse(bh);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!latest || btrfs_super_generation(super) > transid) {
|
||||
brelse(latest);
|
||||
@ -3231,6 +3255,10 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
|
||||
brelse(bh);
|
||||
}
|
||||
}
|
||||
|
||||
if (!latest)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
return latest;
|
||||
}
|
||||
|
||||
@ -3299,8 +3327,9 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
bh = __getblk(device->bdev, bytenr / 4096,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (!bh) {
|
||||
printk(KERN_ERR "BTRFS: couldn't get super "
|
||||
"buffer head for bytenr %Lu\n", bytenr);
|
||||
btrfs_err(device->dev_root->fs_info,
|
||||
"couldn't get super buffer head for bytenr %llu",
|
||||
bytenr);
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
@ -3449,22 +3478,31 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
|
||||
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
|
||||
{
|
||||
if ((flags & (BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_AVAIL_ALLOC_BIT_SINGLE)) ||
|
||||
((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0))
|
||||
return 0;
|
||||
int raid_type;
|
||||
int min_tolerated = INT_MAX;
|
||||
|
||||
if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID5 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
return 1;
|
||||
if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
|
||||
(flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
|
||||
min_tolerated = min(min_tolerated,
|
||||
btrfs_raid_array[BTRFS_RAID_SINGLE].
|
||||
tolerated_failures);
|
||||
|
||||
if (flags & BTRFS_BLOCK_GROUP_RAID6)
|
||||
return 2;
|
||||
for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
|
||||
if (raid_type == BTRFS_RAID_SINGLE)
|
||||
continue;
|
||||
if (!(flags & btrfs_raid_group[raid_type]))
|
||||
continue;
|
||||
min_tolerated = min(min_tolerated,
|
||||
btrfs_raid_array[raid_type].
|
||||
tolerated_failures);
|
||||
}
|
||||
|
||||
pr_warn("BTRFS: unknown raid type: %llu\n", flags);
|
||||
return 0;
|
||||
if (min_tolerated == INT_MAX) {
|
||||
pr_warn("BTRFS: unknown raid flag: %llu\n", flags);
|
||||
min_tolerated = 0;
|
||||
}
|
||||
|
||||
return min_tolerated;
|
||||
}
|
||||
|
||||
int btrfs_calc_num_tolerated_disk_barrier_failures(
|
||||
@ -3548,7 +3586,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
if (ret) {
|
||||
mutex_unlock(
|
||||
&root->fs_info->fs_devices->device_list_mutex);
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"errors while submitting device barriers.");
|
||||
return ret;
|
||||
}
|
||||
@ -3588,7 +3626,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
/* FUA is masked off if unsupported and can't be the reason */
|
||||
btrfs_error(root->fs_info, -EIO,
|
||||
btrfs_std_error(root->fs_info, -EIO,
|
||||
"%d errors while writing supers", total_errors);
|
||||
return -EIO;
|
||||
}
|
||||
@ -3606,7 +3644,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
}
|
||||
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
|
||||
if (total_errors > max_errors) {
|
||||
btrfs_error(root->fs_info, -EIO,
|
||||
btrfs_std_error(root->fs_info, -EIO,
|
||||
"%d errors while writing supers", total_errors);
|
||||
return -EIO;
|
||||
}
|
||||
@ -3792,7 +3830,7 @@ void close_ctree(struct btrfs_root *root)
|
||||
percpu_counter_sum(&fs_info->delalloc_bytes));
|
||||
}
|
||||
|
||||
btrfs_sysfs_remove_one(fs_info);
|
||||
btrfs_sysfs_remove_mounted(fs_info);
|
||||
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
|
||||
|
||||
btrfs_free_fs_roots(fs_info);
|
||||
@ -4290,25 +4328,6 @@ again:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
while (!list_empty(&cur_trans->pending_ordered)) {
|
||||
ordered = list_first_entry(&cur_trans->pending_ordered,
|
||||
struct btrfs_ordered_extent,
|
||||
trans_list);
|
||||
list_del_init(&ordered->trans_list);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
}
|
||||
|
||||
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
@ -4320,7 +4339,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
|
||||
cur_trans->state = TRANS_STATE_UNBLOCKED;
|
||||
wake_up(&root->fs_info->transaction_wait);
|
||||
|
||||
btrfs_free_pending_ordered(cur_trans, root->fs_info);
|
||||
btrfs_destroy_delayed_inodes(root);
|
||||
btrfs_assert_delayed_root_empty(root);
|
||||
|
||||
|
@ -60,6 +60,8 @@ void close_ctree(struct btrfs_root *root);
|
||||
int write_ctree_super(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, int max_mirrors);
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
|
||||
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
|
||||
struct buffer_head **bh_ret);
|
||||
int btrfs_commit_super(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -96,8 +96,8 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
|
||||
inode = tree->mapping->host;
|
||||
isize = i_size_read(inode);
|
||||
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
|
||||
printk_ratelimited(KERN_DEBUG
|
||||
"BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
|
||||
btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
|
||||
"%s: ino %llu isize %llu odd range [%llu,%llu]",
|
||||
caller, btrfs_ino(inode), isize, start, end);
|
||||
}
|
||||
}
|
||||
@ -131,6 +131,25 @@ struct extent_page_data {
|
||||
unsigned int sync_io:1;
|
||||
};
|
||||
|
||||
static void add_extent_changeset(struct extent_state *state, unsigned bits,
|
||||
struct extent_changeset *changeset,
|
||||
int set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!changeset)
|
||||
return;
|
||||
if (set && (state->state & bits) == bits)
|
||||
return;
|
||||
if (!set && (state->state & bits) == 0)
|
||||
return;
|
||||
changeset->bytes_changed += state->end - state->start + 1;
|
||||
ret = ulist_add(changeset->range_changed, state->start, state->end,
|
||||
GFP_ATOMIC);
|
||||
/* ENOMEM */
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
static noinline void flush_write_bio(void *data);
|
||||
static inline struct btrfs_fs_info *
|
||||
tree_fs_info(struct extent_io_tree *tree)
|
||||
@ -410,7 +429,8 @@ static void clear_state_cb(struct extent_io_tree *tree,
|
||||
}
|
||||
|
||||
static void set_state_bits(struct extent_io_tree *tree,
|
||||
struct extent_state *state, unsigned *bits);
|
||||
struct extent_state *state, unsigned *bits,
|
||||
struct extent_changeset *changeset);
|
||||
|
||||
/*
|
||||
* insert an extent_state struct into the tree. 'bits' are set on the
|
||||
@ -426,7 +446,7 @@ static int insert_state(struct extent_io_tree *tree,
|
||||
struct extent_state *state, u64 start, u64 end,
|
||||
struct rb_node ***p,
|
||||
struct rb_node **parent,
|
||||
unsigned *bits)
|
||||
unsigned *bits, struct extent_changeset *changeset)
|
||||
{
|
||||
struct rb_node *node;
|
||||
|
||||
@ -436,7 +456,7 @@ static int insert_state(struct extent_io_tree *tree,
|
||||
state->start = start;
|
||||
state->end = end;
|
||||
|
||||
set_state_bits(tree, state, bits);
|
||||
set_state_bits(tree, state, bits, changeset);
|
||||
|
||||
node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
|
||||
if (node) {
|
||||
@ -511,7 +531,8 @@ static struct extent_state *next_state(struct extent_state *state)
|
||||
*/
|
||||
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
|
||||
struct extent_state *state,
|
||||
unsigned *bits, int wake)
|
||||
unsigned *bits, int wake,
|
||||
struct extent_changeset *changeset)
|
||||
{
|
||||
struct extent_state *next;
|
||||
unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
|
||||
@ -522,6 +543,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
|
||||
tree->dirty_bytes -= range;
|
||||
}
|
||||
clear_state_cb(tree, state, bits);
|
||||
add_extent_changeset(state, bits_to_clear, changeset, 0);
|
||||
state->state &= ~bits_to_clear;
|
||||
if (wake)
|
||||
wake_up(&state->wq);
|
||||
@ -569,10 +591,10 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
|
||||
*
|
||||
* This takes the tree lock, and returns 0 on success and < 0 on error.
|
||||
*/
|
||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, int wake, int delete,
|
||||
struct extent_state **cached_state,
|
||||
gfp_t mask)
|
||||
static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, int wake, int delete,
|
||||
struct extent_state **cached_state,
|
||||
gfp_t mask, struct extent_changeset *changeset)
|
||||
{
|
||||
struct extent_state *state;
|
||||
struct extent_state *cached;
|
||||
@ -671,7 +693,8 @@ hit_next:
|
||||
if (err)
|
||||
goto out;
|
||||
if (state->end <= end) {
|
||||
state = clear_state_bit(tree, state, &bits, wake);
|
||||
state = clear_state_bit(tree, state, &bits, wake,
|
||||
changeset);
|
||||
goto next;
|
||||
}
|
||||
goto search_again;
|
||||
@ -692,13 +715,13 @@ hit_next:
|
||||
if (wake)
|
||||
wake_up(&state->wq);
|
||||
|
||||
clear_state_bit(tree, prealloc, &bits, wake);
|
||||
clear_state_bit(tree, prealloc, &bits, wake, changeset);
|
||||
|
||||
prealloc = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
state = clear_state_bit(tree, state, &bits, wake);
|
||||
state = clear_state_bit(tree, state, &bits, wake, changeset);
|
||||
next:
|
||||
if (last_end == (u64)-1)
|
||||
goto out;
|
||||
@ -789,7 +812,7 @@ out:
|
||||
|
||||
static void set_state_bits(struct extent_io_tree *tree,
|
||||
struct extent_state *state,
|
||||
unsigned *bits)
|
||||
unsigned *bits, struct extent_changeset *changeset)
|
||||
{
|
||||
unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
|
||||
|
||||
@ -798,6 +821,7 @@ static void set_state_bits(struct extent_io_tree *tree,
|
||||
u64 range = state->end - state->start + 1;
|
||||
tree->dirty_bytes += range;
|
||||
}
|
||||
add_extent_changeset(state, bits_to_set, changeset, 1);
|
||||
state->state |= bits_to_set;
|
||||
}
|
||||
|
||||
@ -835,7 +859,7 @@ static int __must_check
|
||||
__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, unsigned exclusive_bits,
|
||||
u64 *failed_start, struct extent_state **cached_state,
|
||||
gfp_t mask)
|
||||
gfp_t mask, struct extent_changeset *changeset)
|
||||
{
|
||||
struct extent_state *state;
|
||||
struct extent_state *prealloc = NULL;
|
||||
@ -873,7 +897,7 @@ again:
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
err = insert_state(tree, prealloc, start, end,
|
||||
&p, &parent, &bits);
|
||||
&p, &parent, &bits, changeset);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
||||
@ -899,7 +923,7 @@ hit_next:
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_state_bits(tree, state, &bits);
|
||||
set_state_bits(tree, state, &bits, changeset);
|
||||
cache_state(state, cached_state);
|
||||
merge_state(tree, state);
|
||||
if (last_end == (u64)-1)
|
||||
@ -945,7 +969,7 @@ hit_next:
|
||||
if (err)
|
||||
goto out;
|
||||
if (state->end <= end) {
|
||||
set_state_bits(tree, state, &bits);
|
||||
set_state_bits(tree, state, &bits, changeset);
|
||||
cache_state(state, cached_state);
|
||||
merge_state(tree, state);
|
||||
if (last_end == (u64)-1)
|
||||
@ -980,7 +1004,7 @@ hit_next:
|
||||
* the later extent.
|
||||
*/
|
||||
err = insert_state(tree, prealloc, start, this_end,
|
||||
NULL, NULL, &bits);
|
||||
NULL, NULL, &bits, changeset);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
||||
@ -1008,7 +1032,7 @@ hit_next:
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
||||
set_state_bits(tree, prealloc, &bits);
|
||||
set_state_bits(tree, prealloc, &bits, changeset);
|
||||
cache_state(prealloc, cached_state);
|
||||
merge_state(tree, prealloc);
|
||||
prealloc = NULL;
|
||||
@ -1038,7 +1062,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached_state, gfp_t mask)
|
||||
{
|
||||
return __set_extent_bit(tree, start, end, bits, 0, failed_start,
|
||||
cached_state, mask);
|
||||
cached_state, mask, NULL);
|
||||
}
|
||||
|
||||
|
||||
@ -1111,7 +1135,7 @@ again:
|
||||
goto out;
|
||||
}
|
||||
err = insert_state(tree, prealloc, start, end,
|
||||
&p, &parent, &bits);
|
||||
&p, &parent, &bits, NULL);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
cache_state(prealloc, cached_state);
|
||||
@ -1130,9 +1154,9 @@ hit_next:
|
||||
* Just lock what we found and keep going
|
||||
*/
|
||||
if (state->start == start && state->end <= end) {
|
||||
set_state_bits(tree, state, &bits);
|
||||
set_state_bits(tree, state, &bits, NULL);
|
||||
cache_state(state, cached_state);
|
||||
state = clear_state_bit(tree, state, &clear_bits, 0);
|
||||
state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
|
||||
if (last_end == (u64)-1)
|
||||
goto out;
|
||||
start = last_end + 1;
|
||||
@ -1171,9 +1195,10 @@ hit_next:
|
||||
if (err)
|
||||
goto out;
|
||||
if (state->end <= end) {
|
||||
set_state_bits(tree, state, &bits);
|
||||
set_state_bits(tree, state, &bits, NULL);
|
||||
cache_state(state, cached_state);
|
||||
state = clear_state_bit(tree, state, &clear_bits, 0);
|
||||
state = clear_state_bit(tree, state, &clear_bits, 0,
|
||||
NULL);
|
||||
if (last_end == (u64)-1)
|
||||
goto out;
|
||||
start = last_end + 1;
|
||||
@ -1208,7 +1233,7 @@ hit_next:
|
||||
* the later extent.
|
||||
*/
|
||||
err = insert_state(tree, prealloc, start, this_end,
|
||||
NULL, NULL, &bits);
|
||||
NULL, NULL, &bits, NULL);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
cache_state(prealloc, cached_state);
|
||||
@ -1233,9 +1258,9 @@ hit_next:
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
||||
set_state_bits(tree, prealloc, &bits);
|
||||
set_state_bits(tree, prealloc, &bits, NULL);
|
||||
cache_state(prealloc, cached_state);
|
||||
clear_state_bit(tree, prealloc, &clear_bits, 0);
|
||||
clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
|
||||
prealloc = NULL;
|
||||
goto out;
|
||||
}
|
||||
@ -1274,6 +1299,30 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
NULL, mask);
|
||||
}
|
||||
|
||||
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask,
|
||||
struct extent_changeset *changeset)
|
||||
{
|
||||
/*
|
||||
* We don't support EXTENT_LOCKED yet, as current changeset will
|
||||
* record any bits changed, so for EXTENT_LOCKED case, it will
|
||||
* either fail with -EEXIST or changeset will record the whole
|
||||
* range.
|
||||
*/
|
||||
BUG_ON(bits & EXTENT_LOCKED);
|
||||
|
||||
return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, mask,
|
||||
changeset);
|
||||
}
|
||||
|
||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, int wake, int delete,
|
||||
struct extent_state **cached, gfp_t mask)
|
||||
{
|
||||
return __clear_extent_bit(tree, start, end, bits, wake, delete,
|
||||
cached, mask, NULL);
|
||||
}
|
||||
|
||||
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask)
|
||||
{
|
||||
@ -1285,6 +1334,20 @@ int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
|
||||
}
|
||||
|
||||
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask,
|
||||
struct extent_changeset *changeset)
|
||||
{
|
||||
/*
|
||||
* Don't support EXTENT_LOCKED case, same reason as
|
||||
* set_record_extent_bits().
|
||||
*/
|
||||
BUG_ON(bits & EXTENT_LOCKED);
|
||||
|
||||
return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask,
|
||||
changeset);
|
||||
}
|
||||
|
||||
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached_state, gfp_t mask)
|
||||
{
|
||||
@ -1343,7 +1406,7 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
while (1) {
|
||||
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
|
||||
EXTENT_LOCKED, &failed_start,
|
||||
cached_state, GFP_NOFS);
|
||||
cached_state, GFP_NOFS, NULL);
|
||||
if (err == -EEXIST) {
|
||||
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
|
||||
start = failed_start;
|
||||
@ -1365,7 +1428,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
||||
u64 failed_start;
|
||||
|
||||
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
|
||||
&failed_start, NULL, GFP_NOFS);
|
||||
&failed_start, NULL, GFP_NOFS, NULL);
|
||||
if (err == -EEXIST) {
|
||||
if (failed_start > start)
|
||||
clear_extent_bit(tree, start, failed_start - 1,
|
||||
@ -2078,8 +2141,8 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
printk_ratelimited_in_rcu(KERN_INFO
|
||||
"BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n",
|
||||
btrfs_info_rl_in_rcu(fs_info,
|
||||
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
|
||||
btrfs_ino(inode), start,
|
||||
rcu_str_deref(dev->name), sector);
|
||||
bio_put(bio);
|
||||
@ -3070,8 +3133,12 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
|
||||
set_extent_uptodate(tree, cur, cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
unlock_extent_cached(tree, cur, cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
if (parent_locked)
|
||||
free_extent_state(cached);
|
||||
else
|
||||
unlock_extent_cached(tree, cur,
|
||||
cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
continue;
|
||||
@ -5566,13 +5633,15 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
|
||||
unsigned long src_i;
|
||||
|
||||
if (src_offset + len > dst->len) {
|
||||
printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
|
||||
"len %lu dst len %lu\n", src_offset, len, dst->len);
|
||||
btrfs_err(dst->fs_info,
|
||||
"memmove bogus src_offset %lu move "
|
||||
"len %lu dst len %lu", src_offset, len, dst->len);
|
||||
BUG_ON(1);
|
||||
}
|
||||
if (dst_offset + len > dst->len) {
|
||||
printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
|
||||
"len %lu dst len %lu\n", dst_offset, len, dst->len);
|
||||
btrfs_err(dst->fs_info,
|
||||
"memmove bogus dst_offset %lu move "
|
||||
"len %lu dst len %lu", dst_offset, len, dst->len);
|
||||
BUG_ON(1);
|
||||
}
|
||||
|
||||
@ -5612,13 +5681,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
|
||||
unsigned long src_i;
|
||||
|
||||
if (src_offset + len > dst->len) {
|
||||
printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
|
||||
"len %lu len %lu\n", src_offset, len, dst->len);
|
||||
btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
|
||||
"len %lu len %lu", src_offset, len, dst->len);
|
||||
BUG_ON(1);
|
||||
}
|
||||
if (dst_offset + len > dst->len) {
|
||||
printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
|
||||
"len %lu len %lu\n", dst_offset, len, dst->len);
|
||||
btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
|
||||
"len %lu len %lu", dst_offset, len, dst->len);
|
||||
BUG_ON(1);
|
||||
}
|
||||
if (dst_offset < src_offset) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __EXTENTIO__
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
#include "ulist.h"
|
||||
|
||||
/* bits for the extent state */
|
||||
#define EXTENT_DIRTY (1U << 0)
|
||||
@ -18,6 +19,7 @@
|
||||
#define EXTENT_NEED_WAIT (1U << 13)
|
||||
#define EXTENT_DAMAGED (1U << 14)
|
||||
#define EXTENT_NORESERVE (1U << 15)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 16)
|
||||
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
|
||||
|
||||
@ -161,6 +163,17 @@ struct extent_buffer {
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure to record how many bytes and which ranges are set/cleared
|
||||
*/
|
||||
struct extent_changeset {
|
||||
/* How many bytes are set/cleared in this operation */
|
||||
u64 bytes_changed;
|
||||
|
||||
/* Changed ranges */
|
||||
struct ulist *range_changed;
|
||||
};
|
||||
|
||||
static inline void extent_set_compress_type(unsigned long *bio_flags,
|
||||
int compress_type)
|
||||
{
|
||||
@ -210,11 +223,17 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state *cached_state);
|
||||
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask);
|
||||
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask,
|
||||
struct extent_changeset *changeset);
|
||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, int wake, int delete,
|
||||
struct extent_state **cached, gfp_t mask);
|
||||
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask);
|
||||
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, gfp_t mask,
|
||||
struct extent_changeset *changeset);
|
||||
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
unsigned bits, u64 *failed_start,
|
||||
struct extent_state **cached_state, gfp_t mask);
|
||||
|
228
fs/btrfs/file.c
228
fs/btrfs/file.c
@ -847,7 +847,7 @@ next_slot:
|
||||
disk_bytenr, num_bytes, 0,
|
||||
root->root_key.objectid,
|
||||
new_key.objectid,
|
||||
start - extent_offset, 1);
|
||||
start - extent_offset);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
key.offset = start;
|
||||
@ -925,7 +925,7 @@ delete_extent_item:
|
||||
disk_bytenr, num_bytes, 0,
|
||||
root->root_key.objectid,
|
||||
key.objectid, key.offset -
|
||||
extent_offset, 0);
|
||||
extent_offset);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
inode_sub_bytes(inode,
|
||||
extent_end - key.offset);
|
||||
@ -1204,7 +1204,7 @@ again:
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
|
||||
root->root_key.objectid,
|
||||
ino, orig_offset, 1);
|
||||
ino, orig_offset);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (split == start) {
|
||||
@ -1231,7 +1231,7 @@ again:
|
||||
del_nr++;
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
0, root->root_key.objectid,
|
||||
ino, orig_offset, 0);
|
||||
ino, orig_offset);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
other_start = 0;
|
||||
@ -1248,7 +1248,7 @@ again:
|
||||
del_nr++;
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
0, root->root_key.objectid,
|
||||
ino, orig_offset, 0);
|
||||
ino, orig_offset);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
if (del_nr == 0) {
|
||||
@ -1469,7 +1469,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
u64 release_bytes = 0;
|
||||
u64 lockstart;
|
||||
u64 lockend;
|
||||
unsigned long first_index;
|
||||
size_t num_written = 0;
|
||||
int nrptrs;
|
||||
int ret = 0;
|
||||
@ -1485,8 +1484,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
|
||||
first_index = pos >> PAGE_CACHE_SHIFT;
|
||||
|
||||
while (iov_iter_count(i) > 0) {
|
||||
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
|
||||
size_t write_bytes = min(iov_iter_count(i),
|
||||
@ -1510,12 +1507,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
}
|
||||
|
||||
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
|
||||
ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
|
||||
if (ret == -ENOSPC &&
|
||||
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
||||
BTRFS_INODE_PREALLOC))) {
|
||||
|
||||
if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
||||
BTRFS_INODE_PREALLOC)) {
|
||||
ret = check_can_nocow(inode, pos, &write_bytes);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret > 0) {
|
||||
/*
|
||||
* For nodata cow case, no need to reserve
|
||||
* data space.
|
||||
*/
|
||||
only_release_metadata = true;
|
||||
/*
|
||||
* our prealloc extent may be smaller than
|
||||
@ -1524,20 +1526,19 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
num_pages = DIV_ROUND_UP(write_bytes + offset,
|
||||
PAGE_CACHE_SIZE);
|
||||
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
goto reserve_metadata;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
reserve_metadata:
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
|
||||
if (ret) {
|
||||
if (!only_release_metadata)
|
||||
btrfs_free_reserved_data_space(inode,
|
||||
reserve_bytes);
|
||||
btrfs_free_reserved_data_space(inode, pos,
|
||||
write_bytes);
|
||||
else
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
break;
|
||||
@ -1603,12 +1604,17 @@ again:
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
if (only_release_metadata)
|
||||
if (only_release_metadata) {
|
||||
btrfs_delalloc_release_metadata(inode,
|
||||
release_bytes);
|
||||
else
|
||||
btrfs_delalloc_release_space(inode,
|
||||
} else {
|
||||
u64 __pos;
|
||||
|
||||
__pos = round_down(pos, root->sectorsize) +
|
||||
(dirty_pages << PAGE_CACHE_SHIFT);
|
||||
btrfs_delalloc_release_space(inode, __pos,
|
||||
release_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
|
||||
@ -1660,7 +1666,7 @@ again:
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
btrfs_delalloc_release_metadata(inode, release_bytes);
|
||||
} else {
|
||||
btrfs_delalloc_release_space(inode, release_bytes);
|
||||
btrfs_delalloc_release_space(inode, pos, release_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2266,7 +2272,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
u64 drop_end;
|
||||
int ret = 0;
|
||||
int err = 0;
|
||||
int rsv_count;
|
||||
unsigned int rsv_count;
|
||||
bool same_page;
|
||||
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
|
||||
u64 ino_size;
|
||||
@ -2487,6 +2493,19 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
}
|
||||
|
||||
trans->block_rsv = &root->fs_info->trans_block_rsv;
|
||||
/*
|
||||
* If we are using the NO_HOLES feature we might have had already an
|
||||
* hole that overlaps a part of the region [lockstart, lockend] and
|
||||
* ends at (or beyond) lockend. Since we have no file extent items to
|
||||
* represent holes, drop_end can be less than lockend and so we must
|
||||
* make sure we have an extent map representing the existing hole (the
|
||||
* call to __btrfs_drop_extents() might have dropped the existing extent
|
||||
* map representing the existing hole), otherwise the fast fsync path
|
||||
* will not record the existence of the hole region
|
||||
* [existing_hole_start, lockend].
|
||||
*/
|
||||
if (drop_end <= lockend)
|
||||
drop_end = lockend + 1;
|
||||
/*
|
||||
* Don't insert file hole extent item if it's for a range beyond eof
|
||||
* (because it's useless) or if it represents a 0 bytes range (when
|
||||
@ -2541,17 +2560,61 @@ out_only_mutex:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Helper structure to record which range is already reserved */
|
||||
struct falloc_range {
|
||||
struct list_head list;
|
||||
u64 start;
|
||||
u64 len;
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper function to add falloc range
|
||||
*
|
||||
* Caller should have locked the larger range of extent containing
|
||||
* [start, len)
|
||||
*/
|
||||
static int add_falloc_range(struct list_head *head, u64 start, u64 len)
|
||||
{
|
||||
struct falloc_range *prev = NULL;
|
||||
struct falloc_range *range = NULL;
|
||||
|
||||
if (list_empty(head))
|
||||
goto insert;
|
||||
|
||||
/*
|
||||
* As fallocate iterate by bytenr order, we only need to check
|
||||
* the last range.
|
||||
*/
|
||||
prev = list_entry(head->prev, struct falloc_range, list);
|
||||
if (prev->start + prev->len == start) {
|
||||
prev->len += len;
|
||||
return 0;
|
||||
}
|
||||
insert:
|
||||
range = kmalloc(sizeof(*range), GFP_NOFS);
|
||||
if (!range)
|
||||
return -ENOMEM;
|
||||
range->start = start;
|
||||
range->len = len;
|
||||
list_add_tail(&range->list, head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long btrfs_fallocate(struct file *file, int mode,
|
||||
loff_t offset, loff_t len)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct falloc_range *range;
|
||||
struct falloc_range *tmp;
|
||||
struct list_head reserve_list;
|
||||
u64 cur_offset;
|
||||
u64 last_byte;
|
||||
u64 alloc_start;
|
||||
u64 alloc_end;
|
||||
u64 alloc_hint = 0;
|
||||
u64 locked_end;
|
||||
u64 actual_end = 0;
|
||||
struct extent_map *em;
|
||||
int blocksize = BTRFS_I(inode)->root->sectorsize;
|
||||
int ret;
|
||||
@ -2567,11 +2630,12 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
return btrfs_punch_hole(inode, offset, len);
|
||||
|
||||
/*
|
||||
* Make sure we have enough space before we do the
|
||||
* allocation.
|
||||
* Only trigger disk allocation, don't trigger qgroup reserve
|
||||
*
|
||||
* For qgroup space, it will be checked later.
|
||||
*/
|
||||
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
|
||||
if (ret)
|
||||
ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
@ -2579,6 +2643,13 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* TODO: Move these two operations after we have checked
|
||||
* accurate reserved space, or fallocate can still fail but
|
||||
* with page truncated or size expanded.
|
||||
*
|
||||
* But that's a minor problem and won't do much harm BTW.
|
||||
*/
|
||||
if (alloc_start > inode->i_size) {
|
||||
ret = btrfs_cont_expand(inode, i_size_read(inode),
|
||||
alloc_start);
|
||||
@ -2637,10 +2708,10 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
}
|
||||
}
|
||||
|
||||
/* First, check if we exceed the qgroup limit */
|
||||
INIT_LIST_HEAD(&reserve_list);
|
||||
cur_offset = alloc_start;
|
||||
while (1) {
|
||||
u64 actual_end;
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
|
||||
alloc_end - cur_offset, 0);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
@ -2653,57 +2724,82 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
last_byte = min(extent_map_end(em), alloc_end);
|
||||
actual_end = min_t(u64, extent_map_end(em), offset + len);
|
||||
last_byte = ALIGN(last_byte, blocksize);
|
||||
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
(cur_offset >= inode->i_size &&
|
||||
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
|
||||
ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
|
||||
last_byte - cur_offset,
|
||||
1 << inode->i_blkbits,
|
||||
offset + len,
|
||||
&alloc_hint);
|
||||
} else if (actual_end > inode->i_size &&
|
||||
!(mode & FALLOC_FL_KEEP_SIZE)) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
/*
|
||||
* We didn't need to allocate any more space, but we
|
||||
* still extended the size of the file so we need to
|
||||
* update i_size and the inode item.
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
} else {
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
i_size_write(inode, actual_end);
|
||||
btrfs_ordered_update_i_size(inode, actual_end,
|
||||
NULL);
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret)
|
||||
btrfs_end_transaction(trans, root);
|
||||
else
|
||||
ret = btrfs_end_transaction(trans,
|
||||
root);
|
||||
ret = add_falloc_range(&reserve_list, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
if (ret < 0) {
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
ret = btrfs_qgroup_reserve_data(inode, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
free_extent_map(em);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
cur_offset = last_byte;
|
||||
if (cur_offset >= alloc_end) {
|
||||
ret = 0;
|
||||
if (cur_offset >= alloc_end)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If ret is still 0, means we're OK to fallocate.
|
||||
* Or just cleanup the list and exit.
|
||||
*/
|
||||
list_for_each_entry_safe(range, tmp, &reserve_list, list) {
|
||||
if (!ret)
|
||||
ret = btrfs_prealloc_file_range(inode, mode,
|
||||
range->start,
|
||||
range->len, 1 << inode->i_blkbits,
|
||||
offset + len, &alloc_hint);
|
||||
list_del(&range->list);
|
||||
kfree(range);
|
||||
}
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (actual_end > inode->i_size &&
|
||||
!(mode & FALLOC_FL_KEEP_SIZE)) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
/*
|
||||
* We didn't need to allocate any more space, but we
|
||||
* still extended the size of the file so we need to
|
||||
* update i_size and the inode item.
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
} else {
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
i_size_write(inode, actual_end);
|
||||
btrfs_ordered_update_i_size(inode, actual_end, NULL);
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret)
|
||||
btrfs_end_transaction(trans, root);
|
||||
else
|
||||
ret = btrfs_end_transaction(trans, root);
|
||||
}
|
||||
}
|
||||
out_unlock:
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
|
||||
&cached_state, GFP_NOFS);
|
||||
out:
|
||||
/*
|
||||
* As we waited the extent range, the data_rsv_map must be empty
|
||||
* in the range, as written data range will be released from it.
|
||||
* And for prealloacted extent, it will also be released when
|
||||
* its metadata is written.
|
||||
* So this is completely used as cleanup.
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
/* Let go of our reservation. */
|
||||
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
|
||||
btrfs_free_reserved_data_space(inode, alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -450,9 +450,9 @@ static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
|
||||
|
||||
gen = io_ctl->cur;
|
||||
if (le64_to_cpu(*gen) != generation) {
|
||||
printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
|
||||
"(%Lu) does not match inode (%Lu)\n", *gen,
|
||||
generation);
|
||||
btrfs_err_rl(io_ctl->root->fs_info,
|
||||
"space cache generation (%llu) does not match inode (%llu)",
|
||||
*gen, generation);
|
||||
io_ctl_unmap_page(io_ctl);
|
||||
return -EIO;
|
||||
}
|
||||
@ -506,8 +506,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
|
||||
PAGE_CACHE_SIZE - offset);
|
||||
btrfs_csum_final(crc, (char *)&crc);
|
||||
if (val != crc) {
|
||||
printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
|
||||
"space cache\n");
|
||||
btrfs_err_rl(io_ctl->root->fs_info,
|
||||
"csum mismatch on free space cache");
|
||||
io_ctl_unmap_page(io_ctl);
|
||||
return -EIO;
|
||||
}
|
||||
@ -1215,7 +1215,7 @@ out:
|
||||
* @offset - the offset for the key we'll insert
|
||||
*
|
||||
* This function writes out a free space cache struct to disk for quick recovery
|
||||
* on mount. This will return 0 if it was successfull in writing the cache out,
|
||||
* on mount. This will return 0 if it was successful in writing the cache out,
|
||||
* or an errno if it was not.
|
||||
*/
|
||||
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
|
||||
*/
|
||||
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||
struct btrfs_free_space *bitmap_info, u64 *offset,
|
||||
u64 *bytes)
|
||||
u64 *bytes, bool for_alloc)
|
||||
{
|
||||
unsigned long found_bits = 0;
|
||||
unsigned long max_bits = 0;
|
||||
@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||
unsigned long next_zero;
|
||||
unsigned long extent_bits;
|
||||
|
||||
/*
|
||||
* Skip searching the bitmap if we don't have a contiguous section that
|
||||
* is large enough for this allocation.
|
||||
*/
|
||||
if (for_alloc &&
|
||||
bitmap_info->max_extent_size &&
|
||||
bitmap_info->max_extent_size < *bytes) {
|
||||
*bytes = bitmap_info->max_extent_size;
|
||||
return -1;
|
||||
}
|
||||
|
||||
i = offset_to_bit(bitmap_info->offset, ctl->unit,
|
||||
max_t(u64, *offset, bitmap_info->offset));
|
||||
bits = bytes_to_bits(*bytes, ctl->unit);
|
||||
|
||||
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
|
||||
if (for_alloc && bits == 1) {
|
||||
found_bits = 1;
|
||||
break;
|
||||
}
|
||||
next_zero = find_next_zero_bit(bitmap_info->bitmap,
|
||||
BITS_PER_BITMAP, i);
|
||||
extent_bits = next_zero - i;
|
||||
@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||
}
|
||||
|
||||
*bytes = (u64)(max_bits) * ctl->unit;
|
||||
bitmap_info->max_extent_size = *bytes;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
|
||||
if (entry->bitmap) {
|
||||
u64 size = *bytes;
|
||||
|
||||
ret = search_bitmap(ctl, entry, &tmp, &size);
|
||||
ret = search_bitmap(ctl, entry, &tmp, &size, true);
|
||||
if (!ret) {
|
||||
*offset = tmp;
|
||||
*bytes = size;
|
||||
@ -1874,7 +1890,8 @@ again:
|
||||
search_start = *offset;
|
||||
search_bytes = ctl->unit;
|
||||
search_bytes = min(search_bytes, end - search_start + 1);
|
||||
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
|
||||
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
|
||||
false);
|
||||
if (ret < 0 || search_start != *offset)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1919,7 +1936,7 @@ again:
|
||||
search_start = *offset;
|
||||
search_bytes = ctl->unit;
|
||||
ret = search_bitmap(ctl, bitmap_info, &search_start,
|
||||
&search_bytes);
|
||||
&search_bytes, false);
|
||||
if (ret < 0 || search_start != *offset)
|
||||
return -EAGAIN;
|
||||
|
||||
@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||
|
||||
bitmap_set_bits(ctl, info, offset, bytes_to_set);
|
||||
|
||||
/*
|
||||
* We set some bytes, we have no idea what the max extent size is
|
||||
* anymore.
|
||||
*/
|
||||
info->max_extent_size = 0;
|
||||
|
||||
return bytes_to_set;
|
||||
|
||||
}
|
||||
@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||
struct btrfs_free_space *info)
|
||||
{
|
||||
struct btrfs_block_group_cache *block_group = ctl->private;
|
||||
bool forced = false;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
|
||||
block_group))
|
||||
forced = true;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we are below the extents threshold then we can add this as an
|
||||
* extent, and don't have to deal with the bitmap
|
||||
*/
|
||||
if (ctl->free_extents < ctl->extents_thresh) {
|
||||
if (!forced && ctl->free_extents < ctl->extents_thresh) {
|
||||
/*
|
||||
* If this block group has some small extents we don't want to
|
||||
* use up all of our free slots in the cache with them, we want
|
||||
@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
|
||||
search_start = min_start;
|
||||
search_bytes = bytes;
|
||||
|
||||
err = search_bitmap(ctl, entry, &search_start, &search_bytes);
|
||||
err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
|
||||
if (err) {
|
||||
if (search_bytes > *max_extent_size)
|
||||
*max_extent_size = search_bytes;
|
||||
@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
|
||||
unsigned long want_bits;
|
||||
unsigned long min_bits;
|
||||
unsigned long found_bits;
|
||||
unsigned long max_bits = 0;
|
||||
unsigned long start = 0;
|
||||
unsigned long total_found = 0;
|
||||
int ret;
|
||||
@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
|
||||
want_bits = bytes_to_bits(bytes, ctl->unit);
|
||||
min_bits = bytes_to_bits(min_bytes, ctl->unit);
|
||||
|
||||
/*
|
||||
* Don't bother looking for a cluster in this bitmap if it's heavily
|
||||
* fragmented.
|
||||
*/
|
||||
if (entry->max_extent_size &&
|
||||
entry->max_extent_size < cont1_bytes)
|
||||
return -ENOSPC;
|
||||
again:
|
||||
found_bits = 0;
|
||||
for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
|
||||
@ -2791,13 +2829,19 @@ again:
|
||||
BITS_PER_BITMAP, i);
|
||||
if (next_zero - i >= min_bits) {
|
||||
found_bits = next_zero - i;
|
||||
if (found_bits > max_bits)
|
||||
max_bits = found_bits;
|
||||
break;
|
||||
}
|
||||
if (next_zero - i > max_bits)
|
||||
max_bits = next_zero - i;
|
||||
i = next_zero;
|
||||
}
|
||||
|
||||
if (!found_bits)
|
||||
if (!found_bits) {
|
||||
entry->max_extent_size = (u64)max_bits * ctl->unit;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (!total_found) {
|
||||
start = i;
|
||||
@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
|
||||
spin_lock_init(&cluster->refill_lock);
|
||||
cluster->root = RB_ROOT;
|
||||
cluster->max_size = 0;
|
||||
cluster->fragmented = false;
|
||||
INIT_LIST_HEAD(&cluster->block_group_list);
|
||||
cluster->block_group = NULL;
|
||||
}
|
||||
@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
|
||||
}
|
||||
|
||||
bytes = minlen;
|
||||
ret2 = search_bitmap(ctl, entry, &start, &bytes);
|
||||
ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
|
||||
if (ret2 || start >= end) {
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
mutex_unlock(&ctl->cache_writeout_mutex);
|
||||
@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
|
||||
u64 count = 1;
|
||||
int ret;
|
||||
|
||||
ret = search_bitmap(ctl, entry, &offset, &count);
|
||||
ret = search_bitmap(ctl, entry, &offset, &count, true);
|
||||
/* Logic error; Should be empty if it can't find anything */
|
||||
ASSERT(!ret);
|
||||
|
||||
@ -3532,6 +3577,7 @@ again:
|
||||
spin_lock(&ctl->tree_lock);
|
||||
info->offset = offset;
|
||||
info->bytes = bytes;
|
||||
info->max_extent_size = 0;
|
||||
ret = link_free_space(ctl, info);
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
if (ret)
|
||||
@ -3559,6 +3605,7 @@ again:
|
||||
}
|
||||
|
||||
bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
|
||||
|
||||
bytes -= bytes_added;
|
||||
offset += bytes_added;
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
@ -3602,7 +3649,7 @@ have_info:
|
||||
|
||||
bit_off = offset;
|
||||
bit_bytes = ctl->unit;
|
||||
ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
|
||||
ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
|
||||
if (!ret) {
|
||||
if (bit_off == offset) {
|
||||
ret = 1;
|
||||
|
@ -23,6 +23,7 @@ struct btrfs_free_space {
|
||||
struct rb_node offset_index;
|
||||
u64 offset;
|
||||
u64 bytes;
|
||||
u64 max_extent_size;
|
||||
unsigned long *bitmap;
|
||||
struct list_head list;
|
||||
};
|
||||
|
@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
|
||||
name, name_len, &extref)) {
|
||||
btrfs_std_error(root->fs_info, -ENOENT);
|
||||
btrfs_std_error(root->fs_info, -ENOENT, NULL);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
@ -488,17 +488,17 @@ again:
|
||||
/* Just to make sure we have enough space */
|
||||
prealloc += 8 * PAGE_CACHE_SIZE;
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode, prealloc);
|
||||
ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
|
||||
if (ret)
|
||||
goto out_put;
|
||||
|
||||
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
|
||||
prealloc, prealloc, &alloc_hint);
|
||||
if (ret) {
|
||||
btrfs_delalloc_release_space(inode, prealloc);
|
||||
btrfs_delalloc_release_space(inode, 0, prealloc);
|
||||
goto out_put;
|
||||
}
|
||||
btrfs_free_reserved_data_space(inode, prealloc);
|
||||
btrfs_free_reserved_data_space(inode, 0, prealloc);
|
||||
|
||||
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
|
||||
out_put:
|
||||
|
194
fs/btrfs/inode.c
194
fs/btrfs/inode.c
@ -310,6 +310,13 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
|
||||
btrfs_delalloc_release_metadata(inode, end + 1 - start);
|
||||
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
|
||||
out:
|
||||
/*
|
||||
* Don't forget to free the reserved space, as for inlined extent
|
||||
* it won't count as data extent, free them directly here.
|
||||
* And at reserve time, it's always aligned to page size, so
|
||||
* just free one page here.
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
|
||||
btrfs_free_path(path);
|
||||
btrfs_end_transaction(trans, root);
|
||||
return ret;
|
||||
@ -1096,6 +1103,9 @@ static noinline void async_cow_submit(struct btrfs_work *work)
|
||||
nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
|
||||
PAGE_CACHE_SHIFT;
|
||||
|
||||
/*
|
||||
* atomic_sub_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
|
||||
5 * 1024 * 1024 &&
|
||||
waitqueue_active(&root->fs_info->async_submit_wait))
|
||||
@ -1766,7 +1776,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
|
||||
|
||||
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
|
||||
&& do_list && !(state->state & EXTENT_NORESERVE))
|
||||
btrfs_free_reserved_data_space(inode, len);
|
||||
btrfs_free_reserved_data_space_noquota(inode,
|
||||
state->start, len);
|
||||
|
||||
__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
|
||||
root->fs_info->delalloc_batch);
|
||||
@ -1861,15 +1872,15 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||
u64 bio_offset)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
|
||||
int ret = 0;
|
||||
int skip_sum;
|
||||
int metadata = 0;
|
||||
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
|
||||
|
||||
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
|
||||
|
||||
if (btrfs_is_free_space_inode(inode))
|
||||
metadata = 2;
|
||||
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
|
||||
|
||||
if (!(rw & REQ_WRITE)) {
|
||||
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
|
||||
@ -1989,7 +2000,8 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
|
||||
ret = btrfs_delalloc_reserve_space(inode, page_start,
|
||||
PAGE_CACHE_SIZE);
|
||||
if (ret) {
|
||||
mapping_set_error(page->mapping, ret);
|
||||
end_extent_writepage(page, ret, page_start, page_end);
|
||||
@ -2115,7 +2127,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
ins.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
ret = btrfs_alloc_reserved_file_extent(trans, root,
|
||||
root->root_key.objectid,
|
||||
btrfs_ino(inode), file_pos, &ins);
|
||||
btrfs_ino(inode), file_pos,
|
||||
ram_bytes, &ins);
|
||||
/*
|
||||
* Release the reserved range from inode dirty range map, as it is
|
||||
* already moved into delayed_ref_head
|
||||
*/
|
||||
btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
||||
@ -2573,7 +2591,7 @@ again:
|
||||
ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
|
||||
new->disk_len, 0,
|
||||
backref->root_id, backref->inum,
|
||||
new->file_pos, 0); /* start - extent_offset */
|
||||
new->file_pos); /* start - extent_offset */
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_free_path;
|
||||
@ -2599,7 +2617,6 @@ static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(old, tmp, &new->head, list) {
|
||||
list_del(&old->list);
|
||||
kfree(old);
|
||||
}
|
||||
kfree(new);
|
||||
@ -2824,6 +2841,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
|
||||
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
|
||||
|
||||
/*
|
||||
* For mwrite(mmap + memset to write) case, we still reserve
|
||||
* space for NOCOW range.
|
||||
* As NOCOW won't cause a new delayed ref, just free the space
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
|
||||
ordered_extent->len);
|
||||
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
|
||||
if (nolock)
|
||||
trans = btrfs_join_transaction_nolock(root);
|
||||
@ -3018,8 +3043,6 @@ static int __readpage_endio_check(struct inode *inode,
|
||||
char *kaddr;
|
||||
u32 csum_expected;
|
||||
u32 csum = ~(u32)0;
|
||||
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
csum_expected = *(((u32 *)io_bio->csum) + icsum);
|
||||
|
||||
@ -3032,9 +3055,8 @@ static int __readpage_endio_check(struct inode *inode,
|
||||
kunmap_atomic(kaddr);
|
||||
return 0;
|
||||
zeroit:
|
||||
if (__ratelimit(&_rs))
|
||||
btrfs_warn(BTRFS_I(inode)->root->fs_info,
|
||||
"csum failed ino %llu off %llu csum %u expected csum %u",
|
||||
btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
|
||||
"csum failed ino %llu off %llu csum %u expected csum %u",
|
||||
btrfs_ino(inode), start, csum, csum_expected);
|
||||
memset(kaddr + pgoff, 1, len);
|
||||
flush_dcache_page(page);
|
||||
@ -4217,6 +4239,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
|
||||
|
||||
}
|
||||
|
||||
static int truncate_inline_extent(struct inode *inode,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *found_key,
|
||||
const u64 item_end,
|
||||
const u64 new_size)
|
||||
{
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u32 size = (u32)(new_size - found_key->offset);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
|
||||
loff_t offset = new_size;
|
||||
loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
|
||||
|
||||
/*
|
||||
* Zero out the remaining of the last page of our inline extent,
|
||||
* instead of directly truncating our inline extent here - that
|
||||
* would be much more complex (decompressing all the data, then
|
||||
* compressing the truncated data, which might be bigger than
|
||||
* the size of the inline extent, resize the extent, etc).
|
||||
* We release the path because to get the page we might need to
|
||||
* read the extent item from disk (data not in the page cache).
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
return btrfs_truncate_page(inode, offset, page_end - offset, 0);
|
||||
}
|
||||
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
||||
size = btrfs_file_extent_calc_inline_size(size);
|
||||
btrfs_truncate_item(root, path, size, 1);
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this can truncate away extent items, csum items and directory items.
|
||||
* It starts at a high offset and removes keys until it can't find
|
||||
@ -4411,27 +4474,40 @@ search_again:
|
||||
* special encodings
|
||||
*/
|
||||
if (!del_item &&
|
||||
btrfs_file_extent_compression(leaf, fi) == 0 &&
|
||||
btrfs_file_extent_encryption(leaf, fi) == 0 &&
|
||||
btrfs_file_extent_other_encoding(leaf, fi) == 0) {
|
||||
u32 size = new_size - found_key.offset;
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
inode_sub_bytes(inode, item_end + 1 -
|
||||
new_size);
|
||||
|
||||
/*
|
||||
* update the ram bytes to properly reflect
|
||||
* the new size of our item
|
||||
* Need to release path in order to truncate a
|
||||
* compressed extent. So delete any accumulated
|
||||
* extent items so far.
|
||||
*/
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
||||
size =
|
||||
btrfs_file_extent_calc_inline_size(size);
|
||||
btrfs_truncate_item(root, path, size, 1);
|
||||
if (btrfs_file_extent_compression(leaf, fi) !=
|
||||
BTRFS_COMPRESS_NONE && pending_del_nr) {
|
||||
err = btrfs_del_items(trans, root, path,
|
||||
pending_del_slot,
|
||||
pending_del_nr);
|
||||
if (err) {
|
||||
btrfs_abort_transaction(trans,
|
||||
root,
|
||||
err);
|
||||
goto error;
|
||||
}
|
||||
pending_del_nr = 0;
|
||||
}
|
||||
|
||||
err = truncate_inline_extent(inode, path,
|
||||
&found_key,
|
||||
item_end,
|
||||
new_size);
|
||||
if (err) {
|
||||
btrfs_abort_transaction(trans,
|
||||
root, err);
|
||||
goto error;
|
||||
}
|
||||
} else if (test_bit(BTRFS_ROOT_REF_COWS,
|
||||
&root->state)) {
|
||||
inode_sub_bytes(inode, item_end + 1 -
|
||||
found_key.offset);
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
}
|
||||
}
|
||||
delete:
|
||||
@ -4461,7 +4537,7 @@ delete:
|
||||
ret = btrfs_free_extent(trans, root, extent_start,
|
||||
extent_num_bytes, 0,
|
||||
btrfs_header_owner(leaf),
|
||||
ino, extent_offset, 0);
|
||||
ino, extent_offset);
|
||||
BUG_ON(ret);
|
||||
if (btrfs_should_throttle_delayed_refs(trans, root))
|
||||
btrfs_async_run_delayed_refs(root,
|
||||
@ -4575,14 +4651,17 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
|
||||
if ((offset & (blocksize - 1)) == 0 &&
|
||||
(!len || ((len & (blocksize - 1)) == 0)))
|
||||
goto out;
|
||||
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
|
||||
ret = btrfs_delalloc_reserve_space(inode,
|
||||
round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
again:
|
||||
page = find_or_create_page(mapping, index, mask);
|
||||
if (!page) {
|
||||
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
|
||||
btrfs_delalloc_release_space(inode,
|
||||
round_down(from, PAGE_CACHE_SIZE),
|
||||
PAGE_CACHE_SIZE);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -4650,7 +4729,8 @@ again:
|
||||
|
||||
out_unlock:
|
||||
if (ret)
|
||||
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
|
||||
btrfs_delalloc_release_space(inode, page_start,
|
||||
PAGE_CACHE_SIZE);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
out:
|
||||
@ -5048,6 +5128,18 @@ static void evict_inode_truncate_pages(struct inode *inode)
|
||||
spin_unlock(&io_tree->lock);
|
||||
|
||||
lock_extent_bits(io_tree, start, end, 0, &cached_state);
|
||||
|
||||
/*
|
||||
* If still has DELALLOC flag, the extent didn't reach disk,
|
||||
* and its reserved space won't be freed by delayed_ref.
|
||||
* So we need to free its reserved space here.
|
||||
* (Refer to comment in btrfs_invalidatepage, case 2)
|
||||
*
|
||||
* Note, end is the bytenr of last byte, so we need + 1 here.
|
||||
*/
|
||||
if (state->state & EXTENT_DELALLOC)
|
||||
btrfs_qgroup_free_data(inode, start, end - start + 1);
|
||||
|
||||
clear_extent_bit(io_tree, start, end,
|
||||
EXTENT_LOCKED | EXTENT_DIRTY |
|
||||
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
||||
@ -7581,7 +7673,7 @@ unlock:
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
btrfs_free_reserved_data_space(inode, len);
|
||||
btrfs_free_reserved_data_space(inode, start, len);
|
||||
WARN_ON(dio_data->reserve < len);
|
||||
dio_data->reserve -= len;
|
||||
current->journal_info = dio_data;
|
||||
@ -8371,7 +8463,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
relock = true;
|
||||
}
|
||||
ret = btrfs_delalloc_reserve_space(inode, count);
|
||||
ret = btrfs_delalloc_reserve_space(inode, offset, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
dio_data.outstanding_extents = div64_u64(count +
|
||||
@ -8400,10 +8492,10 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
current->journal_info = NULL;
|
||||
if (ret < 0 && ret != -EIOCBQUEUED) {
|
||||
if (dio_data.reserve)
|
||||
btrfs_delalloc_release_space(inode,
|
||||
dio_data.reserve);
|
||||
btrfs_delalloc_release_space(inode, offset,
|
||||
dio_data.reserve);
|
||||
} else if (ret >= 0 && (size_t)ret < count)
|
||||
btrfs_delalloc_release_space(inode,
|
||||
btrfs_delalloc_release_space(inode, offset,
|
||||
count - (size_t)ret);
|
||||
}
|
||||
out:
|
||||
@ -8562,6 +8654,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Qgroup reserved space handler
|
||||
* Page here will be either
|
||||
* 1) Already written to disk
|
||||
* In this case, its reserved space is released from data rsv map
|
||||
* and will be freed by delayed_ref handler finally.
|
||||
* So even we call qgroup_free_data(), it won't decrease reserved
|
||||
* space.
|
||||
* 2) Not written to disk
|
||||
* This means the reserved space should be freed here.
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
|
||||
if (!inode_evicting) {
|
||||
clear_extent_bit(tree, page_start, page_end,
|
||||
EXTENT_LOCKED | EXTENT_DIRTY |
|
||||
@ -8612,7 +8716,11 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
u64 page_end;
|
||||
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
|
||||
page_start = page_offset(page);
|
||||
page_end = page_start + PAGE_CACHE_SIZE - 1;
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode, page_start,
|
||||
PAGE_CACHE_SIZE);
|
||||
if (!ret) {
|
||||
ret = file_update_time(vma->vm_file);
|
||||
reserved = 1;
|
||||
@ -8631,8 +8739,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
again:
|
||||
lock_page(page);
|
||||
size = i_size_read(inode);
|
||||
page_start = page_offset(page);
|
||||
page_end = page_start + PAGE_CACHE_SIZE - 1;
|
||||
|
||||
if ((page->mapping != inode->i_mapping) ||
|
||||
(page_start >= size)) {
|
||||
@ -8709,7 +8815,7 @@ out_unlock:
|
||||
}
|
||||
unlock_page(page);
|
||||
out:
|
||||
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
|
||||
btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
|
||||
out_noreserve:
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
@ -8998,6 +9104,7 @@ void btrfs_destroy_inode(struct inode *inode)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
}
|
||||
btrfs_qgroup_check_reserved_leak(inode);
|
||||
inode_tree_del(inode);
|
||||
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
|
||||
free:
|
||||
@ -9634,6 +9741,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
u64 cur_offset = start;
|
||||
u64 i_size;
|
||||
u64 cur_bytes;
|
||||
u64 last_alloc = (u64)-1;
|
||||
int ret = 0;
|
||||
bool own_trans = true;
|
||||
|
||||
@ -9650,6 +9758,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
|
||||
cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
|
||||
cur_bytes = max(cur_bytes, min_size);
|
||||
/*
|
||||
* If we are severely fragmented we could end up with really
|
||||
* small allocations, so if the allocator is returning small
|
||||
* chunks lets make its job easier by only searching for those
|
||||
* sized chunks.
|
||||
*/
|
||||
cur_bytes = min(cur_bytes, last_alloc);
|
||||
ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
|
||||
*alloc_hint, &ins, 1, 0);
|
||||
if (ret) {
|
||||
@ -9658,6 +9773,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
break;
|
||||
}
|
||||
|
||||
last_alloc = ins.offset;
|
||||
ret = insert_reserved_file_extent(trans, inode,
|
||||
cur_offset, ins.objectid,
|
||||
ins.offset, ins.offset,
|
||||
|
280
fs/btrfs/ioctl.c
280
fs/btrfs/ioctl.c
@ -1120,7 +1120,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
||||
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode,
|
||||
page_cnt << PAGE_CACHE_SHIFT);
|
||||
start_index << PAGE_CACHE_SHIFT,
|
||||
page_cnt << PAGE_CACHE_SHIFT);
|
||||
if (ret)
|
||||
return ret;
|
||||
i_done = 0;
|
||||
@ -1210,7 +1211,8 @@ again:
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
btrfs_delalloc_release_space(inode,
|
||||
(page_cnt - i_done) << PAGE_CACHE_SHIFT);
|
||||
start_index << PAGE_CACHE_SHIFT,
|
||||
(page_cnt - i_done) << PAGE_CACHE_SHIFT);
|
||||
}
|
||||
|
||||
|
||||
@ -1235,7 +1237,9 @@ out:
|
||||
unlock_page(pages[i]);
|
||||
page_cache_release(pages[i]);
|
||||
}
|
||||
btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
|
||||
btrfs_delalloc_release_space(inode,
|
||||
start_index << PAGE_CACHE_SHIFT,
|
||||
page_cnt << PAGE_CACHE_SHIFT);
|
||||
return ret;
|
||||
|
||||
}
|
||||
@ -1342,7 +1346,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
|
||||
break;
|
||||
|
||||
if (btrfs_defrag_cancelled(root->fs_info)) {
|
||||
printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
|
||||
btrfs_debug(root->fs_info, "defrag_file cancelled");
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
@ -1579,7 +1583,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
|
||||
new_size = div_u64(new_size, root->sectorsize);
|
||||
new_size *= root->sectorsize;
|
||||
|
||||
printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
|
||||
btrfs_info_in_rcu(root->fs_info, "new size for %s is %llu",
|
||||
rcu_str_deref(device->name), new_size);
|
||||
|
||||
if (new_size > old_size) {
|
||||
@ -2081,7 +2085,7 @@ static noinline int search_ioctl(struct inode *inode,
|
||||
key.offset = (u64)-1;
|
||||
root = btrfs_read_fs_root_no_name(info, &key);
|
||||
if (IS_ERR(root)) {
|
||||
printk(KERN_ERR "BTRFS: could not find root %llu\n",
|
||||
btrfs_err(info, "could not find root %llu",
|
||||
sk->tree_id);
|
||||
btrfs_free_path(path);
|
||||
return -ENOENT;
|
||||
@ -2221,7 +2225,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
|
||||
key.offset = (u64)-1;
|
||||
root = btrfs_read_fs_root_no_name(info, &key);
|
||||
if (IS_ERR(root)) {
|
||||
printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
|
||||
btrfs_err(info, "could not find root %llu", tree_id);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
@ -2699,7 +2703,6 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
|
||||
{
|
||||
struct btrfs_ioctl_fs_info_args *fi_args;
|
||||
struct btrfs_device *device;
|
||||
struct btrfs_device *next;
|
||||
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
|
||||
int ret = 0;
|
||||
|
||||
@ -2711,7 +2714,7 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
|
||||
fi_args->num_devices = fs_devices->num_devices;
|
||||
memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
|
||||
|
||||
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (device->devid > fi_args->max_id)
|
||||
fi_args->max_id = device->devid;
|
||||
}
|
||||
@ -3203,41 +3206,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Helper to check and see if this root currently has a ref on the given disk
|
||||
* bytenr. If it does then we need to update the quota for this root. This
|
||||
* doesn't do anything if quotas aren't enabled.
|
||||
*/
|
||||
static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
u64 disko)
|
||||
{
|
||||
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
|
||||
struct ulist *roots;
|
||||
struct ulist_iterator uiter;
|
||||
struct ulist_node *root_node = NULL;
|
||||
int ret;
|
||||
|
||||
if (!root->fs_info->quota_enabled)
|
||||
return 1;
|
||||
|
||||
btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
|
||||
ret = btrfs_find_all_roots(trans, root->fs_info, disko,
|
||||
tree_mod_seq_elem.seq, &roots);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = 0;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((root_node = ulist_next(roots, &uiter))) {
|
||||
if (root_node->val == root->objectid) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ulist_free(roots);
|
||||
out:
|
||||
btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
u64 endoff,
|
||||
@ -3328,6 +3296,150 @@ static void clone_update_extent_map(struct inode *inode,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure we do not end up inserting an inline extent into a file that has
|
||||
* already other (non-inline) extents. If a file has an inline extent it can
|
||||
* not have any other extents and the (single) inline extent must start at the
|
||||
* file offset 0. Failing to respect these rules will lead to file corruption,
|
||||
* resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
|
||||
*
|
||||
* We can have extents that have been already written to disk or we can have
|
||||
* dirty ranges still in delalloc, in which case the extent maps and items are
|
||||
* created only when we run delalloc, and the delalloc ranges might fall outside
|
||||
* the range we are currently locking in the inode's io tree. So we check the
|
||||
* inode's i_size because of that (i_size updates are done while holding the
|
||||
* i_mutex, which we are holding here).
|
||||
* We also check to see if the inode has a size not greater than "datal" but has
|
||||
* extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
|
||||
* protected against such concurrent fallocate calls by the i_mutex).
|
||||
*
|
||||
* If the file has no extents but a size greater than datal, do not allow the
|
||||
* copy because we would need turn the inline extent into a non-inline one (even
|
||||
* with NO_HOLES enabled). If we find our destination inode only has one inline
|
||||
* extent, just overwrite it with the source inline extent if its size is less
|
||||
* than the source extent's size, or we could copy the source inline extent's
|
||||
* data into the destination inode's inline extent if the later is greater then
|
||||
* the former.
|
||||
*/
|
||||
static int clone_copy_inline_extent(struct inode *src,
|
||||
struct inode *dst,
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *new_key,
|
||||
const u64 drop_start,
|
||||
const u64 datal,
|
||||
const u64 skip,
|
||||
const u64 size,
|
||||
char *inline_data)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(dst)->root;
|
||||
const u64 aligned_end = ALIGN(new_key->offset + datal,
|
||||
root->sectorsize);
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
|
||||
if (new_key->offset > 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
key.objectid = btrfs_ino(dst);
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret > 0) {
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
goto copy_inline_extent;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||
if (key.objectid == btrfs_ino(dst) &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY) {
|
||||
ASSERT(key.offset > 0);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
} else if (i_size_read(dst) <= datal) {
|
||||
struct btrfs_file_extent_item *ei;
|
||||
u64 ext_len;
|
||||
|
||||
/*
|
||||
* If the file size is <= datal, make sure there are no other
|
||||
* extents following (can happen do to an fallocate call with
|
||||
* the flag FALLOC_FL_KEEP_SIZE).
|
||||
*/
|
||||
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
/*
|
||||
* If it's an inline extent, it can not have other extents
|
||||
* following it.
|
||||
*/
|
||||
if (btrfs_file_extent_type(path->nodes[0], ei) ==
|
||||
BTRFS_FILE_EXTENT_INLINE)
|
||||
goto copy_inline_extent;
|
||||
|
||||
ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
|
||||
if (ext_len > aligned_end)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = btrfs_next_item(root, path);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret == 0) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
||||
path->slots[0]);
|
||||
if (key.objectid == btrfs_ino(dst) &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
copy_inline_extent:
|
||||
/*
|
||||
* We have no extent items, or we have an extent at offset 0 which may
|
||||
* or may not be inlined. All these cases are dealt the same way.
|
||||
*/
|
||||
if (i_size_read(dst) > datal) {
|
||||
/*
|
||||
* If the destination inode has an inline extent...
|
||||
* This would require copying the data from the source inline
|
||||
* extent into the beginning of the destination's inline extent.
|
||||
* But this is really complex, both extents can be compressed
|
||||
* or just one of them, which would require decompressing and
|
||||
* re-compressing data (which could increase the new compressed
|
||||
* size, not allowing the compressed data to fit anymore in an
|
||||
* inline extent).
|
||||
* So just don't support this case for now (it should be rare,
|
||||
* we are not really saving space when cloning inline extents).
|
||||
*/
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (skip) {
|
||||
const u32 start = btrfs_file_extent_calc_inline_size(0);
|
||||
|
||||
memmove(inline_data + start, inline_data + start + skip, datal);
|
||||
}
|
||||
|
||||
write_extent_buffer(path->nodes[0], inline_data,
|
||||
btrfs_item_ptr_offset(path->nodes[0],
|
||||
path->slots[0]),
|
||||
size);
|
||||
inode_add_bytes(dst, datal);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_clone() - clone a range from inode file to another
|
||||
*
|
||||
@ -3352,9 +3464,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||
u32 nritems;
|
||||
int slot;
|
||||
int ret;
|
||||
int no_quota;
|
||||
const u64 len = olen_aligned;
|
||||
u64 last_disko = 0;
|
||||
u64 last_dest_end = destoff;
|
||||
|
||||
ret = -ENOMEM;
|
||||
@ -3400,7 +3510,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
process_slot:
|
||||
no_quota = 1;
|
||||
if (path->slots[0] >= nritems) {
|
||||
ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
|
||||
if (ret < 0)
|
||||
@ -3552,35 +3661,13 @@ process_slot:
|
||||
btrfs_set_file_extent_num_bytes(leaf, extent,
|
||||
datal);
|
||||
|
||||
/*
|
||||
* We need to look up the roots that point at
|
||||
* this bytenr and see if the new root does. If
|
||||
* it does not we need to make sure we update
|
||||
* quotas appropriately.
|
||||
*/
|
||||
if (disko && root != BTRFS_I(src)->root &&
|
||||
disko != last_disko) {
|
||||
no_quota = check_ref(trans, root,
|
||||
disko);
|
||||
if (no_quota < 0) {
|
||||
btrfs_abort_transaction(trans,
|
||||
root,
|
||||
ret);
|
||||
btrfs_end_transaction(trans,
|
||||
root);
|
||||
ret = no_quota;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (disko) {
|
||||
inode_add_bytes(inode, datal);
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
disko, diskl, 0,
|
||||
root->root_key.objectid,
|
||||
btrfs_ino(inode),
|
||||
new_key.offset - datao,
|
||||
no_quota);
|
||||
new_key.offset - datao);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans,
|
||||
root,
|
||||
@ -3594,21 +3681,6 @@ process_slot:
|
||||
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 skip = 0;
|
||||
u64 trim = 0;
|
||||
u64 aligned_end = 0;
|
||||
|
||||
/*
|
||||
* Don't copy an inline extent into an offset
|
||||
* greater than zero. Having an inline extent
|
||||
* at such an offset results in chaos as btrfs
|
||||
* isn't prepared for such cases. Just skip
|
||||
* this case for the same reasons as commented
|
||||
* at btrfs_ioctl_clone().
|
||||
*/
|
||||
if (last_dest_end > 0) {
|
||||
ret = -EOPNOTSUPP;
|
||||
btrfs_end_transaction(trans, root);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (off > key.offset) {
|
||||
skip = off - key.offset;
|
||||
@ -3626,42 +3698,22 @@ process_slot:
|
||||
size -= skip + trim;
|
||||
datal -= skip + trim;
|
||||
|
||||
aligned_end = ALIGN(new_key.offset + datal,
|
||||
root->sectorsize);
|
||||
ret = btrfs_drop_extents(trans, root, inode,
|
||||
drop_start,
|
||||
aligned_end,
|
||||
1);
|
||||
ret = clone_copy_inline_extent(src, inode,
|
||||
trans, path,
|
||||
&new_key,
|
||||
drop_start,
|
||||
datal,
|
||||
skip, size, buf);
|
||||
if (ret) {
|
||||
if (ret != -EOPNOTSUPP)
|
||||
btrfs_abort_transaction(trans,
|
||||
root, ret);
|
||||
root,
|
||||
ret);
|
||||
btrfs_end_transaction(trans, root);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path,
|
||||
&new_key, size);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root,
|
||||
ret);
|
||||
btrfs_end_transaction(trans, root);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (skip) {
|
||||
u32 start =
|
||||
btrfs_file_extent_calc_inline_size(0);
|
||||
memmove(buf+start, buf+start+skip,
|
||||
datal);
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
write_extent_buffer(leaf, buf,
|
||||
btrfs_item_ptr_offset(leaf, slot),
|
||||
size);
|
||||
inode_add_bytes(inode, datal);
|
||||
}
|
||||
|
||||
/* If we have an implicit hole (NO_HOLES feature). */
|
||||
@ -4814,7 +4866,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
/* update qgroup status and info */
|
||||
err = btrfs_run_qgroups(trans, root->fs_info);
|
||||
if (err < 0)
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"failed to update qgroup status and info\n");
|
||||
err = btrfs_end_transaction(trans, root);
|
||||
if (err && !ret)
|
||||
|
@ -79,6 +79,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
||||
write_lock(&eb->lock);
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
atomic_inc(&eb->spinning_writers);
|
||||
/*
|
||||
* atomic_dec_and_test implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_and_test(&eb->blocking_writers) &&
|
||||
waitqueue_active(&eb->write_lock_wq))
|
||||
wake_up(&eb->write_lock_wq);
|
||||
@ -86,6 +89,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
||||
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
|
||||
read_lock(&eb->lock);
|
||||
atomic_inc(&eb->spinning_readers);
|
||||
/*
|
||||
* atomic_dec_and_test implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_and_test(&eb->blocking_readers) &&
|
||||
waitqueue_active(&eb->read_lock_wq))
|
||||
wake_up(&eb->read_lock_wq);
|
||||
@ -229,6 +235,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
|
||||
}
|
||||
btrfs_assert_tree_read_locked(eb);
|
||||
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
|
||||
/*
|
||||
* atomic_dec_and_test implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_and_test(&eb->blocking_readers) &&
|
||||
waitqueue_active(&eb->read_lock_wq))
|
||||
wake_up(&eb->read_lock_wq);
|
||||
@ -280,6 +289,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
|
||||
if (blockers) {
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
atomic_dec(&eb->blocking_writers);
|
||||
/*
|
||||
* Make sure counter is updated before we wake up waiters.
|
||||
*/
|
||||
smp_mb();
|
||||
if (waitqueue_active(&eb->write_lock_wq))
|
||||
wake_up(&eb->write_lock_wq);
|
||||
|
@ -345,6 +345,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
||||
|
||||
if (entry->bytes_left == 0) {
|
||||
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
|
||||
/*
|
||||
* Implicit memory barrier after test_and_set_bit
|
||||
*/
|
||||
if (waitqueue_active(&entry->wait))
|
||||
wake_up(&entry->wait);
|
||||
} else {
|
||||
@ -409,6 +412,9 @@ have_entry:
|
||||
|
||||
if (entry->bytes_left == 0) {
|
||||
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
|
||||
/*
|
||||
* Implicit memory barrier after test_and_set_bit
|
||||
*/
|
||||
if (waitqueue_active(&entry->wait))
|
||||
wake_up(&entry->wait);
|
||||
} else {
|
||||
@ -484,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
spin_lock_irq(&log->log_extents_lock[index]);
|
||||
while (!list_empty(&log->logged_list[index])) {
|
||||
struct inode *inode;
|
||||
ordered = list_first_entry(&log->logged_list[index],
|
||||
struct btrfs_ordered_extent,
|
||||
log_list);
|
||||
list_del_init(&ordered->log_list);
|
||||
inode = ordered->inode;
|
||||
spin_unlock_irq(&log->log_extents_lock[index]);
|
||||
|
||||
if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
|
||||
!test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
|
||||
struct inode *inode = ordered->inode;
|
||||
u64 start = ordered->file_offset;
|
||||
u64 end = ordered->file_offset + ordered->len - 1;
|
||||
|
||||
@ -503,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
|
||||
&ordered->flags));
|
||||
|
||||
/*
|
||||
* If our ordered extent completed it means it updated the
|
||||
* fs/subvol and csum trees already, so no need to make the
|
||||
* current transaction's commit wait for it, as we end up
|
||||
* holding memory unnecessarily and delaying the inode's iput
|
||||
* until the transaction commit (we schedule an iput for the
|
||||
* inode when the ordered extent's refcount drops to 0), which
|
||||
* prevents it from being evictable until the transaction
|
||||
* commits.
|
||||
* In order to keep us from losing our ordered extent
|
||||
* information when committing the transaction we have to make
|
||||
* sure that any logged extents are completed when we go to
|
||||
* commit the transaction. To do this we simply increase the
|
||||
* current transactions pending_ordered counter and decrement it
|
||||
* when the ordered extent completes.
|
||||
*/
|
||||
if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
else
|
||||
list_add_tail(&ordered->trans_list, &trans->ordered);
|
||||
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
spin_lock_irq(&tree->lock);
|
||||
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
|
||||
set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
|
||||
atomic_inc(&trans->transaction->pending_ordered);
|
||||
}
|
||||
spin_unlock_irq(&tree->lock);
|
||||
}
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
spin_lock_irq(&log->log_extents_lock[index]);
|
||||
}
|
||||
spin_unlock_irq(&log->log_extents_lock[index]);
|
||||
@ -578,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct rb_node *node;
|
||||
bool dec_pending_ordered = false;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
spin_lock_irq(&tree->lock);
|
||||
@ -587,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
||||
if (tree->last == node)
|
||||
tree->last = NULL;
|
||||
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
|
||||
if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
|
||||
dec_pending_ordered = true;
|
||||
spin_unlock_irq(&tree->lock);
|
||||
|
||||
/*
|
||||
* The current running transaction is waiting on us, we need to let it
|
||||
* know that we're complete and wake it up.
|
||||
*/
|
||||
if (dec_pending_ordered) {
|
||||
struct btrfs_transaction *trans;
|
||||
|
||||
/*
|
||||
* The checks for trans are just a formality, it should be set,
|
||||
* but if it isn't we don't want to deref/assert under the spin
|
||||
* lock, so be nice and check if trans is set, but ASSERT() so
|
||||
* if it isn't set a developer will notice.
|
||||
*/
|
||||
spin_lock(&root->fs_info->trans_lock);
|
||||
trans = root->fs_info->running_transaction;
|
||||
if (trans)
|
||||
atomic_inc(&trans->use_count);
|
||||
spin_unlock(&root->fs_info->trans_lock);
|
||||
|
||||
ASSERT(trans);
|
||||
if (trans) {
|
||||
if (atomic_dec_and_test(&trans->pending_ordered))
|
||||
wake_up(&trans->pending_wait);
|
||||
btrfs_put_transaction(trans);
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
list_del_init(&entry->root_extent_list);
|
||||
root->nr_ordered_extents--;
|
||||
|
@ -73,6 +73,8 @@ struct btrfs_ordered_sum {
|
||||
|
||||
#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
|
||||
* in the logging code. */
|
||||
#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
|
||||
* complete in the current transaction. */
|
||||
struct btrfs_ordered_extent {
|
||||
/* logical offset in the file */
|
||||
u64 file_offset;
|
||||
|
@ -49,18 +49,16 @@ static struct prop_handler prop_handlers[] = {
|
||||
.extract = prop_compression_extract,
|
||||
.inheritable = 1
|
||||
},
|
||||
{
|
||||
.xattr_name = NULL
|
||||
}
|
||||
};
|
||||
|
||||
void __init btrfs_props_init(void)
|
||||
{
|
||||
struct prop_handler *p;
|
||||
int i;
|
||||
|
||||
hash_init(prop_handlers_ht);
|
||||
|
||||
for (p = &prop_handlers[0]; p->xattr_name; p++) {
|
||||
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
|
||||
struct prop_handler *p = &prop_handlers[i];
|
||||
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
|
||||
|
||||
hash_add(prop_handlers_ht, &p->node, h);
|
||||
@ -301,15 +299,16 @@ static int inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
struct inode *parent)
|
||||
{
|
||||
const struct prop_handler *h;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (!test_bit(BTRFS_INODE_HAS_PROPS,
|
||||
&BTRFS_I(parent)->runtime_flags))
|
||||
return 0;
|
||||
|
||||
for (h = &prop_handlers[0]; h->xattr_name; h++) {
|
||||
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
|
||||
const struct prop_handler *h = &prop_handlers[i];
|
||||
const char *value;
|
||||
u64 num_bytes;
|
||||
|
||||
|
@ -1652,10 +1652,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
}
|
||||
|
||||
/* For exclusive extent, free its reserved bytes too */
|
||||
if (nr_old_roots == 0 && nr_new_roots == 1 &&
|
||||
cur_new_count == nr_new_roots)
|
||||
qg->reserved -= num_bytes;
|
||||
if (dirty)
|
||||
qgroup_dirty(fs_info, qg);
|
||||
}
|
||||
@ -2035,7 +2031,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
|
||||
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
@ -2116,14 +2112,13 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
|
||||
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
u64 ref_root = root->root_key.objectid;
|
||||
int ret = 0;
|
||||
|
||||
if (!is_fstree(ref_root))
|
||||
@ -2169,6 +2164,11 @@ out:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
}
|
||||
|
||||
static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
|
||||
{
|
||||
return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
|
||||
num_bytes);
|
||||
}
|
||||
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
|
||||
@ -2188,10 +2188,10 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
|
||||
*/
|
||||
static int
|
||||
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *scratch_leaf)
|
||||
struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_key found;
|
||||
struct extent_buffer *scratch_leaf = NULL;
|
||||
struct ulist *roots = NULL;
|
||||
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
|
||||
u64 num_bytes;
|
||||
@ -2229,7 +2229,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||
fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
|
||||
|
||||
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
|
||||
memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
|
||||
scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
|
||||
if (!scratch_leaf) {
|
||||
ret = -ENOMEM;
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
goto out;
|
||||
}
|
||||
extent_buffer_get(scratch_leaf);
|
||||
btrfs_tree_read_lock(scratch_leaf);
|
||||
btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
|
||||
slot = path->slots[0];
|
||||
btrfs_release_path(path);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
@ -2255,6 +2263,10 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
if (scratch_leaf) {
|
||||
btrfs_tree_read_unlock_blocking(scratch_leaf);
|
||||
free_extent_buffer(scratch_leaf);
|
||||
}
|
||||
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
|
||||
|
||||
return ret;
|
||||
@ -2266,16 +2278,12 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
qgroup_rescan_work);
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
struct extent_buffer *scratch_leaf = NULL;
|
||||
int err = -ENOMEM;
|
||||
int ret = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
goto out;
|
||||
scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
|
||||
if (!scratch_leaf)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
@ -2287,8 +2295,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
if (!fs_info->quota_enabled) {
|
||||
err = -EINTR;
|
||||
} else {
|
||||
err = qgroup_rescan_leaf(fs_info, path, trans,
|
||||
scratch_leaf);
|
||||
err = qgroup_rescan_leaf(fs_info, path, trans);
|
||||
}
|
||||
if (err > 0)
|
||||
btrfs_commit_transaction(trans, fs_info->fs_root);
|
||||
@ -2297,7 +2304,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(scratch_leaf);
|
||||
btrfs_free_path(path);
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
@ -2486,3 +2492,190 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
|
||||
btrfs_queue_work(fs_info->qgroup_rescan_workers,
|
||||
&fs_info->qgroup_rescan_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve qgroup space for range [start, start + len).
|
||||
*
|
||||
* This function will either reserve space from related qgroups or doing
|
||||
* nothing if the range is already reserved.
|
||||
*
|
||||
* Return 0 for successful reserve
|
||||
* Return <0 for error (including -EQUOT)
|
||||
*
|
||||
* NOTE: this function may sleep for memory allocation.
|
||||
*/
|
||||
int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_changeset changeset;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
int ret;
|
||||
|
||||
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
|
||||
len == 0)
|
||||
return 0;
|
||||
|
||||
changeset.bytes_changed = 0;
|
||||
changeset.range_changed = ulist_alloc(GFP_NOFS);
|
||||
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
|
||||
start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
|
||||
&changeset);
|
||||
trace_btrfs_qgroup_reserve_data(inode, start, len,
|
||||
changeset.bytes_changed,
|
||||
QGROUP_RESERVE);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
ret = qgroup_reserve(root, changeset.bytes_changed);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
|
||||
ulist_free(changeset.range_changed);
|
||||
return ret;
|
||||
|
||||
cleanup:
|
||||
/* cleanup already reserved ranges */
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(changeset.range_changed, &uiter)))
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
|
||||
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
|
||||
GFP_NOFS);
|
||||
ulist_free(changeset.range_changed);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
|
||||
int free)
|
||||
{
|
||||
struct extent_changeset changeset;
|
||||
int trace_op = QGROUP_RELEASE;
|
||||
int ret;
|
||||
|
||||
changeset.bytes_changed = 0;
|
||||
changeset.range_changed = ulist_alloc(GFP_NOFS);
|
||||
if (!changeset.range_changed)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
|
||||
start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
|
||||
&changeset);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (free) {
|
||||
qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
|
||||
trace_op = QGROUP_FREE;
|
||||
}
|
||||
trace_btrfs_qgroup_release_data(inode, start, len,
|
||||
changeset.bytes_changed, trace_op);
|
||||
out:
|
||||
ulist_free(changeset.range_changed);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a reserved space range from io_tree and related qgroups
|
||||
*
|
||||
* Should be called when a range of pages get invalidated before reaching disk.
|
||||
* Or for error cleanup case.
|
||||
*
|
||||
* For data written to disk, use btrfs_qgroup_release_data().
|
||||
*
|
||||
* NOTE: This function may sleep for memory allocation.
|
||||
*/
|
||||
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
|
||||
{
|
||||
return __btrfs_qgroup_release_data(inode, start, len, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release a reserved space range from io_tree only.
|
||||
*
|
||||
* Should be called when a range of pages get written to disk and corresponding
|
||||
* FILE_EXTENT is inserted into corresponding root.
|
||||
*
|
||||
* Since new qgroup accounting framework will only update qgroup numbers at
|
||||
* commit_transaction() time, its reserved space shouldn't be freed from
|
||||
* related qgroups.
|
||||
*
|
||||
* But we should release the range from io_tree, to allow further write to be
|
||||
* COWed.
|
||||
*
|
||||
* NOTE: This function may sleep for memory allocation.
|
||||
*/
|
||||
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
|
||||
{
|
||||
return __btrfs_qgroup_release_data(inode, start, len, 0);
|
||||
}
|
||||
|
||||
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
|
||||
num_bytes == 0)
|
||||
return 0;
|
||||
|
||||
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
|
||||
ret = qgroup_reserve(root, num_bytes);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
atomic_add(num_bytes, &root->qgroup_meta_rsv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
|
||||
{
|
||||
int reserved;
|
||||
|
||||
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
|
||||
return;
|
||||
|
||||
reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
|
||||
if (reserved == 0)
|
||||
return;
|
||||
qgroup_free(root, reserved);
|
||||
}
|
||||
|
||||
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
|
||||
{
|
||||
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
|
||||
return;
|
||||
|
||||
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
|
||||
WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
|
||||
atomic_sub(num_bytes, &root->qgroup_meta_rsv);
|
||||
qgroup_free(root, num_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check qgroup reserved space leaking, normally at destory inode
|
||||
* time
|
||||
*/
|
||||
void btrfs_qgroup_check_reserved_leak(struct inode *inode)
|
||||
{
|
||||
struct extent_changeset changeset;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator iter;
|
||||
int ret;
|
||||
|
||||
changeset.bytes_changed = 0;
|
||||
changeset.range_changed = ulist_alloc(GFP_NOFS);
|
||||
if (WARN_ON(!changeset.range_changed))
|
||||
return;
|
||||
|
||||
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
|
||||
EXTENT_QGROUP_RESERVED, GFP_NOFS, &changeset);
|
||||
|
||||
WARN_ON(ret < 0);
|
||||
if (WARN_ON(changeset.bytes_changed)) {
|
||||
ULIST_ITER_INIT(&iter);
|
||||
while ((unode = ulist_next(changeset.range_changed, &iter))) {
|
||||
btrfs_warn(BTRFS_I(inode)->root->fs_info,
|
||||
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
|
||||
inode->i_ino, unode->val, unode->aux);
|
||||
}
|
||||
qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
|
||||
}
|
||||
ulist_free(changeset.range_changed);
|
||||
}
|
||||
|
@ -33,6 +33,13 @@ struct btrfs_qgroup_extent_record {
|
||||
struct ulist *old_roots;
|
||||
};
|
||||
|
||||
/*
|
||||
* For qgroup event trace points only
|
||||
*/
|
||||
#define QGROUP_RESERVE (1<<0)
|
||||
#define QGROUP_RELEASE (1<<1)
|
||||
#define QGROUP_FREE (1<<2)
|
||||
|
||||
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
|
||||
@ -71,9 +78,18 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
|
||||
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
|
||||
struct btrfs_qgroup_inherit *inherit);
|
||||
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
|
||||
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
|
||||
|
||||
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes);
|
||||
/*
|
||||
* TODO: Add proper trace point for it, as btrfs_qgroup_free() is
|
||||
* called by everywhere, can't provide good trace for delayed ref case.
|
||||
*/
|
||||
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes)
|
||||
{
|
||||
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
|
||||
trace_btrfs_qgroup_free_delayed_ref(ref_root, num_bytes);
|
||||
}
|
||||
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
@ -81,4 +97,13 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
u64 rfer, u64 excl);
|
||||
#endif
|
||||
|
||||
/* New io_tree based accurate qgroup reserve API */
|
||||
int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
|
||||
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
|
||||
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
|
||||
|
||||
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
|
||||
void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
|
||||
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
|
||||
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
|
||||
#endif /* __BTRFS_QGROUP__ */
|
||||
|
@ -810,7 +810,11 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
goto done_nolock;
|
||||
} else if (waitqueue_active(&h->wait)) {
|
||||
/*
|
||||
* The barrier for this waitqueue_active is not needed,
|
||||
* we're protected by h->lock and can't miss a wakeup.
|
||||
*/
|
||||
} else if (waitqueue_active(&h->wait)) {
|
||||
spin_unlock(&rbio->bio_list_lock);
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
wake_up(&h->wait);
|
||||
|
@ -569,7 +569,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||
rec = kzalloc(sizeof(*rec), GFP_NOFS);
|
||||
if (!rec) {
|
||||
reada_extent_put(root->fs_info, re);
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rec->rc = rc;
|
||||
@ -918,6 +918,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
||||
u64 start;
|
||||
u64 generation;
|
||||
int level;
|
||||
int ret;
|
||||
struct extent_buffer *node;
|
||||
static struct btrfs_key max_key = {
|
||||
.objectid = (u64)-1,
|
||||
@ -943,9 +944,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
||||
generation = btrfs_header_generation(node);
|
||||
free_extent_buffer(node);
|
||||
|
||||
if (reada_add_block(rc, start, &max_key, level, generation)) {
|
||||
ret = reada_add_block(rc, start, &max_key, level, generation);
|
||||
if (ret) {
|
||||
kfree(rc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
reada_start_machine(root->fs_info);
|
||||
|
@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
|
||||
num_bytes, parent,
|
||||
btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset, 1);
|
||||
key.objectid, key.offset);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
break;
|
||||
@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
parent, btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset, 1);
|
||||
key.objectid, key.offset);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
break;
|
||||
@ -1900,23 +1900,21 @@ again:
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
|
||||
path->nodes[level]->start,
|
||||
src->root_key.objectid, level - 1, 0,
|
||||
1);
|
||||
src->root_key.objectid, level - 1, 0);
|
||||
BUG_ON(ret);
|
||||
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
|
||||
0, dest->root_key.objectid, level - 1,
|
||||
0, 1);
|
||||
0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
|
||||
path->nodes[level]->start,
|
||||
src->root_key.objectid, level - 1, 0,
|
||||
1);
|
||||
src->root_key.objectid, level - 1, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
|
||||
0, dest->root_key.objectid, level - 1,
|
||||
0, 1);
|
||||
0);
|
||||
BUG_ON(ret);
|
||||
|
||||
btrfs_unlock_up_safe(path, 0);
|
||||
@ -2418,7 +2416,7 @@ again:
|
||||
}
|
||||
out:
|
||||
if (ret) {
|
||||
btrfs_std_error(root->fs_info, ret);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
if (!list_empty(&reloc_roots))
|
||||
free_reloc_roots(&reloc_roots);
|
||||
|
||||
@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
node->eb->start, blocksize,
|
||||
upper->eb->start,
|
||||
btrfs_header_owner(upper->eb),
|
||||
node->level, 0, 1);
|
||||
node->level, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
|
||||
@ -3034,8 +3032,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
|
||||
BUG_ON(cluster->start != cluster->boundary[0]);
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
ret = btrfs_check_data_free_space(inode, cluster->end +
|
||||
1 - cluster->start, 0);
|
||||
ret = btrfs_check_data_free_space(inode, cluster->start,
|
||||
cluster->end + 1 - cluster->start);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -3056,8 +3054,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
|
||||
break;
|
||||
nr++;
|
||||
}
|
||||
btrfs_free_reserved_data_space(inode, cluster->end +
|
||||
1 - cluster->start);
|
||||
btrfs_free_reserved_data_space(inode, cluster->start,
|
||||
cluster->end + 1 - cluster->start);
|
||||
out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
|
@ -45,12 +45,13 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
|
||||
if (!need_reset && btrfs_root_generation(item)
|
||||
!= btrfs_root_generation_v2(item)) {
|
||||
if (btrfs_root_generation_v2(item) != 0) {
|
||||
printk(KERN_WARNING "BTRFS: mismatching "
|
||||
btrfs_warn(eb->fs_info,
|
||||
"mismatching "
|
||||
"generation and generation_v2 "
|
||||
"found in root item. This root "
|
||||
"was probably mounted with an "
|
||||
"older kernel. Resetting all "
|
||||
"new fields.\n");
|
||||
"new fields.");
|
||||
}
|
||||
need_reset = 1;
|
||||
}
|
||||
@ -141,7 +142,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
int ret;
|
||||
int slot;
|
||||
unsigned long ptr;
|
||||
int old_len;
|
||||
u32 old_len;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
@ -283,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
|
||||
trans = btrfs_join_transaction(tree_root);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
btrfs_error(tree_root->fs_info, err,
|
||||
btrfs_std_error(tree_root->fs_info, err,
|
||||
"Failed to start trans to delete "
|
||||
"orphan item");
|
||||
break;
|
||||
@ -292,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
|
||||
root_key.objectid);
|
||||
btrfs_end_transaction(trans, tree_root);
|
||||
if (err) {
|
||||
btrfs_error(tree_root->fs_info, err,
|
||||
btrfs_std_error(tree_root->fs_info, err,
|
||||
"Failed to delete root orphan "
|
||||
"item");
|
||||
break;
|
||||
|
@ -580,9 +580,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
* hold all of the paths here
|
||||
*/
|
||||
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
|
||||
printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
|
||||
btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev "
|
||||
"%s, sector %llu, root %llu, inode %llu, offset %llu, "
|
||||
"length %llu, links %u (path: %s)\n", swarn->errstr,
|
||||
"length %llu, links %u (path: %s)", swarn->errstr,
|
||||
swarn->logical, rcu_str_deref(swarn->dev->name),
|
||||
(unsigned long long)swarn->sector, root, inum, offset,
|
||||
min(isize - offset, (u64)PAGE_SIZE), nlink,
|
||||
@ -592,9 +592,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
return 0;
|
||||
|
||||
err:
|
||||
printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
|
||||
btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev "
|
||||
"%s, sector %llu, root %llu, inode %llu, offset %llu: path "
|
||||
"resolving failed with ret=%d\n", swarn->errstr,
|
||||
"resolving failed with ret=%d", swarn->errstr,
|
||||
swarn->logical, rcu_str_deref(swarn->dev->name),
|
||||
(unsigned long long)swarn->sector, root, inum, offset, ret);
|
||||
|
||||
@ -649,10 +649,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
||||
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
|
||||
item_size, &ref_root,
|
||||
&ref_level);
|
||||
printk_in_rcu(KERN_WARNING
|
||||
"BTRFS: %s at logical %llu on dev %s, "
|
||||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, "
|
||||
"sector %llu: metadata %s (level %d) in tree "
|
||||
"%llu\n", errstr, swarn.logical,
|
||||
"%llu", errstr, swarn.logical,
|
||||
rcu_str_deref(dev->name),
|
||||
(unsigned long long)swarn.sector,
|
||||
ref_level ? "node" : "leaf",
|
||||
@ -850,8 +850,8 @@ out:
|
||||
btrfs_dev_replace_stats_inc(
|
||||
&sctx->dev_root->fs_info->dev_replace.
|
||||
num_uncorrectable_read_errors);
|
||||
printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
|
||||
"unable to fixup (nodatasum) error at logical %llu on dev %s\n",
|
||||
btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
|
||||
"unable to fixup (nodatasum) error at logical %llu on dev %s",
|
||||
fixup->logical, rcu_str_deref(fixup->dev->name));
|
||||
}
|
||||
|
||||
@ -1230,8 +1230,8 @@ corrected_error:
|
||||
sctx->stat.corrected_errors++;
|
||||
sblock_to_check->data_corrected = 1;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
printk_ratelimited_in_rcu(KERN_ERR
|
||||
"BTRFS: fixed up error at logical %llu on dev %s\n",
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"fixed up error at logical %llu on dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
}
|
||||
} else {
|
||||
@ -1239,8 +1239,8 @@ did_not_correct_error:
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.uncorrectable_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
printk_ratelimited_in_rcu(KERN_ERR
|
||||
"BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"unable to fixup (regular) error at logical %llu on dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
}
|
||||
|
||||
@ -1626,9 +1626,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
||||
int ret;
|
||||
|
||||
if (!page_bad->dev->bdev) {
|
||||
printk_ratelimited(KERN_WARNING "BTRFS: "
|
||||
btrfs_warn_rl(sblock_bad->sctx->dev_root->fs_info,
|
||||
"scrub_repair_page_from_good_copy(bdev == NULL) "
|
||||
"is unexpected!\n");
|
||||
"is unexpected");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -2201,15 +2201,15 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.read_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
printk_ratelimited_in_rcu(KERN_ERR
|
||||
"BTRFS: I/O error rebulding logical %llu for dev %s\n",
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"IO error rebuilding logical %llu for dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
} else if (sblock->header_error || sblock->checksum_error) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.uncorrectable_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
printk_ratelimited_in_rcu(KERN_ERR
|
||||
"BTRFS: failed to rebuild valid logical %llu for dev %s\n",
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"failed to rebuild valid logical %llu for dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
} else {
|
||||
scrub_write_block_to_dev_replace(sblock);
|
||||
@ -4375,8 +4375,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
|
||||
if (!dev)
|
||||
return -EIO;
|
||||
if (!dev->bdev) {
|
||||
printk_ratelimited(KERN_WARNING
|
||||
"BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
|
||||
btrfs_warn_rl(dev->dev_root->fs_info,
|
||||
"scrub write_page_nocow(bdev == NULL) is unexpected");
|
||||
return -EIO;
|
||||
}
|
||||
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
|
||||
|
212
fs/btrfs/send.c
212
fs/btrfs/send.c
@ -1434,16 +1434,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
|
||||
}
|
||||
|
||||
if (cur_clone_root) {
|
||||
if (compressed != BTRFS_COMPRESS_NONE) {
|
||||
/*
|
||||
* Offsets given by iterate_extent_inodes() are relative
|
||||
* to the start of the extent, we need to add logical
|
||||
* offset from the file extent item.
|
||||
* (See why at backref.c:check_extent_in_eb())
|
||||
*/
|
||||
cur_clone_root->offset += btrfs_file_extent_offset(eb,
|
||||
fi);
|
||||
}
|
||||
*found = cur_clone_root;
|
||||
ret = 0;
|
||||
} else {
|
||||
@ -2353,8 +2343,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
|
||||
}
|
||||
|
||||
TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
|
||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
|
||||
sctx->send_root->root_item.uuid);
|
||||
|
||||
if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
|
||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
|
||||
sctx->send_root->root_item.received_uuid);
|
||||
else
|
||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
|
||||
sctx->send_root->root_item.uuid);
|
||||
|
||||
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
|
||||
le64_to_cpu(sctx->send_root->root_item.ctransid));
|
||||
if (parent_root) {
|
||||
@ -2564,7 +2560,7 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino);
|
||||
} else if (S_ISSOCK(mode)) {
|
||||
cmd = BTRFS_SEND_C_MKSOCK;
|
||||
} else {
|
||||
printk(KERN_WARNING "btrfs: unexpected inode type %o",
|
||||
btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
|
||||
(int)(mode & S_IFMT));
|
||||
ret = -ENOTSUPP;
|
||||
goto out;
|
||||
@ -4687,6 +4683,171 @@ tlv_put_failure:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int send_extent_data(struct send_ctx *sctx,
|
||||
const u64 offset,
|
||||
const u64 len)
|
||||
{
|
||||
u64 sent = 0;
|
||||
|
||||
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
|
||||
return send_update_extent(sctx, offset, len);
|
||||
|
||||
while (sent < len) {
|
||||
u64 size = len - sent;
|
||||
int ret;
|
||||
|
||||
if (size > BTRFS_SEND_READ_SIZE)
|
||||
size = BTRFS_SEND_READ_SIZE;
|
||||
ret = send_write(sctx, offset + sent, size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
break;
|
||||
sent += ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int clone_range(struct send_ctx *sctx,
|
||||
struct clone_root *clone_root,
|
||||
const u64 disk_byte,
|
||||
u64 data_offset,
|
||||
u64 offset,
|
||||
u64 len)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* We can't send a clone operation for the entire range if we find
|
||||
* extent items in the respective range in the source file that
|
||||
* refer to different extents or if we find holes.
|
||||
* So check for that and do a mix of clone and regular write/copy
|
||||
* operations if needed.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* mkfs.btrfs -f /dev/sda
|
||||
* mount /dev/sda /mnt
|
||||
* xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
|
||||
* cp --reflink=always /mnt/foo /mnt/bar
|
||||
* xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
|
||||
* btrfs subvolume snapshot -r /mnt /mnt/snap
|
||||
*
|
||||
* If when we send the snapshot and we are processing file bar (which
|
||||
* has a higher inode number than foo) we blindly send a clone operation
|
||||
* for the [0, 100K[ range from foo to bar, the receiver ends up getting
|
||||
* a file bar that matches the content of file foo - iow, doesn't match
|
||||
* the content from bar in the original filesystem.
|
||||
*/
|
||||
key.objectid = clone_root->ino;
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = clone_root->offset;
|
||||
ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
|
||||
if (key.objectid == clone_root->ino &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY)
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
struct btrfs_file_extent_item *ei;
|
||||
u8 type;
|
||||
u64 ext_len;
|
||||
u64 clone_len;
|
||||
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(clone_root->root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
|
||||
/*
|
||||
* We might have an implicit trailing hole (NO_HOLES feature
|
||||
* enabled). We deal with it after leaving this loop.
|
||||
*/
|
||||
if (key.objectid != clone_root->ino ||
|
||||
key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
break;
|
||||
|
||||
ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
type = btrfs_file_extent_type(leaf, ei);
|
||||
if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
|
||||
ext_len = PAGE_CACHE_ALIGN(ext_len);
|
||||
} else {
|
||||
ext_len = btrfs_file_extent_num_bytes(leaf, ei);
|
||||
}
|
||||
|
||||
if (key.offset + ext_len <= clone_root->offset)
|
||||
goto next;
|
||||
|
||||
if (key.offset > clone_root->offset) {
|
||||
/* Implicit hole, NO_HOLES feature enabled. */
|
||||
u64 hole_len = key.offset - clone_root->offset;
|
||||
|
||||
if (hole_len > len)
|
||||
hole_len = len;
|
||||
ret = send_extent_data(sctx, offset, hole_len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
len -= hole_len;
|
||||
if (len == 0)
|
||||
break;
|
||||
offset += hole_len;
|
||||
clone_root->offset += hole_len;
|
||||
data_offset += hole_len;
|
||||
}
|
||||
|
||||
if (key.offset >= clone_root->offset + len)
|
||||
break;
|
||||
|
||||
clone_len = min_t(u64, ext_len, len);
|
||||
|
||||
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
|
||||
btrfs_file_extent_offset(leaf, ei) == data_offset)
|
||||
ret = send_clone(sctx, offset, clone_len, clone_root);
|
||||
else
|
||||
ret = send_extent_data(sctx, offset, clone_len);
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
len -= clone_len;
|
||||
if (len == 0)
|
||||
break;
|
||||
offset += clone_len;
|
||||
clone_root->offset += clone_len;
|
||||
data_offset += clone_len;
|
||||
next:
|
||||
path->slots[0]++;
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
ret = send_extent_data(sctx, offset, len);
|
||||
else
|
||||
ret = 0;
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int send_write_or_clone(struct send_ctx *sctx,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *key,
|
||||
@ -4695,9 +4856,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
|
||||
int ret = 0;
|
||||
struct btrfs_file_extent_item *ei;
|
||||
u64 offset = key->offset;
|
||||
u64 pos = 0;
|
||||
u64 len;
|
||||
u32 l;
|
||||
u8 type;
|
||||
u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
|
||||
|
||||
@ -4725,22 +4884,15 @@ static int send_write_or_clone(struct send_ctx *sctx,
|
||||
}
|
||||
|
||||
if (clone_root && IS_ALIGNED(offset + len, bs)) {
|
||||
ret = send_clone(sctx, offset, len, clone_root);
|
||||
} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
|
||||
ret = send_update_extent(sctx, offset, len);
|
||||
u64 disk_byte;
|
||||
u64 data_offset;
|
||||
|
||||
disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
|
||||
data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
|
||||
ret = clone_range(sctx, clone_root, disk_byte, data_offset,
|
||||
offset, len);
|
||||
} else {
|
||||
while (pos < len) {
|
||||
l = len - pos;
|
||||
if (l > BTRFS_SEND_READ_SIZE)
|
||||
l = BTRFS_SEND_READ_SIZE;
|
||||
ret = send_write(sctx, pos + offset, l);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (!ret)
|
||||
break;
|
||||
pos += ret;
|
||||
}
|
||||
ret = 0;
|
||||
ret = send_extent_data(sctx, offset, len);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
|
@ -130,7 +130,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
/*
|
||||
* __btrfs_std_error decodes expected errors from the caller and
|
||||
* invokes the approciate error response.
|
||||
@ -140,7 +139,9 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
#ifdef CONFIG_PRINTK
|
||||
const char *errstr;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Special case: if the error is EROFS, and we're already
|
||||
@ -149,6 +150,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
errstr = btrfs_decode_error(errno);
|
||||
if (fmt) {
|
||||
struct va_format vaf;
|
||||
@ -166,6 +168,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
|
||||
sb->s_id, function, line, errno, errstr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Don't go through full error handling during mount */
|
||||
save_error_info(fs_info);
|
||||
@ -173,6 +176,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
btrfs_handle_error(fs_info);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
static const char * const logtypes[] = {
|
||||
"emergency",
|
||||
"alert",
|
||||
@ -212,27 +216,6 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
|
||||
/*
|
||||
* Special case: if the error is EROFS, and we're already
|
||||
* under MS_RDONLY, then it is safe here.
|
||||
*/
|
||||
if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
|
||||
return;
|
||||
|
||||
/* Don't go through full error handling during mount */
|
||||
if (sb->s_flags & MS_BORN) {
|
||||
save_error_info(fs_info);
|
||||
btrfs_handle_error(fs_info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -320,6 +303,9 @@ enum {
|
||||
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
|
||||
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
|
||||
Opt_datasum, Opt_treelog, Opt_noinode_cache,
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
|
||||
#endif
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
@ -372,6 +358,11 @@ static match_table_t tokens = {
|
||||
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
|
||||
{Opt_fatal_errors, "fatal_errors=%s"},
|
||||
{Opt_commit_interval, "commit=%d"},
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
{Opt_fragment_data, "fragment=data"},
|
||||
{Opt_fragment_metadata, "fragment=metadata"},
|
||||
{Opt_fragment_all, "fragment=all"},
|
||||
#endif
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
@ -738,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
|
||||
info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
|
||||
}
|
||||
break;
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
case Opt_fragment_all:
|
||||
btrfs_info(root->fs_info, "fragmenting all space");
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
|
||||
break;
|
||||
case Opt_fragment_metadata:
|
||||
btrfs_info(root->fs_info, "fragmenting metadata");
|
||||
btrfs_set_opt(info->mount_opt,
|
||||
FRAGMENT_METADATA);
|
||||
break;
|
||||
case Opt_fragment_data:
|
||||
btrfs_info(root->fs_info, "fragmenting data");
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
|
||||
break;
|
||||
#endif
|
||||
case Opt_err:
|
||||
btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
|
||||
ret = -EINVAL;
|
||||
@ -1189,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
seq_puts(seq, ",fatal_errors=panic");
|
||||
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
|
||||
seq_printf(seq, ",commit=%d", info->commit_interval);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_test_opt(root, FRAGMENT_DATA))
|
||||
seq_puts(seq, ",fragment=data");
|
||||
if (btrfs_test_opt(root, FRAGMENT_METADATA))
|
||||
seq_puts(seq, ",fragment=metadata");
|
||||
#endif
|
||||
seq_printf(seq, ",subvolid=%llu",
|
||||
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
|
||||
seq_puts(seq, ",subvol=");
|
||||
|
@ -437,24 +437,24 @@ static const struct attribute *btrfs_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static void btrfs_release_super_kobj(struct kobject *kobj)
|
||||
static void btrfs_release_fsid_kobj(struct kobject *kobj)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj);
|
||||
|
||||
memset(&fs_devs->super_kobj, 0, sizeof(struct kobject));
|
||||
memset(&fs_devs->fsid_kobj, 0, sizeof(struct kobject));
|
||||
complete(&fs_devs->kobj_unregister);
|
||||
}
|
||||
|
||||
static struct kobj_type btrfs_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.release = btrfs_release_super_kobj,
|
||||
.release = btrfs_release_fsid_kobj,
|
||||
};
|
||||
|
||||
static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj)
|
||||
{
|
||||
if (kobj->ktype != &btrfs_ktype)
|
||||
return NULL;
|
||||
return container_of(kobj, struct btrfs_fs_devices, super_kobj);
|
||||
return container_of(kobj, struct btrfs_fs_devices, fsid_kobj);
|
||||
}
|
||||
|
||||
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
|
||||
@ -502,12 +502,12 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
|
||||
attrs[0] = &fa->kobj_attr.attr;
|
||||
if (add) {
|
||||
int ret;
|
||||
ret = sysfs_merge_group(&fs_info->fs_devices->super_kobj,
|
||||
ret = sysfs_merge_group(&fs_info->fs_devices->fsid_kobj,
|
||||
&agroup);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else
|
||||
sysfs_unmerge_group(&fs_info->fs_devices->super_kobj,
|
||||
sysfs_unmerge_group(&fs_info->fs_devices->fsid_kobj,
|
||||
&agroup);
|
||||
}
|
||||
|
||||
@ -523,9 +523,9 @@ static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
|
||||
fs_devs->device_dir_kobj = NULL;
|
||||
}
|
||||
|
||||
if (fs_devs->super_kobj.state_initialized) {
|
||||
kobject_del(&fs_devs->super_kobj);
|
||||
kobject_put(&fs_devs->super_kobj);
|
||||
if (fs_devs->fsid_kobj.state_initialized) {
|
||||
kobject_del(&fs_devs->fsid_kobj);
|
||||
kobject_put(&fs_devs->fsid_kobj);
|
||||
wait_for_completion(&fs_devs->kobj_unregister);
|
||||
}
|
||||
}
|
||||
@ -545,7 +545,7 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
|
||||
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_reset_fs_info_ptr(fs_info);
|
||||
|
||||
@ -555,9 +555,9 @@ void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
|
||||
kobject_put(fs_info->space_info_kobj);
|
||||
}
|
||||
addrm_unknown_feature_attrs(fs_info, false);
|
||||
sysfs_remove_group(&fs_info->fs_devices->super_kobj, &btrfs_feature_attr_group);
|
||||
sysfs_remove_files(&fs_info->fs_devices->super_kobj, btrfs_attrs);
|
||||
btrfs_kobj_rm_device(fs_info->fs_devices, NULL);
|
||||
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
|
||||
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, NULL);
|
||||
}
|
||||
|
||||
const char * const btrfs_feature_set_names[3] = {
|
||||
@ -637,7 +637,7 @@ static void init_feature_attrs(void)
|
||||
|
||||
/* when one_device is NULL, it removes all device links */
|
||||
|
||||
int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device)
|
||||
{
|
||||
struct hd_struct *disk;
|
||||
@ -675,7 +675,7 @@ int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
|
||||
{
|
||||
if (!fs_devs->device_dir_kobj)
|
||||
fs_devs->device_dir_kobj = kobject_create_and_add("devices",
|
||||
&fs_devs->super_kobj);
|
||||
&fs_devs->fsid_kobj);
|
||||
|
||||
if (!fs_devs->device_dir_kobj)
|
||||
return -ENOMEM;
|
||||
@ -683,7 +683,7 @@ int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device)
|
||||
{
|
||||
int error = 0;
|
||||
@ -730,31 +730,31 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
||||
int error;
|
||||
|
||||
init_completion(&fs_devs->kobj_unregister);
|
||||
fs_devs->super_kobj.kset = btrfs_kset;
|
||||
error = kobject_init_and_add(&fs_devs->super_kobj,
|
||||
fs_devs->fsid_kobj.kset = btrfs_kset;
|
||||
error = kobject_init_and_add(&fs_devs->fsid_kobj,
|
||||
&btrfs_ktype, parent, "%pU", fs_devs->fsid);
|
||||
return error;
|
||||
}
|
||||
|
||||
int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
|
||||
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int error;
|
||||
struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
|
||||
struct kobject *super_kobj = &fs_devs->super_kobj;
|
||||
struct kobject *fsid_kobj = &fs_devs->fsid_kobj;
|
||||
|
||||
btrfs_set_fs_info_ptr(fs_info);
|
||||
|
||||
error = btrfs_kobj_add_device(fs_devs, NULL);
|
||||
error = btrfs_sysfs_add_device_link(fs_devs, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = sysfs_create_files(super_kobj, btrfs_attrs);
|
||||
error = sysfs_create_files(fsid_kobj, btrfs_attrs);
|
||||
if (error) {
|
||||
btrfs_kobj_rm_device(fs_devs, NULL);
|
||||
btrfs_sysfs_rm_device_link(fs_devs, NULL);
|
||||
return error;
|
||||
}
|
||||
|
||||
error = sysfs_create_group(super_kobj,
|
||||
error = sysfs_create_group(fsid_kobj,
|
||||
&btrfs_feature_attr_group);
|
||||
if (error)
|
||||
goto failure;
|
||||
@ -764,7 +764,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
|
||||
goto failure;
|
||||
|
||||
fs_info->space_info_kobj = kobject_create_and_add("allocation",
|
||||
super_kobj);
|
||||
fsid_kobj);
|
||||
if (!fs_info->space_info_kobj) {
|
||||
error = -ENOMEM;
|
||||
goto failure;
|
||||
@ -776,7 +776,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
|
||||
|
||||
return 0;
|
||||
failure:
|
||||
btrfs_sysfs_remove_one(fs_info);
|
||||
btrfs_sysfs_remove_mounted(fs_info);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -82,9 +82,9 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
|
||||
extern const char * const btrfs_feature_set_names[3];
|
||||
extern struct kobj_type space_info_ktype;
|
||||
extern struct kobj_type btrfs_raid_ktype;
|
||||
int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
||||
struct kobject *parent);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include "btrfs-tests.h"
|
||||
#include "../ctree.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../free-space-cache.h"
|
||||
|
||||
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
|
||||
@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
|
||||
kfree(cache);
|
||||
return NULL;
|
||||
}
|
||||
cache->fs_info = btrfs_alloc_dummy_fs_info();
|
||||
if (!cache->fs_info) {
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cache->key.objectid = 0;
|
||||
cache->key.offset = 1024 * 1024 * 1024;
|
||||
@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
|
||||
int btrfs_test_free_space_cache(void)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
int ret;
|
||||
struct btrfs_root *root = NULL;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
test_msg("Running btrfs free space cache tests\n");
|
||||
|
||||
@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
root = btrfs_alloc_dummy_root();
|
||||
if (!root)
|
||||
goto out;
|
||||
|
||||
root->fs_info = btrfs_alloc_dummy_fs_info();
|
||||
if (!root->fs_info)
|
||||
goto out;
|
||||
|
||||
root->fs_info->extent_root = root;
|
||||
cache->fs_info = root->fs_info;
|
||||
|
||||
ret = test_extents(cache);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -904,6 +923,7 @@ out:
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
btrfs_free_dummy_root(root);
|
||||
test_msg("Free space cache tests finished\n");
|
||||
return ret;
|
||||
}
|
||||
|
@ -82,6 +82,12 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
||||
static void clear_btree_io_tree(struct extent_io_tree *tree)
|
||||
{
|
||||
spin_lock(&tree->lock);
|
||||
/*
|
||||
* Do a single barrier for the waitqueue_active check here, the state
|
||||
* of the waitqueue should not change once clear_btree_io_tree is
|
||||
* called.
|
||||
*/
|
||||
smp_mb();
|
||||
while (!RB_EMPTY_ROOT(&tree->state)) {
|
||||
struct rb_node *node;
|
||||
struct extent_state *state;
|
||||
@ -226,25 +232,22 @@ loop:
|
||||
extwriter_counter_init(cur_trans, type);
|
||||
init_waitqueue_head(&cur_trans->writer_wait);
|
||||
init_waitqueue_head(&cur_trans->commit_wait);
|
||||
init_waitqueue_head(&cur_trans->pending_wait);
|
||||
cur_trans->state = TRANS_STATE_RUNNING;
|
||||
/*
|
||||
* One for this trans handle, one so it will live on until we
|
||||
* commit the transaction.
|
||||
*/
|
||||
atomic_set(&cur_trans->use_count, 2);
|
||||
cur_trans->have_free_bgs = 0;
|
||||
atomic_set(&cur_trans->pending_ordered, 0);
|
||||
cur_trans->flags = 0;
|
||||
cur_trans->start_time = get_seconds();
|
||||
cur_trans->dirty_bg_run = 0;
|
||||
|
||||
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
|
||||
|
||||
cur_trans->delayed_refs.href_root = RB_ROOT;
|
||||
cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
|
||||
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
|
||||
cur_trans->delayed_refs.num_heads_ready = 0;
|
||||
cur_trans->delayed_refs.pending_csums = 0;
|
||||
cur_trans->delayed_refs.num_heads = 0;
|
||||
cur_trans->delayed_refs.flushing = 0;
|
||||
cur_trans->delayed_refs.run_delayed_start = 0;
|
||||
cur_trans->delayed_refs.qgroup_to_skip = 0;
|
||||
|
||||
/*
|
||||
* although the tree mod log is per file system and not per transaction,
|
||||
@ -264,7 +267,6 @@ loop:
|
||||
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
|
||||
INIT_LIST_HEAD(&cur_trans->pending_chunks);
|
||||
INIT_LIST_HEAD(&cur_trans->switch_commits);
|
||||
INIT_LIST_HEAD(&cur_trans->pending_ordered);
|
||||
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
|
||||
INIT_LIST_HEAD(&cur_trans->io_bgs);
|
||||
INIT_LIST_HEAD(&cur_trans->dropped_roots);
|
||||
@ -447,8 +449,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
|
||||
}
|
||||
|
||||
static struct btrfs_trans_handle *
|
||||
start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
unsigned int type, enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
struct btrfs_trans_handle *h;
|
||||
struct btrfs_transaction *cur_trans;
|
||||
@ -478,13 +480,10 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
|
||||
* the appropriate flushing if need be.
|
||||
*/
|
||||
if (num_items > 0 && root != root->fs_info->chunk_root) {
|
||||
if (root->fs_info->quota_enabled &&
|
||||
is_fstree(root->root_key.objectid)) {
|
||||
qgroup_reserved = num_items * root->nodesize;
|
||||
ret = btrfs_qgroup_reserve(root, qgroup_reserved);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
qgroup_reserved = num_items * root->nodesize;
|
||||
ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
|
||||
/*
|
||||
@ -502,7 +501,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
|
||||
goto reserve_fail;
|
||||
}
|
||||
again:
|
||||
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
|
||||
h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
|
||||
if (!h) {
|
||||
ret = -ENOMEM;
|
||||
goto alloc_fail;
|
||||
@ -543,26 +542,13 @@ again:
|
||||
|
||||
h->transid = cur_trans->transid;
|
||||
h->transaction = cur_trans;
|
||||
h->blocks_used = 0;
|
||||
h->bytes_reserved = 0;
|
||||
h->chunk_bytes_reserved = 0;
|
||||
h->root = root;
|
||||
h->delayed_ref_updates = 0;
|
||||
h->use_count = 1;
|
||||
h->adding_csums = 0;
|
||||
h->block_rsv = NULL;
|
||||
h->orig_rsv = NULL;
|
||||
h->aborted = 0;
|
||||
h->qgroup_reserved = 0;
|
||||
h->delayed_ref_elem.seq = 0;
|
||||
|
||||
h->type = type;
|
||||
h->allocating_chunk = false;
|
||||
h->can_flush_pending_bgs = true;
|
||||
h->reloc_reserved = false;
|
||||
h->sync = false;
|
||||
INIT_LIST_HEAD(&h->qgroup_ref_list);
|
||||
INIT_LIST_HEAD(&h->new_bgs);
|
||||
INIT_LIST_HEAD(&h->ordered);
|
||||
|
||||
smp_mb();
|
||||
if (cur_trans->state >= TRANS_STATE_BLOCKED &&
|
||||
@ -579,7 +565,6 @@ again:
|
||||
h->bytes_reserved = num_bytes;
|
||||
h->reloc_reserved = reloc_reserved;
|
||||
}
|
||||
h->qgroup_reserved = qgroup_reserved;
|
||||
|
||||
got_it:
|
||||
btrfs_record_root_in_trans(h, root);
|
||||
@ -597,20 +582,20 @@ alloc_fail:
|
||||
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
|
||||
num_bytes);
|
||||
reserve_fail:
|
||||
if (qgroup_reserved)
|
||||
btrfs_qgroup_free(root, qgroup_reserved);
|
||||
btrfs_qgroup_free_meta(root, qgroup_reserved);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
|
||||
int num_items)
|
||||
unsigned int num_items)
|
||||
{
|
||||
return start_transaction(root, num_items, TRANS_START,
|
||||
BTRFS_RESERVE_FLUSH_ALL);
|
||||
}
|
||||
|
||||
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
|
||||
struct btrfs_root *root, int num_items)
|
||||
struct btrfs_root *root,
|
||||
unsigned int num_items)
|
||||
{
|
||||
return start_transaction(root, num_items, TRANS_START,
|
||||
BTRFS_RESERVE_FLUSH_LIMIT);
|
||||
@ -794,12 +779,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
if (!list_empty(&trans->new_bgs))
|
||||
btrfs_create_pending_block_groups(trans, root);
|
||||
|
||||
if (!list_empty(&trans->ordered)) {
|
||||
spin_lock(&info->trans_lock);
|
||||
list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
|
||||
spin_unlock(&info->trans_lock);
|
||||
}
|
||||
|
||||
trans->delayed_ref_updates = 0;
|
||||
if (!trans->sync) {
|
||||
must_run_delayed_refs =
|
||||
@ -815,15 +794,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
must_run_delayed_refs = 2;
|
||||
}
|
||||
|
||||
if (trans->qgroup_reserved) {
|
||||
/*
|
||||
* the same root has to be passed here between start_transaction
|
||||
* and end_transaction. Subvolume quota depends on this.
|
||||
*/
|
||||
btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
|
||||
trans->qgroup_reserved = 0;
|
||||
}
|
||||
|
||||
btrfs_trans_release_metadata(trans, root);
|
||||
trans->block_rsv = NULL;
|
||||
|
||||
@ -856,6 +826,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
atomic_dec(&cur_trans->num_writers);
|
||||
extwriter_counter_dec(cur_trans, trans->type);
|
||||
|
||||
/*
|
||||
* Make sure counter is updated before we wake up waiters.
|
||||
*/
|
||||
smp_mb();
|
||||
if (waitqueue_active(&cur_trans->writer_wait))
|
||||
wake_up(&cur_trans->writer_wait);
|
||||
@ -1238,6 +1211,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
if (err)
|
||||
break;
|
||||
btrfs_qgroup_free_meta_all(root);
|
||||
}
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
@ -1795,25 +1769,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
|
||||
static inline void
|
||||
btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
while (!list_empty(&cur_trans->pending_ordered)) {
|
||||
ordered = list_first_entry(&cur_trans->pending_ordered,
|
||||
struct btrfs_ordered_extent,
|
||||
trans_list);
|
||||
list_del_init(&ordered->trans_list);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
|
||||
&ordered->flags));
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
wait_event(cur_trans->pending_wait,
|
||||
atomic_read(&cur_trans->pending_ordered) == 0);
|
||||
}
|
||||
|
||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
@ -1842,10 +1801,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_trans_release_metadata(trans, root);
|
||||
trans->block_rsv = NULL;
|
||||
if (trans->qgroup_reserved) {
|
||||
btrfs_qgroup_free(root, trans->qgroup_reserved);
|
||||
trans->qgroup_reserved = 0;
|
||||
}
|
||||
|
||||
cur_trans = trans->transaction;
|
||||
|
||||
@ -1865,7 +1820,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!cur_trans->dirty_bg_run) {
|
||||
if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
|
||||
int run_it = 0;
|
||||
|
||||
/* this mutex is also taken before trying to set
|
||||
@ -1874,18 +1829,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
* after a extents from that block group have been
|
||||
* allocated for cache files. btrfs_set_block_group_ro
|
||||
* will wait for the transaction to commit if it
|
||||
* finds dirty_bg_run = 1
|
||||
* finds BTRFS_TRANS_DIRTY_BG_RUN set.
|
||||
*
|
||||
* The dirty_bg_run flag is also used to make sure only
|
||||
* one process starts all the block group IO. It wouldn't
|
||||
* The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
|
||||
* only one process starts all the block group IO. It wouldn't
|
||||
* hurt to have more than one go through, but there's no
|
||||
* real advantage to it either.
|
||||
*/
|
||||
mutex_lock(&root->fs_info->ro_block_group_mutex);
|
||||
if (!cur_trans->dirty_bg_run) {
|
||||
if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
|
||||
&cur_trans->flags))
|
||||
run_it = 1;
|
||||
cur_trans->dirty_bg_run = 1;
|
||||
}
|
||||
mutex_unlock(&root->fs_info->ro_block_group_mutex);
|
||||
|
||||
if (run_it)
|
||||
@ -1897,7 +1851,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
spin_lock(&root->fs_info->trans_lock);
|
||||
list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
|
||||
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
|
||||
spin_unlock(&root->fs_info->trans_lock);
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
@ -1956,7 +1909,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_wait_delalloc_flush(root->fs_info);
|
||||
|
||||
btrfs_wait_pending_ordered(cur_trans, root->fs_info);
|
||||
btrfs_wait_pending_ordered(cur_trans);
|
||||
|
||||
btrfs_scrub_pause(root);
|
||||
/*
|
||||
@ -2136,7 +2089,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_write_and_wait_transaction(trans, root);
|
||||
if (ret) {
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"Error while writing out transaction");
|
||||
mutex_unlock(&root->fs_info->tree_log_mutex);
|
||||
goto scrub_continue;
|
||||
@ -2156,7 +2109,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_finish_extent_commit(trans, root);
|
||||
|
||||
if (cur_trans->have_free_bgs)
|
||||
if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
|
||||
btrfs_clear_space_info_full(root->fs_info);
|
||||
|
||||
root->fs_info->last_trans_committed = cur_trans->transid;
|
||||
@ -2198,10 +2151,6 @@ cleanup_transaction:
|
||||
btrfs_trans_release_metadata(trans, root);
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
trans->block_rsv = NULL;
|
||||
if (trans->qgroup_reserved) {
|
||||
btrfs_qgroup_free(root, trans->qgroup_reserved);
|
||||
trans->qgroup_reserved = 0;
|
||||
}
|
||||
btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
|
||||
if (current->journal_info == trans)
|
||||
current->journal_info = NULL;
|
||||
|
@ -32,6 +32,10 @@ enum btrfs_trans_state {
|
||||
TRANS_STATE_MAX = 6,
|
||||
};
|
||||
|
||||
#define BTRFS_TRANS_HAVE_FREE_BGS 0
|
||||
#define BTRFS_TRANS_DIRTY_BG_RUN 1
|
||||
#define BTRFS_TRANS_CACHE_ENOSPC 2
|
||||
|
||||
struct btrfs_transaction {
|
||||
u64 transid;
|
||||
/*
|
||||
@ -46,11 +50,9 @@ struct btrfs_transaction {
|
||||
*/
|
||||
atomic_t num_writers;
|
||||
atomic_t use_count;
|
||||
atomic_t pending_ordered;
|
||||
|
||||
/*
|
||||
* true if there is free bgs operations in this transaction
|
||||
*/
|
||||
int have_free_bgs;
|
||||
unsigned long flags;
|
||||
|
||||
/* Be protected by fs_info->trans_lock when we want to change it. */
|
||||
enum btrfs_trans_state state;
|
||||
@ -59,9 +61,9 @@ struct btrfs_transaction {
|
||||
unsigned long start_time;
|
||||
wait_queue_head_t writer_wait;
|
||||
wait_queue_head_t commit_wait;
|
||||
wait_queue_head_t pending_wait;
|
||||
struct list_head pending_snapshots;
|
||||
struct list_head pending_chunks;
|
||||
struct list_head pending_ordered;
|
||||
struct list_head switch_commits;
|
||||
struct list_head dirty_bgs;
|
||||
struct list_head io_bgs;
|
||||
@ -80,7 +82,6 @@ struct btrfs_transaction {
|
||||
spinlock_t dropped_roots_lock;
|
||||
struct btrfs_delayed_ref_root delayed_refs;
|
||||
int aborted;
|
||||
int dirty_bg_run;
|
||||
};
|
||||
|
||||
#define __TRANS_FREEZABLE (1U << 0)
|
||||
@ -107,7 +108,6 @@ struct btrfs_trans_handle {
|
||||
u64 transid;
|
||||
u64 bytes_reserved;
|
||||
u64 chunk_bytes_reserved;
|
||||
u64 qgroup_reserved;
|
||||
unsigned long use_count;
|
||||
unsigned long blocks_reserved;
|
||||
unsigned long blocks_used;
|
||||
@ -129,7 +129,6 @@ struct btrfs_trans_handle {
|
||||
*/
|
||||
struct btrfs_root *root;
|
||||
struct seq_list delayed_ref_elem;
|
||||
struct list_head ordered;
|
||||
struct list_head qgroup_ref_list;
|
||||
struct list_head new_bgs;
|
||||
};
|
||||
@ -185,9 +184,10 @@ static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
|
||||
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
|
||||
int num_items);
|
||||
unsigned int num_items);
|
||||
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
|
||||
struct btrfs_root *root, int num_items);
|
||||
struct btrfs_root *root,
|
||||
unsigned int num_items);
|
||||
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
|
||||
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
|
||||
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
|
||||
|
@ -229,7 +229,9 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
|
||||
void btrfs_end_log_trans(struct btrfs_root *root)
|
||||
{
|
||||
if (atomic_dec_and_test(&root->log_writers)) {
|
||||
smp_mb();
|
||||
/*
|
||||
* Implicit memory barrier after atomic_dec_and_test
|
||||
*/
|
||||
if (waitqueue_active(&root->log_writer_wait))
|
||||
wake_up(&root->log_writer_wait);
|
||||
}
|
||||
@ -691,7 +693,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
ins.objectid, ins.offset,
|
||||
0, root->root_key.objectid,
|
||||
key->objectid, offset, 0);
|
||||
key->objectid, offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
@ -2820,7 +2822,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||
|
||||
mutex_lock(&log_root_tree->log_mutex);
|
||||
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
|
||||
smp_mb();
|
||||
/*
|
||||
* Implicit memory barrier after atomic_dec_and_test
|
||||
*/
|
||||
if (waitqueue_active(&log_root_tree->log_writer_wait))
|
||||
wake_up(&log_root_tree->log_writer_wait);
|
||||
}
|
||||
@ -2950,6 +2954,9 @@ out_wake_log_root:
|
||||
atomic_set(&log_root_tree->log_commit[index2], 0);
|
||||
mutex_unlock(&log_root_tree->log_mutex);
|
||||
|
||||
/*
|
||||
* The barrier before waitqueue_active is implied by mutex_unlock
|
||||
*/
|
||||
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
|
||||
wake_up(&log_root_tree->log_commit_wait[index2]);
|
||||
out:
|
||||
@ -2961,6 +2968,9 @@ out:
|
||||
atomic_set(&root->log_commit[index1], 0);
|
||||
mutex_unlock(&root->log_mutex);
|
||||
|
||||
/*
|
||||
* The barrier before waitqueue_active is implied by mutex_unlock
|
||||
*/
|
||||
if (waitqueue_active(&root->log_commit_wait[index1]))
|
||||
wake_up(&root->log_commit_wait[index1]);
|
||||
return ret;
|
||||
@ -5314,7 +5324,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
|
||||
|
||||
ret = walk_log_tree(trans, log_root_tree, &wc);
|
||||
if (ret) {
|
||||
btrfs_error(fs_info, ret, "Failed to pin buffers while "
|
||||
btrfs_std_error(fs_info, ret, "Failed to pin buffers while "
|
||||
"recovering log root tree.");
|
||||
goto error;
|
||||
}
|
||||
@ -5328,7 +5338,7 @@ again:
|
||||
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
|
||||
|
||||
if (ret < 0) {
|
||||
btrfs_error(fs_info, ret,
|
||||
btrfs_std_error(fs_info, ret,
|
||||
"Couldn't find tree log root.");
|
||||
goto error;
|
||||
}
|
||||
@ -5346,7 +5356,7 @@ again:
|
||||
log = btrfs_read_fs_root(log_root_tree, &found_key);
|
||||
if (IS_ERR(log)) {
|
||||
ret = PTR_ERR(log);
|
||||
btrfs_error(fs_info, ret,
|
||||
btrfs_std_error(fs_info, ret,
|
||||
"Couldn't read tree log root.");
|
||||
goto error;
|
||||
}
|
||||
@ -5361,7 +5371,7 @@ again:
|
||||
free_extent_buffer(log->node);
|
||||
free_extent_buffer(log->commit_root);
|
||||
kfree(log);
|
||||
btrfs_error(fs_info, ret, "Couldn't read target root "
|
||||
btrfs_std_error(fs_info, ret, "Couldn't read target root "
|
||||
"for tree log recovery.");
|
||||
goto error;
|
||||
}
|
||||
|
@ -42,6 +42,82 @@
|
||||
#include "dev-replace.h"
|
||||
#include "sysfs.h"
|
||||
|
||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = {
|
||||
.sub_stripes = 2,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0, /* 0 == as many as possible */
|
||||
.devs_min = 4,
|
||||
.tolerated_failures = 1,
|
||||
.devs_increment = 2,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID1] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 2,
|
||||
.devs_min = 2,
|
||||
.tolerated_failures = 1,
|
||||
.devs_increment = 2,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_DUP] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 2,
|
||||
.devs_max = 1,
|
||||
.devs_min = 1,
|
||||
.tolerated_failures = 0,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID0] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 2,
|
||||
.tolerated_failures = 0,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 1,
|
||||
},
|
||||
[BTRFS_RAID_SINGLE] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 1,
|
||||
.devs_min = 1,
|
||||
.tolerated_failures = 0,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 1,
|
||||
},
|
||||
[BTRFS_RAID_RAID5] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 2,
|
||||
.tolerated_failures = 1,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID6] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 3,
|
||||
.tolerated_failures = 2,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 3,
|
||||
},
|
||||
};
|
||||
|
||||
const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
|
||||
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
|
||||
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
|
||||
[BTRFS_RAID_RAID0] = BTRFS_BLOCK_GROUP_RAID0,
|
||||
[BTRFS_RAID_SINGLE] = 0,
|
||||
[BTRFS_RAID_RAID5] = BTRFS_BLOCK_GROUP_RAID5,
|
||||
[BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
|
||||
};
|
||||
|
||||
static int init_first_rw_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_device *device);
|
||||
@ -198,7 +274,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
|
||||
|
||||
if (IS_ERR(*bdev)) {
|
||||
ret = PTR_ERR(*bdev);
|
||||
printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
|
||||
goto error;
|
||||
}
|
||||
|
||||
@ -211,8 +286,8 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
|
||||
}
|
||||
invalidate_bdev(*bdev);
|
||||
*bh = btrfs_read_dev_super(*bdev);
|
||||
if (!*bh) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(*bh)) {
|
||||
ret = PTR_ERR(*bh);
|
||||
blkdev_put(*bdev, flags);
|
||||
goto error;
|
||||
}
|
||||
@ -345,6 +420,9 @@ loop_lock:
|
||||
pending = pending->bi_next;
|
||||
cur->bi_next = NULL;
|
||||
|
||||
/*
|
||||
* atomic_dec_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
@ -765,36 +843,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
|
||||
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
|
||||
struct btrfs_device *new_device;
|
||||
struct rcu_string *name;
|
||||
|
||||
if (device->bdev)
|
||||
fs_devices->open_devices--;
|
||||
|
||||
if (device->writeable &&
|
||||
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
||||
list_del_init(&device->dev_alloc_list);
|
||||
fs_devices->rw_devices--;
|
||||
}
|
||||
|
||||
if (device->missing)
|
||||
fs_devices->missing_devices--;
|
||||
|
||||
new_device = btrfs_alloc_device(NULL, &device->devid,
|
||||
device->uuid);
|
||||
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
|
||||
|
||||
/* Safe because we are under uuid_mutex */
|
||||
if (device->name) {
|
||||
name = rcu_string_strdup(device->name->str, GFP_NOFS);
|
||||
BUG_ON(!name); /* -ENOMEM */
|
||||
rcu_assign_pointer(new_device->name, name);
|
||||
}
|
||||
|
||||
list_replace_rcu(&device->dev_list, &new_device->dev_list);
|
||||
new_device->fs_devices = device->fs_devices;
|
||||
|
||||
call_rcu(&device->rcu, free_device);
|
||||
btrfs_close_one_device(device);
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
@ -1402,7 +1451,7 @@ again:
|
||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_dev_extent);
|
||||
} else {
|
||||
btrfs_error(root->fs_info, ret, "Slot search failed");
|
||||
btrfs_std_error(root->fs_info, ret, "Slot search failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1410,10 +1459,10 @@ again:
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
if (ret) {
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"Failed to remove dev extent item");
|
||||
} else {
|
||||
trans->transaction->have_free_bgs = 1;
|
||||
set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
|
||||
}
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@ -1801,7 +1850,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
if (device->bdev) {
|
||||
device->fs_devices->open_devices--;
|
||||
/* remove sysfs entry */
|
||||
btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
|
||||
btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
|
||||
}
|
||||
|
||||
call_rcu(&device->rcu, free_device);
|
||||
@ -1924,7 +1973,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
|
||||
if (srcdev->writeable) {
|
||||
fs_devices->rw_devices--;
|
||||
/* zero out the old super if it is writable */
|
||||
btrfs_scratch_superblock(srcdev);
|
||||
btrfs_scratch_superblocks(srcdev->bdev,
|
||||
rcu_str_deref(srcdev->name));
|
||||
}
|
||||
|
||||
if (srcdev->bdev)
|
||||
@ -1971,10 +2021,11 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
WARN_ON(!tgtdev);
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev);
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
|
||||
|
||||
if (tgtdev->bdev) {
|
||||
btrfs_scratch_superblock(tgtdev);
|
||||
btrfs_scratch_superblocks(tgtdev->bdev,
|
||||
rcu_str_deref(tgtdev->name));
|
||||
fs_info->fs_devices->open_devices--;
|
||||
}
|
||||
fs_info->fs_devices->num_devices--;
|
||||
@ -2041,10 +2092,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
|
||||
}
|
||||
}
|
||||
|
||||
if (!*device) {
|
||||
btrfs_err(root->fs_info, "no missing device found");
|
||||
return -ENOENT;
|
||||
}
|
||||
if (!*device)
|
||||
return BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
@ -2309,7 +2358,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
tmp + 1);
|
||||
|
||||
/* add sysfs device entry */
|
||||
btrfs_kobj_add_device(root->fs_info->fs_devices, device);
|
||||
btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
|
||||
|
||||
/*
|
||||
* we've got more storage, clear any full flags on the space
|
||||
@ -2350,9 +2399,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
*/
|
||||
snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
|
||||
root->fs_info->fsid);
|
||||
if (kobject_rename(&root->fs_info->fs_devices->super_kobj,
|
||||
if (kobject_rename(&root->fs_info->fs_devices->fsid_kobj,
|
||||
fsid_buf))
|
||||
pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n");
|
||||
btrfs_warn(root->fs_info,
|
||||
"sysfs: failed to create fsid for sprout");
|
||||
}
|
||||
|
||||
root->fs_info->num_tolerated_disk_barrier_failures =
|
||||
@ -2368,7 +2418,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
|
||||
ret = btrfs_relocate_sys_chunks(root);
|
||||
if (ret < 0)
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"Failed to relocate sys chunks after "
|
||||
"device initialization. This can be fixed "
|
||||
"using the \"btrfs balance\" command.");
|
||||
@ -2388,7 +2438,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
error_trans:
|
||||
btrfs_end_transaction(trans, root);
|
||||
rcu_string_free(device->name);
|
||||
btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
|
||||
btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
|
||||
kfree(device);
|
||||
error:
|
||||
blkdev_put(bdev, FMODE_EXCL);
|
||||
@ -2613,7 +2663,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0) { /* Logic error or corruption */
|
||||
btrfs_error(root->fs_info, -ENOENT,
|
||||
btrfs_std_error(root->fs_info, -ENOENT,
|
||||
"Failed lookup while freeing chunk.");
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
@ -2621,7 +2671,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
if (ret < 0)
|
||||
btrfs_error(root->fs_info, ret,
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"Failed to delete chunk item.");
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@ -2806,7 +2856,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
btrfs_std_error(root->fs_info, ret);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3009,16 +3059,19 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
|
||||
* (albeit full) chunks.
|
||||
*/
|
||||
if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
|
||||
!(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->data.usage = 90;
|
||||
}
|
||||
if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
|
||||
!(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->sys.usage = 90;
|
||||
}
|
||||
if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
|
||||
!(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->meta.usage = 90;
|
||||
@ -3072,6 +3125,39 @@ static int chunk_profiles_filter(u64 chunk_type,
|
||||
|
||||
static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
u64 chunk_used;
|
||||
u64 user_thresh_min;
|
||||
u64 user_thresh_max;
|
||||
int ret = 1;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
chunk_used = btrfs_block_group_used(&cache->item);
|
||||
|
||||
if (bargs->usage_min == 0)
|
||||
user_thresh_min = 0;
|
||||
else
|
||||
user_thresh_min = div_factor_fine(cache->key.offset,
|
||||
bargs->usage_min);
|
||||
|
||||
if (bargs->usage_max == 0)
|
||||
user_thresh_max = 1;
|
||||
else if (bargs->usage_max > 100)
|
||||
user_thresh_max = cache->key.offset;
|
||||
else
|
||||
user_thresh_max = div_factor_fine(cache->key.offset,
|
||||
bargs->usage_max);
|
||||
|
||||
if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
|
||||
ret = 0;
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
|
||||
u64 chunk_offset, struct btrfs_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
u64 chunk_used, user_thresh;
|
||||
@ -3080,7 +3166,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
|
||||
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
chunk_used = btrfs_block_group_used(&cache->item);
|
||||
|
||||
if (bargs->usage == 0)
|
||||
if (bargs->usage_min == 0)
|
||||
user_thresh = 1;
|
||||
else if (bargs->usage > 100)
|
||||
user_thresh = cache->key.offset;
|
||||
@ -3170,6 +3256,19 @@ static int chunk_vrange_filter(struct extent_buffer *leaf,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int chunk_stripes_range_filter(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
|
||||
if (bargs->stripes_min <= num_stripes
|
||||
&& num_stripes <= bargs->stripes_max)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int chunk_soft_convert_filter(u64 chunk_type,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
@ -3216,6 +3315,9 @@ static int should_balance_chunk(struct btrfs_root *root,
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
|
||||
return 0;
|
||||
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
|
||||
chunk_usage_range_filter(bctl->fs_info, chunk_offset, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* devid filter */
|
||||
@ -3236,6 +3338,12 @@ static int should_balance_chunk(struct btrfs_root *root,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* stripes filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
|
||||
chunk_stripes_range_filter(leaf, chunk, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* soft profile changing mode */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
|
||||
chunk_soft_convert_filter(chunk_type, bargs)) {
|
||||
@ -3250,6 +3358,16 @@ static int should_balance_chunk(struct btrfs_root *root,
|
||||
return 0;
|
||||
else
|
||||
bargs->limit--;
|
||||
} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
|
||||
/*
|
||||
* Same logic as the 'limit' filter; the minimum cannot be
|
||||
* determined here because we do not have the global informatoin
|
||||
* about the count of all chunks that satisfy the filters.
|
||||
*/
|
||||
if (bargs->limit_max == 0)
|
||||
return 0;
|
||||
else
|
||||
bargs->limit_max--;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -3264,6 +3382,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
|
||||
struct btrfs_device *device;
|
||||
u64 old_size;
|
||||
u64 size_to_free;
|
||||
u64 chunk_type;
|
||||
struct btrfs_chunk *chunk;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
@ -3274,9 +3393,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
|
||||
int ret;
|
||||
int enospc_errors = 0;
|
||||
bool counting = true;
|
||||
/* The single value limit and min/max limits use the same bytes in the */
|
||||
u64 limit_data = bctl->data.limit;
|
||||
u64 limit_meta = bctl->meta.limit;
|
||||
u64 limit_sys = bctl->sys.limit;
|
||||
u32 count_data = 0;
|
||||
u32 count_meta = 0;
|
||||
u32 count_sys = 0;
|
||||
|
||||
/* step one make some room on all the devices */
|
||||
devices = &fs_info->fs_devices->devices;
|
||||
@ -3317,6 +3440,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
again:
|
||||
if (!counting) {
|
||||
/*
|
||||
* The single value limit and min/max limits use the same bytes
|
||||
* in the
|
||||
*/
|
||||
bctl->data.limit = limit_data;
|
||||
bctl->meta.limit = limit_meta;
|
||||
bctl->sys.limit = limit_sys;
|
||||
@ -3364,6 +3491,7 @@ again:
|
||||
}
|
||||
|
||||
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
|
||||
chunk_type = btrfs_chunk_type(leaf, chunk);
|
||||
|
||||
if (!counting) {
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
@ -3384,6 +3512,28 @@ again:
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->stat.expected++;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
|
||||
if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
|
||||
count_data++;
|
||||
else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
count_sys++;
|
||||
else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
|
||||
count_meta++;
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply limit_min filter, no need to check if the LIMITS
|
||||
* filter is used, limit_min is 0 by default
|
||||
*/
|
||||
if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
count_data < bctl->data.limit_min)
|
||||
|| ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
count_meta < bctl->meta.limit_min)
|
||||
|| ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
|
||||
count_sys < bctl->sys.limit_min)) {
|
||||
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
|
||||
goto loop;
|
||||
}
|
||||
|
||||
@ -3461,11 +3611,20 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
|
||||
unset_balance_control(fs_info);
|
||||
ret = del_balance_item(fs_info->tree_root);
|
||||
if (ret)
|
||||
btrfs_std_error(fs_info, ret);
|
||||
btrfs_std_error(fs_info, ret, NULL);
|
||||
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
}
|
||||
|
||||
/* Non-zero return value signifies invalidity */
|
||||
static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
|
||||
u64 allowed)
|
||||
{
|
||||
return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(!alloc_profile_is_valid(bctl_arg->target, 1) ||
|
||||
(bctl_arg->target & ~allowed)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Should be called with both balance and volume mutexes held
|
||||
*/
|
||||
@ -3523,27 +3682,21 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
|
||||
if (num_devices > 3)
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_RAID6);
|
||||
if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(!alloc_profile_is_valid(bctl->data.target, 1) ||
|
||||
(bctl->data.target & ~allowed))) {
|
||||
if (validate_convert_profile(&bctl->data, allowed)) {
|
||||
btrfs_err(fs_info, "unable to start balance with target "
|
||||
"data profile %llu",
|
||||
bctl->data.target);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(!alloc_profile_is_valid(bctl->meta.target, 1) ||
|
||||
(bctl->meta.target & ~allowed))) {
|
||||
if (validate_convert_profile(&bctl->meta, allowed)) {
|
||||
btrfs_err(fs_info,
|
||||
"unable to start balance with target metadata profile %llu",
|
||||
bctl->meta.target);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(!alloc_profile_is_valid(bctl->sys.target, 1) ||
|
||||
(bctl->sys.target & ~allowed))) {
|
||||
if (validate_convert_profile(&bctl->sys, allowed)) {
|
||||
btrfs_err(fs_info,
|
||||
"unable to start balance with target system profile %llu",
|
||||
bctl->sys.target);
|
||||
@ -4285,65 +4438,6 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = {
|
||||
.sub_stripes = 2,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0, /* 0 == as many as possible */
|
||||
.devs_min = 4,
|
||||
.devs_increment = 2,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID1] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 2,
|
||||
.devs_min = 2,
|
||||
.devs_increment = 2,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_DUP] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 2,
|
||||
.devs_max = 1,
|
||||
.devs_min = 1,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID0] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 2,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 1,
|
||||
},
|
||||
[BTRFS_RAID_SINGLE] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 1,
|
||||
.devs_min = 1,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 1,
|
||||
},
|
||||
[BTRFS_RAID_RAID5] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 2,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 2,
|
||||
},
|
||||
[BTRFS_RAID_RAID6] = {
|
||||
.sub_stripes = 1,
|
||||
.dev_stripes = 1,
|
||||
.devs_max = 0,
|
||||
.devs_min = 3,
|
||||
.devs_increment = 1,
|
||||
.ncopies = 3,
|
||||
},
|
||||
};
|
||||
|
||||
static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
|
||||
{
|
||||
/* TODO allow them to set a preferred stripe size */
|
||||
@ -6594,8 +6688,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
|
||||
BUG_ON(!path);
|
||||
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
|
||||
if (ret < 0) {
|
||||
printk_in_rcu(KERN_WARNING "BTRFS: "
|
||||
"error %d while searching for dev_stats item for device %s!\n",
|
||||
btrfs_warn_in_rcu(dev_root->fs_info,
|
||||
"error %d while searching for dev_stats item for device %s",
|
||||
ret, rcu_str_deref(device->name));
|
||||
goto out;
|
||||
}
|
||||
@ -6605,8 +6699,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
|
||||
/* need to delete old one and insert a new one */
|
||||
ret = btrfs_del_item(trans, dev_root, path);
|
||||
if (ret != 0) {
|
||||
printk_in_rcu(KERN_WARNING "BTRFS: "
|
||||
"delete too small dev_stats item for device %s failed %d!\n",
|
||||
btrfs_warn_in_rcu(dev_root->fs_info,
|
||||
"delete too small dev_stats item for device %s failed %d",
|
||||
rcu_str_deref(device->name), ret);
|
||||
goto out;
|
||||
}
|
||||
@ -6619,9 +6713,9 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_insert_empty_item(trans, dev_root, path,
|
||||
&key, sizeof(*ptr));
|
||||
if (ret < 0) {
|
||||
printk_in_rcu(KERN_WARNING "BTRFS: "
|
||||
"insert dev_stats item for device %s failed %d!\n",
|
||||
rcu_str_deref(device->name), ret);
|
||||
btrfs_warn_in_rcu(dev_root->fs_info,
|
||||
"insert dev_stats item for device %s failed %d",
|
||||
rcu_str_deref(device->name), ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -6675,8 +6769,8 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
|
||||
{
|
||||
if (!dev->dev_stats_valid)
|
||||
return;
|
||||
printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
|
||||
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
|
||||
btrfs_err_rl_in_rcu(dev->dev_root->fs_info,
|
||||
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
|
||||
rcu_str_deref(dev->name),
|
||||
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
|
||||
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
|
||||
@ -6695,8 +6789,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
|
||||
if (i == BTRFS_DEV_STAT_VALUES_MAX)
|
||||
return; /* all values == 0, suppress message */
|
||||
|
||||
printk_in_rcu(KERN_INFO "BTRFS: "
|
||||
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
|
||||
btrfs_info_in_rcu(dev->dev_root->fs_info,
|
||||
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
|
||||
rcu_str_deref(dev->name),
|
||||
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
|
||||
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
|
||||
@ -6740,22 +6834,34 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_scratch_superblock(struct btrfs_device *device)
|
||||
void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
struct btrfs_super_block *disk_super;
|
||||
int copy_num;
|
||||
|
||||
bh = btrfs_read_dev_super(device->bdev);
|
||||
if (!bh)
|
||||
return -EINVAL;
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
if (!bdev)
|
||||
return;
|
||||
|
||||
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
|
||||
set_buffer_dirty(bh);
|
||||
sync_dirty_buffer(bh);
|
||||
brelse(bh);
|
||||
for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
|
||||
copy_num++) {
|
||||
|
||||
return 0;
|
||||
if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
|
||||
continue;
|
||||
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
|
||||
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
|
||||
set_buffer_dirty(bh);
|
||||
sync_dirty_buffer(bh);
|
||||
brelse(bh);
|
||||
}
|
||||
|
||||
/* Notify udev that device has changed */
|
||||
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
|
||||
|
||||
/* Update ctime/mtime for device path for libblkid */
|
||||
update_dev_time(device_path);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6823,3 +6929,38 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
|
||||
fs_devices = fs_devices->seed;
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_close_one_device(struct btrfs_device *device)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = device->fs_devices;
|
||||
struct btrfs_device *new_device;
|
||||
struct rcu_string *name;
|
||||
|
||||
if (device->bdev)
|
||||
fs_devices->open_devices--;
|
||||
|
||||
if (device->writeable &&
|
||||
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
||||
list_del_init(&device->dev_alloc_list);
|
||||
fs_devices->rw_devices--;
|
||||
}
|
||||
|
||||
if (device->missing)
|
||||
fs_devices->missing_devices--;
|
||||
|
||||
new_device = btrfs_alloc_device(NULL, &device->devid,
|
||||
device->uuid);
|
||||
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
|
||||
|
||||
/* Safe because we are under uuid_mutex */
|
||||
if (device->name) {
|
||||
name = rcu_string_strdup(device->name->str, GFP_NOFS);
|
||||
BUG_ON(!name); /* -ENOMEM */
|
||||
rcu_assign_pointer(new_device->name, name);
|
||||
}
|
||||
|
||||
list_replace_rcu(&device->dev_list, &new_device->dev_list);
|
||||
new_device->fs_devices = device->fs_devices;
|
||||
|
||||
call_rcu(&device->rcu, free_device);
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ struct btrfs_fs_devices {
|
||||
|
||||
struct btrfs_fs_info *fs_info;
|
||||
/* sysfs kobjects */
|
||||
struct kobject super_kobj;
|
||||
struct kobject fsid_kobj;
|
||||
struct kobject *device_dir_kobj;
|
||||
struct completion kobj_unregister;
|
||||
};
|
||||
@ -334,10 +334,15 @@ struct btrfs_raid_attr {
|
||||
int dev_stripes; /* stripes per dev */
|
||||
int devs_max; /* max devs to use */
|
||||
int devs_min; /* min devs needed */
|
||||
int tolerated_failures; /* max tolerated fail devs */
|
||||
int devs_increment; /* ndevs has to be a multiple of this */
|
||||
int ncopies; /* how many copies to data has */
|
||||
};
|
||||
|
||||
extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
|
||||
|
||||
extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES];
|
||||
|
||||
struct map_lookup {
|
||||
u64 type;
|
||||
int io_align;
|
||||
@ -375,6 +380,9 @@ struct map_lookup {
|
||||
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
|
||||
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8)
|
||||
|
||||
#define BTRFS_BALANCE_ARGS_MASK \
|
||||
(BTRFS_BALANCE_ARGS_PROFILES | \
|
||||
@ -382,7 +390,10 @@ struct map_lookup {
|
||||
BTRFS_BALANCE_ARGS_DEVID | \
|
||||
BTRFS_BALANCE_ARGS_DRANGE | \
|
||||
BTRFS_BALANCE_ARGS_VRANGE | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT)
|
||||
BTRFS_BALANCE_ARGS_LIMIT | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_STRIPES_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_USAGE_RANGE)
|
||||
|
||||
/*
|
||||
* Profile changing flags. When SOFT is set we won't relocate chunk if
|
||||
@ -482,7 +493,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *tgtdev);
|
||||
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *tgtdev);
|
||||
int btrfs_scratch_superblock(struct btrfs_device *device);
|
||||
void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
|
||||
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
|
||||
u64 logical, u64 len, int mirror_num);
|
||||
unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
|
||||
@ -555,5 +566,6 @@ static inline void unlock_chunks(struct btrfs_root *root)
|
||||
struct list_head *btrfs_get_fs_uuids(void);
|
||||
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_close_one_device(struct btrfs_device *device);
|
||||
|
||||
#endif
|
||||
|
@ -1117,6 +1117,119 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
|
||||
TP_ARGS(wq)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__qgroup_data_map,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 free_reserved),
|
||||
|
||||
TP_ARGS(inode, free_reserved),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, rootid )
|
||||
__field( unsigned long, ino )
|
||||
__field( u64, free_reserved )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rootid = BTRFS_I(inode)->root->objectid;
|
||||
__entry->ino = inode->i_ino;
|
||||
__entry->free_reserved = free_reserved;
|
||||
),
|
||||
|
||||
TP_printk("rootid=%llu, ino=%lu, free_reserved=%llu",
|
||||
__entry->rootid, __entry->ino, __entry->free_reserved)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_data_map, btrfs_qgroup_init_data_rsv_map,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 free_reserved),
|
||||
|
||||
TP_ARGS(inode, free_reserved)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_data_map, btrfs_qgroup_free_data_rsv_map,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 free_reserved),
|
||||
|
||||
TP_ARGS(inode, free_reserved)
|
||||
);
|
||||
|
||||
#define BTRFS_QGROUP_OPERATIONS \
|
||||
{ QGROUP_RESERVE, "reserve" }, \
|
||||
{ QGROUP_RELEASE, "release" }, \
|
||||
{ QGROUP_FREE, "free" }
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op),
|
||||
|
||||
TP_ARGS(inode, start, len, reserved, op),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, rootid )
|
||||
__field( unsigned long, ino )
|
||||
__field( u64, start )
|
||||
__field( u64, len )
|
||||
__field( u64, reserved )
|
||||
__field( int, op )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rootid = BTRFS_I(inode)->root->objectid;
|
||||
__entry->ino = inode->i_ino;
|
||||
__entry->start = start;
|
||||
__entry->len = len;
|
||||
__entry->reserved = reserved;
|
||||
__entry->op = op;
|
||||
),
|
||||
|
||||
TP_printk("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
|
||||
__entry->rootid, __entry->ino, __entry->start, __entry->len,
|
||||
__entry->reserved,
|
||||
__print_flags((unsigned long)__entry->op, "",
|
||||
BTRFS_QGROUP_OPERATIONS)
|
||||
)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_reserve_data,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op),
|
||||
|
||||
TP_ARGS(inode, start, len, reserved, op)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data,
|
||||
|
||||
TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op),
|
||||
|
||||
TP_ARGS(inode, start, len, reserved, op)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
|
||||
|
||||
TP_PROTO(u64 ref_root, u64 reserved),
|
||||
|
||||
TP_ARGS(ref_root, reserved),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, ref_root )
|
||||
__field( u64, reserved )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ref_root = ref_root;
|
||||
__entry->reserved = reserved;
|
||||
),
|
||||
|
||||
TP_printk("root=%llu, reserved=%llu, op=free",
|
||||
__entry->ref_root, __entry->reserved)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
|
||||
|
||||
TP_PROTO(u64 ref_root, u64 reserved),
|
||||
|
||||
TP_ARGS(ref_root, reserved)
|
||||
);
|
||||
#endif /* _TRACE_BTRFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -206,7 +206,13 @@ struct btrfs_ioctl_feature_flags {
|
||||
*/
|
||||
struct btrfs_balance_args {
|
||||
__u64 profiles;
|
||||
__u64 usage;
|
||||
union {
|
||||
__le64 usage;
|
||||
struct {
|
||||
__le32 usage_min;
|
||||
__le32 usage_max;
|
||||
};
|
||||
};
|
||||
__u64 devid;
|
||||
__u64 pstart;
|
||||
__u64 pend;
|
||||
@ -217,8 +223,27 @@ struct btrfs_balance_args {
|
||||
|
||||
__u64 flags;
|
||||
|
||||
__u64 limit; /* limit number of processed chunks */
|
||||
__u64 unused[7];
|
||||
/*
|
||||
* BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
|
||||
* BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
|
||||
* and maximum
|
||||
*/
|
||||
union {
|
||||
__u64 limit; /* limit number of processed chunks */
|
||||
struct {
|
||||
__u32 limit_min;
|
||||
__u32 limit_max;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Process chunks that cross stripes_min..stripes_max devices,
|
||||
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
|
||||
*/
|
||||
__le32 stripes_min;
|
||||
__le32 stripes_max;
|
||||
|
||||
__u64 unused[6];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* report balance progress to userspace */
|
||||
|
Loading…
Reference in New Issue
Block a user