diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b4fb4155811..290e347b6db 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -918,7 +918,8 @@ again: ref->parent, bsz, 0); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); - return -EIO; + ret = -EIO; + goto out; } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 18af6f48781..1431a696501 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, unsigned int j; DECLARE_COMPLETION_ONSTACK(complete); - bio = bio_alloc(GFP_NOFS, num_pages - i); + bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); if (!bio) { printk(KERN_INFO "btrfsic: bio_alloc() for %u pages failed!\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index de6de8e60b4..02fae7f7e42 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, BUG_ON(ret); /* -ENOMEM */ } if (new_flags != 0) { + int level = btrfs_header_level(buf); + ret = btrfs_set_disk_extent_flags(trans, root, buf->start, buf->len, - new_flags, 0); + new_flags, level, 0); if (ret) return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 63c328a9ce9..d6dd49b51ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -88,12 +88,12 @@ struct btrfs_ordered_sum; /* holds checksums of all the data extents */ #define BTRFS_CSUM_TREE_OBJECTID 7ULL -/* for storing balance parameters in the root tree */ -#define BTRFS_BALANCE_OBJECTID -4ULL - /* holds quota configuration and tracking */ #define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing balance parameters in the root tree */ +#define BTRFS_BALANCE_OBJECTID -4ULL + /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL @@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 flags, - int is_data); + int level, int is_data); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f75fcaf79ae..70b962cc177 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node { struct btrfs_delayed_extent_op { struct btrfs_disk_key key; u64 flags_to_set; + int level; unsigned int update_key:1; unsigned int update_flags:1; unsigned int is_data:1; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7ba7b3900cb..65241f32d3f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; + if (btrfs_fs_incompat(fs_info, RAID56)) { + pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); + return -EINVAL; + } + switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4e9ebe1f182..e7b3cb5286a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset { { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, - { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, + { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" }, { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, @@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, } root->commit_root = btrfs_root_node(root); - BUG_ON(!root->node); /* -ENOMEM */ out: if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { root->ref_cows = 1; @@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) { free_extent_buffer(info->tree_root->node); free_extent_buffer(info->tree_root->commit_root); - free_extent_buffer(info->dev_root->node); - free_extent_buffer(info->dev_root->commit_root); - free_extent_buffer(info->extent_root->node); - free_extent_buffer(info->extent_root->commit_root); - free_extent_buffer(info->csum_root->node); - free_extent_buffer(info->csum_root->commit_root); + info->tree_root->node = NULL; + info->tree_root->commit_root = NULL; + + if (info->dev_root) { + free_extent_buffer(info->dev_root->node); + free_extent_buffer(info->dev_root->commit_root); + info->dev_root->node = NULL; + info->dev_root->commit_root = NULL; + } + if (info->extent_root) { + free_extent_buffer(info->extent_root->node); + free_extent_buffer(info->extent_root->commit_root); + info->extent_root->node = NULL; + info->extent_root->commit_root = NULL; + } + if (info->csum_root) { + free_extent_buffer(info->csum_root->node); + free_extent_buffer(info->csum_root->commit_root); + info->csum_root->node = NULL; + info->csum_root->commit_root = NULL; + } if (info->quota_root) { free_extent_buffer(info->quota_root->node); free_extent_buffer(info->quota_root->commit_root); - } - - info->tree_root->node = NULL; - info->tree_root->commit_root = NULL; - info->dev_root->node = NULL; - info->dev_root->commit_root = NULL; - info->extent_root->node = NULL; - info->extent_root->commit_root = NULL; - info->csum_root->node = NULL; - info->csum_root->commit_root = NULL; - if (info->quota_root) { info->quota_root->node = NULL; info->quota_root->commit_root = NULL; } - if (chunk_root) { free_extent_buffer(info->chunk_root->node); free_extent_buffer(info->chunk_root->commit_root); @@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) * caller */ device->flush_bio = NULL; - bio = bio_alloc(GFP_NOFS, 0); + bio = btrfs_io_bio_alloc(GFP_NOFS, 0); if (!bio) return -ENOMEM; @@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, ordered_operations); list_del_init(&btrfs_inode->ordered_operations); + spin_unlock(&root->fs_info->ordered_extent_lock); btrfs_invalidate_inodes(btrfs_inode->root); + + spin_lock(&root->fs_info->ordered_extent_lock); } spin_unlock(&root->fs_info->ordered_extent_lock); @@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) list_del_init(&btrfs_inode->delalloc_inodes); clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, &btrfs_inode->runtime_flags); + spin_unlock(&root->fs_info->delalloc_lock); btrfs_invalidate_inodes(btrfs_inode->root); + + spin_lock(&root->fs_info->delalloc_lock); } spin_unlock(&root->fs_info->delalloc_lock); @@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, while (start <= end) { eb = btrfs_find_tree_block(root, start, root->leafsize); - start += eb->len; + start += root->leafsize; if (!eb) continue; wait_on_extent_buffer_writeback(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2305b5c5cf0..df472ab1b5a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, u32 item_size; int ret; int err = 0; - int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || - node->type == BTRFS_SHARED_BLOCK_REF_KEY); + int metadata = !extent_op->is_data; if (trans->aborted) return 0; @@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, key.objectid = node->bytenr; if (metadata) { - struct btrfs_delayed_tree_ref *tree_ref; - - tree_ref = btrfs_delayed_node_to_tree_ref(node); key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = tree_ref->level; + key.offset = extent_op->level; } else { key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = node->num_bytes; @@ -2719,7 +2715,7 @@ out: int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 flags, - int is_data) + int level, int is_data) { struct btrfs_delayed_extent_op *extent_op; int ret; @@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, extent_op->update_flags = 1; extent_op->update_key = 0; extent_op->is_data = is_data ? 1 : 0; + extent_op->level = level; ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, num_bytes, extent_op); @@ -3109,6 +3106,11 @@ again: WARN_ON(ret); if (i_size_read(inode) > 0) { + ret = btrfs_check_trunc_cache_free_space(root, + &root->fs_info->global_block_rsv); + if (ret) + goto out_put; + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); if (ret) @@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; + if (fs_info->quota_root) + fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; update_global_block_rsv(fs_info); @@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; int ret; + bool global_updated = false; block_rsv = get_block_rsv(trans, root); - if (block_rsv->size == 0) { - ret = reserve_metadata_bytes(root, block_rsv, blocksize, - BTRFS_RESERVE_NO_FLUSH); - /* - * If we couldn't reserve metadata bytes try and use some from - * the global reserve. - */ - if (ret && block_rsv != global_rsv) { - ret = block_rsv_use_bytes(global_rsv, blocksize); - if (!ret) - return global_rsv; - return ERR_PTR(ret); - } else if (ret) { - return ERR_PTR(ret); - } - return block_rsv; - } - + if (unlikely(block_rsv->size == 0)) + goto try_reserve; +again: ret = block_rsv_use_bytes(block_rsv, blocksize); if (!ret) return block_rsv; - if (ret && !block_rsv->failfast) { - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { - static DEFINE_RATELIMIT_STATE(_rs, - DEFAULT_RATELIMIT_INTERVAL * 10, - /*DEFAULT_RATELIMIT_BURST*/ 1); - if (__ratelimit(&_rs)) - WARN(1, KERN_DEBUG - "btrfs: block rsv returned %d\n", ret); - } - ret = reserve_metadata_bytes(root, block_rsv, blocksize, - BTRFS_RESERVE_NO_FLUSH); - if (!ret) { - return block_rsv; - } else if (ret && block_rsv != global_rsv) { - ret = block_rsv_use_bytes(global_rsv, blocksize); - if (!ret) - return global_rsv; - } + + if (block_rsv->failfast) + return ERR_PTR(ret); + + if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { + global_updated = true; + update_global_block_rsv(root->fs_info); + goto again; } - return ERR_PTR(-ENOSPC); + if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL * 10, + /*DEFAULT_RATELIMIT_BURST*/ 1); + if (__ratelimit(&_rs)) + WARN(1, KERN_DEBUG + "btrfs: block rsv returned %d\n", ret); + } +try_reserve: + ret = reserve_metadata_bytes(root, block_rsv, blocksize, + BTRFS_RESERVE_NO_FLUSH); + if (!ret) + return block_rsv; + /* + * If we couldn't reserve metadata bytes try and use some from + * the global reserve if its space type is the same as the global + * reservation. + */ + if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && + block_rsv->space_info == global_rsv->space_info) { + ret = block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + } + return ERR_PTR(ret); } static void unuse_block_rsv(struct btrfs_fs_info *fs_info, @@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, extent_op->update_key = 1; extent_op->update_flags = 1; extent_op->is_data = 0; + extent_op->level = level; ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, ins.objectid, @@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); BUG_ON(ret); /* -ENOMEM */ ret = btrfs_set_disk_extent_flags(trans, root, eb->start, - eb->len, flag, 0); + eb->len, flag, + btrfs_header_level(eb), 0); BUG_ON(ret); /* -ENOMEM */ wc->flags[level] |= flag; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 32d67a822e9..e7e7afb4a87 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -23,6 +23,7 @@ static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; +static struct bio_set *btrfs_bioset; #ifdef CONFIG_BTRFS_DEBUG static LIST_HEAD(buffers); @@ -125,10 +126,20 @@ int __init extent_io_init(void) SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_buffer_cache) goto free_state_cache; + + btrfs_bioset = bioset_create(BIO_POOL_SIZE, + offsetof(struct btrfs_io_bio, bio)); + if (!btrfs_bioset) + goto free_buffer_cache; return 0; +free_buffer_cache: + kmem_cache_destroy(extent_buffer_cache); + extent_buffer_cache = NULL; + free_state_cache: kmem_cache_destroy(extent_state_cache); + extent_state_cache = NULL; return -ENOMEM; } @@ -145,6 +156,8 @@ void extent_io_exit(void) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) kmem_cache_destroy(extent_buffer_cache); + if (btrfs_bioset) + bioset_free(btrfs_bioset); } void extent_io_tree_init(struct extent_io_tree *tree, @@ -1947,28 +1960,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) SetPageUptodate(page); } -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static void check_page_locked(struct extent_io_tree *tree, struct page *page) -{ - u64 start = page_offset(page); - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) - unlock_page(page); -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static void check_page_writeback(struct extent_io_tree *tree, - struct page *page) -{ - end_page_writeback(page); -} - /* * When IO fails, either with EIO or csum verification fails, we * try other mirrors that might have a good copy of the data. This @@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) return 0; - bio = bio_alloc(GFP_NOFS, 1); + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; bio->bi_private = &compl; @@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, return -EIO; } - bio = bio_alloc(GFP_NOFS, 1); + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) { free_io_failure(inode, failrec, 0); return -EIO; @@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err) struct extent_io_tree *tree; u64 start; u64 end; - int whole_page; do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; - start = page_offset(page) + bvec->bv_offset; - end = start + bvec->bv_len - 1; + /* We always issue full-page reads, but if some block + * in a page fails to read, blk_update_request() will + * advance bv_offset and adjust bv_len to compensate. + * Print a warning for nonzero offsets, and an error + * if they don't add up to a full page. */ + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) + printk("%s page write in btrfs with offset %u and length %u\n", + bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE + ? KERN_ERR "partial" : KERN_INFO "incomplete", + bvec->bv_offset, bvec->bv_len); - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; + start = page_offset(page); + end = start + bvec->bv_offset + bvec->bv_len - 1; if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); @@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) if (end_extent_writepage(page, err, start, end)) continue; - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); + end_page_writeback(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); @@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct extent_io_tree *tree; u64 start; u64 end; - int whole_page; int mirror; int ret; @@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct page *page = bvec->bv_page; struct extent_state *cached = NULL; struct extent_state *state; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%ld\n", (u64)bio->bi_sector, err, - (long int)bio->bi_bdev); + "mirror=%lu\n", (u64)bio->bi_sector, err, + io_bio->mirror_num); tree = &BTRFS_I(page->mapping->host)->io_tree; - start = page_offset(page) + bvec->bv_offset; - end = start + bvec->bv_len - 1; + /* We always issue full-page reads, but if some block + * in a page fails to read, blk_update_request() will + * advance bv_offset and adjust bv_len to compensate. + * Print a warning for nonzero offsets, and an error + * if they don't add up to a full page. */ + if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) + printk("%s page read in btrfs with offset %u and length %u\n", + bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE + ? KERN_ERR "partial" : KERN_INFO "incomplete", + bvec->bv_offset, bvec->bv_len); - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; + start = page_offset(page); + end = start + bvec->bv_offset + bvec->bv_len - 1; if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); @@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) } spin_unlock(&tree->lock); - mirror = (int)(unsigned long)bio->bi_bdev; + mirror = io_bio->mirror_num; if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); @@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err) } unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - if (whole_page) { - if (uptodate) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); + if (uptodate) { + SetPageUptodate(page); } else { - if (uptodate) { - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - check_page_locked(tree, page); + ClearPageUptodate(page); + SetPageError(page); } + unlock_page(page); } while (bvec <= bvec_end); bio_put(bio); } +/* + * this allocates from the btrfs_bioset. We're returning a bio right now + * but you can call btrfs_io_bio for the appropriate container_of magic + */ struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) { struct bio *bio; - bio = bio_alloc(gfp_flags, nr_vecs); + bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); + while (!bio && (nr_vecs /= 2)) { + bio = bio_alloc_bioset(gfp_flags, + nr_vecs, btrfs_bioset); + } } if (bio) { @@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, return bio; } +struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); +} + + +/* this also allocates from the btrfs_bioset */ +struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); +} + + static int __must_check submit_one_bio(int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { @@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, last_for_get_extent = isize; } - lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0, &cached_state); em = get_extent_skip_holes(inode, start, last_for_get_extent, @@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, out_free: free_extent_map(em); out: - unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state, GFP_NOFS); return ret; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a2c03a17500..41fb81e7ec5 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags); +struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); +struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); struct btrfs_fs_info; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ecca6c7375a..e53009657f0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root, block_group->key.objectid); } -int btrfs_truncate_free_space_cache(struct btrfs_root *root, - struct btrfs_trans_handle *trans, - struct btrfs_path *path, - struct inode *inode) +int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, + struct btrfs_block_rsv *rsv) { - struct btrfs_block_rsv *rsv; u64 needed_bytes; - loff_t oldsize; - int ret = 0; - - rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->global_block_rsv; + int ret; /* 1 for slack space, 1 for updating the inode */ needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + btrfs_calc_trans_metadata_size(root, 1); - spin_lock(&trans->block_rsv->lock); - if (trans->block_rsv->reserved < needed_bytes) { - spin_unlock(&trans->block_rsv->lock); - trans->block_rsv = rsv; - return -ENOSPC; - } - spin_unlock(&trans->block_rsv->lock); + spin_lock(&rsv->lock); + if (rsv->reserved < needed_bytes) + ret = -ENOSPC; + else + ret = 0; + spin_unlock(&rsv->lock); + return 0; +} + +int btrfs_truncate_free_space_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct inode *inode) +{ + loff_t oldsize; + int ret = 0; oldsize = i_size_read(inode); btrfs_i_size_write(inode, 0); @@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, */ ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); - if (ret) { - trans->block_rsv = rsv; btrfs_abort_transaction(trans, root, ret); return ret; } @@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, ret = btrfs_update_inode(trans, root, inode); if (ret) btrfs_abort_transaction(trans, root, ret); - trans->block_rsv = rsv; return ret; } @@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Make sure we can fit our crcs into the first page */ if (io_ctl.check_crcs && - (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { - WARN_ON(1); + (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) goto out_nospc; - } io_ctl_set_generation(&io_ctl, trans->transid); diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 4dc17d8809c..8b7f19f4496 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); +int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, + struct btrfs_block_rsv *rsv); int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, struct btrfs_path *path, diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index d26f67a59e3..2c66ddbbe67 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root, num_bytes = trans->bytes_reserved; /* * 1 item for inode item insertion if need - * 3 items for inode item update (in the worst case) + * 4 items for inode item update (in the worst case) + * 1 items for slack space if we need do truncation * 1 item for free space object * 3 items for pre-allocation */ - trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); + trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10); ret = btrfs_block_rsv_add(root, trans->block_rsv, trans->bytes_reserved, BTRFS_RESERVE_NO_FLUSH); @@ -468,7 +469,8 @@ again: if (i_size_read(inode) > 0) { ret = btrfs_truncate_free_space_cache(root, trans, path, inode); if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret != -ENOSPC) + btrfs_abort_transaction(trans, root, ret); goto out_put; } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9b31b3b091f..af978f7682b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -715,8 +715,10 @@ retry: async_extent->ram_size - 1, 0); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out_free_reserve; + } em->start = async_extent->start; em->len = async_extent->ram_size; em->orig_start = em->start; @@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, } em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out_reserve; + } em->start = start; em->orig_start = em->start; ram_size = ins.offset; @@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); no_delete: + btrfs_remove_delayed_node(inode); clear_inode(inode); return; } @@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode) struct rb_node **p; struct rb_node *parent; u64 ino = btrfs_ino(inode); -again: - p = &root->inode_tree.rb_node; - parent = NULL; if (inode_unhashed(inode)) return; - +again: + parent = NULL; spin_lock(&root->inode_lock); + p = &root->inode_tree.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_inode, rb_node); @@ -6928,7 +6932,11 @@ struct btrfs_dio_private { /* IO errors */ int errors; + /* orig_bio is our btrfs_io_bio */ struct bio *orig_bio; + + /* dio_bio came from fs/direct-io.c */ + struct bio *dio_bio; }; static void btrfs_endio_direct_read(struct bio *bio, int err) @@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct bio_vec *bvec = bio->bi_io_vec; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; + struct bio *dio_bio; u64 start; start = dip->logical_offset; @@ -6977,14 +6986,15 @@ failed: unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, dip->logical_offset + dip->bytes - 1); - bio->bi_private = dip->private; + dio_bio = dip->dio_bio; kfree(dip); /* If we had a csum failure make sure to clear the uptodate flag */ if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err); + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + dio_end_io(dio_bio, err); + bio_put(bio); } static void btrfs_endio_direct_write(struct bio *bio, int err) @@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct btrfs_ordered_extent *ordered = NULL; u64 ordered_offset = dip->logical_offset; u64 ordered_bytes = dip->bytes; + struct bio *dio_bio; int ret; if (err) @@ -7022,14 +7033,15 @@ out_test: goto again; } out_done: - bio->bi_private = dip->private; + dio_bio = dip->dio_bio; kfree(dip); /* If we had an error make sure to clear the uptodate flag */ if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err); + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + dio_end_io(dio_bio, err); + bio_put(bio); } static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, @@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (!atomic_dec_and_test(&dip->pending_bios)) goto out; - if (dip->errors) + if (dip->errors) { bio_io_error(dip->orig_bio); - else { - set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); + } else { + set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags); bio_endio(dip->orig_bio, 0); } out: @@ -7243,25 +7255,34 @@ out_err: return 0; } -static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, - loff_t file_offset) +static void btrfs_submit_direct(int rw, struct bio *dio_bio, + struct inode *inode, loff_t file_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_dio_private *dip; - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec = dio_bio->bi_io_vec; + struct bio *io_bio; int skip_sum; int write = rw & REQ_WRITE; int ret = 0; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - dip = kmalloc(sizeof(*dip), GFP_NOFS); - if (!dip) { + io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); + + if (!io_bio) { ret = -ENOMEM; goto free_ordered; } - dip->private = bio->bi_private; + dip = kmalloc(sizeof(*dip), GFP_NOFS); + if (!dip) { + ret = -ENOMEM; + goto free_io_bio; + } + + dip->private = dio_bio->bi_private; + io_bio->bi_private = dio_bio->bi_private; dip->inode = inode; dip->logical_offset = file_offset; @@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, do { dip->bytes += bvec->bv_len; bvec++; - } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); + } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1)); - dip->disk_bytenr = (u64)bio->bi_sector << 9; - bio->bi_private = dip; + dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; + io_bio->bi_private = dip; dip->errors = 0; - dip->orig_bio = bio; + dip->orig_bio = io_bio; + dip->dio_bio = dio_bio; atomic_set(&dip->pending_bios, 0); if (write) - bio->bi_end_io = btrfs_endio_direct_write; + io_bio->bi_end_io = btrfs_endio_direct_write; else - bio->bi_end_io = btrfs_endio_direct_read; + io_bio->bi_end_io = btrfs_endio_direct_read; ret = btrfs_submit_direct_hook(rw, dip, skip_sum); if (!ret) return; + +free_io_bio: + bio_put(io_bio); + free_ordered: /* * If this is a write, we need to clean up the reserved space and kill @@ -7300,7 +7326,7 @@ free_ordered: btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); } - bio_endio(bio, ret); + bio_endio(dio_bio, ret); } static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, @@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode) inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); free: - btrfs_remove_delayed_node(inode); call_rcu(&inode->i_rcu, btrfs_i_callback); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0de4a2fcfb2..0f81d67cdc8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root, item_off = btrfs_item_ptr_offset(leaf, i); item_len = btrfs_item_size_nr(leaf, i); - if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) item_len = 0; if (sizeof(sh) + item_len + *sk_offset > @@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root, goto overflow; } - btrfs_item_key_to_cpu(leaf, key, i); - if (!key_in_sk(key, sk)) - continue; - sh.objectid = key->objectid; sh.offset = key->offset; sh.type = key->type; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0740621daf6..0525e1389f5 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, } /* put a new bio on the list */ - bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); + bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); if (!bio) return -ENOMEM; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 704a1b8d2a2..395b82031a4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1773,7 +1773,7 @@ again: if (!eb || !extent_buffer_uptodate(eb)) { ret = (!eb) ? -ENOMEM : -EIO; free_extent_buffer(eb); - return ret; + break; } btrfs_tree_lock(eb); if (cow) { @@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, } truncate: + ret = btrfs_check_trunc_cache_free_space(root, + &fs_info->global_block_rsv); + if (ret) + goto out; + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f489e24659a..79bd479317c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } WARN_ON(!page->page); - bio = bio_alloc(GFP_NOFS, 1); + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) { page->io_error = 1; sblock->no_io_error_seen = 0; @@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; } - bio = bio_alloc(GFP_NOFS, 1); + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; bio->bi_bdev = page_bad->dev->bdev; @@ -1522,7 +1522,7 @@ again: sbio->dev = wr_ctx->tgtdev; bio = sbio->bio; if (!bio) { - bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); + bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); if (!bio) { mutex_unlock(&wr_ctx->wr_lock); return -ENOMEM; @@ -1930,7 +1930,7 @@ again: sbio->dev = spage->dev; bio = sbio->bio; if (!bio) { - bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); + bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); if (!bio) return -ENOMEM; sbio->bio = bio; @@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); return -EIO; } - bio = bio_alloc(GFP_NOFS, 1); + bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a4807ced23c..f0857e092a3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); + btrfs_pause_balance(fs_info); ret = btrfs_commit_super(root); if (ret) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0e925ced971..8bffb9174af 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl, allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (num_devices == 1) allowed |= BTRFS_BLOCK_GROUP_DUP; - else if (num_devices < 4) + else if (num_devices > 1) allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); - else - allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6); - + if (num_devices > 2) + allowed |= BTRFS_BLOCK_GROUP_RAID5; + if (num_devices > 3) + allowed |= (BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID6); if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->data.target, 1) || (bctl->data.target & ~allowed))) { @@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -static void *merge_stripe_index_into_bio_private(void *bi_private, - unsigned int stripe_index) -{ - /* - * with single, dup, RAID0, RAID1 and RAID10, stripe_index is - * at most 1. - * The alternative solution (instead of stealing bits from the - * pointer) would be to allocate an intermediate structure - * that contains the old private pointer plus the stripe_index. - */ - BUG_ON((((uintptr_t)bi_private) & 3) != 0); - BUG_ON(stripe_index > 3); - return (void *)(((uintptr_t)bi_private) | stripe_index); -} - -static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) -{ - return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); -} - -static unsigned int extract_stripe_index_from_bio_private(void *bi_private) -{ - return (unsigned int)((uintptr_t)bi_private) & 3; -} - static void btrfs_end_bio(struct bio *bio, int err) { - struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); + struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; if (err) { atomic_inc(&bbio->error); if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = - extract_stripe_index_from_bio_private( - bio->bi_private); + btrfs_io_bio(bio)->stripe_index; struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); @@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err) } bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; - bio->bi_bdev = (struct block_device *) - (unsigned long)bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio */ @@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, struct btrfs_device *dev = bbio->stripes[dev_nr].dev; bio->bi_private = bbio; - bio->bi_private = merge_stripe_index_into_bio_private( - bio->bi_private, (unsigned int)dev_nr); + btrfs_io_bio(bio)->stripe_index = dev_nr; bio->bi_end_io = btrfs_end_bio; bio->bi_sector = physical >> 9; #ifdef DEBUG @@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) if (atomic_dec_and_test(&bbio->stripes_pending)) { bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; - bio->bi_bdev = (struct block_device *) - (unsigned long)bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_sector = logical >> 9; kfree(bbio); bio_endio(bio, -EIO); @@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } if (dev_nr < total_devs - 1) { - bio = bio_clone(first_bio, GFP_NOFS); + bio = btrfs_bio_clone(first_bio, GFP_NOFS); BUG_ON(!bio); /* -ENOMEM */ } else { bio = first_bio; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 845ccbb0d2e..f6247e2a47f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -152,6 +152,26 @@ struct btrfs_fs_devices { int rotating; }; +/* + * we need the mirror number and stripe index to be passed around + * the call chain while we are processing end_io (especially errors). + * Really, what we need is a btrfs_bio structure that has this info + * and is properly sized with its stripe array, but we're not there + * quite yet. We have our own btrfs bioset, and all of the bios + * we allocate are actually btrfs_io_bios. We'll cram as much of + * struct btrfs_bio as we can into this over time. + */ +struct btrfs_io_bio { + unsigned long mirror_num; + unsigned long stripe_index; + struct bio bio; +}; + +static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) +{ + return container_of(bio, struct btrfs_io_bio, bio); +} + struct btrfs_bio_stripe { struct btrfs_device *dev; u64 physical;