diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d81c653b9bf6..7915f3b03736 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -181,6 +181,7 @@ struct rbd_image_header { struct rbd_spec { u64 pool_id; const char *pool_name; + const char *pool_ns; /* NULL if default, never "" */ const char *image_id; const char *image_name; @@ -735,6 +736,7 @@ enum { Opt_lock_timeout, Opt_last_int, /* int args above */ + Opt_pool_ns, Opt_last_string, /* string args above */ Opt_read_only, @@ -749,6 +751,7 @@ static match_table_t rbd_opts_tokens = { {Opt_queue_depth, "queue_depth=%d"}, {Opt_lock_timeout, "lock_timeout=%d"}, /* int args above */ + {Opt_pool_ns, "_pool_ns=%s"}, /* string args above */ {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ @@ -776,9 +779,14 @@ struct rbd_options { #define RBD_EXCLUSIVE_DEFAULT false #define RBD_TRIM_DEFAULT true +struct parse_rbd_opts_ctx { + struct rbd_spec *spec; + struct rbd_options *opts; +}; + static int parse_rbd_opts_token(char *c, void *private) { - struct rbd_options *rbd_opts = private; + struct parse_rbd_opts_ctx *pctx = private; substring_t argstr[MAX_OPT_ARGS]; int token, intval, ret; @@ -786,7 +794,7 @@ static int parse_rbd_opts_token(char *c, void *private) if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); @@ -802,7 +810,7 @@ static int parse_rbd_opts_token(char *c, void *private) pr_err("queue_depth out of range\n"); return -EINVAL; } - rbd_opts->queue_depth = intval; + pctx->opts->queue_depth = intval; break; case Opt_lock_timeout: /* 0 is "wait forever" (i.e. infinite timeout) */ @@ -810,22 +818,28 @@ static int parse_rbd_opts_token(char *c, void *private) pr_err("lock_timeout out of range\n"); return -EINVAL; } - rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000); + pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000); + break; + case Opt_pool_ns: + kfree(pctx->spec->pool_ns); + pctx->spec->pool_ns = match_strdup(argstr); + if (!pctx->spec->pool_ns) + return -ENOMEM; break; case Opt_read_only: - rbd_opts->read_only = true; + pctx->opts->read_only = true; break; case Opt_read_write: - rbd_opts->read_only = false; + pctx->opts->read_only = false; break; case Opt_lock_on_read: - rbd_opts->lock_on_read = true; + pctx->opts->lock_on_read = true; break; case Opt_exclusive: - rbd_opts->exclusive = true; + pctx->opts->exclusive = true; break; case Opt_notrim: - rbd_opts->trim = false; + pctx->opts->trim = false; break; default: /* libceph prints "bad option" msg */ @@ -1452,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) struct ceph_osd_request *osd_req = obj_request->osd_req; osd_req->r_flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&osd_req->r_mtime); + ktime_get_real_ts64(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->ex.oe_off; } @@ -1475,7 +1489,13 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops) req->r_callback = rbd_osd_req_callback; req->r_priv = obj_req; + /* + * Data objects may be stored in a separate pool, but always in + * the same namespace in that pool as the header in its pool. + */ + ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); req->r_base_oloc.pool = rbd_dev->layout.pool_id; + if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, rbd_dev->header.object_prefix, obj_req->ex.oe_objno)) goto err_req; @@ -4119,6 +4139,14 @@ static ssize_t rbd_pool_id_show(struct device *dev, (unsigned long long) rbd_dev->spec->pool_id); } +static ssize_t rbd_pool_ns_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + + return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: ""); +} + static ssize_t rbd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -4217,6 +4245,7 @@ static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL); static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL); static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL); static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL); +static DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL); static DEVICE_ATTR(name, 0444, rbd_name_show, NULL); static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL); static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh); @@ -4235,6 +4264,7 @@ static struct attribute *rbd_attrs[] = { &dev_attr_config_info.attr, &dev_attr_pool.attr, &dev_attr_pool_id.attr, + &dev_attr_pool_ns.attr, &dev_attr_name.attr, &dev_attr_image_id.attr, &dev_attr_current_snap.attr, @@ -4295,6 +4325,7 @@ static void rbd_spec_free(struct kref *kref) struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); kfree(spec->pool_name); + kfree(spec->pool_ns); kfree(spec->image_id); kfree(spec->image_name); kfree(spec->snap_name); @@ -4353,6 +4384,12 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc, rbd_dev->header.data_pool_id = CEPH_NOPOOL; ceph_oid_init(&rbd_dev->header_oid); rbd_dev->header_oloc.pool = spec->pool_id; + if (spec->pool_ns) { + WARN_ON(!*spec->pool_ns); + rbd_dev->header_oloc.pool_ns = + ceph_find_or_create_string(spec->pool_ns, + strlen(spec->pool_ns)); + } mutex_init(&rbd_dev->watch_mutex); rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; @@ -4633,6 +4670,17 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) parent_spec->pool_id = pool_id; parent_spec->image_id = image_id; parent_spec->snap_id = snap_id; + + /* TODO: support cloning across namespaces */ + if (rbd_dev->spec->pool_ns) { + parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns, + GFP_KERNEL); + if (!parent_spec->pool_ns) { + ret = -ENOMEM; + goto out_err; + } + } + rbd_dev->parent_spec = parent_spec; parent_spec = NULL; /* rbd_dev now owns this */ } else { @@ -5146,8 +5194,7 @@ static int rbd_add_parse_args(const char *buf, const char *mon_addrs; char *snap_name; size_t mon_addrs_size; - struct rbd_spec *spec = NULL; - struct rbd_options *rbd_opts = NULL; + struct parse_rbd_opts_ctx pctx = { 0 }; struct ceph_options *copts; int ret; @@ -5171,22 +5218,22 @@ static int rbd_add_parse_args(const char *buf, goto out_err; } - spec = rbd_spec_alloc(); - if (!spec) + pctx.spec = rbd_spec_alloc(); + if (!pctx.spec) goto out_mem; - spec->pool_name = dup_token(&buf, NULL); - if (!spec->pool_name) + pctx.spec->pool_name = dup_token(&buf, NULL); + if (!pctx.spec->pool_name) goto out_mem; - if (!*spec->pool_name) { + if (!*pctx.spec->pool_name) { rbd_warn(NULL, "no pool name provided"); goto out_err; } - spec->image_name = dup_token(&buf, NULL); - if (!spec->image_name) + pctx.spec->image_name = dup_token(&buf, NULL); + if (!pctx.spec->image_name) goto out_mem; - if (!*spec->image_name) { + if (!*pctx.spec->image_name) { rbd_warn(NULL, "no image name provided"); goto out_err; } @@ -5207,24 +5254,24 @@ static int rbd_add_parse_args(const char *buf, if (!snap_name) goto out_mem; *(snap_name + len) = '\0'; - spec->snap_name = snap_name; + pctx.spec->snap_name = snap_name; /* Initialize all rbd options to the defaults */ - rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); - if (!rbd_opts) + pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL); + if (!pctx.opts) goto out_mem; - rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; - rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; - rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; - rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; - rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; - rbd_opts->trim = RBD_TRIM_DEFAULT; + pctx.opts->read_only = RBD_READ_ONLY_DEFAULT; + pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; + pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; + pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; + pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; + pctx.opts->trim = RBD_TRIM_DEFAULT; copts = ceph_parse_options(options, mon_addrs, - mon_addrs + mon_addrs_size - 1, - parse_rbd_opts_token, rbd_opts); + mon_addrs + mon_addrs_size - 1, + parse_rbd_opts_token, &pctx); if (IS_ERR(copts)) { ret = PTR_ERR(copts); goto out_err; @@ -5232,15 +5279,15 @@ static int rbd_add_parse_args(const char *buf, kfree(options); *ceph_opts = copts; - *opts = rbd_opts; - *rbd_spec = spec; + *opts = pctx.opts; + *rbd_spec = pctx.spec; return 0; out_mem: ret = -ENOMEM; out_err: - kfree(rbd_opts); - rbd_spec_put(spec); + kfree(pctx.opts); + rbd_spec_put(pctx.spec); kfree(options); return ret; @@ -5586,8 +5633,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) ret = rbd_register_watch(rbd_dev); if (ret) { if (ret == -ENOENT) - pr_info("image %s/%s does not exist\n", + pr_info("image %s/%s%s%s does not exist\n", rbd_dev->spec->pool_name, + rbd_dev->spec->pool_ns ?: "", + rbd_dev->spec->pool_ns ? "/" : "", rbd_dev->spec->image_name); goto err_out_format; } @@ -5609,8 +5658,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) ret = rbd_spec_fill_names(rbd_dev); if (ret) { if (ret == -ENOENT) - pr_info("snap %s/%s@%s does not exist\n", + pr_info("snap %s/%s%s%s@%s does not exist\n", rbd_dev->spec->pool_name, + rbd_dev->spec->pool_ns ?: "", + rbd_dev->spec->pool_ns ? "/" : "", rbd_dev->spec->image_name, rbd_dev->spec->snap_name); goto err_out_probe; diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 59cb307b15fb..027408d55aee 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct posix_acl *ceph_get_acl(struct inode *inode, int type) { int size; + unsigned int retry_cnt = 0; const char *name; char *value = NULL; struct posix_acl *acl; @@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) BUG(); } +retry: size = __ceph_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) size = __ceph_getxattr(inode, name, value, size); } - if (size > 0) + if (size == -ERANGE && retry_cnt < 10) { + retry_cnt++; + kfree(value); + value = NULL; + goto retry; + } + + if (size > 0) { acl = posix_acl_from_xattr(&init_user_ns, value, size); - else if (size == -ERANGE || size == -ENODATA || size == 0) + } else if (size == -ENODATA || size == 0) { acl = NULL; - else + } else { + pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n", + ceph_vinop(inode), size); acl = ERR_PTR(-EIO); + } kfree(value); @@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name = NULL; char *value = NULL; struct iattr newattrs; + struct timespec64 old_ctime = inode->i_ctime; umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; switch (type) { @@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (new_mode != old_mode) { newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ret = __ceph_setattr(inode, &newattrs); if (ret) goto out_free; @@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = __ceph_setxattr(inode, name, value, size, 0); if (ret) { if (new_mode != old_mode) { + newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; __ceph_setattr(inode, &newattrs); } goto out_free; @@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, return err; if (acl) { - int ret = posix_acl_equiv_mode(acl, mode); - if (ret < 0) + err = posix_acl_equiv_mode(acl, mode); + if (err < 0) goto out_err; - if (ret == 0) { + if (err == 0) { posix_acl_release(acl); acl = NULL; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 292b3d72d725..9c332a6f6667 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { - struct timespec ts; struct inode *inode; struct ceph_inode_info *ci; struct ceph_fs_client *fsc; @@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); - ts = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, snapc, page_off, len, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, - &ts, &page, 1); + &inode->i_mtime, &page, 1); if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) @@ -1134,7 +1132,7 @@ new_request: pages = NULL; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); BUG_ON(rc); req = NULL; @@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset) /* * vm ops */ -static int ceph_filemap_fault(struct vm_fault *vmf) +static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf) struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; loff_t off = vmf->pgoff << PAGE_SHIFT; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; ceph_block_sigs(&oldset); @@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf) want = CEPH_CAP_FILE_CACHE; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); - if (ret < 0) + err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); + if (err < 0) goto out_restore; dout("filemap_fault %p %llu~%zd got cap refs on %s\n", @@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); + dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n", + inode, off, (size_t)PAGE_SIZE, + ceph_cap_string(got), ret); } else - ret = -EAGAIN; + err = -EAGAIN; - dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", - inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret); if (pinned_page) put_page(pinned_page); ceph_put_cap_refs(ci, got); - if (ret != -EAGAIN) + if (err != -EAGAIN) goto out_restore; /* read inline data */ @@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf) /* does not support inline data > PAGE_SIZE */ ret = VM_FAULT_SIGBUS; } else { - int ret1; struct address_space *mapping = inode->i_mapping; struct page *page = find_or_create_page(mapping, 0, mapping_gfp_constraint(mapping, @@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ret = VM_FAULT_OOM; goto out_inline; } - ret1 = __ceph_do_getattr(inode, page, + err = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, true); - if (ret1 < 0 || off >= i_size_read(inode)) { + if (err < 0 || off >= i_size_read(inode)) { unlock_page(page); put_page(page); - if (ret1 < 0) - ret = ret1; + if (err == -ENOMEM) + ret = VM_FAULT_OOM; else ret = VM_FAULT_SIGBUS; goto out_inline; } - if (ret1 < PAGE_SIZE) - zero_user_segment(page, ret1, PAGE_SIZE); + if (err < PAGE_SIZE) + zero_user_segment(page, err, PAGE_SIZE); else flush_dcache_page(page); SetPageUptodate(page); vmf->page = page; ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: - dout("filemap_fault %p %llu~%zd read inline data ret %d\n", + dout("filemap_fault %p %llu~%zd read inline data ret %x\n", inode, off, (size_t)PAGE_SIZE, ret); } out_restore: ceph_restore_sigs(&oldset); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } @@ -1524,7 +1523,7 @@ out_restore: /* * Reuse write_begin here for simplicity. */ -static int ceph_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) lock_page(page); locked_page = page; } - ret = ceph_uninline_data(vma->vm_file, locked_page); + err = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) + if (err < 0) goto out_free; } @@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) want = CEPH_CAP_FILE_BUFFER; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, &got, NULL); - if (ret < 0) + if (err < 0) goto out_free; dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) break; } - ret = ceph_update_writeable_page(vma->vm_file, off, len, page); - if (ret >= 0) { + err = ceph_update_writeable_page(vma->vm_file, off, len, page); + if (err >= 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; } - } while (ret == -EAGAIN); + } while (err == -EAGAIN); if (ret == VM_FAULT_LOCKED || ci->i_inline_version != CEPH_INLINE_NONE) { @@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) __mark_inode_dirty(inode, dirty); } - dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", + dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); out_free: ceph_restore_sigs(&oldset); ceph_free_cap_flush(prealloc_cf); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } @@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out_put; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, 0, false, true); err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); - wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime); + wr_req->r_mtime = ci->vfs_inode.i_mtime; err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); if (!err) diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 362900e42424..1bf3502bdd6f 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -25,8 +25,9 @@ #include "cache.h" struct ceph_aux_inode { - u64 version; - struct timespec mtime; + u64 version; + u64 mtime_sec; + u64 mtime_nsec; }; struct fscache_netfs ceph_cache_netfs = { @@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; @@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) if (!ci->fscache) { memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, &ci->i_vino, sizeof(ci->i_vino), diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 990258cbd836..dd7dfdd2ba13 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) spin_unlock(&mdsc->caps_list_lock); } +static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) +{ + struct ceph_cap *cap; + int i; + + if (nr_caps) { + BUG_ON(mdsc->caps_reserve_count < nr_caps); + mdsc->caps_reserve_count -= nr_caps; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= nr_caps; + for (i = 0; i < nr_caps; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += nr_caps; + } + + dout("%s: caps %d = %d used + %d resv + %d avail\n", + __func__, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + } +} + /* * Called under mdsc->mutex. */ @@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, int have; int alloc = 0; int max_caps; + int err = 0; bool trimmed = false; struct ceph_mds_session *s; LIST_HEAD(newcaps); @@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", ctx, need, have + alloc); - goto out_nomem; + err = -ENOMEM; + break; + } + + if (!err) { + BUG_ON(have + alloc != need); + ctx->count = need; } - BUG_ON(have + alloc != need); spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; @@ -245,77 +282,26 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); + + if (err) + __ceph_unreserve_caps(mdsc, have + alloc); + spin_unlock(&mdsc->caps_list_lock); - ctx->count = need; dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); - return 0; - -out_nomem: - - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; - - while (!list_empty(&newcaps)) { - cap = list_first_entry(&newcaps, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - - /* Keep some preallocated caps around (ceph_min_count), to - * avoid lots of free/alloc churn. */ - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - kmem_cache_free(ceph_cap_cachep, cap); - } else { - mdsc->caps_avail_count++; - mdsc->caps_total_count++; - list_add(&cap->caps_item, &mdsc->caps_list); - } - } - - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - return -ENOMEM; + return err; } -int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { - int i; - struct ceph_cap *cap; - dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); - if (ctx->count) { - spin_lock(&mdsc->caps_list_lock); - BUG_ON(mdsc->caps_reserve_count < ctx->count); - mdsc->caps_reserve_count -= ctx->count; - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - mdsc->caps_total_count -= ctx->count; - for (i = 0; i < ctx->count; i++) { - cap = list_first_entry(&mdsc->caps_list, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); - } - } else { - mdsc->caps_avail_count += ctx->count; - } - ctx->count = 0; - dout("unreserve caps %d = %d used + %d resv + %d avail\n", - mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - } - return 0; + spin_lock(&mdsc->caps_list_lock); + __ceph_unreserve_caps(mdsc, ctx->count); + ctx->count = 0; + spin_unlock(&mdsc->caps_list_lock); } struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, @@ -1125,7 +1111,7 @@ struct cap_msg_args { u64 flush_tid, oldest_flush_tid, size, max_size; u64 xattr_version; struct ceph_buffer *xattr_buf; - struct timespec atime, mtime, ctime; + struct timespec64 atime, mtime, ctime; int op, caps, wanted, dirty; u32 seq, issue_seq, mseq, time_warp_seq; u32 flags; @@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg) struct ceph_msg *msg; void *p; size_t extra_len; - struct timespec zerotime = {0}; + struct timespec64 zerotime = {0}; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" @@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg) fc->size = cpu_to_le64(arg->size); fc->max_size = cpu_to_le64(arg->max_size); - ceph_encode_timespec(&fc->mtime, &arg->mtime); - ceph_encode_timespec(&fc->atime, &arg->atime); - ceph_encode_timespec(&fc->ctime, &arg->ctime); + ceph_encode_timespec64(&fc->mtime, &arg->mtime); + ceph_encode_timespec64(&fc->atime, &arg->atime); + ceph_encode_timespec64(&fc->ctime, &arg->ctime); fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq); fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid)); @@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg) * We just zero these out for now, as the MDS ignores them unless * the requisite feature flags are set (which we don't do yet). */ - ceph_encode_timespec(p, &zerotime); + ceph_encode_timespec64(p, &zerotime); p += sizeof(struct ceph_timespec); ceph_encode_64(&p, 0); @@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.xattr_buf = NULL; } - arg.mtime = timespec64_to_timespec(inode->i_mtime); - arg.atime = timespec64_to_timespec(inode->i_atime); - arg.ctime = timespec64_to_timespec(inode->i_ctime); + arg.mtime = inode->i_mtime; + arg.atime = inode->i_atime; + arg.ctime = inode->i_ctime; arg.op = op; arg.caps = cap->implemented; @@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode, } if (newcaps & CEPH_CAP_ANY_RD) { - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; /* ctime/mtime/atime? */ - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); + ceph_decode_timespec64(&mtime, &grant->mtime); + ceph_decode_timespec64(&atime, &grant->atime); + ceph_decode_timespec64(&ctime, &grant->ctime); ceph_fill_file_time(inode, extra_info->issued, le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, &atime); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 036ac0f3a393..82928cea0209 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) - return err; + goto out; dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", dir, dentry, mode, rdev); @@ -883,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); @@ -1393,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, " rfiles: %20lld\n" " rsubdirs: %20lld\n" "rbytes: %20lld\n" - "rctime: %10ld.%09ld\n", + "rctime: %10lld.%09ld\n", ci->i_files + ci->i_subdirs, ci->i_files, ci->i_subdirs, @@ -1401,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, ci->i_rfiles, ci->i_rsubdirs, ci->i_rbytes, - (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } if (*ppos >= dfi->dir_info_len) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e2679e8a2535..92ab20433682 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -720,7 +720,7 @@ struct ceph_aio_request { struct list_head osd_reqs; unsigned num_reqs; atomic_t pending_reqs; - struct timespec mtime; + struct timespec64 mtime; struct ceph_cap_flush *prealloc_cf; }; @@ -922,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int num_pages = 0; int flags; int ret; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; @@ -1130,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int flags; int ret; bool check_caps = false; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -1383,12 +1383,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; + loff_t limit = max(i_size_read(inode), fsc->max_file_size); if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1414,6 +1414,13 @@ retry_snap: goto out; pos = iocb->ki_pos; + if (unlikely(pos >= limit)) { + err = -EFBIG; + goto out; + } else { + iov_iter_truncate(from, limit - pos); + } + count = iov_iter_count(from); if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { err = -EDQUOT; @@ -1435,7 +1442,7 @@ retry_snap: } /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1525,7 +1532,7 @@ retry_snap: } if (written >= 0) { - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } @@ -1546,6 +1553,7 @@ out_unlocked: static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); loff_t i_size; loff_t ret; @@ -1590,7 +1598,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) break; } - ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size)); out: inode_unlock(inode); @@ -1662,7 +1670,7 @@ static int ceph_zero_partial_object(struct inode *inode, goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) { ret = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1727,8 +1735,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_inode_to_client(inode)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; @@ -1736,6 +1743,9 @@ static long ceph_fallocate(struct file *file, int mode, loff_t endoff = 0; loff_t size; + if ((offset + length) > max(i_size_read(inode), fsc->max_file_size)) + return -EFBIG; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -1759,7 +1769,7 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a866be999216..ebc7bdaed2d0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued, } void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime) + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, struct timespec64 *atime) { struct ceph_inode_info *ci = ceph_inode(inode); - struct timespec64 ctime64 = timespec_to_timespec64(*ctime); - struct timespec64 mtime64 = timespec_to_timespec64(*mtime); - struct timespec64 atime64 = timespec_to_timespec64(*atime); int warn = 0; if (issued & (CEPH_CAP_FILE_EXCL| @@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || - timespec64_compare(&ctime64, &inode->i_ctime) > 0) { + timespec64_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)ctime->tv_sec, ctime->tv_nsec); - inode->i_ctime = ctime64; + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + ctime->tv_sec, ctime->tv_nsec); + inode->i_ctime = *ctime; } if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %lld.%09ld -> %lld.%09ld " "tw %d -> %d\n", - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + mtime->tv_sec, mtime->tv_nsec, ci->i_time_warp_seq, (int)time_warp_seq); - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else if (time_warp_seq == ci->i_time_warp_seq) { /* nobody did utimes(); take the max */ - if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) { + if (timespec64_compare(mtime, &inode->i_mtime) > 0) { dout("mtime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_mtime.tv_sec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec); - inode->i_mtime = mtime64; + mtime->tv_sec, mtime->tv_nsec); + inode->i_mtime = *mtime; } - if (timespec64_compare(&atime64, &inode->i_atime) > 0) { + if (timespec64_compare(atime, &inode->i_atime) > 0) { dout("atime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_atime.tv_sec, + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)atime->tv_sec, atime->tv_nsec); - inode->i_atime = atime64; + atime->tv_sec, atime->tv_nsec); + inode->i_atime = *atime; } } else if (issued & CEPH_CAP_FILE_EXCL) { /* we did a utimes(); ignore mds values */ @@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else { /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { - inode->i_ctime = ctime64; - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_ctime = *ctime; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else { warn = 1; @@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; @@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); + ceph_decode_timespec64(&atime, &info->atime); + ceph_decode_timespec64(&mtime, &info->mtime); + ceph_decode_timespec64(&ctime, &info->ctime); ceph_fill_file_time(inode, issued, le32_to_cpu(info->time_warp_seq), &ctime, &mtime, &atime); @@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ci->i_rbytes = le64_to_cpu(info->rbytes); ci->i_rfiles = le64_to_cpu(info->rfiles); ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); - ceph_decode_timespec(&ci->i_rctime, &info->rctime); + ceph_decode_timespec64(&ci->i_rctime, &info->rctime); } } @@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int err = 0; int inode_dirty_flags = 0; bool lock_snap_rwsem = false; - struct timespec ts; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (ia_valid & ATTR_ATIME) { dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, + attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_atime = attr->ia_atime; @@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_atime, &attr->ia_atime)) { - ts = timespec64_to_timespec(attr->ia_atime); - ceph_encode_timespec(&req->r_args.setattr.atime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.atime, + &attr->ia_atime); mask |= CEPH_SETATTR_ATIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_MTIME) { dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_mtime = attr->ia_mtime; @@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) { - ts = timespec64_to_timespec(attr->ia_mtime); - ceph_encode_timespec(&req->r_args.setattr.mtime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.mtime, + &attr->ia_mtime); mask |= CEPH_SETATTR_MTIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); if (only) { /* @@ -2140,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; - req->r_stamp = timespec64_to_timespec(attr->ia_ctime); + req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, @@ -2161,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int ceph_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); int err; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -2170,6 +2167,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size > max(inode->i_size, fsc->max_file_size)) + return -EFBIG; + if ((attr->ia_valid & ATTR_SIZE) && ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dc8bc664a871..bc43c822426a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) return msg; } +static void encode_supported_features(void **p, void *end) +{ + static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; + static const size_t count = ARRAY_SIZE(bits); + + if (count > 0) { + size_t i; + size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8; + + BUG_ON(*p + 4 + size > end); + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8); + *p += size; + } else { + BUG_ON(*p + 4 > end); + ceph_encode_32(p, 0); + } +} + /* * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. @@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 struct ceph_msg *msg; struct ceph_mds_session_head *h; int i = -1; - int metadata_bytes = 0; + int extra_bytes = 0; int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; - void *p; + void *p, *end; const char* metadata[][2] = { {"hostname", mdsc->nodename}, @@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 }; /* Calculate serialized length of metadata */ - metadata_bytes = 4; /* map length */ + extra_bytes = 4; /* map length */ for (i = 0; metadata[i][0]; ++i) { - metadata_bytes += 8 + strlen(metadata[i][0]) + + extra_bytes += 8 + strlen(metadata[i][0]) + strlen(metadata[i][1]); metadata_key_count++; } + /* supported feature */ + extra_bytes += 4 + 8; /* Allocate the message */ - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes, + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; } - h = msg->front.iov_base; + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + h = p; h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); h->seq = cpu_to_le64(seq); @@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * * ClientSession messages with metadata are v2 */ - msg->hdr.version = cpu_to_le16(2); + msg->hdr.version = cpu_to_le16(3); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ - p = msg->front.iov_base + sizeof(*h); + p += sizeof(*h); /* Number of entries in the map */ ceph_encode_32(&p, metadata_key_count); @@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 p += val_len; } + encode_supported_features(&p, end); + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + return msg; } @@ -1779,6 +1809,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); + struct timespec64 ts; if (!req) return ERR_PTR(-ENOMEM); @@ -1797,7 +1828,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran); + ktime_get_coarse_real_ts64(&ts); + req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; @@ -2094,7 +2126,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, /* time stamp */ { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } @@ -2187,7 +2219,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, p = msg->front.iov_base + req->r_request_release_offset; { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } @@ -2225,7 +2257,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, /* * send request, or put it on the appropriate wait list. */ -static int __do_request(struct ceph_mds_client *mdsc, +static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { struct ceph_mds_session *session = NULL; @@ -2235,7 +2267,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) __unregister_request(mdsc, req); - goto out; + return; } if (req->r_timeout && @@ -2258,7 +2290,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (mdsc->mdsmap->m_epoch == 0) { dout("do_request no mdsmap, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto finish; + return; } if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && @@ -2276,7 +2308,7 @@ static int __do_request(struct ceph_mds_client *mdsc, ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { dout("do_request no mds or not active, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto out; + return; } /* get, open session */ @@ -2326,8 +2358,7 @@ finish: complete_request(mdsc, req); __unregister_request(mdsc, req); } -out: - return err; + return; } /* @@ -2748,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session, int wake = 0; /* decode */ - if (msg->front.iov_len != sizeof(*h)) + if (msg->front.iov_len < sizeof(*h)) goto bad; op = le32_to_cpu(h->op); seq = le64_to_cpu(h->seq); @@ -2958,15 +2989,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { - struct timespec ts; rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v1.issued = cpu_to_le32(cap->issued); rec.v1.size = cpu_to_le64(inode->i_size); - ts = timespec64_to_timespec(inode->i_mtime); - ceph_encode_timespec(&rec.v1.mtime, &ts); - ts = timespec64_to_timespec(inode->i_atime); - ceph_encode_timespec(&rec.v1.atime, &ts); + ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); + ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v1.pathbase = cpu_to_le64(pathbase); } @@ -3378,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; seq = le32_to_cpu(h->seq); - dname.name = (void *)h + sizeof(*h) + sizeof(u32); - dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); - if (dname.len != get_unaligned_le32(h+1)) + dname.len = get_unaligned_le32(h + 1); + if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len) goto bad; + dname.name = (void *)(h + 1) + sizeof(u32); /* lookup inode */ inode = ceph_find_inode(sb, vino); @@ -3644,8 +3672,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; - strncpy(mdsc->nodename, utsname()->nodename, - sizeof(mdsc->nodename) - 1); + strscpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename)); return 0; } @@ -4019,7 +4047,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } else { mdsc->mdsmap = newmap; /* first mds map */ } - mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; + mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size, + MAX_LFS_FILESIZE); __wake_requests(mdsc, &mdsc->waiting_for_map); ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP, @@ -4155,6 +4184,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -4218,6 +4257,7 @@ static const struct ceph_connection_operations mds_con_ops = { .put = con_put, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2ec3b5b35067..32fcce0d4d3c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -16,6 +16,18 @@ #include #include +/* The first 8 bits are reserved for old ceph releases */ +#define CEPHFS_FEATURE_MIMIC 8 + +#define CEPHFS_FEATURES_ALL { \ + 0, 1, 2, 3, 4, 5, 6, 7, \ + CEPHFS_FEATURE_MIMIC, \ +} + +#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL +#define CEPHFS_FEATURES_CLIENT_REQUIRED {} + + /* * Some lock dependencies: * @@ -229,7 +241,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; - struct timespec r_stamp; + struct timespec64 r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 242bfa5c0539..32d4f13784ba 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, struct inode *inode; struct ceph_inode_info *ci; - if (msg->front.iov_len != sizeof(*h)) { + if (msg->front.iov_len < sizeof(*h)) { pr_err("%s corrupt message mds%d len %d\n", __func__, session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index af81555c14fd..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, BUG_ON(capsnap->writing); capsnap->size = inode->i_size; - capsnap->mtime = timespec64_to_timespec(inode->i_mtime); - capsnap->atime = timespec64_to_timespec(inode->i_atime); - capsnap->ctime = timespec64_to_timespec(inode->i_ctime); + capsnap->mtime = inode->i_mtime; + capsnap->atime = inode->i_atime; + capsnap->ctime = inode->i_ctime; capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 95a3b3ac9b6e..43ca3b763875 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private) if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); @@ -941,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data) dout("set_super %p data %p\n", s, data); s->s_flags = fsc->mount_options->sb_flags; - s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ + s->s_maxbytes = MAX_LFS_FILESIZE; s->s_xattr = ceph_xattr_handlers; s->s_fs_info = fsc; fsc->sb = s; + fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ s->s_op = &ceph_super_ops; s->s_d_op = &ceph_dentry_ops; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 971328b99ede..582e28fd1b7b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -98,6 +98,7 @@ struct ceph_fs_client { unsigned long mount_state; int min_caps; /* min caps i added */ + loff_t max_file_size; struct ceph_mds_client *mdsc; @@ -193,7 +194,7 @@ struct ceph_cap_snap { u64 xattr_version; u64 size; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; u64 time_warp_seq; u64 truncate_size; u32 truncate_seq; @@ -307,7 +308,7 @@ struct ceph_inode_info { char *i_symlink; /* for dirs */ - struct timespec i_rctime; + struct timespec64 i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; @@ -655,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); -extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); extern void ceph_reservation_status(struct ceph_fs_client *client, int *total, int *avail, int *used, @@ -857,8 +858,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent); extern int ceph_fill_file_size(struct inode *inode, int issued, u32 truncate_seq, u64 truncate_size, u64 size); extern void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime); + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, + struct timespec64 *atime); extern int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req); extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5bc8edb4c2a6..5cc8b94f8206 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, size_t size) { - return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } /* quotas */ diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index e931da8424a4..6728c2ee0205 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -64,6 +64,10 @@ struct ceph_auth_client_ops { /* ensure that an existing authorizer is up to date */ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); + int (*add_authorizer_challenge)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, @@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 3901927cf6a0..6b92b3395fa9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap DEFINE_CEPH_FEATURE(59, 1, FS_BTIME) DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap -DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit* +DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit* +DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit* -DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down! DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing @@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ - CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) + CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ + CEPH_FEATURE_CEPHX_V2) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index d143ac8879c6..a6c2a48d42e0 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -194,16 +194,22 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad) } while (0) /* - * struct ceph_timespec <-> struct timespec + * struct ceph_timespec <-> struct timespec64 */ -static inline void ceph_decode_timespec(struct timespec *ts, - const struct ceph_timespec *tv) +static inline void ceph_decode_timespec64(struct timespec64 *ts, + const struct ceph_timespec *tv) { - ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec); + /* + * This will still overflow in year 2106. We could extend + * the protocol to steal two more bits from tv_nsec to + * add three more 136 year epochs after that the way ext4 + * does if necessary. + */ + ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec); ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec); } -static inline void ceph_encode_timespec(struct ceph_timespec *tv, - const struct timespec *ts) +static inline void ceph_encode_timespec64(struct ceph_timespec *tv, + const struct timespec64 *ts) { tv->tv_sec = cpu_to_le32((u32)ts->tv_sec); tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec); diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c7dfcb8a1fb2..fc2b4491ee0a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -31,6 +31,9 @@ struct ceph_connection_operations { struct ceph_auth_handshake *(*get_authorizer) ( struct ceph_connection *con, int *proto, int force_new); + int (*add_authorizer_challenge)(struct ceph_connection *con, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply) (struct ceph_connection *con); int (*invalidate_authorizer)(struct ceph_connection *con); @@ -286,9 +289,8 @@ struct ceph_connection { attempt for this connection, client */ u32 peer_global_seq; /* peer's global seq for this connection */ + struct ceph_auth_handshake *auth; int auth_retry; /* true if we need a newer authorizer */ - void *auth_reply_buf; /* where to put the authorizer reply */ - int auth_reply_buf_len; struct mutex mutex; @@ -330,7 +332,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ + struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 73ae2a926548..9e50aede46c8 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -91,7 +91,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ - +#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 0d6ee04b4c41..02096da01845 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -199,7 +199,7 @@ struct ceph_osd_request { /* set by submitter */ u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ struct ceph_snap_context *r_snapc; /* for writes */ - struct timespec r_mtime; /* ditto */ + struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ @@ -253,7 +253,7 @@ struct ceph_osd_linger_request { struct ceph_osd_request_target t; u32 map_dne_bound; - struct timespec mtime; + struct timespec64 mtime; struct kref kref; struct mutex lock; @@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_snap_context *sc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, - struct timespec *mtime, + struct timespec64 *mtime, struct page **pages, int nr_pages); /* watch/notify */ @@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, u64 notify_id, u64 cookie, void *payload, - size_t payload_len); + u32 payload_len); int ceph_osdc_notify(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, void *payload, - size_t payload_len, + u32 payload_len, u32 timeout, struct page ***preply_pages, size_t *preply_len); diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 7edcded07641..d0223364349f 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v) return ceph_pagelist_append(pl, &v, 1); } static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl, - char *s, size_t len) + char *s, u32 len) { int ret = ceph_pagelist_encode_32(pl, len); if (ret) diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index f8cceb99e732..cd2d5b9301a1 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -41,4 +41,3 @@ config CEPH_LIB_USE_DNS_RESOLVER Documentation/networking/dns_resolver.txt If unsure, say N. - diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 12bf49772d24..db09defe27d0 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -15,4 +15,3 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ auth_x.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ pagevec.o snapshot.o string_table.o - diff --git a/net/ceph/auth.c b/net/ceph/auth.c index dbde2b3c3c15..fbeee068ea14 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_update_authorizer); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->add_authorizer_challenge) + ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, + challenge_buf_len); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 41d2a0c72236..edb7042479ed 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -142,4 +142,3 @@ int ceph_auth_none_init(struct ceph_auth_client *ac) ac->ops = &ceph_auth_none_ops; return 0; } - diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h index 860ed9875791..4158f064302e 100644 --- a/net/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -26,4 +26,3 @@ struct ceph_auth_none_info { int ceph_auth_none_init(struct ceph_auth_client *ac); #endif - diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 2f4a1baf5f52..b52732337ca6 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, return sizeof(u32) + ciphertext_len; } +static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, + int ciphertext_len) +{ + struct ceph_x_encrypt_header *hdr = p; + int plaintext_len; + int ret; + + ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len, + &plaintext_len); + if (ret) + return ret; + + if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { + pr_err("%s bad magic\n", __func__); + return -EINVAL; + } + + return plaintext_len - sizeof(*hdr); +} + static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) { - struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); - int ciphertext_len, plaintext_len; + int ciphertext_len; int ret; ceph_decode_32_safe(p, end, ciphertext_len, e_inval); ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, - &plaintext_len); - if (ret) + ret = __ceph_x_decrypt(secret, *p, ciphertext_len); + if (ret < 0) return ret; - if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) - return -EPERM; - *p += ciphertext_len; - return plaintext_len - sizeof(struct ceph_x_encrypt_header); + return ret; e_inval: return -EINVAL; @@ -149,12 +165,12 @@ static int process_one_ticket(struct ceph_auth_client *ac, void *dp, *dend; int dlen; char is_enc; - struct timespec validity; + struct timespec64 validity; void *tp, *tpend; void **ptp; struct ceph_crypto_key new_session_key = { 0 }; struct ceph_buffer *new_ticket_blob; - unsigned long new_expires, new_renew_after; + time64_t new_expires, new_renew_after; u64 new_secret_id; int ret; @@ -189,11 +205,11 @@ static int process_one_ticket(struct ceph_auth_client *ac, if (ret) goto out; - ceph_decode_timespec(&validity, dp); + ceph_decode_timespec64(&validity, dp); dp += sizeof(struct ceph_timespec); - new_expires = get_seconds() + validity.tv_sec; + new_expires = ktime_get_real_seconds() + validity.tv_sec; new_renew_after = new_expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", new_expires, + dout(" expires=%llu renew_after=%llu\n", new_expires, new_renew_after); /* ticket blob for service */ @@ -275,6 +291,51 @@ bad: return -EINVAL; } +/* + * Encode and encrypt the second part (ceph_x_authorize_b) of the + * authorizer. The first part (ceph_x_authorize_a) should already be + * encoded. + */ +static int encrypt_authorizer(struct ceph_x_authorizer *au, + u64 *server_challenge) +{ + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b *msg_b; + void *p, *end; + int ret; + + msg_a = au->buf->vec.iov_base; + WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id)); + p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len); + end = au->buf->vec.iov_base + au->buf->vec.iov_len; + + msg_b = p + ceph_x_encrypt_offset(); + msg_b->struct_v = 2; + msg_b->nonce = cpu_to_le64(au->nonce); + if (server_challenge) { + msg_b->have_challenge = 1; + msg_b->server_challenge_plus_one = + cpu_to_le64(*server_challenge + 1); + } else { + msg_b->have_challenge = 0; + msg_b->server_challenge_plus_one = 0; + } + + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); + if (ret < 0) + return ret; + + p += ret; + if (server_challenge) { + WARN_ON(p != end); + } else { + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + } + + return 0; +} + static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au) { ceph_crypto_key_destroy(&au->session_key); @@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, int maxlen; struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; - void *p, *end; int ret; int ticket_blob_len = (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); @@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout(" th %p secret_id %lld %lld\n", th, th->secret_id, le64_to_cpu(msg_a->ticket_blob.secret_id)); - p = msg_a + 1; - p += ticket_blob_len; - end = au->buf->vec.iov_base + au->buf->vec.iov_len; - - msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; get_random_bytes(&au->nonce, sizeof(au->nonce)); - msg_b->nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); - if (ret < 0) + ret = encrypt_authorizer(au, NULL); + if (ret) { + pr_err("failed to encrypt authorizer: %d", ret); goto out_au; + } - p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); return 0; @@ -385,13 +437,13 @@ static bool need_key(struct ceph_x_ticket_handler *th) if (!th->have_key) return true; - return get_seconds() >= th->renew_after; + return ktime_get_real_seconds() >= th->renew_after; } static bool have_key(struct ceph_x_ticket_handler *th) { if (th->have_key) { - if (get_seconds() >= th->expires) + if (ktime_get_real_seconds() >= th->expires) th->have_key = false; } @@ -626,6 +678,54 @@ static int ceph_x_update_authorizer( return 0; } +static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, + void *challenge_buf, + int challenge_buf_len, + u64 *server_challenge) +{ + struct ceph_x_authorize_challenge *ch = + challenge_buf + sizeof(struct ceph_x_encrypt_header); + int ret; + + /* no leading len */ + ret = __ceph_x_decrypt(&au->session_key, challenge_buf, + challenge_buf_len); + if (ret < 0) + return ret; + if (ret < sizeof(*ch)) { + pr_err("bad size %d for ceph_x_authorize_challenge\n", ret); + return -EINVAL; + } + + *server_challenge = le64_to_cpu(ch->server_challenge); + return 0; +} + +static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + struct ceph_x_authorizer *au = (void *)a; + u64 server_challenge; + int ret; + + ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, + &server_challenge); + if (ret) { + pr_err("failed to decrypt authorize challenge: %d", ret); + return ret; + } + + ret = encrypt_authorizer(au, &server_challenge); + if (ret) { + pr_err("failed to encrypt authorizer w/ challenge: %d", ret); + return ret; + } + + return 0; +} + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { @@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); if (ret < 0) return ret; - if (ret != sizeof(*reply)) - return -EPERM; + if (ret < sizeof(*reply)) { + pr_err("bad size %d for ceph_x_authorize_reply\n", ret); + return -EINVAL; + } if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) ret = -EPERM; @@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le64 *psig) { void *enc_buf = au->enc_buf; - struct { - __le32 len; - __le32 header_crc; - __le32 front_crc; - __le32 middle_crc; - __le32 data_crc; - } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); int ret; - sigblock->len = cpu_to_le32(4*sizeof(u32)); - sigblock->header_crc = msg->hdr.crc; - sigblock->front_crc = msg->footer.front_crc; - sigblock->middle_crc = msg->footer.middle_crc; - sigblock->data_crc = msg->footer.data_crc; - ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, - sizeof(*sigblock)); - if (ret < 0) - return ret; + if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) { + struct { + __le32 len; + __le32 header_crc; + __le32 front_crc; + __le32 middle_crc; + __le32 data_crc; + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); + + sigblock->len = cpu_to_le32(4*sizeof(u32)); + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->data_crc = msg->footer.data_crc; + + ret = ceph_x_encrypt(&au->session_key, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock)); + if (ret < 0) + return ret; + + *psig = *(__le64 *)(enc_buf + sizeof(u32)); + } else { + struct { + __le32 header_crc; + __le32 front_crc; + __le32 front_len; + __le32 middle_crc; + __le32 middle_len; + __le32 data_crc; + __le32 data_len; + __le32 seq_lower_word; + } __packed *sigblock = enc_buf; + struct { + __le64 a, b, c, d; + } __packed *penc = enc_buf; + int ciphertext_len; + + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->front_len = msg->hdr.front_len; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->middle_len = msg->hdr.middle_len; + sigblock->data_crc = msg->footer.data_crc; + sigblock->data_len = msg->hdr.data_len; + sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq; + + /* no leading len, no ceph_x_encrypt_header */ + ret = ceph_crypt(&au->session_key, true, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock), + &ciphertext_len); + if (ret) + return ret; + + *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; + } - *psig = *(__le64 *)(enc_buf + sizeof(u32)); return 0; } @@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, + .add_authorizer_challenge = ceph_x_add_authorizer_challenge, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, @@ -823,5 +964,3 @@ out_nomem: out: return ret; } - - diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 454cb54568af..c03735f96df9 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -22,7 +22,7 @@ struct ceph_x_ticket_handler { u64 secret_id; struct ceph_buffer *ticket_blob; - unsigned long renew_after, expires; + time64_t renew_after, expires; }; #define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */ @@ -52,4 +52,3 @@ struct ceph_x_info { int ceph_x_init(struct ceph_auth_client *ac); #endif - diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 32c13d763b9a..24b0b74564d0 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -70,6 +70,13 @@ struct ceph_x_authorize_a { struct ceph_x_authorize_b { __u8 struct_v; __le64 nonce; + __u8 have_challenge; + __le64 server_challenge_plus_one; +} __attribute__ ((packed)); + +struct ceph_x_authorize_challenge { + __u8 struct_v; + __le64 server_challenge; } __attribute__ ((packed)); struct ceph_x_authorize_reply { diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 584fdbef2088..87afb9ec4c68 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -304,7 +304,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) { struct ceph_crypto_key *ckey; ukey = request_key(&key_type_ceph, name, NULL); - if (!ukey || IS_ERR(ukey)) { + if (IS_ERR(ukey)) { /* request_key errors don't map nicely to mount(2) errors; don't even try, but still printk */ key_err = PTR_ERR(ukey); @@ -379,7 +379,7 @@ ceph_parse_options(char *options, const char *dev_name, /* parse mount options */ while ((c = strsep(&options, ",")) != NULL) { - int token, intval, ret; + int token, intval; if (!*c) continue; err = -EINVAL; @@ -394,11 +394,10 @@ ceph_parse_options(char *options, const char *dev_name, continue; } if (token < Opt_last_int) { - ret = match_int(&argstr[0], &intval); - if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); - continue; + err = match_int(&argstr[0], &intval); + if (err < 0) { + pr_err("bad option arg (not int) at '%s'\n", c); + goto out; } dout("got int token %d val %d\n", token, intval); } else if (token > Opt_last_int && token < Opt_last_string) { diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c index 8d2032b2f225..2105a6eaa66c 100644 --- a/net/ceph/cls_lock_client.c +++ b/net/ceph/cls_lock_client.c @@ -32,7 +32,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc, int desc_len = strlen(desc); void *p, *end; struct page *lock_op_page; - struct timespec mtime; + struct timespec64 mtime; int ret; lock_op_buf_size = name_len + sizeof(__le32) + @@ -63,7 +63,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc, ceph_encode_string(&p, end, desc, desc_len); /* only support infinite duration */ memset(&mtime, 0, sizeof(mtime)); - ceph_encode_timespec(p, &mtime); + ceph_encode_timespec64(p, &mtime); p += sizeof(struct ceph_timespec); ceph_encode_8(&p, flags); diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 417df675c71b..3f323ed9df52 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -514,7 +514,7 @@ static int crush_choose_firstn(const struct crush_map *map, in, work->work[-1-in->id], x, r, (choose_args ? - &choose_args[-1-in->id] : 0), + &choose_args[-1-in->id] : NULL), outpos); if (item >= map->max_devices) { dprintk(" bad item %d\n", item); @@ -725,7 +725,7 @@ static void crush_choose_indep(const struct crush_map *map, in, work->work[-1-in->id], x, r, (choose_args ? - &choose_args[-1-in->id] : 0), + &choose_args[-1-in->id] : NULL), outpos); if (item >= map->max_devices) { dprintk(" bad item %d\n", item); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index c6413c360771..0a187196aeed 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1417,11 +1417,11 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec now; + struct timespec64 now; - ktime_get_real_ts(&now); + ktime_get_real_ts64(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); - ceph_encode_timespec(&con->out_temp_keepalive2, &now); + ceph_encode_timespec64(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), &con->out_temp_keepalive2); } else { @@ -1434,24 +1434,26 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ -static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con, - int *auth_proto) +static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; + int auth_proto; if (!con->ops->get_authorizer) { + con->auth = NULL; con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->out_connect.authorizer_len = 0; - return NULL; + return 0; } - auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); + auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry); if (IS_ERR(auth)) - return auth; + return PTR_ERR(auth); - con->auth_reply_buf = auth->authorizer_reply_buf; - con->auth_reply_buf_len = auth->authorizer_reply_buf_len; - return auth; + con->auth = auth; + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); + con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); + return 0; } /* @@ -1467,12 +1469,22 @@ static void prepare_write_banner(struct ceph_connection *con) con_flag_set(con, CON_FLAG_WRITE_PENDING); } +static void __prepare_write_connect(struct ceph_connection *con) +{ + con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect); + if (con->auth) + con_out_kvec_add(con, con->auth->authorizer_buf_len, + con->auth->authorizer_buf); + + con->out_more = 0; + con_flag_set(con, CON_FLAG_WRITE_PENDING); +} + static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = get_global_seq(con->msgr, 0); int proto; - int auth_proto; - struct ceph_auth_handshake *auth; + int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: @@ -1499,24 +1511,11 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.protocol_version = cpu_to_le32(proto); con->out_connect.flags = 0; - auth_proto = CEPH_AUTH_UNKNOWN; - auth = get_connect_authorizer(con, &auth_proto); - if (IS_ERR(auth)) - return PTR_ERR(auth); - - con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); - con->out_connect.authorizer_len = auth ? - cpu_to_le32(auth->authorizer_buf_len) : 0; - - con_out_kvec_add(con, sizeof (con->out_connect), - &con->out_connect); - if (auth && auth->authorizer_buf_len) - con_out_kvec_add(con, auth->authorizer_buf_len, - auth->authorizer_buf); - - con->out_more = 0; - con_flag_set(con, CON_FLAG_WRITE_PENDING); + ret = get_connect_authorizer(con); + if (ret) + return ret; + __prepare_write_connect(con); return 0; } @@ -1781,11 +1780,21 @@ static int read_partial_connect(struct ceph_connection *con) if (ret <= 0) goto out; - size = le32_to_cpu(con->in_reply.authorizer_len); - end += size; - ret = read_partial(con, end, size, con->auth_reply_buf); - if (ret <= 0) - goto out; + if (con->auth) { + size = le32_to_cpu(con->in_reply.authorizer_len); + if (size > con->auth->authorizer_reply_buf_len) { + pr_err("authorizer reply too big: %d > %zu\n", size, + con->auth->authorizer_reply_buf_len); + ret = -EINVAL; + goto out; + } + + end += size; + ret = read_partial(con, end, size, + con->auth->authorizer_reply_buf); + if (ret <= 0) + goto out; + } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, (int)con->in_reply.tag, @@ -1793,7 +1802,6 @@ static int read_partial_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); out: return ret; - } /* @@ -2076,12 +2084,27 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); - if (con->auth_reply_buf) { + if (con->auth) { /* * Any connection that defines ->get_authorizer() - * should also define ->verify_authorizer_reply(). + * should also define ->add_authorizer_challenge() and + * ->verify_authorizer_reply(). + * * See get_connect_authorizer(). */ + if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { + ret = con->ops->add_authorizer_challenge( + con, con->auth->authorizer_reply_buf, + le32_to_cpu(con->in_reply.authorizer_len)); + if (ret < 0) + return ret; + + con_out_kvec_reset(con); + __prepare_write_connect(con); + prepare_read_connect(con); + return 0; + } + ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; @@ -2555,7 +2578,7 @@ static int read_keepalive_ack(struct ceph_connection *con) int ret = read_partial(con, size, size, &ceph_ts); if (ret <= 0) return ret; - ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); + ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); prepare_read_tag(con); return 1; } @@ -3223,12 +3246,12 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { - struct timespec now; - struct timespec ts; - ktime_get_real_ts(&now); - jiffies_to_timespec(interval, &ts); - ts = timespec_add(con->last_keepalive_ack, ts); - return timespec_compare(&now, &ts) >= 0; + struct timespec64 now; + struct timespec64 ts; + ktime_get_real_ts64(&now); + jiffies_to_timespec64(interval, &ts); + ts = timespec64_add(con->last_keepalive_ack, ts); + return timespec64_compare(&now, &ts) >= 0; } return false; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d7a7a2330ef7..18deb3d889c4 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1249,7 +1249,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) if (monc->client->extra_mon_dispatch && monc->client->extra_mon_dispatch(monc->client, msg) == 0) break; - + pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a00c74f1154e..60934bd8796c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req, p += sizeof(struct ceph_blkin_trace_info); ceph_encode_32(&p, 0); /* client_inc, always 0 */ - ceph_encode_timespec(p, &req->r_mtime); + ceph_encode_timespec64(p, &req->r_mtime); p += sizeof(struct ceph_timespec); encode_oloc(&p, end, &req->r_t.target_oloc); @@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&lreq->mtime); + ktime_get_real_ts64(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&req->r_mtime); + ktime_get_real_ts64(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); @@ -4591,7 +4591,7 @@ EXPORT_SYMBOL(ceph_osdc_unwatch); static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, u64 notify_id, u64 cookie, void *payload, - size_t payload_len) + u32 payload_len) { struct ceph_osd_req_op *op; struct ceph_pagelist *pl; @@ -4628,7 +4628,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, u64 notify_id, u64 cookie, void *payload, - size_t payload_len) + u32 payload_len) { struct ceph_osd_request *req; int ret; @@ -4661,7 +4661,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack); static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, u64 cookie, u32 prot_ver, u32 timeout, - void *payload, size_t payload_len) + void *payload, u32 payload_len) { struct ceph_osd_req_op *op; struct ceph_pagelist *pl; @@ -4701,7 +4701,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, void *payload, - size_t payload_len, + u32 payload_len, u32 timeout, struct page ***preply_pages, size_t *preply_len) @@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_snap_context *snapc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, - struct timespec *mtime, + struct timespec64 *mtime, struct page **pages, int num_pages) { struct ceph_osd_request *req; @@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = { .put = put_osd_con, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index e560d3975f41..d3736f5bffec 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -197,4 +197,3 @@ void ceph_zero_page_vector_range(int off, int len, struct page **pages) } } EXPORT_SYMBOL(ceph_zero_page_vector_range); -