From a1b879eefc2b34cd3f17187ef6fc1cf3960e9518 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 May 2019 12:09:17 +0100 Subject: [PATCH 01/19] afs: Fix key leak in afs_release() and afs_evict_inode() Fix afs_release() to go through the cleanup part of the function if FMODE_WRITE is set rather than exiting through vfs_fsync() (which skips the cleanup). The cleanup involves discarding the refs on the key used for file ops and the writeback key record. Also fix afs_evict_inode() to clean up any left over wb keys attached to the inode/vnode when it is removed. Fixes: 5a8132761609 ("afs: Do better accretion of small writes on newly created content") Signed-off-by: David Howells --- fs/afs/file.c | 7 ++++--- fs/afs/inode.c | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index e8d6619890a9..b0a43e250a9b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -170,11 +170,12 @@ int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = file->private_data; + int ret = 0; _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); if ((file->f_mode & FMODE_WRITE)) - return vfs_fsync(file, 0); + ret = vfs_fsync(file, 0); file->private_data = NULL; if (af->wb) @@ -182,8 +183,8 @@ int afs_release(struct inode *inode, struct file *file) key_put(af->key); kfree(af); afs_prune_wb_keys(vnode); - _leave(" = 0"); - return 0; + _leave(" = %d", ret); + return ret; } /* diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c4652b42d545..f30aa5eacd39 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -573,6 +573,7 @@ void afs_evict_inode(struct inode *inode) } #endif + afs_prune_wb_keys(vnode); afs_put_permits(rcu_access_pointer(vnode->permit_cache)); key_put(vnode->silly_key); vnode->silly_key = NULL; From cc1dd5c85cb70ebe09ccf1cc34f29af65442a10f Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 12 May 2019 08:05:10 +0100 Subject: [PATCH 02/19] afs: Fix incorrect error handling in afs_xattr_get_acl() Fix incorrect error handling in afs_xattr_get_acl() where there appears to be a redundant assignment before return, but in fact the return should be a goto to the error handling at the end of the function. Fixes: 260f082bae6d ("afs: Get an AFS3 ACL as an xattr") Addresses-Coverity: ("Unused Value") Reported-by: Colin Ian King Signed-off-by: David Howells cc: Joe Perches --- fs/afs/xattr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index c81f85003fc7..b6c44e75b361 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -71,11 +71,10 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, if (ret == 0) { ret = acl->size; if (size > 0) { - ret = -ERANGE; - if (acl->size > size) - return -ERANGE; - memcpy(buffer, acl->data, acl->size); - ret = acl->size; + if (acl->size <= size) + memcpy(buffer, acl->data, acl->size); + else + ret = -ERANGE; } kfree(acl); } From 773e0c40253443e0ce5491cb0e414b62f7cc45ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 12 May 2019 08:31:23 +0100 Subject: [PATCH 03/19] afs: Fix afs_xattr_get_yfs() to not try freeing an error value afs_xattr_get_yfs() tries to free yacl, which may hold an error value (say if yfs_fs_fetch_opaque_acl() failed and returned an error). Fix this by allocating yacl up front (since it's a fixed-length struct, unlike afs_acl) and passing it in to the RPC function. This also allows the flags to be placed in the object rather than passing them through to the RPC function. Fixes: ae46578b963f ("afs: Get YFS ACLs and information through xattrs") Signed-off-by: David Howells --- fs/afs/internal.h | 2 +- fs/afs/xattr.c | 88 +++++++++++++++++++++++++--------------------- fs/afs/yfsclient.c | 29 ++++----------- 3 files changed, 54 insertions(+), 65 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b3cd6e8ad59d..74ee0f8ef8dd 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1382,7 +1382,7 @@ struct yfs_acl { }; extern void yfs_free_opaque_acl(struct yfs_acl *); -extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int); +extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, struct yfs_acl *); extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *); /* diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index b6c44e75b361..d12bcda911e1 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -148,9 +148,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, struct afs_vnode *vnode = AFS_FS_I(inode); struct yfs_acl *yacl = NULL; struct key *key; - unsigned int flags = 0; char buf[16], *data; - int which = 0, dsize, ret; + int which = 0, dsize, ret = -ENOMEM; if (strcmp(name, "acl") == 0) which = 0; @@ -163,20 +162,26 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else return -EOPNOTSUPP; + yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL); + if (!yacl) + goto error; + if (which == 0) - flags |= YFS_ACL_WANT_ACL; + yacl->flags |= YFS_ACL_WANT_ACL; else if (which == 3) - flags |= YFS_ACL_WANT_VOL_ACL; + yacl->flags |= YFS_ACL_WANT_VOL_ACL; key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error_yacl; + } ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - yacl = yfs_fs_fetch_opaque_acl(&fc, flags); + yfs_fs_fetch_opaque_acl(&fc, yacl); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -184,44 +189,45 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, ret = afs_end_vnode_operation(&fc); } - if (ret == 0) { - switch (which) { - case 0: - data = yacl->acl->data; - dsize = yacl->acl->size; - break; - case 1: - data = buf; - dsize = snprintf(buf, sizeof(buf), "%u", - yacl->inherit_flag); - break; - case 2: - data = buf; - dsize = snprintf(buf, sizeof(buf), "%u", - yacl->num_cleaned); - break; - case 3: - data = yacl->vol_acl->data; - dsize = yacl->vol_acl->size; - break; - default: - ret = -EOPNOTSUPP; - goto out; - } + if (ret < 0) + goto error_key; - ret = dsize; - if (size > 0) { - if (dsize > size) { - ret = -ERANGE; - goto out; - } - memcpy(buffer, data, dsize); - } + switch (which) { + case 0: + data = yacl->acl->data; + dsize = yacl->acl->size; + break; + case 1: + data = buf; + dsize = snprintf(buf, sizeof(buf), "%u", yacl->inherit_flag); + break; + case 2: + data = buf; + dsize = snprintf(buf, sizeof(buf), "%u", yacl->num_cleaned); + break; + case 3: + data = yacl->vol_acl->data; + dsize = yacl->vol_acl->size; + break; + default: + ret = -EOPNOTSUPP; + goto error_key; } -out: - yfs_free_opaque_acl(yacl); + ret = dsize; + if (size > 0) { + if (dsize > size) { + ret = -ERANGE; + goto error_key; + } + memcpy(buffer, data, dsize); + } + +error_key: key_put(key); +error_yacl: + yfs_free_opaque_acl(yacl); +error: return ret; } diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 6cf7d161baa1..d3e9e3fe0b58 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -2333,12 +2333,6 @@ void yfs_free_opaque_acl(struct yfs_acl *yacl) } } -static void yfs_destroy_fs_fetch_opaque_acl(struct afs_call *call) -{ - yfs_free_opaque_acl(call->reply[0]); - afs_flat_call_destructor(call); -} - /* * YFS.FetchOpaqueACL operation type */ @@ -2346,18 +2340,17 @@ static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = { .name = "YFS.FetchOpaqueACL", .op = yfs_FS_FetchOpaqueACL, .deliver = yfs_deliver_fs_fetch_opaque_acl, - .destructor = yfs_destroy_fs_fetch_opaque_acl, + .destructor = afs_flat_call_destructor, }; /* * Fetch the YFS advanced ACLs for a file. */ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, - unsigned int flags) + struct yfs_acl *yacl) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; - struct yfs_acl *yacl; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -2370,19 +2363,15 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) - goto nomem; + if (!call) { + fc->ac.error = -ENOMEM; + return ERR_PTR(-ENOMEM); + } - yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL); - if (!yacl) - goto nomem_call; - - yacl->flags = flags; call->key = fc->key; call->reply[0] = yacl; call->reply[1] = vnode; call->reply[2] = NULL; /* volsync */ - call->ret_reply0 = true; /* marshall the parameters */ bp = call->request; @@ -2396,12 +2385,6 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, trace_afs_make_fs_call(call, &vnode->fid); afs_make_call(&fc->ac, call, GFP_KERNEL); return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac); - -nomem_call: - afs_put_call(call); -nomem: - fc->ac.error = -ENOMEM; - return ERR_PTR(-ENOMEM); } /* From 6b8812fc8ec28c13c09c89f88ce3958f19238838 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 May 2019 15:16:26 +0100 Subject: [PATCH 04/19] afs: Fix missing lock when replacing VL server list When afs_update_cell() replaces the cell->vl_servers list, it uses RCU protocol so that proc is protected, but doesn't take ->vl_servers_lock to protect afs_start_vl_iteration() (which does actually take a shared lock). Fix this by making afs_update_cell() take an exclusive lock when replacing ->vl_servers. Fixes: 0a5143f2f89c ("afs: Implement VL server rotation") Signed-off-by: David Howells --- fs/afs/cell.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9de46116c749..9ca075e11239 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -404,12 +404,11 @@ static void afs_update_cell(struct afs_cell *cell) clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - /* Exclusion on changing vl_addrs is achieved by a - * non-reentrant work item. - */ + write_lock(&cell->vl_servers_lock); old = rcu_dereference_protected(cell->vl_servers, true); rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; + write_unlock(&cell->vl_servers_lock); if (old) afs_put_vlserverlist(cell->net, old); From ca1cbbdce92bc2bfdc17e4f70ad41f6e6af2d03f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 May 2019 15:30:34 +0100 Subject: [PATCH 05/19] afs: Fix afs_cell records to always have a VL server list record Fix it such that afs_cell records always have a VL server list record attached, even if it's a dummy one, so that various checks can be removed. Signed-off-by: David Howells --- fs/afs/cell.c | 19 +++++++++++-------- fs/afs/proc.c | 8 ++++---- fs/afs/vl_list.c | 18 ++++++++---------- fs/afs/vl_rotate.c | 2 +- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9ca075e11239..47f96be05163 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -123,6 +123,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, const char *addresses) { + struct afs_vlserver_list *vllist; struct afs_cell *cell; int i, ret; @@ -157,12 +158,10 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_servers_lock); - /* Fill in the VL server list if we were given a list of addresses to - * use. + /* Provide a VL server list, filling it in if we were given a list of + * addresses to use. */ if (addresses) { - struct afs_vlserver_list *vllist; - vllist = afs_parse_text_addrs(net, addresses, strlen(addresses), ':', VL_SERVICE, AFS_VL_PORT); @@ -171,19 +170,24 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, goto parse_failed; } - rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = TIME64_MAX; - __clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags); } else { + ret = -ENOMEM; + vllist = afs_alloc_vlserver_list(0); + if (!vllist) + goto error; cell->dns_expiry = ktime_get_real_seconds(); } + rcu_assign_pointer(cell->vl_servers, vllist); + _leave(" = %p", cell); return cell; parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); +error: kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -410,8 +414,7 @@ static void afs_update_cell(struct afs_cell *cell) cell->dns_expiry = expiry; write_unlock(&cell->vl_servers_lock); - if (old) - afs_put_vlserverlist(cell->net, old); + afs_put_vlserverlist(cell->net, old); } if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) diff --git a/fs/afs/proc.c b/fs/afs/proc.c index be2ee3bbd0a9..371501d28e08 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -53,7 +53,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) seq_printf(m, "%3u %6lld %2u %s\n", atomic_read(&cell->usage), cell->dns_expiry - ktime_get_real_seconds(), - vllist ? vllist->nr_servers : 0, + vllist->nr_servers, cell->name); return 0; } @@ -296,8 +296,8 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) if (v == SEQ_START_TOKEN) { seq_printf(m, "# source %s, status %s\n", - dns_record_sources[vllist->source], - dns_lookup_statuses[vllist->status]); + dns_record_sources[vllist ? vllist->source : 0], + dns_lookup_statuses[vllist ? vllist->status : 0]); return 0; } @@ -336,7 +336,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) if (pos == 0) return SEQ_START_TOKEN; - if (!vllist || pos - 1 >= vllist->nr_servers) + if (pos - 1 >= vllist->nr_servers) return NULL; return &vllist->servers[pos - 1]; diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index b4f1a84519b9..61e25010ff33 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -232,18 +232,16 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, if (bs.status > NR__dns_lookup_status) bs.status = NR__dns_lookup_status; + /* See if we can update an old server record */ server = NULL; - if (previous) { - /* See if we can update an old server record */ - for (i = 0; i < previous->nr_servers; i++) { - struct afs_vlserver *p = previous->servers[i].server; + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; - if (p->name_len == bs.name_len && - p->port == bs.port && - strncasecmp(b, p->name, bs.name_len) == 0) { - server = afs_get_vlserver(p); - break; - } + if (p->name_len == bs.name_len && + p->port == bs.port && + strncasecmp(b, p->name, bs.name_len) == 0) { + server = afs_get_vlserver(p); + break; } } diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 7adde83a0648..65629d73ad9d 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -55,7 +55,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) rcu_dereference_protected(cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock))); read_unlock(&cell->vl_servers_lock); - if (!vc->server_list || !vc->server_list->nr_servers) + if (!vc->server_list->nr_servers) return false; vc->untried = (1UL << vc->server_list->nr_servers) - 1; From d0660f0b3b7d1760d1ab60ec8e9d0de52e885207 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 May 2019 18:26:55 +0100 Subject: [PATCH 06/19] dns_resolver: Allow used keys to be invalidated Allow used DNS resolver keys to be invalidated after use if the caller is doing its own caching of the results. This reduces the amount of resources required. Fix AFS to invalidate DNS results to kill off permanent failure records that get lodged in the resolver keyring and prevent future lookups from happening. Fixes: 0a5143f2f89c ("afs: Implement VL server rotation") Signed-off-by: David Howells --- fs/afs/addr_list.c | 2 +- fs/afs/dynroot.c | 2 +- fs/cifs/dns_resolve.c | 2 +- fs/nfs/dns_resolve.c | 2 +- include/linux/dns_resolver.h | 3 ++- net/ceph/messenger.c | 2 +- net/dns_resolver/dns_query.c | 6 +++++- 7 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 967db336d11a..9eaff55df7b4 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -251,7 +251,7 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry); + &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index a9ba81ddf154..07484b5a3bbb 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL); + ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 7ede7306599f..1e21b2528cfb 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); + rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a7d3df85736d..e6a700f01452 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); + ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index 34a744a1bafc..f2b3ae22e6b7 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -27,6 +27,7 @@ #include extern int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time64_t *_expiry); + const char *options, char **_result, time64_t *_expiry, + bool invalidate); #endif /* _LINUX_DNS_RESOLVER_H */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3083988ce729..579d6a1ac7fe 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1889,7 +1889,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, return -EINVAL; /* do dns_resolve upcall */ - ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); + ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL, false); if (ip_len > 0) ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); else diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 19aa32fc1802..2d260432b3be 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -54,6 +54,7 @@ * @options: Request options (or NULL if no options) * @_result: Where to place the returned data (or NULL) * @_expiry: Where to store the result expiry time (or NULL) + * @invalidate: Always invalidate the key after use * * The data will be returned in the pointer at *result, if provided, and the * caller is responsible for freeing it. @@ -69,7 +70,8 @@ * Returns the size of the result on success, -ve error code otherwise. */ int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time64_t *_expiry) + const char *options, char **_result, time64_t *_expiry, + bool invalidate) { struct key *rkey; struct user_key_payload *upayload; @@ -157,6 +159,8 @@ int dns_query(const char *type, const char *name, size_t namelen, ret = len; put: up_read(&rkey->sem); + if (invalidate) + key_invalidate(rkey); key_put(rkey); out: kleave(" = %d", ret); From a49294eac27c7159cd8b89a96c3b1a857e37b683 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 May 2019 18:30:33 +0100 Subject: [PATCH 07/19] Add wait_var_event_interruptible() Add wait_var_event_interruptible() to allow interruptible waits for events. Signed-off-by: David Howells Acked-by: Peter Zijlstra (Intel) --- include/linux/wait_bit.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 2b0072fa5e92..7dec36aecbd9 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -305,6 +305,19 @@ do { \ __ret; \ }) +#define __wait_var_event_interruptible(var, condition) \ + ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \ + schedule()) + +#define wait_var_event_interruptible(var, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_var_event_interruptible(var, condition); \ + __ret; \ +}) + /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit * From d5c32c89b208e39a39cd8639aa21c012ce0daf4d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 May 2019 15:06:36 +0100 Subject: [PATCH 08/19] afs: Fix cell DNS lookup Currently, once configured, AFS cells are looked up in the DNS at regular intervals - which is a waste of resources if those cells aren't being used. It also leads to a problem where cells preloaded, but not configured, before the network is brought up end up effectively statically configured with no VL servers and are unable to get any. Fix this by not doing the DNS lookup until the first time a cell is touched. It is waited for if we don't have any cached records yet, otherwise the DNS lookup to maintain the record is done in the background. This has the downside that the first time you touch a cell, you now have to wait for the upcall to do the required DNS lookups rather than them already being cached. Further, the record is not replaced if the old record has at least one server in it and the new record doesn't have any. Fixes: 0a5143f2f89c ("afs: Implement VL server rotation") Signed-off-by: David Howells --- fs/afs/cell.c | 167 +++++++++++++++++++++++++++------------------ fs/afs/internal.h | 10 +-- fs/afs/vl_rotate.c | 26 +++++-- 3 files changed, 129 insertions(+), 74 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 47f96be05163..9c3b07ba2222 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -152,8 +152,6 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); - cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | - (1 << AFS_CELL_FL_NO_LOOKUP_YET)); INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_servers_lock); @@ -170,17 +168,25 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, goto parse_failed; } + vllist->source = DNS_RECORD_FROM_CONFIG; + vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = TIME64_MAX; } else { ret = -ENOMEM; vllist = afs_alloc_vlserver_list(0); if (!vllist) goto error; + vllist->source = DNS_RECORD_UNAVAILABLE; + vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = ktime_get_real_seconds(); } rcu_assign_pointer(cell->vl_servers, vllist); + cell->dns_source = vllist->source; + cell->dns_status = vllist->status; + smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ + _leave(" = %p", cell); return cell; @@ -212,6 +218,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, { struct afs_cell *cell, *candidate, *cursor; struct rb_node *parent, **pp; + enum afs_cell_state state; int ret, n; _enter("%s,%s", name, vllist); @@ -271,18 +278,16 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, wait_for_cell: _debug("wait_for_cell"); - ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE); - smp_rmb(); + wait_var_event(&cell->state, + ({ + state = smp_load_acquire(&cell->state); /* vs error */ + state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED; + })); - switch (READ_ONCE(cell->state)) { - case AFS_CELL_FAILED: + /* Check the state obtained from the wait check. */ + if (state == AFS_CELL_FAILED) { ret = cell->error; goto error; - default: - _debug("weird %u %d", cell->state, cell->error); - goto error; - case AFS_CELL_ACTIVE: - break; } _leave(" = %p [cell]", cell); @@ -364,16 +369,46 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* * Update a cell's VL server address list from the DNS. */ -static void afs_update_cell(struct afs_cell *cell) +static int afs_update_cell(struct afs_cell *cell) { - struct afs_vlserver_list *vllist, *old; + struct afs_vlserver_list *vllist, *old = NULL, *p; unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); time64_t now, expiry = 0; + int ret = 0; _enter("%s", cell->name); vllist = afs_dns_query(cell, &expiry); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); + + _debug("%s: fail %d", cell->name, ret); + if (ret == -ENOMEM) + goto out_wake; + + ret = -ENOMEM; + vllist = afs_alloc_vlserver_list(0); + if (!vllist) + goto out_wake; + + switch (ret) { + case -ENODATA: + case -EDESTADDRREQ: + vllist->status = DNS_LOOKUP_GOT_NOT_FOUND; + break; + case -EAGAIN: + case -ECONNREFUSED: + vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE; + break; + default: + vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE; + break; + } + } + + _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status); + cell->dns_status = vllist->status; now = ktime_get_real_seconds(); if (min_ttl > max_ttl) @@ -383,46 +418,47 @@ static void afs_update_cell(struct afs_cell *cell) else if (expiry > now + max_ttl) expiry = now + max_ttl; - if (IS_ERR(vllist)) { - switch (PTR_ERR(vllist)) { - case -ENODATA: - case -EDESTADDRREQ: + _debug("%s: status %d", cell->name, vllist->status); + if (vllist->source == DNS_RECORD_UNAVAILABLE) { + switch (vllist->status) { + case DNS_LOOKUP_GOT_NOT_FOUND: /* The DNS said that the cell does not exist or there * weren't any addresses to be had. */ - set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); cell->dns_expiry = expiry; break; - case -EAGAIN: - case -ECONNREFUSED: + case DNS_LOOKUP_BAD: + case DNS_LOOKUP_GOT_LOCAL_FAILURE: + case DNS_LOOKUP_GOT_TEMP_FAILURE: + case DNS_LOOKUP_GOT_NS_FAILURE: default: - set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); cell->dns_expiry = now + 10; break; } - - cell->error = -EDESTADDRREQ; } else { - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - - write_lock(&cell->vl_servers_lock); - old = rcu_dereference_protected(cell->vl_servers, true); - rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; - write_unlock(&cell->vl_servers_lock); - - afs_put_vlserverlist(cell->net, old); } - if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) - wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET); + /* Replace the VL server list if the new record has servers or the old + * record doesn't. + */ + write_lock(&cell->vl_servers_lock); + p = rcu_dereference_protected(cell->vl_servers, true); + if (vllist->nr_servers > 0 || p->nr_servers == 0) { + rcu_assign_pointer(cell->vl_servers, vllist); + cell->dns_source = vllist->source; + old = p; + } + write_unlock(&cell->vl_servers_lock); + afs_put_vlserverlist(cell->net, old); - now = ktime_get_real_seconds(); - afs_set_cell_timer(cell->net, cell->dns_expiry - now); - _leave(""); +out_wake: + smp_store_release(&cell->dns_lookup_count, + cell->dns_lookup_count + 1); /* vs source/status */ + wake_up_var(&cell->dns_lookup_count); + _leave(" = %d", ret); + return ret; } /* @@ -493,8 +529,7 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell) now = ktime_get_real_seconds(); cell->last_inactive = now; expire_delay = 0; - if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && - !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; if (atomic_dec_return(&cell->usage) > 1) @@ -625,11 +660,13 @@ again: goto final_destruction; if (cell->state == AFS_CELL_FAILED) goto done; - cell->state = AFS_CELL_UNSET; + smp_store_release(&cell->state, AFS_CELL_UNSET); + wake_up_var(&cell->state); goto again; case AFS_CELL_UNSET: - cell->state = AFS_CELL_ACTIVATING; + smp_store_release(&cell->state, AFS_CELL_ACTIVATING); + wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVATING: @@ -637,28 +674,29 @@ again: if (ret < 0) goto activation_failed; - cell->state = AFS_CELL_ACTIVE; - smp_wmb(); - clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVE: if (atomic_read(&cell->usage) > 1) { - time64_t now = ktime_get_real_seconds(); - if (cell->dns_expiry <= now && net->live) - afs_update_cell(cell); + if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { + ret = afs_update_cell(cell); + if (ret < 0) + cell->error = ret; + } goto done; } - cell->state = AFS_CELL_DEACTIVATING; + smp_store_release(&cell->state, AFS_CELL_DEACTIVATING); + wake_up_var(&cell->state); goto again; case AFS_CELL_DEACTIVATING: - set_bit(AFS_CELL_FL_NOT_READY, &cell->flags); if (atomic_read(&cell->usage) > 1) goto reverse_deactivation; afs_deactivate_cell(net, cell); - cell->state = AFS_CELL_INACTIVE; + smp_store_release(&cell->state, AFS_CELL_INACTIVE); + wake_up_var(&cell->state); goto again; default: @@ -671,17 +709,13 @@ activation_failed: cell->error = ret; afs_deactivate_cell(net, cell); - cell->state = AFS_CELL_FAILED; - smp_wmb(); - if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags)) - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */ + wake_up_var(&cell->state); goto again; reverse_deactivation: - cell->state = AFS_CELL_ACTIVE; - smp_wmb(); - clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); _leave(" [deact->act]"); return; @@ -741,11 +775,16 @@ void afs_manage_cells(struct work_struct *work) } if (usage == 1) { + struct afs_vlserver_list *vllist; time64_t expire_at = cell->last_inactive; - if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && - !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + read_lock(&cell->vl_servers_lock); + vllist = rcu_dereference_protected( + cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock)); + if (vllist->nr_servers > 0) expire_at += afs_cell_gc_delay; + read_unlock(&cell->vl_servers_lock); if (purging || expire_at <= now) sched_cell = true; else if (expire_at < next_manage) @@ -753,10 +792,8 @@ void afs_manage_cells(struct work_struct *work) } if (!purging) { - if (cell->dns_expiry <= now) + if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) sched_cell = true; - else if (cell->dns_expiry <= next_manage) - next_manage = cell->dns_expiry; } if (sched_cell) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 74ee0f8ef8dd..50d925f0a556 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -367,13 +367,13 @@ struct afs_cell { time64_t last_inactive; /* Time of last drop of usage count */ atomic_t usage; unsigned long flags; -#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */ -#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ -#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ -#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ -#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */ +#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ +#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ enum afs_cell_state state; short error; + enum dns_record_source dns_source:8; /* Latest source of data from lookup */ + enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ + unsigned int dns_lookup_count; /* Counter of DNS lookups */ /* Active fileserver interaction state. */ struct list_head proc_volumes; /* procfs volume list */ diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 65629d73ad9d..3f845489a9f0 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -43,11 +43,29 @@ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cel static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) { struct afs_cell *cell = vc->cell; + unsigned int dns_lookup_count; - if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, - TASK_INTERRUPTIBLE)) { - vc->error = -ERESTARTSYS; - return false; + if (cell->dns_source == DNS_RECORD_UNAVAILABLE || + cell->dns_expiry <= ktime_get_real_seconds()) { + dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); + set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); + queue_work(afs_wq, &cell->manager); + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + if (wait_var_event_interruptible( + &cell->dns_lookup_count, + smp_load_acquire(&cell->dns_lookup_count) + != dns_lookup_count) < 0) { + vc->error = -ERESTARTSYS; + return false; + } + } + + /* Status load is ordered after lookup counter load */ + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + vc->error = -EDESTADDRREQ; + return false; + } } read_lock(&cell->vl_servers_lock); From 51eba99970797229b3ac8527193171adea5222ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 May 2019 23:06:24 +0100 Subject: [PATCH 09/19] afs: Fix "kAFS: AFS vnode with undefined type 0" Under some circumstances afs_select_fileserver() can return without setting an error in fc->error. The problem is in the no_more_servers segment where the accumulated errors from attempts to contact various servers are integrated into an afs_error-type variable 'e'. The resultant error code is, however, then abandoned. Fix this by getting the error out of e.error and putting it in 'error' so that the next part will store it into fc->error. Not doing this causes a report like the following: kAFS: AFS vnode with undefined type 0 kAFS: A=0 m=0 s=0 v=0 kAFS: vnode 20000025:1:1 because the code following the server selection loop then sees what it thinks is a successful invocation because fc.error is 0. However, it can't apply the status record because it's all zeros. The report is followed on the first instance with a trace looking something like: dump_stack+0x67/0x8e afs_inode_init_from_status.isra.2+0x21b/0x487 afs_fetch_status+0x119/0x1df afs_iget+0x130/0x295 afs_get_tree+0x31d/0x595 vfs_get_tree+0x1f/0xe8 fc_mount+0xe/0x36 afs_d_automount+0x328/0x3c3 follow_managed+0x109/0x20a lookup_fast+0x3bf/0x3f8 do_last+0xc3/0x6a4 path_openat+0x1af/0x236 do_filp_open+0x51/0xae ? _raw_spin_unlock+0x24/0x2d ? __alloc_fd+0x1a5/0x1b7 do_sys_open+0x13b/0x1e8 do_syscall_64+0x7d/0x1b3 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 4584ae96ae30 ("afs: Fix missing net error handling") Signed-off-by: David Howells --- fs/afs/rotate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index c3ae324781f8..838810da8d5c 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -459,6 +459,8 @@ no_more_servers: s->probe.abort_code); } + error = e.error; + failed_set_error: fc->error = error; failed: From bbd172e31696709b58eb492fafb574985b778326 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 May 2019 13:50:31 +0100 Subject: [PATCH 10/19] rxrpc: Provide kernel interface to set max lifespan on a call Provide an interface to set max lifespan on a call from inside of the kernel without having to call kernel_sendmsg(). Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 10 ++++++++++ include/net/af_rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index cd7303d7fa25..ff035a6418e3 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1056,6 +1056,16 @@ The kernel interface functions are as follows: This value can be used to determine if the remote client has been restarted as it shouldn't change otherwise. + (*) Set the maxmimum lifespan on a call. + + void rxrpc_kernel_set_max_life(struct socket *sock, + struct rxrpc_call *call, + unsigned long hard_timeout) + + This sets the maximum lifespan on a call to hard_timeout (which is in + jiffies). In the event of the timeout occurring, the call will be + aborted and -ETIME or -ETIMEDOUT will be returned. + ======================= CONFIGURABLE PARAMETERS diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 78c856cba4f5..c04602ca4a55 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -68,5 +68,7 @@ u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); +void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, + unsigned long); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index ae8c5d7f3bf1..213935fbbbf7 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -443,6 +443,31 @@ void rxrpc_kernel_new_call_notification( } EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); +/** + * rxrpc_kernel_set_max_life - Set maximum lifespan on a call + * @sock: The socket the call is on + * @call: The call to configure + * @hard_timeout: The maximum lifespan of the call in jiffies + * + * Set the maximum lifespan of a call. The call will end with ETIME or + * ETIMEDOUT if it takes longer than this. + */ +void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call, + unsigned long hard_timeout) +{ + unsigned long now; + + mutex_lock(&call->user_mutex); + + now = jiffies; + hard_timeout += now; + WRITE_ONCE(call->expect_term_by, hard_timeout); + rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard); + + mutex_unlock(&call->user_mutex); +} +EXPORT_SYMBOL(rxrpc_kernel_set_max_life); + /* * connect an RxRPC socket * - this just targets it at a specific destination; no actual connection From 94f699c9cdb11b8f53cb70624b69aeae16f26db2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 May 2019 13:21:59 +0100 Subject: [PATCH 11/19] afs: Fix the maximum lifespan of VL and probe calls If an older AFS server doesn't support an operation, it may accept the call and then sit on it forever, happily responding to pings that make kafs think that the call is still alive. Fix this by setting the maximum lifespan of Volume Location service calls in particular and probe calls in general so that they don't run on endlessly if they're not supported. Signed-off-by: David Howells --- fs/afs/afs.h | 3 +++ fs/afs/fsclient.c | 1 + fs/afs/internal.h | 1 + fs/afs/rxrpc.c | 4 ++++ fs/afs/vlclient.c | 4 ++++ 5 files changed, 13 insertions(+) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index d12ffb457e47..74913c707bba 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -23,6 +23,9 @@ #define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ #define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ +#define AFS_VL_MAX_LIFESPAN (120 * HZ) +#define AFS_PROBE_MAX_LIFESPAN (30 * HZ) + typedef u64 afs_volid_t; typedef u64 afs_vnodeid_t; typedef u64 afs_dataversion_t; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1296f5dc4c1e..7051b58d8a55 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -2115,6 +2115,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, call->upgrade = true; call->want_reply_time = true; call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 50d925f0a556..4765c6716242 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -131,6 +131,7 @@ struct afs_call { int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ u32 epoch; + unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a34a89c75c6a..4974defb4592 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -426,6 +426,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) call->rxcall = rxcall; + if (call->max_lifespan) + rxrpc_kernel_set_max_life(call->net->socket, rxcall, + call->max_lifespan); + /* send the request */ iov[0].iov_base = call->request; iov[0].iov_len = call->request_size; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index dd9ba4e96fb3..7c53768a360b 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -157,6 +157,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, call->key = vc->key; call->reply[0] = entry; call->ret_reply0 = true; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; @@ -289,6 +290,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; @@ -403,6 +405,7 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, call->upgrade = true; call->want_reply_time = true; call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; /* marshall the parameters */ bp = call->request; @@ -646,6 +649,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; From 0ab4c9594812c4bc5606daf0677ae304bf7ec8c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 May 2019 14:51:48 +0100 Subject: [PATCH 12/19] afs: Fix error propagation from server record check/update afs_check/update_server_record() should be setting fc->error rather than fc->ac.error as they're called from within the cursor iteration function. afs_fs_cursor::error is where the error code of the attempt to call the operation on multiple servers is integrated and is the final result, whereas afs_addr_cursor::error is used to hold the error from individual iterations of the call loop. (Note there's also an afs_vl_cursor which also wraps afs_addr_cursor for accessing VL servers rather than file servers). Fix this by setting fc->error in the afs_check/update_server_record() so that any error incurred whilst talking to the VL server correctly propagates to the final result. This results in: kAFS: Unexpected error from FS.StoreData -512 being seen, even though the store-data op is non-interruptible. The error is actually coming from the server record update getting interrupted. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells --- fs/afs/server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index 65b33b6da48b..454c6357f51f 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -521,8 +521,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, &server->uuid); if (IS_ERR(alist)) { - fc->ac.error = PTR_ERR(alist); - _leave(" = f [%d]", fc->ac.error); + fc->error = PTR_ERR(alist); + _leave(" = f [%d]", fc->error); return false; } @@ -574,7 +574,7 @@ retry: ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS) { - fc->ac.error = ret; + fc->error = ret; _leave(" = f [intr]"); return false; } From b960a34b73e4c1c972623bc2076e24b97588d09e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 May 2019 08:21:21 +0100 Subject: [PATCH 13/19] rxrpc: Allow the kernel to mark a call as being non-interruptible Allow kernel services using AF_RXRPC to indicate that a call should be non-interruptible. This allows kafs to make things like lock-extension and writeback data storage calls non-interruptible. If this is set, signals will be ignored for operations on that call where possible - such as waiting to get a call channel on an rxrpc connection. It doesn't prevent UDP sendmsg from being interrupted, but that will be handled by packet retransmission. rxrpc_kernel_recv_data() isn't affected by this since that never waits, preferring instead to return -EAGAIN and leave the waiting to the caller. Userspace initiated calls can't be set to be uninterruptible at this time. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 11 ++++++++++- fs/afs/rxrpc.c | 1 + include/net/af_rxrpc.h | 1 + net/rxrpc/af_rxrpc.c | 3 +++ net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_object.c | 2 ++ net/rxrpc/conn_client.c | 8 ++++++-- net/rxrpc/sendmsg.c | 4 +++- 8 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index ff035a6418e3..180e07d956a7 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -796,7 +796,9 @@ The kernel interface functions are as follows: s64 tx_total_len, gfp_t gfp, rxrpc_notify_rx_t notify_rx, - bool upgrade); + bool upgrade, + bool intr, + unsigned int debug_id); This allocates the infrastructure to make a new RxRPC call and assigns call and connection numbers. The call will be made on the UDP port that @@ -824,6 +826,13 @@ The kernel interface functions are as follows: the server upgrade the service to a better one. The resultant service ID is returned by rxrpc_kernel_recv_data(). + intr should be set to true if the call should be interruptible. If this + is not set, this function may not return until a channel has been + allocated; if it is set, the function may return -ERESTARTSYS. + + debug_id is the call debugging ID to be used for tracing. This can be + obtained by atomically incrementing rxrpc_debug_id. + If this function is successful, an opaque reference to the RxRPC call is returned. The caller now holds a reference on this and it must be properly ended. diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4974defb4592..87763379952d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -417,6 +417,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, + true, call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c04602ca4a55..93358bfc0e1b 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -45,6 +45,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, gfp_t, rxrpc_notify_rx_t, bool, + bool, unsigned int); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 213935fbbbf7..ffde5b187f5d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -270,6 +270,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call + * @intr: The call is interruptible * @debug_id: The debug ID for tracing to be assigned to the call * * Allow a kernel service to begin a call on the nominated socket. This just @@ -287,6 +288,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, gfp_t gfp, rxrpc_notify_rx_t notify_rx, bool upgrade, + bool intr, unsigned int debug_id) { struct rxrpc_conn_parameters cp; @@ -311,6 +313,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&p, 0, sizeof(p)); p.user_call_ID = user_call_ID; p.tx_total_len = tx_total_len; + p.intr = intr; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 062ca9dc29b8..07fc1dfa4878 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -482,6 +482,7 @@ enum rxrpc_call_flag { RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ + RXRPC_CALL_IS_INTR, /* The call is interruptible */ }; /* @@ -711,6 +712,7 @@ struct rxrpc_call_params { u32 normal; /* Max time since last call packet (msec) */ } timeouts; u8 nr_timeouts; /* Number of timeouts specified */ + bool intr; /* The call is interruptible */ }; struct rxrpc_send_params { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fe96881a334d..d0ca98d7aef5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -241,6 +241,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } + if (p->intr) + __set_bit(RXRPC_CALL_IS_INTR, &call->flags); call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), here, (const void *)p->user_call_ID); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 83797b3949e2..5cf5595a14d8 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -656,10 +656,14 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); + if (test_bit(RXRPC_CALL_IS_INTR, &call->flags)) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); if (call->call_id) break; - if (signal_pending(current)) { + if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && + signal_pending(current)) { ret = -ERESTARTSYS; break; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index bec64deb7b0a..45a05d9a27fa 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -80,7 +80,8 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (call->state >= RXRPC_CALL_COMPLETE) return call->error; - if (timeout == 0 && + if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && + timeout == 0 && tx_win == tx_start && signal_pending(current)) return -EINTR; @@ -620,6 +621,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) .call.tx_total_len = -1, .call.user_call_ID = 0, .call.nr_timeouts = 0, + .call.intr = true, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, From 20b8391fff56f64893233a772a81adc392a69121 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 May 2019 16:16:31 +0100 Subject: [PATCH 14/19] afs: Make some RPC operations non-interruptible Make certain RPC operations non-interruptible, including: (*) Set attributes (*) Store data We don't want to get interrupted during a flush on close, flush on unlock, writeback or an inode update, leaving us in a state where we still need to do the writeback or update. (*) Extend lock (*) Release lock We don't want to get lock extension interrupted as the file locks on the server are time-limited. Interruption during lock release is less of an issue since the lock is time-limited, but it's better to complete the release to avoid a several-minute wait to recover it. *Setting* the lock isn't a problem if it's interrupted since we can just return to the user and tell them they were interrupted - at which point they can elect to retry. (*) Silly unlink We want to remove silly unlink files if we can, rather than leaving them for the salvager to clear up. Note that whilst these calls are no longer interruptible, they do have timeouts on them, so if the server stops responding the call will fail with something like ETIME or ECONNRESET. Without this, the following: kAFS: Unexpected error from FS.StoreData -512 appears in dmesg when a pending store data gets interrupted and some processes may just hang. Additionally, make the code that checks/updates the server record ignore failure due to interruption if the main call is uninterruptible and if the server has an address list. The next op will check it again since the expiration time on the old list has past. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Jonathan Billings Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/dir.c | 18 +++++++++--------- fs/afs/dir_silly.c | 4 ++-- fs/afs/file.c | 2 +- fs/afs/flock.c | 6 +++--- fs/afs/fsclient.c | 19 +++++++++++++++++++ fs/afs/inode.c | 4 ++-- fs/afs/internal.h | 9 ++++++++- fs/afs/rotate.c | 27 ++++++++++++++++++--------- fs/afs/rxrpc.c | 4 ++-- fs/afs/server.c | 11 +++++++++++ fs/afs/super.c | 2 +- fs/afs/write.c | 2 +- fs/afs/xattr.c | 8 ++++---- fs/afs/yfsclient.c | 18 ++++++++++++++++++ 14 files changed, 99 insertions(+), 35 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9a466be583d2..c15550310f62 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -704,7 +704,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, goto no_inline_bulk_status; inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { if (test_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags)) { @@ -739,7 +739,7 @@ no_inline_bulk_status: */ cookie->nr_fids = 1; inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { afs_fs_fetch_status(&fc, afs_v2net(dvnode), @@ -1166,7 +1166,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, @@ -1250,7 +1250,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, vnode, dentry->d_name.name, true, @@ -1374,7 +1374,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1445,7 +1445,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, @@ -1510,7 +1510,7 @@ static int afs_link(struct dentry *from, struct inode *dir, } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); goto error_key; @@ -1584,7 +1584,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_symlink(&fc, dentry->d_name.name, @@ -1696,7 +1696,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { + if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { if (orig_dvnode != new_dvnode) { if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index f6f89fdab6b2..fbc2d301ffe8 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -30,7 +30,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode _enter("%pd,%pd", old, new); trace_afs_silly_rename(vnode, false); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_rename(&fc, old->d_name.name, @@ -149,7 +149,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode _enter(""); trace_afs_silly_rename(vnode, true); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, false)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); diff --git a/fs/afs/file.c b/fs/afs/file.c index b0a43e250a9b..f59c6149fa02 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de key_serial(key)); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_data(&fc, desc); diff --git a/fs/afs/flock.c b/fs/afs/flock.c index adc88eff7849..3501ef7ddbb4 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -196,7 +196,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, key_serial(key), type); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_set_lock(&fc, type); @@ -227,7 +227,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) key_serial(key)); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { while (afs_select_current_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_extend_lock(&fc); @@ -258,7 +258,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) key_serial(key)); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { while (afs_select_current_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_release_lock(&fc); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 7051b58d8a55..d58848c357aa 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -469,6 +469,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -664,6 +665,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -712,6 +714,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -833,6 +836,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -930,6 +934,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1023,6 +1028,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1138,6 +1144,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1257,6 +1264,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1362,6 +1370,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, *bp++ = htonl((u32) i_size); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1439,6 +1448,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1538,6 +1548,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1585,6 +1596,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1630,6 +1642,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1815,6 +1828,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1906,6 +1920,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_calli(call, &vnode->fid, type); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1942,6 +1957,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1977,6 +1993,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2211,6 +2228,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2397,6 +2415,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &fids[0]); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index f30aa5eacd39..1e630f016dc5 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -136,7 +136,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) vnode->flags); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_file_status(&fc, NULL, new_inode); @@ -617,7 +617,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_setattr(&fc, attr); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4765c6716242..833fa39ee337 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -149,6 +149,7 @@ struct afs_call { bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ bool want_reply_time; /* T if want reply_time */ + bool intr; /* T if interruptible */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -773,6 +774,7 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ #define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ +#define AFS_FS_CURSOR_INTR 0x0040 /* Set if op is interruptible */ unsigned short nr_iterations; /* Number of server iterations */ }; @@ -1097,7 +1099,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} * rotate.c */ extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, - struct key *); + struct key *, bool); extern bool afs_select_fileserver(struct afs_fs_cursor *); extern bool afs_select_current_fileserver(struct afs_fs_cursor *); extern int afs_end_vnode_operation(struct afs_fs_cursor *); @@ -1122,6 +1124,11 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, bool); extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); +static inline void afs_set_fc_call(struct afs_call *call, struct afs_fs_cursor *fc) +{ + call->intr = fc->flags & AFS_FS_CURSOR_INTR; +} + static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) { call->kvec[0].iov_base = buf; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 838810da8d5c..52f3a9910f0d 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -25,7 +25,7 @@ * them here also using the io_lock. */ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - struct key *key) + struct key *key, bool intr) { memset(fc, 0, sizeof(*fc)); fc->vnode = vnode; @@ -33,10 +33,15 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode fc->ac.error = SHRT_MAX; fc->error = -EDESTADDRREQ; - if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - fc->error = -EINTR; - fc->flags |= AFS_FS_CURSOR_STOP; - return false; + if (intr) { + fc->flags |= AFS_FS_CURSOR_INTR; + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + fc->error = -EINTR; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + } else { + mutex_lock(&vnode->io_lock); } if (vnode->lock_state != AFS_VNODE_LOCK_NONE) @@ -118,10 +123,14 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) */ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) { - msleep_interruptible(1000); - if (signal_pending(current)) { - fc->error = -ERESTARTSYS; - return false; + if (fc->flags & AFS_FS_CURSOR_INTR) { + msleep_interruptible(1000); + if (signal_pending(current)) { + fc->error = -ERESTARTSYS; + return false; + } + } else { + msleep(1000); } return true; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 87763379952d..02a39e6adf63 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -417,7 +417,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, - true, + call->intr, call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -653,7 +653,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call, break; } - if (timeout == 0 && + if (call->intr && timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) break; diff --git a/fs/afs/server.c b/fs/afs/server.c index 454c6357f51f..52c170b59cfd 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -521,6 +521,13 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, &server->uuid); if (IS_ERR(alist)) { + if ((PTR_ERR(alist) == -ERESTARTSYS || + PTR_ERR(alist) == -EINTR) && + !(fc->flags & AFS_FS_CURSOR_INTR) && + server->addresses) { + _leave(" = t [intr]"); + return true; + } fc->error = PTR_ERR(alist); _leave(" = f [%d]", fc->error); return false; @@ -574,6 +581,10 @@ retry: ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS) { + if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) { + _leave(" = t [intr]"); + return true; + } fc->error = ret; _leave(" = f [intr]"); return false; diff --git a/fs/afs/super.c b/fs/afs/super.c index 783c68cd1a35..3df11eede7f4 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -741,7 +741,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) return PTR_ERR(key); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); diff --git a/fs/afs/write.c b/fs/afs/write.c index 0122d7445fba..e669f2fae873 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -361,7 +361,7 @@ found_key: _debug("USE WB KEY %u", key_serial(wbk->key)); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { + if (afs_begin_vnode_operation(&fc, vnode, wbk->key, false)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_store_data(&fc, mapping, first, last, offset, to); diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index d12bcda911e1..e13b005fac42 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -57,7 +57,7 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, return PTR_ERR(key); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); acl = afs_fs_fetch_acl(&fc); @@ -114,7 +114,7 @@ static int afs_xattr_set_acl(const struct xattr_handler *handler, memcpy(acl->data, buffer, size); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_store_acl(&fc, acl); @@ -178,7 +178,7 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); yfs_fs_fetch_opaque_acl(&fc, yacl); @@ -263,7 +263,7 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, memcpy(acl->data, buffer, size); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); yfs_fs_store_opaque_acl2(&fc, acl); diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index d3e9e3fe0b58..3ba33d415a74 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -519,6 +519,7 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -712,6 +713,7 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -813,6 +815,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -877,6 +880,7 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -968,6 +972,7 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1056,6 +1061,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1142,6 +1148,7 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1239,6 +1246,7 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1338,6 +1346,7 @@ int yfs_fs_rename(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1445,6 +1454,7 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1534,6 +1544,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1578,6 +1589,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1767,6 +1779,7 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1866,6 +1879,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_calli(call, &vnode->fid, type); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1903,6 +1917,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1939,6 +1954,7 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2028,6 +2044,7 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc, call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2212,6 +2229,7 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &fids[0]); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } From 3b05e528cb9ef1c7b251705acca46d8deb80183c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 May 2019 09:17:08 +0100 Subject: [PATCH 15/19] afs: Make dynamic root population wait uninterruptibly for proc_cells_lock Make dynamic root population wait uninterruptibly for proc_cells_lock. Signed-off-by: David Howells --- fs/afs/dynroot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 07484b5a3bbb..af1689d1f32e 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -261,8 +261,7 @@ int afs_dynroot_populate(struct super_block *sb) struct afs_net *net = afs_sb2net(sb); int ret; - if (mutex_lock_interruptible(&net->proc_cells_lock) < 0) - return -ERESTARTSYS; + mutex_lock(&net->proc_cells_lock); net->dynroot_sb = sb; hlist_for_each_entry(cell, &net->proc_cells, proc_link) { From 781070551c26def14784ce5ca14194d7ca234b04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 May 2019 17:56:53 +0100 Subject: [PATCH 16/19] afs: Fix calculation of callback expiry time Fix the calculation of the expiry time of a callback promise, as obtained from operations like FS.FetchStatus and FS.FetchData. The time should be based on the timestamp of the first DATA packet in the reply and the calculation needs to turn the ktime_t timestamp into a time64_t. Fixes: c435ee34551e ("afs: Overhaul the callback handling") Signed-off-by: David Howells --- fs/afs/fsclient.c | 57 +++++++++++++++++++++------------------------- fs/afs/yfsclient.c | 51 ++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 57 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d58848c357aa..388750d80cab 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -256,6 +256,23 @@ static int afs_decode_status(struct afs_call *call, return ret; } +static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) +{ + return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry; +} + +static void xdr_decode_AFSCallBack_raw(struct afs_call *call, + struct afs_callback *cb, + const __be32 **_bp) +{ + const __be32 *bp = *_bp; + + cb->version = ntohl(*bp++); + cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); + cb->type = ntohl(*bp++); + *_bp = bp; +} + /* * decode an AFSCallBack block */ @@ -264,46 +281,26 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, const __be32 **_bp) { struct afs_cb_interest *old, *cbi = call->cbi; - const __be32 *bp = *_bp; - u32 cb_expiry; + struct afs_callback cb; + + xdr_decode_AFSCallBack_raw(call, &cb, _bp); write_seqlock(&vnode->cb_lock); if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { - vnode->cb_version = ntohl(*bp++); - cb_expiry = ntohl(*bp++); - vnode->cb_type = ntohl(*bp++); - vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + vnode->cb_version = cb.version; + vnode->cb_type = cb.type; + vnode->cb_expires_at = cb.expires_at; old = vnode->cb_interest; if (old != call->cbi) { vnode->cb_interest = cbi; cbi = old; } set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } else { - bp += 3; } write_sequnlock(&vnode->cb_lock); call->cbi = cbi; - *_bp = bp; -} - -static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry) -{ - return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC); -} - -static void xdr_decode_AFSCallBack_raw(struct afs_call *call, - const __be32 **_bp, - struct afs_callback *cb) -{ - const __be32 *bp = *_bp; - - cb->version = ntohl(*bp++); - cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); - cb->type = ntohl(*bp++); - *_bp = bp; } /* @@ -744,7 +741,7 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]); + xdr_decode_AFSCallBack_raw(call, call->reply[3], &bp); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -2168,7 +2165,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(call, &bp, callback); + xdr_decode_AFSCallBack_raw(call, callback, &bp); xdr_decode_AFSVolSync(&bp, volsync); _leave(" = 0 [done]"); @@ -2322,9 +2319,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) _debug("unmarshall CB array"); bp = call->buffer; callbacks = call->reply[2]; - callbacks[call->count].version = ntohl(bp[0]); - callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1])); - callbacks[call->count].type = ntohl(bp[2]); + xdr_decode_AFSCallBack_raw(call, &callbacks[call->count], &bp); statuses = call->reply[1]; if (call->count == 0 && vnode && statuses[0].abort_code == 0) xdr_decode_AFSCallBack(call, vnode, &bp); diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 3ba33d415a74..b42bd412dba1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -311,6 +311,22 @@ static int yfs_decode_status(struct afs_call *call, return ret; } +static void xdr_decode_YFSCallBack_raw(struct afs_call *call, + struct afs_callback *cb, + const __be32 **_bp) +{ + struct yfs_xdr_YFSCallBack *x = (void *)*_bp; + ktime_t cb_expiry; + + cb_expiry = call->reply_time; + cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100); + cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC); + cb->version = ntohl(x->version); + cb->type = ntohl(x->type); + + *_bp += xdr_size(x); +} + /* * Decode a YFSCallBack block */ @@ -318,18 +334,17 @@ static void xdr_decode_YFSCallBack(struct afs_call *call, struct afs_vnode *vnode, const __be32 **_bp) { - struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp; struct afs_cb_interest *old, *cbi = call->cbi; - u64 cb_expiry; + struct afs_callback cb; + + xdr_decode_YFSCallBack_raw(call, &cb, _bp); write_seqlock(&vnode->cb_lock); if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { - cb_expiry = xdr_to_u64(xdr->expiration_time); - do_div(cb_expiry, 10 * 1000 * 1000); - vnode->cb_version = ntohl(xdr->version); - vnode->cb_type = ntohl(xdr->type); - vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + vnode->cb_version = cb.version; + vnode->cb_type = cb.type; + vnode->cb_expires_at = cb.expires_at; old = vnode->cb_interest; if (old != call->cbi) { vnode->cb_interest = cbi; @@ -340,22 +355,6 @@ static void xdr_decode_YFSCallBack(struct afs_call *call, write_sequnlock(&vnode->cb_lock); call->cbi = cbi; - *_bp += xdr_size(xdr); -} - -static void xdr_decode_YFSCallBack_raw(const __be32 **_bp, - struct afs_callback *cb) -{ - struct yfs_xdr_YFSCallBack *x = (void *)*_bp; - u64 cb_expiry; - - cb_expiry = xdr_to_u64(x->expiration_time); - do_div(cb_expiry, 10 * 1000 * 1000); - cb->version = ntohl(x->version); - cb->type = ntohl(x->type); - cb->expires_at = cb_expiry + ktime_get_real_seconds(); - - *_bp += xdr_size(x); } /* @@ -743,7 +742,7 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_YFSCallBack_raw(&bp, call->reply[3]); + xdr_decode_YFSCallBack_raw(call, call->reply[3], &bp); xdr_decode_YFSVolSync(&bp, NULL); _leave(" = 0 [done]"); @@ -1983,7 +1982,7 @@ static int yfs_deliver_fs_fetch_status(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_YFSCallBack_raw(&bp, callback); + xdr_decode_YFSCallBack_raw(call, callback, &bp); xdr_decode_YFSVolSync(&bp, volsync); _leave(" = 0 [done]"); @@ -2138,7 +2137,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) _debug("unmarshall CB array"); bp = call->buffer; callbacks = call->reply[2]; - xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]); + xdr_decode_YFSCallBack_raw(call, &callbacks[call->count], &bp); statuses = call->reply[1]; if (call->count == 0 && vnode && statuses[0].abort_code == 0) { bp = call->buffer; From d9052dda8a39069312218f913d22d99c48d90004 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 May 2019 11:52:03 +0100 Subject: [PATCH 17/19] afs: Don't invalidate callback if AFS_VNODE_DIR_VALID not set Don't invalidate the callback promise on a directory if the AFS_VNODE_DIR_VALID flag is not set (which indicates that the directory contents are invalid, due to edit failure, callback break, page reclaim). The directory will be reloaded next time the directory is accessed, so clearing the callback flag at this point may race with a reload of the directory and cancel it's recorded callback promise. Fixes: f3ddee8dc4e2 ("afs: Fix directory handling") Signed-off-by: David Howells --- fs/afs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1e630f016dc5..bf8f56e851df 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -430,12 +430,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; vnode->cb_v_break = vnode->volume->cb_v_break; valid = false; - } else if (vnode->status.type == AFS_FTYPE_DIR && - (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) || - vnode->cb_expires_at - 10 <= now)) { + } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { valid = false; - } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) || - vnode->cb_expires_at - 10 <= now) { + } else if (vnode->cb_expires_at - 10 <= now) { valid = false; } else { valid = true; From c7226e407b6065d3bda8bd9dc627663d2c505ea3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 May 2019 23:03:31 +0100 Subject: [PATCH 18/19] afs: Fix lock-wait/callback-break double locking __afs_break_callback() holds vnode->lock around its call of afs_lock_may_be_available() - which also takes that lock. Fix this by not taking the lock in __afs_break_callback(). Also, there's no point checking the granted_locks and pending_locks queues; it's sufficient to check lock_state, so move that check out of afs_lock_may_be_available() into __afs_break_callback() to replace the queue checks. Fixes: e8d6c554126b ("AFS: implement file locking") Signed-off-by: David Howells --- fs/afs/callback.c | 8 +------- fs/afs/flock.c | 3 --- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 128f2dbe256a..4876079aa643 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -218,14 +218,8 @@ void __afs_break_callback(struct afs_vnode *vnode) vnode->cb_break++; afs_clear_permits(vnode); - spin_lock(&vnode->lock); - - _debug("break callback"); - - if (list_empty(&vnode->granted_locks) && - !list_empty(&vnode->pending_locks)) + if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); - spin_unlock(&vnode->lock); } } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 3501ef7ddbb4..c91cd201013f 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -41,9 +41,6 @@ void afs_lock_may_be_available(struct afs_vnode *vnode) { _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB) - return; - spin_lock(&vnode->lock); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_next_locker(vnode, 0); From fd711586bb7d63f257da5eff234e68c446ac35ea Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 May 2019 23:14:41 +0100 Subject: [PATCH 19/19] afs: Fix double inc of vnode->cb_break When __afs_break_callback() clears the CB_PROMISED flag, it increments vnode->cb_break to trigger a future refetch of the status and callback - however it also calls afs_clear_permits(), which also increments vnode->cb_break. Fix this by removing the increment from afs_clear_permits(). Whilst we're at it, fix the conditional call to afs_put_permits() as the function checks to see if the argument is NULL, so the check is redundant. Fixes: be080a6f43c4 ("afs: Overhaul permit caching"); Signed-off-by: David Howells --- fs/afs/security.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 5f58a9a17e69..db5529e47eb8 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -87,11 +87,9 @@ void afs_clear_permits(struct afs_vnode *vnode) permits = rcu_dereference_protected(vnode->permit_cache, lockdep_is_held(&vnode->lock)); RCU_INIT_POINTER(vnode->permit_cache, NULL); - vnode->cb_break++; spin_unlock(&vnode->lock); - if (permits) - afs_put_permits(permits); + afs_put_permits(permits); } /*