From 02c048b919455aaa38628563cdcc2e691c8a9f53 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Dec 2010 20:16:56 +0100 Subject: [PATCH] fuse: allow batching of FORGET requests Terje Malmedal reports that a fuse filesystem with 32 million inodes on a machine with lots of memory can take up to 30 minutes to process FORGET requests when all those inodes are evicted from the icache. To solve this, create a BATCH_FORGET request that allows up to about 8000 FORGET requests to be sent in a single message. This request is only sent if userspace supports interface version 7.16 or later, otherwise fall back to sending individual FORGET messages. Reported-by: Terje Malmedal Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 92 ++++++++++++++++++++++++++++++++++++++------ fs/fuse/fuse_i.h | 3 +- include/linux/fuse.h | 16 +++++++- 3 files changed, 97 insertions(+), 14 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fed65303eeeb..cf8d28d1fbad 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -254,8 +254,8 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup) { - forget->nodeid = nodeid; - forget->nlookup = nlookup; + forget->forget_one.nodeid = nodeid; + forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); fc->forget_list_tail->next = forget; @@ -974,15 +974,26 @@ __releases(fc->lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc) +static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, + unsigned max, + unsigned *countp) { - struct fuse_forget_link *forget = fc->forget_list_head.next; + struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link **newhead = &head; + unsigned count; - fc->forget_list_head.next = forget->next; + for (count = 0; *newhead != NULL && count < max; count++) + newhead = &(*newhead)->next; + + fc->forget_list_head.next = *newhead; + *newhead = NULL; if (fc->forget_list_head.next == NULL) fc->forget_list_tail = &fc->forget_list_head; - return forget; + if (countp != NULL) + *countp = count; + + return head; } static int fuse_read_single_forget(struct fuse_conn *fc, @@ -991,13 +1002,13 @@ static int fuse_read_single_forget(struct fuse_conn *fc, __releases(fc->lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fc); + struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); struct fuse_forget_in arg = { - .nlookup = forget->nlookup, + .nlookup = forget->forget_one.nlookup, }; struct fuse_in_header ih = { .opcode = FUSE_FORGET, - .nodeid = forget->nodeid, + .nodeid = forget->forget_one.nodeid, .unique = fuse_get_unique(fc), .len = sizeof(ih) + sizeof(arg), }; @@ -1018,6 +1029,65 @@ __releases(fc->lock) return ih.len; } +static int fuse_read_batch_forget(struct fuse_conn *fc, + struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->lock) +{ + int err; + unsigned max_forgets; + unsigned count; + struct fuse_forget_link *head; + struct fuse_batch_forget_in arg = { .count = 0 }; + struct fuse_in_header ih = { + .opcode = FUSE_BATCH_FORGET, + .unique = fuse_get_unique(fc), + .len = sizeof(ih) + sizeof(arg), + }; + + if (nbytes < ih.len) { + spin_unlock(&fc->lock); + return -EINVAL; + } + + max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); + head = dequeue_forget(fc, max_forgets, &count); + spin_unlock(&fc->lock); + + arg.count = count; + ih.len += count * sizeof(struct fuse_forget_one); + err = fuse_copy_one(cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(cs, &arg, sizeof(arg)); + + while (head) { + struct fuse_forget_link *forget = head; + + if (!err) { + err = fuse_copy_one(cs, &forget->forget_one, + sizeof(forget->forget_one)); + } + head = forget->next; + kfree(forget); + } + + fuse_copy_finish(cs); + + if (err) + return err; + + return ih.len; +} + +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, + size_t nbytes) +__releases(fc->lock) +{ + if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fc, cs, nbytes); + else + return fuse_read_batch_forget(fc, cs, nbytes); +} + /* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from @@ -1058,7 +1128,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, if (forget_pending(fc)) { if (list_empty(&fc->pending) || fc->forget_batch-- > 0) - return fuse_read_single_forget(fc, cs, nbytes); + return fuse_read_forget(fc, cs, nbytes); if (fc->forget_batch <= -8) fc->forget_batch = 16; @@ -1837,7 +1907,7 @@ __acquires(fc->lock) end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); while (forget_pending(fc)) - kfree(dequeue_forget(fc)); + kfree(dequeue_forget(fc, 1, NULL)); } /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 33369c63a522..ae5744a2f9e9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -55,8 +55,7 @@ extern unsigned max_user_congthresh; /* One forget request */ struct fuse_forget_link { - u64 nodeid; - u64 nlookup; + struct fuse_forget_one forget_one; struct fuse_forget_link *next; }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index c3c578e09833..cf11881f4938 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -41,6 +41,9 @@ * 7.15 * - add store notify * - add retrieve notify + * + * 7.16 + * - add BATCH_FORGET request */ #ifndef _LINUX_FUSE_H @@ -72,7 +75,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 15 +#define FUSE_KERNEL_MINOR_VERSION 16 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -256,6 +259,7 @@ enum fuse_opcode { FUSE_IOCTL = 39, FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -290,6 +294,16 @@ struct fuse_forget_in { __u64 nlookup; }; +struct fuse_forget_one { + __u64 nodeid; + __u64 nlookup; +}; + +struct fuse_batch_forget_in { + __u32 count; + __u32 dummy; +}; + struct fuse_getattr_in { __u32 getattr_flags; __u32 dummy;