From 474a00ee1306eb7e82329fdc28b6471a99facba1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Mar 2011 21:31:32 -0400 Subject: [PATCH 1/8] kill simple_set_mnt() not needed anymore, since all users (->get_sb() instances) are gone. Signed-off-by: Al Viro --- fs/namespace.c | 8 -------- include/linux/fs.h | 1 - 2 files changed, 9 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d7513485c1f3..a2a01a104ab0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -466,14 +466,6 @@ static void __mnt_unmake_readonly(struct vfsmount *mnt) br_write_unlock(vfsmount_lock); } -void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) -{ - mnt->mnt_sb = sb; - mnt->mnt_root = dget(sb->s_root); -} - -EXPORT_SYMBOL(simple_set_mnt); - void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); diff --git a/include/linux/fs.h b/include/linux/fs.h index 92f7e04aea11..7061a8587ee3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1839,7 +1839,6 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *, const struct super_operations *ops, const struct dentry_operations *dops, unsigned long); -extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); static inline void sb_mark_dirty(struct super_block *sb) { From fbe0aa1f3d16fac5b641c0c1697371dcbe45b569 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 17 Mar 2011 16:29:15 -0700 Subject: [PATCH 2/8] Some fixes for pstore 1) Change from ->get_sb() to ->mount() 2) Use mount_single() instead of mount_nodev() 3) Pulled in ramfs_get_inode() & trimmed to what I need for pstore 4) Drop the ugly pstore_writefile() Just save data using kmalloc() and provide a pstore_file_read() that uses simple_read_from_buffer(). Signed-off-by: Al Viro --- fs/pstore/inode.c | 116 ++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 549d245d0b42..08342232cb1c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -40,9 +40,29 @@ struct pstore_private { u64 id; int (*erase)(u64); + ssize_t size; + char data[]; }; -#define pstore_get_inode ramfs_get_inode +static int pstore_file_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t pstore_file_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct pstore_private *ps = file->private_data; + + return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size); +} + +static const struct file_operations pstore_file_operations = { + .open = pstore_file_open, + .read = pstore_file_read, + .llseek = default_llseek, +}; /* * When a file is unlinked from our file system we call the @@ -63,6 +83,30 @@ static const struct inode_operations pstore_dir_inode_operations = { .unlink = pstore_unlink, }; +static struct inode *pstore_get_inode(struct super_block *sb, + const struct inode *dir, int mode, dev_t dev) +{ + struct inode *inode = new_inode(sb); + + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_uid = inode->i_gid = 0; + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_fop = &pstore_file_operations; + break; + case S_IFDIR: + inode->i_op = &pstore_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(inode); + break; + } + } + return inode; +} + static const struct super_operations pstore_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, @@ -70,37 +114,10 @@ static const struct super_operations pstore_ops = { }; static struct super_block *pstore_sb; -static struct vfsmount *pstore_mnt; int pstore_is_mounted(void) { - return pstore_mnt != NULL; -} - -/* - * Set up a file structure as if we had opened this file and - * write our data to it. - */ -static int pstore_writefile(struct inode *inode, struct dentry *dentry, - char *data, size_t size) -{ - struct file f; - ssize_t n; - mm_segment_t old_fs = get_fs(); - - memset(&f, '0', sizeof f); - f.f_mapping = inode->i_mapping; - f.f_path.dentry = dentry; - f.f_path.mnt = pstore_mnt; - f.f_pos = 0; - f.f_op = inode->i_fop; - set_fs(KERNEL_DS); - n = do_sync_write(&f, data, size, &f.f_pos); - set_fs(old_fs); - - fsnotify_modify(&f); - - return n == size; + return pstore_sb != NULL; } /* @@ -123,8 +140,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); if (!inode) goto fail; - inode->i_uid = inode->i_gid = 0; - private = kmalloc(sizeof *private, GFP_KERNEL); + private = kmalloc(sizeof *private + size, GFP_KERNEL); if (!private) goto fail_alloc; private->id = id; @@ -152,28 +168,19 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, if (IS_ERR(dentry)) goto fail_lockedalloc; - d_add(dentry, inode); - - mutex_unlock(&root->d_inode->i_mutex); - - if (!pstore_writefile(inode, dentry, data, size)) - goto fail_write; + memcpy(private->data, data, size); + inode->i_size = private->size = size; inode->i_private = private; if (time.tv_sec) inode->i_mtime = inode->i_ctime = time; - return 0; + d_add(dentry, inode); -fail_write: - kfree(private); - inode->i_nlink--; - mutex_lock(&root->d_inode->i_mutex); - d_delete(dentry); - dput(dentry); mutex_unlock(&root->d_inode->i_mutex); - goto fail; + + return 0; fail_lockedalloc: mutex_unlock(&root->d_inode->i_mutex); @@ -225,32 +232,21 @@ fail: return err; } -static int pstore_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *pstore_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - struct dentry *root; - - root = mount_nodev(fs_type, flags, data, pstore_fill_super); - if (IS_ERR(root)) - return -ENOMEM; - - mnt->mnt_root = root; - mnt->mnt_sb = root->d_sb; - pstore_mnt = mnt; - - return 0; + return mount_single(fs_type, flags, data, pstore_fill_super); } static void pstore_kill_sb(struct super_block *sb) { kill_litter_super(sb); pstore_sb = NULL; - pstore_mnt = NULL; } static struct file_system_type pstore_fs_type = { .name = "pstore", - .get_sb = pstore_get_sb, + .mount = pstore_mount, .kill_sb = pstore_kill_sb, }; From 9d412a43c3b26e1e549319e5eec26f0829f9f74d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Mar 2011 22:08:28 -0400 Subject: [PATCH 3/8] vfs: split off vfsmount-related parts of vfs_kern_mount() new function: mount_fs(). Does all work done by vfs_kern_mount() except the allocation and filling of vfsmount; returns root dentry or ERR_PTR(). vfs_kern_mount() switched to using it and taken to fs/namespace.c, along with its wrappers. alloc_vfsmnt()/free_vfsmnt() made static. functions in namespace.c slightly reordered. Signed-off-by: Al Viro --- fs/internal.h | 5 +- fs/namespace.c | 153 ++++++++++++++++++++++++++++++++++++------------- fs/super.c | 96 ++++++------------------------- 3 files changed, 132 insertions(+), 122 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index f3d15de44b15..17191546d527 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -12,6 +12,7 @@ #include struct super_block; +struct file_system_type; struct linux_binprm; struct path; @@ -61,8 +62,6 @@ extern int check_unsafe_exec(struct linux_binprm *); extern int copy_mount_options(const void __user *, unsigned long *); extern int copy_mount_string(const void __user *, char **); -extern void free_vfsmnt(struct vfsmount *); -extern struct vfsmount *alloc_vfsmnt(const char *); extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, @@ -99,6 +98,8 @@ extern struct file *get_empty_filp(void); extern int do_remount_sb(struct super_block *, int, void *, int); extern void __put_super(struct super_block *sb); extern void put_super(struct super_block *sb); +extern struct dentry *mount_fs(struct file_system_type *, + int, const char *, void *); /* * open.c diff --git a/fs/namespace.c b/fs/namespace.c index a2a01a104ab0..453529f72dff 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -196,7 +196,7 @@ unsigned int mnt_get_count(struct vfsmount *mnt) #endif } -struct vfsmount *alloc_vfsmnt(const char *name) +static struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { @@ -466,7 +466,7 @@ static void __mnt_unmake_readonly(struct vfsmount *mnt) br_write_unlock(vfsmount_lock); } -void free_vfsmnt(struct vfsmount *mnt) +static void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); mnt_free_id(mnt); @@ -670,6 +670,36 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p) return p; } +struct vfsmount * +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +{ + struct vfsmount *mnt; + struct dentry *root; + + if (!type) + return ERR_PTR(-ENODEV); + + mnt = alloc_vfsmnt(name); + if (!mnt) + return ERR_PTR(-ENOMEM); + + if (flags & MS_KERNMOUNT) + mnt->mnt_flags = MNT_INTERNAL; + + root = mount_fs(type, flags, name, data); + if (IS_ERR(root)) { + free_vfsmnt(mnt); + return ERR_CAST(root); + } + + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + mnt->mnt_mountpoint = mnt->mnt_root; + mnt->mnt_parent = mnt; + return mnt; +} +EXPORT_SYMBOL_GPL(vfs_kern_mount); + static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, int flag) { @@ -1905,7 +1935,81 @@ out: return err; } -static int do_add_mount(struct vfsmount *, struct path *, int); +static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) +{ + int err; + const char *subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + err = -EINVAL; + if (!subtype[0]) + goto err; + } else + subtype = ""; + + mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); + err = -ENOMEM; + if (!mnt->mnt_sb->s_subtype) + goto err; + return mnt; + + err: + mntput(mnt); + return ERR_PTR(err); +} + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && + !mnt->mnt_sb->s_subtype) + mnt = fs_set_subtype(mnt, fstype); + put_filesystem(type); + return mnt; +} +EXPORT_SYMBOL_GPL(do_kern_mount); + +/* + * add a mount into a namespace's mount tree + */ +static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) +{ + int err; + + mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); + + down_write(&namespace_sem); + /* Something was mounted here while we slept */ + err = follow_down(path, true); + if (err < 0) + goto unlock; + + err = -EINVAL; + if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) + goto unlock; + + /* Refuse the same filesystem on the same mount point */ + err = -EBUSY; + if (path->mnt->mnt_sb == newmnt->mnt_sb && + path->mnt->mnt_root == path->dentry) + goto unlock; + + err = -EINVAL; + if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) + goto unlock; + + newmnt->mnt_flags = mnt_flags; + err = graft_tree(newmnt, path); + +unlock: + up_write(&namespace_sem); + return err; +} /* * create a new mount for userspace and request it to be added into the @@ -1965,43 +2069,6 @@ fail: return err; } -/* - * add a mount into a namespace's mount tree - */ -static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) -{ - int err; - - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); - - down_write(&namespace_sem); - /* Something was mounted here while we slept */ - err = follow_down(path, true); - if (err < 0) - goto unlock; - - err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) - goto unlock; - - /* Refuse the same filesystem on the same mount point */ - err = -EBUSY; - if (path->mnt->mnt_sb == newmnt->mnt_sb && - path->mnt->mnt_root == path->dentry) - goto unlock; - - err = -EINVAL; - if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) - goto unlock; - - newmnt->mnt_flags = mnt_flags; - err = graft_tree(newmnt, path); - -unlock: - up_write(&namespace_sem); - return err; -} - /** * mnt_set_expiry - Put a mount on an expiration list * @mnt: The mount to list. @@ -2660,3 +2727,9 @@ void put_mnt_ns(struct mnt_namespace *ns) kfree(ns); } EXPORT_SYMBOL(put_mnt_ns); + +struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) +{ + return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); +} +EXPORT_SYMBOL_GPL(kern_mount_data); diff --git a/fs/super.c b/fs/super.c index 4bae0ef6110e..e84864908264 100644 --- a/fs/super.c +++ b/fs/super.c @@ -910,29 +910,18 @@ struct dentry *mount_single(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_single); -struct vfsmount * -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +struct dentry * +mount_fs(struct file_system_type *type, int flags, const char *name, void *data) { - struct vfsmount *mnt; struct dentry *root; + struct super_block *sb; char *secdata = NULL; - int error; - - if (!type) - return ERR_PTR(-ENODEV); - - error = -ENOMEM; - mnt = alloc_vfsmnt(name); - if (!mnt) - goto out; - - if (flags & MS_KERNMOUNT) - mnt->mnt_flags = MNT_INTERNAL; + int error = -ENOMEM; if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { secdata = alloc_secdata(); if (!secdata) - goto out_mnt; + goto out; error = security_sb_copy_data(data, secdata); if (error) @@ -944,13 +933,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void error = PTR_ERR(root); goto out_free_secdata; } - mnt->mnt_root = root; - mnt->mnt_sb = root->d_sb; - BUG_ON(!mnt->mnt_sb); - WARN_ON(!mnt->mnt_sb->s_bdi); - mnt->mnt_sb->s_flags |= MS_BORN; + sb = root->d_sb; + BUG_ON(!sb); + WARN_ON(!sb->s_bdi); + sb->s_flags |= MS_BORN; - error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); + error = security_sb_kern_mount(sb, flags, secdata); if (error) goto out_sb; @@ -961,27 +949,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void * violate this rule. This warning should be either removed or * converted to a BUG() in 2.6.34. */ - WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " - "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); + WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " + "negative value (%lld)\n", type->name, sb->s_maxbytes); - mnt->mnt_mountpoint = mnt->mnt_root; - mnt->mnt_parent = mnt; - up_write(&mnt->mnt_sb->s_umount); + up_write(&sb->s_umount); free_secdata(secdata); - return mnt; + return root; out_sb: - dput(mnt->mnt_root); - deactivate_locked_super(mnt->mnt_sb); + dput(root); + deactivate_locked_super(sb); out_free_secdata: free_secdata(secdata); -out_mnt: - free_vfsmnt(mnt); out: return ERR_PTR(error); } -EXPORT_SYMBOL_GPL(vfs_kern_mount); - /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock @@ -1071,49 +1053,3 @@ out: return 0; } EXPORT_SYMBOL(thaw_super); - -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) -{ - int err; - const char *subtype = strchr(fstype, '.'); - if (subtype) { - subtype++; - err = -EINVAL; - if (!subtype[0]) - goto err; - } else - subtype = ""; - - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); - err = -ENOMEM; - if (!mnt->mnt_sb->s_subtype) - goto err; - return mnt; - - err: - mntput(mnt); - return ERR_PTR(err); -} - -struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) -{ - struct file_system_type *type = get_fs_type(fstype); - struct vfsmount *mnt; - if (!type) - return ERR_PTR(-ENODEV); - mnt = vfs_kern_mount(type, flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); - put_filesystem(type); - return mnt; -} -EXPORT_SYMBOL_GPL(do_kern_mount); - -struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) -{ - return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); -} - -EXPORT_SYMBOL_GPL(kern_mount_data); From 27cb1572e3e6bb1f8cf6bb3d74c914a87b131792 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Mar 2011 08:29:36 -0400 Subject: [PATCH 4/8] fix deadlock in pivot_root() Don't hold vfsmount_lock over the loop traversing ->mnt_parent; do check_mnt(new.mnt) under namespace_sem instead; combined with namespace_sem held over all that code it'll guarantee the stability of ->mnt_parent chain all the way to the root. Doing check_mnt() outside of namespace_sem in case of pivot_root() is wrong anyway. Signed-off-by: Al Viro --- fs/namespace.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 453529f72dff..46cc26b5aaf2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2569,9 +2569,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = user_path_dir(new_root, &new); if (error) goto out0; - error = -EINVAL; - if (!check_mnt(new.mnt)) - goto out1; error = user_path_dir(put_old, &old); if (error) @@ -2591,7 +2588,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(root.mnt->mnt_parent)) goto out2; - if (!check_mnt(root.mnt)) + if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) goto out2; error = -ENOENT; if (cant_mount(old.dentry)) @@ -2615,19 +2612,19 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) - goto out3; /* already mounted on put_old */ + goto out2; /* already mounted on put_old */ if (tmp->mnt_parent == new.mnt) break; tmp = tmp->mnt_parent; } if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) - goto out3; + goto out2; } else if (!is_subdir(old.dentry, new.dentry)) - goto out3; + goto out2; + br_write_lock(vfsmount_lock); detach_mnt(new.mnt, &parent_path); detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ @@ -2650,9 +2647,6 @@ out1: path_put(&new); out0: return error; -out3: - br_write_unlock(vfsmount_lock); - goto out2; } static void __init init_mount_tree(void) From b12cea9198fa99ffd3de1776c323bc7464d26b44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Mar 2011 08:55:38 -0400 Subject: [PATCH 5/8] change the locking order for namespace_sem Have it nested inside ->i_mutex. Instead of using follow_down() under namespace_sem, followed by grabbing i_mutex and checking that mountpoint to be is not dead, do the following: grab i_mutex check that it's not dead grab namespace_sem see if anything is mounted there if not, we've won otherwise drop locks put_path on what we had replace with what's mounted retry everything with new mountpoint to be New helper (lock_mount()) does that. do_add_mount(), do_move_mount(), do_loopback() and pivot_root() switched to it; in case of the last two that eliminates a race we used to have - original code didn't do follow_down(). Signed-off-by: Al Viro --- fs/namespace.c | 135 +++++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 46cc26b5aaf2..9263995bf6a1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1663,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, return err; } +static int lock_mount(struct path *path) +{ + struct vfsmount *mnt; +retry: + mutex_lock(&path->dentry->d_inode->i_mutex); + if (unlikely(cant_mount(path->dentry))) { + mutex_unlock(&path->dentry->d_inode->i_mutex); + return -ENOENT; + } + down_write(&namespace_sem); + mnt = lookup_mnt(path); + if (likely(!mnt)) + return 0; + up_write(&namespace_sem); + mutex_unlock(&path->dentry->d_inode->i_mutex); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + goto retry; +} + +static void unlock_mount(struct path *path) +{ + up_write(&namespace_sem); + mutex_unlock(&path->dentry->d_inode->i_mutex); +} + static int graft_tree(struct vfsmount *mnt, struct path *path) { - int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; @@ -1673,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; - err = -ENOENT; - mutex_lock(&path->dentry->d_inode->i_mutex); - if (cant_mount(path->dentry)) - goto out_unlock; + if (d_unlinked(path->dentry)) + return -ENOENT; - if (!d_unlinked(path->dentry)) - err = attach_recursive_mnt(mnt, path, NULL); -out_unlock: - mutex_unlock(&path->dentry->d_inode->i_mutex); - return err; + return attach_recursive_mnt(mnt, path, NULL); } /* @@ -1745,6 +1765,7 @@ static int do_change_type(struct path *path, int flag) static int do_loopback(struct path *path, char *old_name, int recurse) { + LIST_HEAD(umount_list); struct path old_path; struct vfsmount *mnt = NULL; int err = mount_is_safe(path); @@ -1756,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name, if (err) return err; - down_write(&namespace_sem); - err = -EINVAL; - if (IS_MNT_UNBINDABLE(old_path.mnt)) + err = lock_mount(path); + if (err) goto out; + err = -EINVAL; + if (IS_MNT_UNBINDABLE(old_path.mnt)) + goto out2; + if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) - goto out; + goto out2; err = -ENOMEM; if (recurse) @@ -1771,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name, mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); if (!mnt) - goto out; + goto out2; err = graft_tree(mnt, path); if (err) { - LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); br_write_unlock(vfsmount_lock); - release_mounts(&umount_list); } - +out2: + unlock_mount(path); + release_mounts(&umount_list); out: - up_write(&namespace_sem); path_put(&old_path); return err; } @@ -1873,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name) if (err) return err; - down_write(&namespace_sem); - err = follow_down(path, true); + err = lock_mount(path); if (err < 0) goto out; err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) - goto out; - - err = -ENOENT; - mutex_lock(&path->dentry->d_inode->i_mutex); - if (cant_mount(path->dentry)) goto out1; if (d_unlinked(path->dentry)) @@ -1926,9 +1942,8 @@ static int do_move_mount(struct path *path, char *old_name) * automatically */ list_del_init(&old_path.mnt->mnt_expire); out1: - mutex_unlock(&path->dentry->d_inode->i_mutex); + unlock_mount(path); out: - up_write(&namespace_sem); if (!err) path_put(&parent_path); path_put(&old_path); @@ -1983,11 +1998,9 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); - down_write(&namespace_sem); - /* Something was mounted here while we slept */ - err = follow_down(path, true); - if (err < 0) - goto unlock; + err = lock_mount(path); + if (err) + return err; err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) @@ -2007,7 +2020,7 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag err = graft_tree(newmnt, path); unlock: - up_write(&namespace_sem); + unlock_mount(path); return err; } @@ -2575,55 +2588,53 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out1; error = security_sb_pivotroot(&old, &new); - if (error) { - path_put(&old); - goto out1; - } + if (error) + goto out2; get_fs_root(current->fs, &root); - down_write(&namespace_sem); - mutex_lock(&old.dentry->d_inode->i_mutex); + error = lock_mount(&old); + if (error) + goto out3; + error = -EINVAL; if (IS_MNT_SHARED(old.mnt) || IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(root.mnt->mnt_parent)) - goto out2; + goto out4; if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) - goto out2; + goto out4; error = -ENOENT; - if (cant_mount(old.dentry)) - goto out2; if (d_unlinked(new.dentry)) - goto out2; + goto out4; if (d_unlinked(old.dentry)) - goto out2; + goto out4; error = -EBUSY; if (new.mnt == root.mnt || old.mnt == root.mnt) - goto out2; /* loop, on the same file system */ + goto out4; /* loop, on the same file system */ error = -EINVAL; if (root.mnt->mnt_root != root.dentry) - goto out2; /* not a mountpoint */ + goto out4; /* not a mountpoint */ if (root.mnt->mnt_parent == root.mnt) - goto out2; /* not attached */ + goto out4; /* not attached */ if (new.mnt->mnt_root != new.dentry) - goto out2; /* not a mountpoint */ + goto out4; /* not a mountpoint */ if (new.mnt->mnt_parent == new.mnt) - goto out2; /* not attached */ + goto out4; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) - goto out2; /* already mounted on put_old */ + goto out4; /* already mounted on put_old */ if (tmp->mnt_parent == new.mnt) break; tmp = tmp->mnt_parent; } if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) - goto out2; + goto out4; } else if (!is_subdir(old.dentry, new.dentry)) - goto out2; + goto out4; br_write_lock(vfsmount_lock); detach_mnt(new.mnt, &parent_path); detach_mnt(root.mnt, &root_parent); @@ -2634,14 +2645,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); - error = 0; - path_put(&root_parent); - path_put(&parent_path); -out2: - mutex_unlock(&old.dentry->d_inode->i_mutex); - up_write(&namespace_sem); +out4: + unlock_mount(&old); + if (!error) { + path_put(&root_parent); + path_put(&parent_path); + } +out3: path_put(&root); +out2: path_put(&old); out1: path_put(&new); From 7cc90cc3ffe22a0d81b8d605b20a82ec7911012d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Mar 2011 09:04:20 -0400 Subject: [PATCH 6/8] don't pass 'mounting_here' flag to follow_down() it's always false now Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- fs/nfsd/vfs.c | 2 +- include/linux/namei.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index b912b7abe747..e092648a068c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1065,7 +1065,7 @@ failed: * Care must be taken as namespace_sem may be held (indicated by mounting_here * being true). */ -int follow_down(struct path *path, bool mounting_here) +int follow_down(struct path *path) { unsigned managed; int ret; @@ -1086,7 +1086,7 @@ int follow_down(struct path *path, bool mounting_here) BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); ret = path->dentry->d_op->d_manage( - path->dentry, mounting_here, false); + path->dentry, false, false); if (ret < 0) return ret == -EISDIR ? 0 : ret; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index da1d9701f8e4..ff93025ae2f7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -87,7 +87,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, .dentry = dget(dentry)}; int err = 0; - err = follow_down(&path, false); + err = follow_down(&path); if (err < 0) goto out; diff --git a/include/linux/namei.h b/include/linux/namei.h index 9c8603872c36..eba45ea10298 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -85,7 +85,7 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern int follow_down_one(struct path *); -extern int follow_down(struct path *, bool); +extern int follow_down(struct path *); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); From 1aed3e4204dd787d53b3cd6363eb63bb4900c38e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Mar 2011 09:09:02 -0400 Subject: [PATCH 7/8] lose 'mounting_here' argument in ->d_manage() it's always false... Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 6 +----- fs/autofs4/root.c | 6 +++--- fs/namei.c | 7 +++---- include/linux/dcache.h | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ef0714aa8e40..306f0ae8df09 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -873,7 +873,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); - int (*d_manage)(struct dentry *, bool, bool); + int (*d_manage)(struct dentry *, bool); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -969,10 +969,6 @@ struct dentry_operations { mounted on it and not to check the automount flag. Any other error code will abort pathwalk completely. - If the 'mounting_here' parameter is true, then namespace_sem is being - held by the caller and the function should not initiate any mounts or - unmounts that it will then wait for. - If the 'rcu_walk' parameter is true, then the caller is doing a pathwalk in RCU-walk mode. Sleeping is not permitted in this mode, and the caller can be asked to leave it and call again by returing diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 014e7aba3b08..e6f84d26f4cf 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -36,7 +36,7 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); static struct vfsmount *autofs4_d_automount(struct path *); -static int autofs4_d_manage(struct dentry *, bool, bool); +static int autofs4_d_manage(struct dentry *, bool); static void autofs4_dentry_release(struct dentry *); const struct file_operations autofs4_root_operations = { @@ -446,7 +446,7 @@ done: return NULL; } -int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk) +int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -454,7 +454,7 @@ int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk) dentry, dentry->d_name.len, dentry->d_name.name); /* The daemon never waits. */ - if (autofs4_oz_mode(sbi) || mounting_here) { + if (autofs4_oz_mode(sbi)) { if (!d_mountpoint(dentry)) return -EISDIR; return 0; diff --git a/fs/namei.c b/fs/namei.c index e092648a068c..5a9a6c3094da 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -933,8 +933,7 @@ static int follow_managed(struct path *path, unsigned flags) if (managed & DCACHE_MANAGE_TRANSIT) { BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); - ret = path->dentry->d_op->d_manage(path->dentry, - false, false); + ret = path->dentry->d_op->d_manage(path->dentry, false); if (ret < 0) return ret == -EISDIR ? 0 : ret; } @@ -999,7 +998,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, struct vfsmount *mounted; if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && !reverse_transit && - path->dentry->d_op->d_manage(path->dentry, false, true) < 0) + path->dentry->d_op->d_manage(path->dentry, true) < 0) return false; mounted = __lookup_mnt(path->mnt, path->dentry, 1); if (!mounted) @@ -1086,7 +1085,7 @@ int follow_down(struct path *path) BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); ret = path->dentry->d_op->d_manage( - path->dentry, false, false); + path->dentry, false); if (ret < 0) return ret == -EISDIR ? 0 : ret; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f958c19e3ca5..1a87760d6532 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -168,7 +168,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); - int (*d_manage)(struct dentry *, bool, bool); + int (*d_manage)(struct dentry *, bool); } ____cacheline_aligned; /* From 24ff6663ccfdaf088dfa7acae489cb11ed4f43c4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Nov 2010 20:52:55 -0500 Subject: [PATCH 8/8] fs: call security_d_instantiate in d_obtain_alias V2 While trying to track down some NFS problems with BTRFS, I kept noticing I was getting -EACCESS for no apparent reason. Eric Paris and printk() helped me figure out that it was SELinux that was giving me grief, with the following denial type=AVC msg=audit(1290013638.413:95): avc: denied { 0x800000 } for pid=1772 comm="nfsd" name="" dev=sda1 ino=256 scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass=file Turns out this is because in d_obtain_alias if we can't find an alias we create one and do all the normal instantiation stuff, but we don't do the security_d_instantiate. Usually we are protected from getting a hashed dentry that hasn't yet run security_d_instantiate() by the parent's i_mutex, but obviously this isn't an option there, so in order to deal with the case that a second thread comes in and finds our new dentry before we get to run security_d_instantiate(), we go ahead and call it if we find a dentry already. Eric assures me that this is ok as the code checks to see if the dentry has been initialized already so calling security_d_instantiate() against the same dentry multiple times is ok. With this patch I'm no longer getting errant -EACCESS values. Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/dcache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index a39fe47c466f..1baddc1cec48 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1612,10 +1612,13 @@ struct dentry *d_obtain_alias(struct inode *inode) __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); + security_d_instantiate(tmp, inode); return tmp; out_iput: + if (res && !IS_ERR(res)) + security_d_instantiate(res, inode); iput(inode); return res; }