linux/fs/sysfs/bin.c
Eric W. Biederman 3ff195b011 sysfs: Implement sysfs tagged directory support.
The problem.  When implementing a network namespace I need to be able
to have multiple network devices with the same name.  Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.

What this patch does is to add an additional tag field to the
sysfs dirent structure.  For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible.  Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.

I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.

For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded.  Which means I need
a simple race free way to setup those directories as tagged.

To achieve a reace free design all tagged directories are created
and managed by sysfs itself.

Users of this interface:
- define a type in the sysfs_tag_type enumeration.
- call sysfs_register_ns_types with the type and it's operations
- sysfs_exit_ns when an individual tag is no longer valid

- Implement mount_ns() which returns the ns of the calling process
  so we can attach it to a sysfs superblock.
- Implement ktype.namespace() which returns the ns of a syfs kobject.

Everything else is left up to sysfs and the driver layer.

For the network namespace mount_ns and namespace() are essentially
one line functions, and look to remain that.

Tags are currently represented a const void * pointers as that is
both generic, prevides enough information for equality comparisons,
and is trivial to create for current users, as it is just the
existing namespace pointer.

The work needed in sysfs is more extensive.  At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent.  Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.

Currently only directories which hold kobjects, and
symlinks are supported.  There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2010-05-21 09:37:31 -07:00

509 lines
11 KiB
C

/*
* fs/sysfs/bin.c - sysfs binary file implementation
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Matthew Wilcox
* Copyright (c) 2004 Silicon Graphics, Inc.
* Copyright (c) 2007 SUSE Linux Products GmbH
* Copyright (c) 2007 Tejun Heo <teheo@suse.de>
*
* This file is released under the GPLv2.
*
* Please see Documentation/filesystems/sysfs.txt for more information.
*/
#undef DEBUG
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/mm.h>
#include <asm/uaccess.h>
#include "sysfs.h"
/*
* There's one bin_buffer for each open file.
*
* filp->private_data points to bin_buffer and
* sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
* sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
*/
static DEFINE_MUTEX(sysfs_bin_lock);
struct bin_buffer {
struct mutex mutex;
void *buffer;
int mmapped;
const struct vm_operations_struct *vm_ops;
struct file *file;
struct hlist_node list;
};
static int
fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
rc = -EIO;
if (attr->read)
rc = attr->read(kobj, attr, buffer, off, count);
sysfs_put_active(attr_sd);
return rc;
}
static ssize_t
read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
{
struct bin_buffer *bb = file->private_data;
struct dentry *dentry = file->f_path.dentry;
int size = dentry->d_inode->i_size;
loff_t offs = *off;
int count = min_t(size_t, bytes, PAGE_SIZE);
char *temp;
if (!bytes)
return 0;
if (size) {
if (offs > size)
return 0;
if (offs + count > size)
count = size - offs;
}
temp = kmalloc(count, GFP_KERNEL);
if (!temp)
return -ENOMEM;
mutex_lock(&bb->mutex);
count = fill_read(dentry, bb->buffer, offs, count);
if (count < 0) {
mutex_unlock(&bb->mutex);
goto out_free;
}
memcpy(temp, bb->buffer, count);
mutex_unlock(&bb->mutex);
if (copy_to_user(userbuf, temp, count)) {
count = -EFAULT;
goto out_free;
}
pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
*off = offs + count;
out_free:
kfree(temp);
return count;
}
static int
flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
rc = -EIO;
if (attr->write)
rc = attr->write(kobj, attr, buffer, offset, count);
sysfs_put_active(attr_sd);
return rc;
}
static ssize_t write(struct file *file, const char __user *userbuf,
size_t bytes, loff_t *off)
{
struct bin_buffer *bb = file->private_data;
struct dentry *dentry = file->f_path.dentry;
int size = dentry->d_inode->i_size;
loff_t offs = *off;
int count = min_t(size_t, bytes, PAGE_SIZE);
char *temp;
if (!bytes)
return 0;
if (size) {
if (offs > size)
return 0;
if (offs + count > size)
count = size - offs;
}
temp = memdup_user(userbuf, count);
if (IS_ERR(temp))
return PTR_ERR(temp);
mutex_lock(&bb->mutex);
memcpy(bb->buffer, temp, count);
count = flush_write(dentry, bb->buffer, offs, count);
mutex_unlock(&bb->mutex);
if (count > 0)
*off = offs + count;
kfree(temp);
return count;
}
static void bin_vma_open(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
if (!bb->vm_ops || !bb->vm_ops->open)
return;
if (!sysfs_get_active(attr_sd))
return;
bb->vm_ops->open(vma);
sysfs_put_active(attr_sd);
}
static void bin_vma_close(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
if (!bb->vm_ops || !bb->vm_ops->close)
return;
if (!sysfs_get_active(attr_sd))
return;
bb->vm_ops->close(vma);
sysfs_put_active(attr_sd);
}
static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
int ret;
if (!bb->vm_ops || !bb->vm_ops->fault)
return VM_FAULT_SIGBUS;
if (!sysfs_get_active(attr_sd))
return VM_FAULT_SIGBUS;
ret = bb->vm_ops->fault(vma, vmf);
sysfs_put_active(attr_sd);
return ret;
}
static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
int ret;
if (!bb->vm_ops)
return VM_FAULT_SIGBUS;
if (!bb->vm_ops->page_mkwrite)
return 0;
if (!sysfs_get_active(attr_sd))
return VM_FAULT_SIGBUS;
ret = bb->vm_ops->page_mkwrite(vma, vmf);
sysfs_put_active(attr_sd);
return ret;
}
static int bin_access(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
int ret;
if (!bb->vm_ops || !bb->vm_ops->access)
return -EINVAL;
if (!sysfs_get_active(attr_sd))
return -EINVAL;
ret = bb->vm_ops->access(vma, addr, buf, len, write);
sysfs_put_active(attr_sd);
return ret;
}
#ifdef CONFIG_NUMA
static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
int ret;
if (!bb->vm_ops || !bb->vm_ops->set_policy)
return 0;
if (!sysfs_get_active(attr_sd))
return -EINVAL;
ret = bb->vm_ops->set_policy(vma, new);
sysfs_put_active(attr_sd);
return ret;
}
static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
unsigned long addr)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct mempolicy *pol;
if (!bb->vm_ops || !bb->vm_ops->get_policy)
return vma->vm_policy;
if (!sysfs_get_active(attr_sd))
return vma->vm_policy;
pol = bb->vm_ops->get_policy(vma, addr);
sysfs_put_active(attr_sd);
return pol;
}
static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
const nodemask_t *to, unsigned long flags)
{
struct file *file = vma->vm_file;
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
int ret;
if (!bb->vm_ops || !bb->vm_ops->migrate)
return 0;
if (!sysfs_get_active(attr_sd))
return 0;
ret = bb->vm_ops->migrate(vma, from, to, flags);
sysfs_put_active(attr_sd);
return ret;
}
#endif
static const struct vm_operations_struct bin_vm_ops = {
.open = bin_vma_open,
.close = bin_vma_close,
.fault = bin_fault,
.page_mkwrite = bin_page_mkwrite,
.access = bin_access,
#ifdef CONFIG_NUMA
.set_policy = bin_set_policy,
.get_policy = bin_get_policy,
.migrate = bin_migrate,
#endif
};
static int mmap(struct file *file, struct vm_area_struct *vma)
{
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
int rc;
mutex_lock(&bb->mutex);
/* need attr_sd for attr, its parent for kobj */
rc = -ENODEV;
if (!sysfs_get_active(attr_sd))
goto out_unlock;
rc = -EINVAL;
if (!attr->mmap)
goto out_put;
rc = attr->mmap(kobj, attr, vma);
if (rc)
goto out_put;
/*
* PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
* to satisfy versions of X which crash if the mmap fails: that
* substitutes a new vm_file, and we don't then want bin_vm_ops.
*/
if (vma->vm_file != file)
goto out_put;
rc = -EINVAL;
if (bb->mmapped && bb->vm_ops != vma->vm_ops)
goto out_put;
rc = 0;
bb->mmapped = 1;
bb->vm_ops = vma->vm_ops;
vma->vm_ops = &bin_vm_ops;
out_put:
sysfs_put_active(attr_sd);
out_unlock:
mutex_unlock(&bb->mutex);
return rc;
}
static int open(struct inode * inode, struct file * file)
{
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
struct bin_buffer *bb = NULL;
int error;
/* binary file operations requires both @sd and its parent */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
error = -EACCES;
if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
goto err_out;
if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
goto err_out;
error = -ENOMEM;
bb = kzalloc(sizeof(*bb), GFP_KERNEL);
if (!bb)
goto err_out;
bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!bb->buffer)
goto err_out;
mutex_init(&bb->mutex);
bb->file = file;
file->private_data = bb;
mutex_lock(&sysfs_bin_lock);
hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
mutex_unlock(&sysfs_bin_lock);
/* open succeeded, put active references */
sysfs_put_active(attr_sd);
return 0;
err_out:
sysfs_put_active(attr_sd);
kfree(bb);
return error;
}
static int release(struct inode * inode, struct file * file)
{
struct bin_buffer *bb = file->private_data;
mutex_lock(&sysfs_bin_lock);
hlist_del(&bb->list);
mutex_unlock(&sysfs_bin_lock);
kfree(bb->buffer);
kfree(bb);
return 0;
}
const struct file_operations bin_fops = {
.read = read,
.write = write,
.mmap = mmap,
.llseek = generic_file_llseek,
.open = open,
.release = release,
};
void unmap_bin_file(struct sysfs_dirent *attr_sd)
{
struct bin_buffer *bb;
struct hlist_node *tmp;
if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
return;
mutex_lock(&sysfs_bin_lock);
hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
struct inode *inode = bb->file->f_path.dentry->d_inode;
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
}
mutex_unlock(&sysfs_bin_lock);
}
/**
* sysfs_create_bin_file - create binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
*/
int sysfs_create_bin_file(struct kobject *kobj,
const struct bin_attribute *attr)
{
BUG_ON(!kobj || !kobj->sd || !attr);
return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
}
/**
* sysfs_remove_bin_file - remove binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
*/
void sysfs_remove_bin_file(struct kobject *kobj,
const struct bin_attribute *attr)
{
sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);