mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-31 16:13:22 +00:00
dax: provide an iomap based dax read/write path
This is a much simpler implementation of the DAX read/write path that makes use of the iomap infrastructure. It does not try to mirror the direct I/O calling conventions and thus doesn't have to deal with i_dio_count or the end_io handler, but instead leaves locking and filesystem-specific I/O completion to the caller. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
b0d5e82fcf
commit
a254e56812
114
fs/dax.c
114
fs/dax.c
@ -31,6 +31,8 @@
|
|||||||
#include <linux/vmstat.h>
|
#include <linux/vmstat.h>
|
||||||
#include <linux/pfn_t.h>
|
#include <linux/pfn_t.h>
|
||||||
#include <linux/sizes.h>
|
#include <linux/sizes.h>
|
||||||
|
#include <linux/iomap.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use lowest available bit in exceptional entry for locking, other two
|
* We use lowest available bit in exceptional entry for locking, other two
|
||||||
@ -1241,3 +1243,115 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
|
|||||||
return dax_zero_page_range(inode, from, length, get_block);
|
return dax_zero_page_range(inode, from, length, get_block);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dax_truncate_page);
|
EXPORT_SYMBOL_GPL(dax_truncate_page);
|
||||||
|
|
||||||
|
#ifdef CONFIG_FS_IOMAP
|
||||||
|
static loff_t
|
||||||
|
iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||||
|
struct iomap *iomap)
|
||||||
|
{
|
||||||
|
struct iov_iter *iter = data;
|
||||||
|
loff_t end = pos + length, done = 0;
|
||||||
|
ssize_t ret = 0;
|
||||||
|
|
||||||
|
if (iov_iter_rw(iter) == READ) {
|
||||||
|
end = min(end, i_size_read(inode));
|
||||||
|
if (pos >= end)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
|
||||||
|
return iov_iter_zero(min(length, end - pos), iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
while (pos < end) {
|
||||||
|
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||||
|
struct blk_dax_ctl dax = { 0 };
|
||||||
|
ssize_t map_len;
|
||||||
|
|
||||||
|
dax.sector = iomap->blkno +
|
||||||
|
(((pos & PAGE_MASK) - iomap->offset) >> 9);
|
||||||
|
dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
|
||||||
|
map_len = dax_map_atomic(iomap->bdev, &dax);
|
||||||
|
if (map_len < 0) {
|
||||||
|
ret = map_len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
dax.addr += offset;
|
||||||
|
map_len -= offset;
|
||||||
|
if (map_len > end - pos)
|
||||||
|
map_len = end - pos;
|
||||||
|
|
||||||
|
if (iov_iter_rw(iter) == WRITE)
|
||||||
|
map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
|
||||||
|
else
|
||||||
|
map_len = copy_to_iter(dax.addr, map_len, iter);
|
||||||
|
dax_unmap_atomic(iomap->bdev, &dax);
|
||||||
|
if (map_len <= 0) {
|
||||||
|
ret = map_len ? map_len : -EFAULT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += map_len;
|
||||||
|
length -= map_len;
|
||||||
|
done += map_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return done ? done : ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* iomap_dax_rw - Perform I/O to a DAX file
|
||||||
|
* @iocb: The control block for this I/O
|
||||||
|
* @iter: The addresses to do I/O from or to
|
||||||
|
* @ops: iomap ops passed from the file system
|
||||||
|
*
|
||||||
|
* This function performs read and write operations to directly mapped
|
||||||
|
* persistent memory. The callers needs to take care of read/write exclusion
|
||||||
|
* and evicting any page cache pages in the region under I/O.
|
||||||
|
*/
|
||||||
|
ssize_t
|
||||||
|
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
struct iomap_ops *ops)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
|
||||||
|
unsigned flags = 0;
|
||||||
|
|
||||||
|
if (iov_iter_rw(iter) == WRITE)
|
||||||
|
flags |= IOMAP_WRITE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Yes, even DAX files can have page cache attached to them: A zeroed
|
||||||
|
* page is inserted into the pagecache when we have to serve a write
|
||||||
|
* fault on a hole. It should never be dirtied and can simply be
|
||||||
|
* dropped from the pagecache once we get real data for the page.
|
||||||
|
*
|
||||||
|
* XXX: This is racy against mmap, and there's nothing we can do about
|
||||||
|
* it. We'll eventually need to shift this down even further so that
|
||||||
|
* we can check if we allocated blocks over a hole first.
|
||||||
|
*/
|
||||||
|
if (mapping->nrpages) {
|
||||||
|
ret = invalidate_inode_pages2_range(mapping,
|
||||||
|
pos >> PAGE_SHIFT,
|
||||||
|
(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
|
||||||
|
WARN_ON_ONCE(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (iov_iter_count(iter)) {
|
||||||
|
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
|
||||||
|
iter, iomap_dax_actor);
|
||||||
|
if (ret <= 0)
|
||||||
|
break;
|
||||||
|
pos += ret;
|
||||||
|
done += ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
iocb->ki_pos += done;
|
||||||
|
return done ? done : ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_dax_rw);
|
||||||
|
#endif /* CONFIG_FS_IOMAP */
|
||||||
|
@ -6,9 +6,13 @@
|
|||||||
#include <linux/radix-tree.h>
|
#include <linux/radix-tree.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
|
|
||||||
|
struct iomap_ops;
|
||||||
|
|
||||||
/* We use lowest available exceptional entry bit for locking */
|
/* We use lowest available exceptional entry bit for locking */
|
||||||
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
|
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
|
||||||
|
|
||||||
|
ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
struct iomap_ops *ops);
|
||||||
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
|
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
|
||||||
get_block_t, dio_iodone_t, int flags);
|
get_block_t, dio_iodone_t, int flags);
|
||||||
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user