linux/fs/jffs2/file.c
David Woodhouse cf5eba5334 [JFFS2] Reduce excessive node count for syslog files.
We currently get fairly poor behaviour with files which get many short
writes, such as system logs. This is because we end up with many tiny
data nodes, and the rbtree gets massive. None of these nodes are
actually obsolete, so they are counted as 'clean' space. Eraseblocks can
be entirely full of these nodes (which are REF_NORMAL instead of
REF_PRISTINE), and still they count entirely towards 'used_size' and the
eraseblocks can sit on the clean_list for a long time without being
picked for GC.

One way to alleviate this in the long term is to account REF_NORMAL
space separately from REF_PRISTINE space, rather than counting them both
towards used_size. Then these eraseblocks can be picked for GC and the
offending nodes will be garbage collected.

The short-term fix, though -- which probably makes sense even if we do
eventually implement the above -- is to merge these nodes as they're
written. When we write the last byte in a page, write the _whole_ page.
This obsoletes the earlier nodes in the page _immediately_ and we don't
even need to wait for the garbage collection to do it.

Original implementation from Ferenc Havasi <havasi@inf.u-szeged.hu>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
2006-05-14 04:06:24 +01:00

293 lines
8.4 KiB
C

/*
* JFFS2 -- Journalling Flash File System, Version 2.
*
* Copyright (C) 2001-2003 Red Hat, Inc.
*
* Created by David Woodhouse <dwmw2@infradead.org>
*
* For licensing information, see the file 'LICENCE' in this directory.
*
* $Id: file.c,v 1.104 2005/10/18 23:29:35 tpoynor Exp $
*
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/crc32.h>
#include <linux/jffs2.h>
#include "nodelist.h"
static int jffs2_commit_write (struct file *filp, struct page *pg,
unsigned start, unsigned end);
static int jffs2_prepare_write (struct file *filp, struct page *pg,
unsigned start, unsigned end);
static int jffs2_readpage (struct file *filp, struct page *pg);
int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
return 0;
}
const struct file_operations jffs2_file_operations =
{
.llseek = generic_file_llseek,
.open = generic_file_open,
.read = generic_file_read,
.write = generic_file_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
.sendfile = generic_file_sendfile
};
/* jffs2_file_inode_operations */
struct inode_operations jffs2_file_inode_operations =
{
.setattr = jffs2_setattr
};
struct address_space_operations jffs2_file_address_operations =
{
.readpage = jffs2_readpage,
.prepare_write =jffs2_prepare_write,
.commit_write = jffs2_commit_write
};
static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
{
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
unsigned char *pg_buf;
int ret;
D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT));
BUG_ON(!PageLocked(pg));
pg_buf = kmap(pg);
/* FIXME: Can kmap fail? */
ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
if (ret) {
ClearPageUptodate(pg);
SetPageError(pg);
} else {
SetPageUptodate(pg);
ClearPageError(pg);
}
flush_dcache_page(pg);
kunmap(pg);
D2(printk(KERN_DEBUG "readpage finished\n"));
return 0;
}
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
{
int ret = jffs2_do_readpage_nolock(inode, pg);
unlock_page(pg);
return ret;
}
static int jffs2_readpage (struct file *filp, struct page *pg)
{
struct jffs2_inode_info *f = JFFS2_INODE_INFO(pg->mapping->host);
int ret;
down(&f->sem);
ret = jffs2_do_readpage_unlock(pg->mapping->host, pg);
up(&f->sem);
return ret;
}
static int jffs2_prepare_write (struct file *filp, struct page *pg,
unsigned start, unsigned end)
{
struct inode *inode = pg->mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT;
int ret = 0;
D1(printk(KERN_DEBUG "jffs2_prepare_write()\n"));
if (pageofs > inode->i_size) {
/* Make new hole frag from old EOF to new page */
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
uint32_t phys_ofs, alloc_len;
D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs));
ret = jffs2_reserve_space(c, sizeof(ri), &phys_ofs, &alloc_len,
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret)
return ret;
down(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
ri.totlen = cpu_to_je32(sizeof(ri));
ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
ri.ino = cpu_to_je32(f->inocache->ino);
ri.version = cpu_to_je32(++f->highest_version);
ri.mode = cpu_to_jemode(inode->i_mode);
ri.uid = cpu_to_je16(inode->i_uid);
ri.gid = cpu_to_je16(inode->i_gid);
ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds());
ri.offset = cpu_to_je32(inode->i_size);
ri.dsize = cpu_to_je32(pageofs - inode->i_size);
ri.csize = cpu_to_je32(0);
ri.compr = JFFS2_COMPR_ZERO;
ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
ri.data_crc = cpu_to_je32(0);
fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_NORMAL);
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
up(&f->sem);
return ret;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
if (f->metadata) {
jffs2_mark_node_obsolete(c, f->metadata->raw);
jffs2_free_full_dnode(f->metadata);
f->metadata = NULL;
}
if (ret) {
D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret));
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
up(&f->sem);
return ret;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
up(&f->sem);
}
/* Read in the page if it wasn't already present, unless it's a whole page */
if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) {
down(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
up(&f->sem);
}
D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags));
return ret;
}
static int jffs2_commit_write (struct file *filp, struct page *pg,
unsigned start, unsigned end)
{
/* Actually commit the write from the page cache page we're looking at.
* For now, we write the full page out each time. It sucks, but it's simple
*/
struct inode *inode = pg->mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode *ri;
unsigned aligned_start = start & ~3;
int ret = 0;
uint32_t writtenlen = 0;
D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags));
if (end == PAGE_CACHE_SIZE) {
if (!start) {
/* We need to avoid deadlock with page_cache_read() in
jffs2_garbage_collect_pass(). So we have to mark the
page up to date, to prevent page_cache_read() from
trying to re-lock it. */
SetPageUptodate(pg);
} else {
/* When writing out the end of a page, write out the
_whole_ page. This helps to reduce the number of
nodes in files which have many short writes, like
syslog files. */
start = aligned_start = 0;
}
}
ri = jffs2_alloc_raw_inode();
if (!ri) {
D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n"));
return -ENOMEM;
}
/* Set the fields that the generic jffs2_write_inode_range() code can't find */
ri->ino = cpu_to_je32(inode->i_ino);
ri->mode = cpu_to_jemode(inode->i_mode);
ri->uid = cpu_to_je16(inode->i_uid);
ri->gid = cpu_to_je16(inode->i_gid);
ri->isize = cpu_to_je32((uint32_t)inode->i_size);
ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds());
/* In 2.4, it was already kmapped by generic_file_write(). Doesn't
hurt to do it again. The alternative is ifdefs, which are ugly. */
kmap(pg);
ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
(pg->index << PAGE_CACHE_SHIFT) + aligned_start,
end - aligned_start, &writtenlen);
kunmap(pg);
if (ret) {
/* There was an error writing. */
SetPageError(pg);
}
/* Adjust writtenlen for the padding we did, so we don't confuse our caller */
if (writtenlen < (start&3))
writtenlen = 0;
else
writtenlen -= (start&3);
if (writtenlen) {
if (inode->i_size < (pg->index << PAGE_CACHE_SHIFT) + start + writtenlen) {
inode->i_size = (pg->index << PAGE_CACHE_SHIFT) + start + writtenlen;
inode->i_blocks = (inode->i_size + 511) >> 9;
inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
}
}
jffs2_free_raw_inode(ri);
if (start+writtenlen < end) {
/* generic_file_write has written more to the page cache than we've
actually written to the medium. Mark the page !Uptodate so that
it gets reread */
D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n"));
SetPageError(pg);
ClearPageUptodate(pg);
}
D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret));
return start+writtenlen==end?0:ret;
}