f2fs-tools: Make sload.f2fs reproduce hard links

If sload.f2fs encounters a file with nr_links > 1, it will mark it
as a possible hard link by remembering the original device and
inode. When sload.f2fs creates the file, it will check if it has
already created a file for the same original device and inode. If
so, it will add the original inode to the directory and increment
the number of links to it, instead of writing a new inode.

This allows sload.f2fs to accurately reproduce a directory tree that
contains hard links, such as those created by ostree. Without this
patch, directory trees containing hard links result in the content of
the files being duplicated.

This is version 2 of the patch; it has been rebased against the dev
branch and includes a fix from Jaegeuk Kim to avoid building data
contents twice on hard linked files.

Co-authored-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Jordan Webb <jordan@getseam.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Jordan Webb 2020-12-10 09:28:11 -06:00 committed by Jaegeuk Kim
parent b585244e72
commit 747b74cb9c
5 changed files with 124 additions and 4 deletions

View File

@ -15,6 +15,7 @@
*/
#include "fsck.h"
#include "node.h"
#include <search.h>
static int room_for_filename(const u8 *bitmap, int slots, int max_slots)
{
@ -634,10 +635,43 @@ int convert_inline_dentry(struct f2fs_sb_info *sbi, struct f2fs_node *node,
return 0;
}
static int cmp_from_devino(const void *a, const void *b) {
u64 devino_a = ((struct hardlink_cache_entry*) a)->from_devino;
u64 devino_b = ((struct hardlink_cache_entry*) b)->from_devino;
return (devino_a > devino_b) - (devino_a < devino_b);
}
struct hardlink_cache_entry *f2fs_search_hardlink(struct f2fs_sb_info *sbi,
struct dentry *de)
{
struct hardlink_cache_entry *find_hardlink = NULL;
struct hardlink_cache_entry *found_hardlink = NULL;
void *search_result;
/* This might be a hardlink, try to find it in the cache */
find_hardlink = calloc(1, sizeof(struct hardlink_cache_entry));
find_hardlink->from_devino = de->from_devino;
search_result = tsearch(find_hardlink, &(sbi->hardlink_cache),
cmp_from_devino);
ASSERT(search_result != 0);
found_hardlink = *(struct hardlink_cache_entry**) search_result;
ASSERT(find_hardlink->from_devino == found_hardlink->from_devino);
/* If it was already in the cache, free the entry we just created */
if (found_hardlink != find_hardlink)
free(find_hardlink);
return found_hardlink;
}
int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
{
struct f2fs_node *parent, *child;
struct node_info ni;
struct hardlink_cache_entry *found_hardlink = NULL;
struct node_info ni, hardlink_ni;
struct f2fs_summary sum;
block_t blkaddr = NULL_ADDR;
int ret;
@ -649,6 +683,9 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
return -1;
}
if (de->from_devino)
found_hardlink = f2fs_search_hardlink(sbi, de);
parent = calloc(BLOCK_SZ, 1);
ASSERT(parent);
@ -674,7 +711,26 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
child = calloc(BLOCK_SZ, 1);
ASSERT(child);
if (found_hardlink && found_hardlink->to_ino) {
/*
* If we found this devino in the cache, we're creating a
* hard link.
*/
get_node_info(sbi, found_hardlink->to_ino, &hardlink_ni);
if (hardlink_ni.blk_addr == NULL_ADDR) {
MSG(1, "No original inode for hard link to_ino=%x\n",
found_hardlink->to_ino);
return -1;
}
/* Use previously-recorded inode */
de->ino = found_hardlink->to_ino;
blkaddr = hardlink_ni.blk_addr;
MSG(1, "Info: Creating \"%s\" as hard link to inode %d\n",
de->path, de->ino);
} else {
f2fs_alloc_nid(sbi, &de->ino);
}
init_inode_block(sbi, child, de);
@ -689,6 +745,30 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
goto free_child_dir;
}
if (found_hardlink) {
if (!found_hardlink->to_ino) {
MSG(2, "Adding inode %d from %s to hardlink cache\n",
de->ino, de->path);
found_hardlink->to_ino = de->ino;
} else {
/* Replace child with original block */
free(child);
child = calloc(BLOCK_SZ, 1);
ASSERT(child);
ret = dev_read_block(child, blkaddr);
ASSERT(ret >= 0);
/* Increment links and skip to writing block */
child->i.i_links = cpu_to_le32(
le32_to_cpu(child->i.i_links) + 1);
MSG(2, "Number of links on inode %d is now %d\n",
de->ino, le32_to_cpu(child->i.i_links));
goto write_child_dir;
}
}
/* write child */
set_summary(&sum, de->ino, 0, ni.version);
ret = reserve_new_block(sbi, &blkaddr, &sum, CURSEG_HOT_NODE, 1);
@ -697,16 +777,21 @@ int f2fs_create(struct f2fs_sb_info *sbi, struct dentry *de)
/* update nat info */
update_nat_blkaddr(sbi, de->ino, de->ino, blkaddr);
write_child_dir:
ret = dev_write_block(child, blkaddr);
ASSERT(ret >= 0);
update_free_segments(sbi);
MSG(1, "Info: Create %s -> %s\n"
" -- ino=%x, type=%x, mode=%x, uid=%x, "
"gid=%x, cap=%"PRIx64", size=%lu, pino=%x\n",
"gid=%x, cap=%"PRIx64", size=%lu, link=%u "
"blocks=%"PRIx64" pino=%x\n",
de->full_path, de->path,
de->ino, de->file_type, de->mode,
de->uid, de->gid, de->capabilities, de->size, de->pino);
de->uid, de->gid, de->capabilities, de->size,
le32_to_cpu(child->i.i_links),
le64_to_cpu(child->i.i_blocks),
de->pino);
free_child_dir:
free(child);
free_parent_dir:

View File

@ -221,6 +221,7 @@ struct dentry {
uint64_t capabilities;
nid_t ino;
nid_t pino;
u64 from_devino;
};
/* different from dnode_of_data in kernel */
@ -234,6 +235,12 @@ struct dnode_of_data {
int idirty, ndirty;
};
struct hardlink_cache_entry {
u64 from_devino;
nid_t to_ino;
int nbuild;
};
struct f2fs_sb_info {
struct f2fs_fsck *fsck;
@ -276,6 +283,9 @@ struct f2fs_sb_info {
/* true if late_build_segment_manger() is called */
bool seg_manager_done;
/* keep track of hardlinks so we can recreate them */
void *hardlink_cache;
};
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)

View File

@ -305,6 +305,8 @@ int f2fs_find_path(struct f2fs_sb_info *, char *, nid_t *);
nid_t f2fs_lookup(struct f2fs_sb_info *, struct f2fs_node *, u8 *, int);
int f2fs_add_link(struct f2fs_sb_info *, struct f2fs_node *,
const unsigned char *, int, nid_t, int, block_t, int);
struct hardlink_cache_entry *f2fs_search_hardlink(struct f2fs_sb_info *sbi,
struct dentry *de);
/* xattr.c */
void *read_all_xattrs(struct f2fs_sb_info *, struct f2fs_node *);

View File

@ -456,6 +456,17 @@ int f2fs_build_file(struct f2fs_sb_info *sbi, struct dentry *de)
if (de->ino == 0)
return -1;
if (de->from_devino) {
struct hardlink_cache_entry *found_hardlink;
found_hardlink = f2fs_search_hardlink(sbi, de);
if (found_hardlink && found_hardlink->to_ino &&
found_hardlink->nbuild)
return 0;
found_hardlink->nbuild++;
}
fd = open(de->full_path, O_RDONLY);
if (fd < 0) {
MSG(0, "Skip: Fail to open %s\n", de->full_path);

View File

@ -148,6 +148,15 @@ static void set_inode_metadata(struct dentry *de)
}
if (S_ISREG(stat.st_mode)) {
if (stat.st_nlink > 1) {
/*
* This file might have multiple links to it, so remember
* device and inode.
*/
de->from_devino = stat.st_dev;
de->from_devino <<= 32;
de->from_devino |= stat.st_ino;
}
de->file_type = F2FS_FT_REG_FILE;
} else if (S_ISDIR(stat.st_mode)) {
de->file_type = F2FS_FT_DIR;
@ -333,6 +342,9 @@ int f2fs_sload(struct f2fs_sb_info *sbi)
/* flush NAT/SIT journal entries */
flush_journal_entries(sbi);
/* initialize empty hardlink cache */
sbi->hardlink_cache = 0;
ret = build_directory(sbi, c.from_dir, "/",
c.target_out_dir, F2FS_ROOT_INO(sbi));
if (ret) {