fsck: Check write pointer consistency of open zones

On sudden f2fs shutdown, write pointers of zoned block devices can go
further but f2fs meta data keeps current segments at positions before the
write operations. After remounting the f2fs, this inconsistency causes
write operations not at write pointers and "Unaligned write command"
error is reported.

To avoid the error, have f2fs.fsck check consistency of write pointers
of open zones that current segments point to. Compare each current
segment's position and the write pointer position of the open zone. If
inconsistency is found and 'fix_on' flag is set, assign a new zone to the
current segment and check the newly assigned zone has write pointer at
the zone start. Leave the original zone as is to keep data recorded in
it.

To care about fsync data, refer each seg_entry's ckpt_valid_map to get
the last valid block in the zone. If the last valid block is beyond the
current segments position, fsync data exits in the zone. In case fsync
data exists, do not assign a new zone to the current segment not to lose
the fsync data. It is expected that the kernel replay the fsync data and
fix the write pointer inconsistency at mount time.

Also check consistency between write pointer of the zone the current
segment points to with valid block maps of the zone. If the last valid
block is beyond the write pointer position, report to indicate a bug. If
'fix_on' flag is set, assign a new zone to the current segment.

When inconsistencies are found, turn on 'bug_on' flag in fsck_verify() to
ask users to fix them or not. When inconsistencies get fixed, turn on
'force' flag in fsck_verify() to enforce fixes in following checks.

This check and fix is done twice. The first is done at the beginning of
do_fsck() function so that other fixes can reflect the current segment
modification. The second is done in fsck_verify() to reflect updated meta
data by other fixes.

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Shin'ichiro Kawasaki 2019-11-28 16:59:29 +09:00 committed by Jaegeuk Kim
parent 75bdccdf0b
commit 42326f61e9
5 changed files with 218 additions and 1 deletions

View File

@ -430,6 +430,11 @@ static inline block_t __end_block_addr(struct f2fs_sb_info *sbi)
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
#define GET_SEC_FROM_SEG(sbi, segno) \
((segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define FREE_I_START_SEGNO(sbi) \ #define FREE_I_START_SEGNO(sbi) \
GET_SEGNO_FROM_SEG0(sbi, SM_I(sbi)->main_blkaddr) GET_SEGNO_FROM_SEG0(sbi, SM_I(sbi)->main_blkaddr)
#define GET_R2L_SEGNO(sbi, segno) (segno + FREE_I_START_SEGNO(sbi)) #define GET_R2L_SEGNO(sbi, segno) (segno + FREE_I_START_SEGNO(sbi))

View File

@ -2181,6 +2181,130 @@ static void fix_checkpoints(struct f2fs_sb_info *sbi)
fix_checkpoint(sbi); fix_checkpoint(sbi);
} }
#ifdef HAVE_LINUX_BLKZONED_H
/*
* Refer valid block map and return offset of the last valid block in the zone.
* Obtain valid block map from SIT and fsync data.
* If there is no valid block in the zone, return -1.
*/
static int last_vblk_off_in_zone(struct f2fs_sb_info *sbi,
unsigned int zone_segno)
{
int s, b;
unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone;
struct seg_entry *se;
for (s = segs_per_zone - 1; s >= 0; s--) {
se = get_seg_entry(sbi, zone_segno + s);
/*
* Refer not cur_valid_map but ckpt_valid_map which reflects
* fsync data.
*/
ASSERT(se->ckpt_valid_map);
for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
if (f2fs_test_bit(b, (const char*)se->ckpt_valid_map))
return b + (s << sbi->log_blocks_per_seg);
}
return -1;
}
static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
struct blk_zone blkz;
block_t cs_block, wp_block, zone_last_vblock;
u_int64_t cs_sector, wp_sector;
int i, ret;
unsigned int zone_segno;
int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
/* get the device the curseg points to */
cs_block = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
for (i = 0; i < MAX_DEVICES; i++) {
if (!c.devices[i].path)
break;
if (c.devices[i].start_blkaddr <= cs_block &&
cs_block <= c.devices[i].end_blkaddr)
break;
}
if (i >= MAX_DEVICES)
return -EINVAL;
/* get write pointer position of the zone the curseg points to */
cs_sector = (cs_block - c.devices[i].start_blkaddr)
<< log_sectors_per_block;
ret = f2fs_report_zone(i, cs_sector, &blkz);
if (ret)
return ret;
if (blk_zone_type(&blkz) != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
/* check consistency between the curseg and the write pointer */
wp_block = c.devices[i].start_blkaddr +
(blk_zone_wp_sector(&blkz) >> log_sectors_per_block);
wp_sector = blk_zone_wp_sector(&blkz);
if (cs_sector == wp_sector)
return 0;
if (cs_sector > wp_sector) {
MSG(0, "Inconsistent write pointer with curseg %d: "
"curseg %d[0x%x,0x%x] > wp[0x%x,0x%x]\n",
type, type, curseg->segno, curseg->next_blkoff,
GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
fsck->chk.wp_inconsistent_zones++;
return -EINVAL;
}
MSG(0, "Write pointer goes advance from curseg %d: "
"curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n",
type, type, curseg->segno, curseg->next_blkoff,
GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
zone_segno = GET_SEG_FROM_SEC(sbi,
GET_SEC_FROM_SEG(sbi, curseg->segno));
zone_last_vblock = START_BLOCK(sbi, zone_segno) +
last_vblk_off_in_zone(sbi, zone_segno);
/*
* If valid blocks exist between the curseg position and the write
* pointer, they are fsync data. This is not an error to fix. Leave it
* for kernel to recover later.
* If valid blocks exist between the curseg's zone start and the curseg
* position, or if there is no valid block in the curseg's zone, fix
* the inconsistency between the curseg and the writ pointer.
* Of Note is that if there is no valid block in the curseg's zone,
* last_vblk_off_in_zone() returns -1 and zone_last_vblock is always
* smaller than cs_block.
*/
if (cs_block <= zone_last_vblock && zone_last_vblock < wp_block) {
MSG(0, "Curseg has fsync data: curseg %d[0x%x,0x%x] "
"last valid block in zone[0x%x,0x%x]\n",
type, curseg->segno, curseg->next_blkoff,
GET_SEGNO(sbi, zone_last_vblock),
OFFSET_IN_SEG(sbi, zone_last_vblock));
return 0;
}
fsck->chk.wp_inconsistent_zones++;
return -EINVAL;
}
#else
static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
{
return 0;
}
#endif
int check_curseg_offset(struct f2fs_sb_info *sbi, int type) int check_curseg_offset(struct f2fs_sb_info *sbi, int type)
{ {
struct curseg_info *curseg = CURSEG_I(sbi, type); struct curseg_info *curseg = CURSEG_I(sbi, type);
@ -2209,6 +2333,10 @@ int check_curseg_offset(struct f2fs_sb_info *sbi, int type)
return -EINVAL; return -EINVAL;
} }
} }
if (c.zoned_model == F2FS_ZONED_HM)
return check_curseg_write_pointer(sbi, type);
return 0; return 0;
} }
@ -2628,6 +2756,23 @@ out:
return cnt; return cnt;
} }
/*
* Check and fix consistency with write pointers at the beginning of
* fsck so that following writes by fsck do not fail.
*/
void fsck_chk_and_fix_write_pointers(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
if (c.zoned_model != F2FS_ZONED_HM)
return;
if (check_curseg_offsets(sbi) && c.fix_on) {
fix_curseg_info(sbi);
fsck->chk.wp_fixed = 1;
}
}
int fsck_chk_curseg_info(struct f2fs_sb_info *sbi) int fsck_chk_curseg_info(struct f2fs_sb_info *sbi)
{ {
struct curseg_info *curseg; struct curseg_info *curseg;
@ -2678,6 +2823,20 @@ int fsck_verify(struct f2fs_sb_info *sbi)
printf("\n"); printf("\n");
if (c.zoned_model == F2FS_ZONED_HM) {
printf("[FSCK] Write pointers consistency ");
if (fsck->chk.wp_inconsistent_zones == 0x0) {
printf(" [Ok..]\n");
} else {
printf(" [Fail] [0x%x]\n",
fsck->chk.wp_inconsistent_zones);
c.bug_on = 1;
}
if (fsck->chk.wp_fixed && c.fix_on)
force = 1;
}
if (c.feature & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) { if (c.feature & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) {
for (i = 0; i < fsck->nr_nat_entries; i++) for (i = 0; i < fsck->nr_nat_entries; i++)
if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0) if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0)

View File

@ -80,6 +80,8 @@ struct f2fs_fsck {
u32 multi_hard_link_files; u32 multi_hard_link_files;
u64 sit_valid_blocks; u64 sit_valid_blocks;
u32 sit_free_segs; u32 sit_free_segs;
u32 wp_fixed;
u32 wp_inconsistent_zones;
} chk; } chk;
struct hard_link_node *hard_link_list_head; struct hard_link_node *hard_link_list_head;
@ -162,6 +164,7 @@ int fsck_chk_inline_dentries(struct f2fs_sb_info *, struct f2fs_node *,
struct child_info *); struct child_info *);
void fsck_chk_checkpoint(struct f2fs_sb_info *sbi); void fsck_chk_checkpoint(struct f2fs_sb_info *sbi);
int fsck_chk_meta(struct f2fs_sb_info *sbi); int fsck_chk_meta(struct f2fs_sb_info *sbi);
void fsck_chk_and_fix_write_pointers(struct f2fs_sb_info *);
int fsck_chk_curseg_info(struct f2fs_sb_info *); int fsck_chk_curseg_info(struct f2fs_sb_info *);
void pretty_print_filename(const u8 *raw_name, u32 len, void pretty_print_filename(const u8 *raw_name, u32 len,
char out[F2FS_PRINT_NAMELEN], int enc_name); char out[F2FS_PRINT_NAMELEN], int enc_name);

View File

@ -602,6 +602,8 @@ static void do_fsck(struct f2fs_sb_info *sbi)
print_cp_state(flag); print_cp_state(flag);
fsck_chk_and_fix_write_pointers(sbi);
fsck_chk_curseg_info(sbi); fsck_chk_curseg_info(sbi);
if (!c.fix_on && !c.bug_on) { if (!c.fix_on && !c.bug_on) {

View File

@ -12,6 +12,7 @@
#include "node.h" #include "node.h"
#include "xattr.h" #include "xattr.h"
#include <locale.h> #include <locale.h>
#include <stdbool.h>
#ifdef HAVE_LINUX_POSIX_ACL_H #ifdef HAVE_LINUX_POSIX_ACL_H
#include <linux/posix_acl.h> #include <linux/posix_acl.h>
#endif #endif
@ -2480,6 +2481,52 @@ void set_section_type(struct f2fs_sb_info *sbi, unsigned int segno, int type)
} }
} }
#ifdef HAVE_LINUX_BLKZONED_H
static bool write_pointer_at_zone_start(struct f2fs_sb_info *sbi,
unsigned int zone_segno)
{
u_int64_t sector;
struct blk_zone blkz;
block_t block = START_BLOCK(sbi, zone_segno);
int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
int ret, j;
if (c.zoned_model != F2FS_ZONED_HM)
return true;
for (j = 0; j < MAX_DEVICES; j++) {
if (!c.devices[j].path)
break;
if (c.devices[j].start_blkaddr <= block &&
block <= c.devices[j].end_blkaddr)
break;
}
if (j >= MAX_DEVICES)
return false;
sector = (block - c.devices[j].start_blkaddr) << log_sectors_per_block;
ret = f2fs_report_zone(j, sector, &blkz);
if (ret)
return false;
if (blk_zone_type(&blkz) != BLK_ZONE_TYPE_SEQWRITE_REQ)
return true;
return blk_zone_sector(&blkz) == blk_zone_wp_sector(&blkz);
}
#else
static bool write_pointer_at_zone_start(struct f2fs_sb_info *sbi,
unsigned int zone_segno)
{
return true;
}
#endif
int find_next_free_block(struct f2fs_sb_info *sbi, u64 *to, int left, int find_next_free_block(struct f2fs_sb_info *sbi, u64 *to, int left,
int want_type, bool new_sec) int want_type, bool new_sec)
{ {
@ -2533,7 +2580,8 @@ int find_next_free_block(struct f2fs_sb_info *sbi, u64 *to, int left,
break; break;
} }
if (i == sbi->segs_per_sec) { if (i == sbi->segs_per_sec &&
write_pointer_at_zone_start(sbi, segno)) {
set_section_type(sbi, segno, want_type); set_section_type(sbi, segno, want_type);
return 0; return 0;
} }