diff --git a/configure.ac b/configure.ac index 9ac0c24..e9acd1a 100644 --- a/configure.ac +++ b/configure.ac @@ -213,6 +213,10 @@ AC_CONFIG_FILES([ tools/f2fs_io/Makefile ]) +AC_CHECK_MEMBER([struct blk_zone.capacity], + [AC_DEFINE(HAVE_BLK_ZONE_REP_V2, [1], [report zones includes zone capacity])], + [], [[#include ]]) + # export library version info for mkfs/libf2fs_format_la AC_SUBST(FMT_CURRENT, 6) AC_SUBST(FMT_REVISION, 0) diff --git a/fsck/fsck.c b/fsck/fsck.c index 0f627eb..f97e9fb 100644 --- a/fsck/fsck.c +++ b/fsck/fsck.c @@ -1905,11 +1905,12 @@ int fsck_chk_meta(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) sit_node_blks += se->valid_blocks; } - if (fsck->chk.sit_free_segs + sit_valid_segs != TOTAL_SEGS(sbi)) { + if (fsck->chk.sit_free_segs + sit_valid_segs != + get_usable_seg_count(sbi)) { ASSERT_MSG("SIT usage does not match: sit_free_segs %u, " "sit_valid_segs %u, total_segs %u", fsck->chk.sit_free_segs, sit_valid_segs, - TOTAL_SEGS(sbi)); + get_usable_seg_count(sbi)); return -EINVAL; } diff --git a/fsck/fsck.h b/fsck/fsck.h index d8bab97..c5e85fe 100644 --- a/fsck/fsck.h +++ b/fsck/fsck.h @@ -235,6 +235,8 @@ extern u32 update_nat_bits_flags(struct f2fs_super_block *, struct f2fs_checkpoint *, u32); extern void write_nat_bits(struct f2fs_sb_info *, struct f2fs_super_block *, struct f2fs_checkpoint *, int); +extern unsigned int get_usable_seg_count(struct f2fs_sb_info *); +extern bool is_usable_seg(struct f2fs_sb_info *, unsigned int); /* dump.c */ struct dump_option { diff --git a/fsck/mount.c b/fsck/mount.c index 2f55768..8ebc5b0 100644 --- a/fsck/mount.c +++ b/fsck/mount.c @@ -30,6 +30,76 @@ #define ACL_OTHER (0x20) #endif +static int get_device_idx(struct f2fs_sb_info *sbi, u_int32_t segno) +{ + block_t seg_start_blkaddr; + int i; + + seg_start_blkaddr = SM_I(sbi)->main_blkaddr + + segno * DEFAULT_BLOCKS_PER_SEGMENT; + for (i = 0; i < c.ndevs; i++) + if (c.devices[i].start_blkaddr <= seg_start_blkaddr && + c.devices[i].end_blkaddr > seg_start_blkaddr) + return i; + return 0; +} + +#ifdef HAVE_LINUX_BLKZONED_H + +static int get_zone_idx_from_dev(struct f2fs_sb_info *sbi, + u_int32_t segno, u_int32_t dev_idx) +{ + block_t seg_start_blkaddr = START_BLOCK(sbi, segno); + + return (seg_start_blkaddr - c.devices[dev_idx].start_blkaddr) >> + log_base_2(sbi->segs_per_sec * sbi->blocks_per_seg); +} + +bool is_usable_seg(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int secno = segno / sbi->segs_per_sec; + block_t seg_start = START_BLOCK(sbi, segno); + block_t blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; + unsigned int dev_idx = get_device_idx(sbi, segno); + unsigned int zone_idx = get_zone_idx_from_dev(sbi, segno, dev_idx); + unsigned int sec_off = SM_I(sbi)->main_blkaddr >> + log_base_2(blocks_per_sec); + + if (zone_idx < c.devices[dev_idx].nr_rnd_zones) + return true; + + if (c.devices[dev_idx].zoned_model != F2FS_ZONED_HM) + return true; + + return seg_start < ((sec_off + secno) * blocks_per_sec) + + c.devices[dev_idx].zone_cap_blocks[zone_idx]; +} + +unsigned int get_usable_seg_count(struct f2fs_sb_info *sbi) +{ + unsigned int i, usable_seg_count = 0; + + for (i = 0; i < TOTAL_SEGS(sbi); i++) + if (is_usable_seg(sbi, i)) + usable_seg_count++; + + return usable_seg_count; +} + +#else + +bool is_usable_seg(struct f2fs_sb_info *UNUSED(sbi), unsigned int UNUSED(segno)) +{ + return true; +} + +unsigned int get_usable_seg_count(struct f2fs_sb_info *sbi) +{ + return TOTAL_SEGS(sbi); +} + +#endif + u32 get_free_segments(struct f2fs_sb_info *sbi) { u32 i, free_segs = 0; @@ -37,7 +107,8 @@ u32 get_free_segments(struct f2fs_sb_info *sbi) for (i = 0; i < TOTAL_SEGS(sbi); i++) { struct seg_entry *se = get_seg_entry(sbi, i); - if (se->valid_blocks == 0x0 && !IS_CUR_SEGNO(sbi, i)) + if (se->valid_blocks == 0x0 && !IS_CUR_SEGNO(sbi, i) && + is_usable_seg(sbi, i)) free_segs++; } return free_segs; @@ -2351,7 +2422,7 @@ void build_sit_area_bitmap(struct f2fs_sb_info *sbi) memcpy(ptr, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); ptr += SIT_VBLOCK_MAP_SIZE; - if (se->valid_blocks == 0x0) { + if (se->valid_blocks == 0x0 && is_usable_seg(sbi, segno)) { if (le32_to_cpu(sbi->ckpt->cur_node_segno[0]) == segno || le32_to_cpu(sbi->ckpt->cur_data_segno[0]) == segno || le32_to_cpu(sbi->ckpt->cur_node_segno[1]) == segno || diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h index 3323be1..b6bd4a7 100644 --- a/include/f2fs_fs.h +++ b/include/f2fs_fs.h @@ -332,6 +332,7 @@ struct device_info { u_int32_t nr_zones; u_int32_t nr_rnd_zones; size_t zone_blocks; + size_t *zone_cap_blocks; }; typedef struct { @@ -537,6 +538,7 @@ struct f2fs_configuration { (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) +#define round_up(x, y) (((x) + (y) - 1) / (y)) /* * Copied from fs/f2fs/f2fs.h */ @@ -1325,13 +1327,42 @@ blk_zone_cond_str(struct blk_zone *blkz) return "Unknown-cond"; } -#define blk_zone_empty(z) (blk_zone_cond(z) == BLK_ZONE_COND_EMPTY) +/* + * Handle kernel zone capacity support + */ +#ifndef HAVE_BLK_ZONE_REP_V2 +#define BLK_ZONE_REP_CAPACITY (1 << 0) +struct blk_zone_v2 { + __u64 start; /* Zone start sector */ + __u64 len; /* Zone length in number of sectors */ + __u64 wp; /* Zone write pointer position */ + __u8 type; /* Zone type */ + __u8 cond; /* Zone condition */ + __u8 non_seq; /* Non-sequential write resources active */ + __u8 reset; /* Reset write pointer recommended */ + __u8 resv[4]; + __u64 capacity; /* Zone capacity in number of sectors */ + __u8 reserved[24]; +}; +#define blk_zone blk_zone_v2 +struct blk_zone_report_v2 { + __u64 sector; + __u32 nr_zones; + __u32 flags; +struct blk_zone zones[0]; +}; +#define blk_zone_report blk_zone_report_v2 +#endif /* HAVE_BLK_ZONE_REP_V2 */ + +#define blk_zone_empty(z) (blk_zone_cond(z) == BLK_ZONE_COND_EMPTY) #define blk_zone_sector(z) (z)->start #define blk_zone_length(z) (z)->len #define blk_zone_wp_sector(z) (z)->wp #define blk_zone_need_reset(z) (int)(z)->reset #define blk_zone_non_seq(z) (int)(z)->non_seq +#define blk_zone_capacity(z, f) ((f & BLK_ZONE_REP_CAPACITY) ? \ + (z)->capacity : (z)->len) #endif @@ -1343,6 +1374,7 @@ extern int f2fs_report_zones(int, report_zones_cb_t *, void *); extern int f2fs_check_zones(int); int f2fs_reset_zone(int, void *); extern int f2fs_reset_zones(int); +extern uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb); #define SIZE_ALIGN(val, size) ((val) + (size) - 1) / (size) #define SEG_ALIGN(blks) SIZE_ALIGN(blks, c.blks_per_seg) @@ -1353,6 +1385,7 @@ static inline double get_best_overprovision(struct f2fs_super_block *sb) { double reserved, ovp, candidate, end, diff, space; double max_ovp = 0, max_space = 0; + u_int32_t usable_main_segs = f2fs_get_usable_segments(sb); if (get_sb(segment_count_main) < 256) { candidate = 10; @@ -1366,9 +1399,9 @@ static inline double get_best_overprovision(struct f2fs_super_block *sb) for (; candidate <= end; candidate += diff) { reserved = (2 * (100 / candidate + 1) + 6) * - get_sb(segs_per_sec); - ovp = (get_sb(segment_count_main) - reserved) * candidate / 100; - space = get_sb(segment_count_main) - reserved - ovp; + round_up(usable_main_segs, get_sb(section_count)); + ovp = (usable_main_segs - reserved) * candidate / 100; + space = usable_main_segs - reserved - ovp; if (max_space < space) { max_space = space; max_ovp = candidate; diff --git a/lib/libf2fs_io.c b/lib/libf2fs_io.c index 1f597a9..138285d 100644 --- a/lib/libf2fs_io.c +++ b/lib/libf2fs_io.c @@ -784,6 +784,7 @@ int f2fs_finalize_device(void) break; } free(c.devices[i].path); + free(c.devices[i].zone_cap_blocks); } close(c.kd); diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c index efc687c..56c97d1 100644 --- a/lib/libf2fs_zoned.c +++ b/lib/libf2fs_zoned.c @@ -291,6 +291,13 @@ int f2fs_check_zones(int j) return -ENOMEM; } + dev->zone_cap_blocks = malloc(dev->nr_zones * sizeof(size_t)); + if (!dev->zone_cap_blocks) { + ERR_MSG("No memory for zone capacity list.\n"); + return -ENOMEM; + } + memset(dev->zone_cap_blocks, 0, (dev->nr_zones * sizeof(size_t))); + dev->nr_rnd_zones = 0; sector = 0; total_sectors = (dev->total_sectors * c.sector_size) >> 9; @@ -335,10 +342,15 @@ int f2fs_check_zones(int j) blk_zone_cond_str(blkz), blk_zone_sector(blkz), blk_zone_length(blkz)); + dev->zone_cap_blocks[n] = + blk_zone_length(blkz) >> + (F2FS_BLKSIZE_BITS - SECTOR_SHIFT); } else { DBG(2, - "Zone %05u: type 0x%x (%s), cond 0x%x (%s), need_reset %d, " - "non_seq %d, sector %llu, %llu sectors, wp sector %llu\n", + "Zone %05u: type 0x%x (%s), cond 0x%x (%s)," + " need_reset %d, non_seq %d, sector %llu," + " %llu sectors, capacity %llu," + " wp sector %llu\n", n, blk_zone_type(blkz), blk_zone_type_str(blkz), @@ -348,7 +360,11 @@ int f2fs_check_zones(int j) blk_zone_non_seq(blkz), blk_zone_sector(blkz), blk_zone_length(blkz), + blk_zone_capacity(blkz, rep->flags), blk_zone_wp_sector(blkz)); + dev->zone_cap_blocks[n] = + blk_zone_capacity(blkz, rep->flags) >> + (F2FS_BLKSIZE_BITS - SECTOR_SHIFT); } sector = blk_zone_sector(blkz) + blk_zone_length(blkz); @@ -473,6 +489,34 @@ out: return ret; } +uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) +{ +#ifdef HAVE_BLK_ZONE_REP_V2 + int i, j; + uint32_t usable_segs = 0, zone_segs; + + for (i = 0; i < c.ndevs; i++) { + if (c.devices[i].zoned_model != F2FS_ZONED_HM) { + usable_segs += c.devices[i].total_segments; + continue; + } + for (j = 0; j < c.devices[i].nr_zones; j++) { + zone_segs = c.devices[i].zone_cap_blocks[j] >> + get_sb(log_blocks_per_seg); + if (c.devices[i].zone_cap_blocks[j] % + DEFAULT_BLOCKS_PER_SEGMENT) + usable_segs += zone_segs + 1; + else + usable_segs += zone_segs; + } + } + usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> + get_sb(log_blocks_per_seg); + return usable_segs; +#endif + return get_sb(segment_count_main); +} + #else int f2fs_report_zone(int i, u_int64_t UNUSED(sector), void *UNUSED(blkzone)) @@ -527,5 +571,9 @@ int f2fs_reset_zones(int i) return -1; } +uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) +{ + return get_sb(segment_count_main); +} #endif diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c index be19c36..7c5d1eb 100644 --- a/mkfs/f2fs_format.c +++ b/mkfs/f2fs_format.c @@ -425,13 +425,19 @@ static int f2fs_prepare_super_block(void) set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec); - /* Let's determine the best reserved and overprovisioned space */ + /* + * Let's determine the best reserved and overprovisioned space. + * For Zoned device, if zone capacity less than zone size, the segments + * starting after the zone capacity are unusable in each zone. So get + * overprovision ratio and reserved seg count based on avg usable + * segs_per_sec. + */ if (c.overprovision == 0) c.overprovision = get_best_overprovision(sb); c.reserved_segments = - (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE) - * c.segs_per_sec; + (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE) * + round_up(f2fs_get_usable_segments(sb), get_sb(section_count)); if (c.overprovision == 0 || c.total_segments < F2FS_MIN_SEGMENTS || (c.devices[0].total_sectors * @@ -679,19 +685,28 @@ static int f2fs_write_check_point_pack(void) set_cp(valid_block_count, 2 + c.quota_inum + c.quota_dnum + c.lpf_inum + c.lpf_dnum); set_cp(rsvd_segment_count, c.reserved_segments); - set_cp(overprov_segment_count, (get_sb(segment_count_main) - + + /* + * For zoned devices, if zone capacity less than zone size, get + * overprovision segment count based on usable segments in the device. + */ + set_cp(overprov_segment_count, (f2fs_get_usable_segments(sb) - get_cp(rsvd_segment_count)) * c.overprovision / 100); set_cp(overprov_segment_count, get_cp(overprov_segment_count) + get_cp(rsvd_segment_count)); + if (f2fs_get_usable_segments(sb) <= get_cp(overprov_segment_count)) { + MSG(0, "\tError: Not enough segments to create F2FS Volume\n"); + goto free_nat_bits; + } MSG(0, "Info: Overprovision ratio = %.3lf%%\n", c.overprovision); MSG(0, "Info: Overprovision segments = %u (GC reserved = %u)\n", get_cp(overprov_segment_count), c.reserved_segments); /* main segments - reserved segments - (node + data segments) */ - set_cp(free_segment_count, get_sb(segment_count_main) - 6); + set_cp(free_segment_count, f2fs_get_usable_segments(sb) - 6); set_cp(user_block_count, ((get_cp(free_segment_count) + 6 - get_cp(overprov_segment_count)) * c.blks_per_seg)); /* cp page (2), data summaries (1), node summaries (3) */