ext4: add online resizing support for meta_bg and 64-bit file systems

This patch adds support for resizing file systems with the meta_bg and
64bit features.

[ Added a fix by tytso to fix a divide by zero when resizing a
  filesystem from 14 TB to 18TB.  Also fixed overhead accounting for
  meta_bg file systems.]

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Yongqiang Yang 2012-09-05 01:33:50 -04:00 committed by Theodore Ts'o
parent 28623c2f5b
commit 01f795f9e0
2 changed files with 165 additions and 65 deletions

View File

@ -365,26 +365,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EOPNOTSUPP;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_META_BG)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with meta_bg");
return -EOPNOTSUPP;
}
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
}
if (n_blocks_count > MAX_32_NUM &&
!EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR,
"File system only supports 32-bit block numbers");
return -EOPNOTSUPP;
}
err = ext4_resize_begin(sb);
if (err)
return err;

View File

@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb)
smp_mb__after_clear_bit();
}
static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
ext4_group_t group) {
return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
EXT4_DESC_PER_BLOCK_BITS(sb);
}
static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
ext4_group_t group) {
group = ext4_meta_bg_first_group(sb, group);
return ext4_group_first_block_no(sb, group);
}
static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
ext4_group_t group) {
ext4_grpblk_t overhead;
overhead = ext4_bg_num_gdb(sb, group);
if (ext4_bg_has_super(sb, group))
overhead += 1 +
le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
return overhead;
}
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb,
ext4_fsblk_t end = start + input->blocks_count;
ext4_group_t group = input->group;
ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext4_bg_has_super(sb, group) ?
(1 + ext4_bg_num_gdb(sb, group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
unsigned overhead = ext4_group_overhead_blocks(sb, group);
ext4_fsblk_t metaend = start + overhead;
struct buffer_head *bh = NULL;
ext4_grpblk_t free_blocks_count, offset;
@ -209,7 +229,6 @@ static int ext4_alloc_group_tables(struct super_block *sb,
int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_fsblk_t start_blk;
ext4_fsblk_t last_blk;
ext4_group_t src_group;
@ -234,19 +253,19 @@ static int ext4_alloc_group_tables(struct super_block *sb,
start_blk = ext4_group_first_block_no(sb, src_group);
last_blk = start_blk + group_data[src_group - group].blocks_count;
overhead = ext4_bg_has_super(sb, src_group) ?
(1 + ext4_bg_num_gdb(sb, src_group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
overhead = ext4_group_overhead_blocks(sb, src_group);
start_blk += overhead;
/* We collect contiguous blocks as much as possible. */
src_group++;
for (; src_group <= last_group; src_group++)
if (!ext4_bg_has_super(sb, src_group))
for (; src_group <= last_group; src_group++) {
overhead = ext4_group_overhead_blocks(sb, src_group);
if (overhead != 0)
last_blk += group_data[src_group - group].blocks_count;
else
break;
}
/* Allocate block bitmaps */
for (; bb_index < flex_gd->count; bb_index++) {
@ -438,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
ext4_group_t group, count;
struct buffer_head *bh = NULL;
int reserved_gdb, i, j, err = 0, err2;
int meta_bg;
BUG_ON(!flex_gd->count || !group_data ||
group_data[0].group != sbi->s_groups_count);
reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
/* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
@ -452,15 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
group = group_data[0].group;
for (i = 0; i < flex_gd->count; i++, group++) {
unsigned long gdblocks;
ext4_grpblk_t overhead;
gdblocks = ext4_bg_num_gdb(sb, group);
start = ext4_group_first_block_no(sb, group);
if (!ext4_bg_has_super(sb, group))
if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
goto handle_itb;
if (meta_bg == 1) {
ext4_group_t first_group;
first_group = ext4_meta_bg_first_group(sb, group);
if (first_group != group + 1 &&
first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
goto handle_itb;
}
block = start + ext4_bg_has_super(sb, group);
/* Copy all of the GDT blocks into the backup in this group */
for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
for (j = 0; j < gdblocks; j++, block++) {
struct buffer_head *gdb;
ext4_debug("update backup group %#04llx\n", block);
@ -530,11 +561,11 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
err = PTR_ERR(bh);
goto out;
}
if (ext4_bg_has_super(sb, group)) {
overhead = ext4_group_overhead_blocks(sb, group);
if (overhead != 0) {
ext4_debug("mark backup superblock %#04llx (+0)\n",
start);
ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
1);
ext4_set_bits(bh->b_data, 0, overhead);
}
ext4_mark_bitmap_end(group_data[i].blocks_count,
sb->s_blocksize * 8, bh->b_data);
@ -830,6 +861,45 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
return err;
}
/*
* add_new_gdb_meta_bg is the sister of add_new_gdb.
*/
static int add_new_gdb_meta_bg(struct super_block *sb,
handle_t *handle, ext4_group_t group) {
ext4_fsblk_t gdblock;
struct buffer_head *gdb_bh;
struct buffer_head **o_group_desc, **n_group_desc;
unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int err;
gdblock = ext4_meta_bg_first_block_no(sb, group) +
ext4_bg_has_super(sb, group);
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
GFP_NOFS);
if (!n_group_desc) {
err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups",
gdb_num + 1);
return err;
}
o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
ext4_kvfree(o_group_desc);
err = ext4_journal_get_write_access(handle, gdb_bh);
if (unlikely(err))
brelse(gdb_bh);
return err;
}
/*
* Called when we are adding a new group which has a backup copy of each of
* the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
@ -958,16 +1028,16 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
* do not copy the full number of backups at this time. The resize
* which changed s_groups_count will backup again.
*/
static void update_backups(struct super_block *sb,
int blk_off, char *data, int size)
static void update_backups(struct super_block *sb, int blk_off, char *data,
int size, int meta_bg)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
const ext4_group_t last = sbi->s_groups_count;
ext4_group_t last;
const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
unsigned three = 1;
unsigned five = 5;
unsigned seven = 7;
ext4_group_t group;
ext4_group_t group = 0;
int rest = sb->s_blocksize - size;
handle_t *handle;
int err = 0, err2;
@ -981,8 +1051,17 @@ static void update_backups(struct super_block *sb,
ext4_superblock_csum_set(sb, (struct ext4_super_block *)data);
while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
if (meta_bg == 0) {
group = ext4_list_backups(sb, &three, &five, &seven);
last = sbi->s_groups_count;
} else {
group = ext4_meta_bg_first_group(sb, group) + 1;
last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
}
while (group < sbi->s_groups_count) {
struct buffer_head *bh;
ext4_fsblk_t backup_block;
/* Out of journal space, and can't get more - abort - so sad */
if (ext4_handle_valid(handle) &&
@ -991,13 +1070,20 @@ static void update_backups(struct super_block *sb,
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break;
bh = sb_getblk(sb, group * bpg + blk_off);
if (meta_bg == 0)
backup_block = group * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
bh = sb_getblk(sb, backup_block);
if (!bh) {
err = -EIO;
break;
}
ext4_debug("update metadata backup %#04lx\n",
(unsigned long)bh->b_blocknr);
ext4_debug("update metadata backup %llu(+%llu)\n",
backup_block, backup_block -
ext4_group_first_block_no(sb, group));
if ((err = ext4_journal_get_write_access(handle, bh)))
break;
lock_buffer(bh);
@ -1010,6 +1096,13 @@ static void update_backups(struct super_block *sb,
if (unlikely(err))
ext4_std_error(sb, err);
brelse(bh);
if (meta_bg == 0)
group = ext4_list_backups(sb, &three, &five, &seven);
else if (group == last)
break;
else
group = last;
}
if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2;
@ -1052,7 +1145,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *gdb_bh;
int i, gdb_off, gdb_num, err = 0;
int meta_bg;
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
for (i = 0; i < count; i++, group++) {
int reserved_gdb = ext4_bg_has_super(sb, group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
@ -1072,8 +1167,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
err = reserve_backup_gdb(handle, resize_inode, group);
} else
} else if (meta_bg != 0) {
err = add_new_gdb_meta_bg(sb, handle, group);
} else {
err = add_new_gdb(handle, resize_inode, group);
}
if (err)
break;
}
@ -1225,7 +1323,7 @@ static void ext4_update_super(struct super_block *sb,
}
reserved_blocks = ext4_r_blocks_count(es) * 100;
do_div(reserved_blocks, ext4_blocks_count(es));
reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
reserved_blocks *= blocks_count;
do_div(reserved_blocks, 100);
@ -1236,6 +1334,7 @@ static void ext4_update_super(struct super_block *sb,
le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
@ -1270,6 +1369,8 @@ static void ext4_update_super(struct super_block *sb,
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
ext4_debug("free blocks count %llu",
percpu_counter_read(&sbi->s_freeclusters_counter));
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
sbi->s_log_groups_per_flex) {
@ -1361,15 +1462,17 @@ static int ext4_flex_group_add(struct super_block *sb,
int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_META_BG);
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
gdb_bh = sbi->s_group_desc[gdb_num];
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
gdb_bh->b_size);
gdb_bh->b_size, meta_bg);
}
}
exit:
@ -1413,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
group_data[i].group = group + i;
group_data[i].blocks_count = blocks_per_group;
overhead = ext4_bg_has_super(sb, group + i) ?
(1 + ext4_bg_num_gdb(sb, group + i) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
overhead = ext4_group_overhead_blocks(sb, group + i);
group_data[i].free_blocks_count = blocks_per_group - overhead;
if (ext4_has_group_desc_csum(sb))
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
@ -1563,11 +1664,13 @@ static int ext4_group_extend_no_check(struct super_block *sb,
err = err2;
if (!err) {
ext4_fsblk_t first_block;
first_block = ext4_group_first_block_no(sb, 0);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block,
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
}
@ -1662,15 +1765,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *bh;
struct inode *resize_inode;
ext4_fsblk_t o_blocks_count;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_grpblk_t offset, add;
struct inode *resize_inode = NULL;
ext4_grpblk_t add, offset;
unsigned long n_desc_blocks;
unsigned long o_desc_blocks;
unsigned long desc_blocks;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_fsblk_t o_blocks_count;
int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
int meta_bg;
o_blocks_count = ext4_blocks_count(es);
@ -1692,22 +1796,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
EXT4_DESC_PER_BLOCK(sb);
EXT4_DESC_PER_BLOCK(sb);
o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
EXT4_DESC_PER_BLOCK(sb);
desc_blocks = n_desc_blocks - o_desc_blocks;
if (desc_blocks &&
(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
ext4_warning(sb, "No reserved GDT blocks, can't resize");
return -EPERM;
}
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) {
if (meta_bg) {
ext4_error(sb, "resize_inode and meta_bg enabled "
"simultaneously");
return -EINVAL;
}
if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) {
ext4_warning(sb,
"No reserved GDT blocks, can't resize");
return -EPERM;
}
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
}
} else if (!meta_bg) {
ext4_warning(sb, "File system features do not permit "
"online resize");
return -EPERM;
}
/* See if the device is actually as big as what was requested */
@ -1761,8 +1876,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
out:
if (flex_gd)
free_flex_gd(flex_gd);
iput(resize_inode);
if (resize_inode != NULL)
iput(resize_inode);
if (test_opt(sb, DEBUG))
ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
"upto %llu blocks", o_blocks_count, n_blocks_count);