ext4: teach the inode allocator to use a goal inode number

Enhance the inode allocator to take a goal inode number as a
paremeter; if it is specified, it takes precedence over Orlov or
parent directory inode allocation algorithms.

The extents migration function uses the goal inode number so that the
extent trees allocated the migration function use the correct flex_bg.
In the future, the goal inode functionality will also be used to
allocate an adjacent inode for the extended attributes.

Also, for testing purposes the goal inode number can be specified via
/sys/fs/{dev}/inode_goal.  This can be useful for testing inode
allocation beyond 2^32 blocks on very large filesystems.

Signed-off-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Andreas Dilger 2009-06-13 11:45:35 -04:00 committed by Theodore Ts'o
parent f157a4aa98
commit 11013911da
6 changed files with 36 additions and 10 deletions

View File

@ -79,3 +79,13 @@ Description:
This file is read-only and shows the number of This file is read-only and shows the number of
kilobytes of data that have been written to this kilobytes of data that have been written to this
filesystem since it was mounted. filesystem since it was mounted.
What: /sys/fs/ext4/<disk>/inode_goal
Date: June 2008
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
Tuning parameter which (if non-zero) controls the goal
inode used by the inode allocator in p0reference to
all other allocation hueristics. This is intended for
debugging use only, and should be 0 on production
systems.

View File

@ -863,6 +863,7 @@ struct ext4_sb_info {
int s_inode_size; int s_inode_size;
int s_first_ino; int s_first_ino;
unsigned int s_inode_readahead_blks; unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock; spinlock_t s_next_gen_lock;
u32 s_next_generation; u32 s_next_generation;
u32 s_hash_seed[4]; u32 s_hash_seed[4];
@ -1316,7 +1317,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */ /* ialloc.c */
extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
const struct qstr *qstr); const struct qstr *qstr, __u32 goal);
extern void ext4_free_inode(handle_t *, struct inode *); extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *); extern unsigned long ext4_count_free_inodes(struct super_block *);

View File

@ -799,7 +799,7 @@ static int ext4_claim_inode(struct super_block *sb,
* group to find a free inode. * group to find a free inode.
*/ */
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
const struct qstr *qstr) const struct qstr *qstr, __u32 goal)
{ {
struct super_block *sb; struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *inode_bitmap_bh = NULL;
@ -830,6 +830,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
ei = EXT4_I(inode); ei = EXT4_I(inode);
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
if (!goal)
goal = sbi->s_inode_goal;
if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
ret2 = 0;
goto got_group;
}
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group); ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) { if (ret2 == -1) {
@ -858,7 +868,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
if (ret2 == -1) if (ret2 == -1)
goto out; goto out;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO; err = -EIO;
gdp = ext4_get_group_desc(sb, group, &group_desc_bh); gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@ -870,8 +880,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
if (!inode_bitmap_bh) if (!inode_bitmap_bh)
goto fail; goto fail;
ino = 0;
repeat_in_this_group: repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *) ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data, inode_bitmap_bh->b_data,

View File

@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL; struct inode *tmp_inode = NULL;
struct list_blocks_struct lb; struct list_blocks_struct lb;
unsigned long max_entries; unsigned long max_entries;
__u32 goal;
/* /*
* If the filesystem does not support extents, or the inode * If the filesystem does not support extents, or the inode
@ -483,8 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
retval = PTR_ERR(handle); retval = PTR_ERR(handle);
return retval; return retval;
} }
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, 0); S_IFREG, 0, goal);
if (IS_ERR(tmp_inode)) { if (IS_ERR(tmp_inode)) {
retval = -ENOMEM; retval = -ENOMEM;
ext4_journal_stop(handle); ext4_journal_stop(handle);

View File

@ -1782,7 +1782,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name); inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations; inode->i_op = &ext4_file_inode_operations;
@ -1816,7 +1816,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name); inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev); init_special_inode(inode, inode->i_mode, rdev);
@ -1853,7 +1853,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name); inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
&dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto out_stop; goto out_stop;
@ -2264,7 +2265,8 @@ static int ext4_symlink(struct inode *dir,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name); inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto out_stop; goto out_stop;

View File

@ -2206,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks); inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@ -2218,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes), ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats), ATTR_LIST(mb_stats),
ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_max_to_scan),
ATTR_LIST(mb_min_to_scan), ATTR_LIST(mb_min_to_scan),