linux_dsm_epyc7002/fs/f2fs/dir.c
Jaegeuk Kim 354a3399dc f2fs: recover wrong pino after checkpoint during fsync
If a file is linked, f2fs loose its parent inode number so that fsync calls
for the linked file should do checkpoint all the time.
But, if we can recover its parent inode number after the checkpoint, we can
adjust roll-forward mechanism for the further fsync calls, which is able to
improve the fsync performance significatly.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-06-14 09:04:45 +09:00

681 lines
16 KiB
C

/*
* fs/f2fs/dir.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
#include "xattr.h"
static unsigned long dir_blocks(struct inode *inode)
{
return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
>> PAGE_CACHE_SHIFT;
}
static unsigned int dir_buckets(unsigned int level)
{
if (level < MAX_DIR_HASH_DEPTH / 2)
return 1 << level;
else
return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
}
static unsigned int bucket_blocks(unsigned int level)
{
if (level < MAX_DIR_HASH_DEPTH / 2)
return 2;
else
return 4;
}
static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_UNKNOWN] = DT_UNKNOWN,
[F2FS_FT_REG_FILE] = DT_REG,
[F2FS_FT_DIR] = DT_DIR,
[F2FS_FT_CHRDEV] = DT_CHR,
[F2FS_FT_BLKDEV] = DT_BLK,
[F2FS_FT_FIFO] = DT_FIFO,
[F2FS_FT_SOCK] = DT_SOCK,
[F2FS_FT_SYMLINK] = DT_LNK,
};
#define S_SHIFT 12
static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
[S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV,
[S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV,
[S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO,
[S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK,
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
static unsigned long dir_block_index(unsigned int level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;
for (i = 0; i < level; i++)
bidx += dir_buckets(i) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
static bool early_match_name(const char *name, size_t namelen,
f2fs_hash_t namehash, struct f2fs_dir_entry *de)
{
if (le16_to_cpu(de->name_len) != namelen)
return false;
if (de->hash_code != namehash)
return false;
return true;
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
const char *name, size_t namelen, int *max_slots,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos, end_pos, next_pos;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
int slots;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
de = &dentry_blk->dentry[bit_pos];
slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
*res_page = dentry_page;
goto found;
}
}
next_pos = bit_pos + slots;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK, next_pos);
if (bit_pos >= NR_DENTRY_IN_BLOCK)
end_pos = NR_DENTRY_IN_BLOCK;
else
end_pos = bit_pos;
if (*max_slots < end_pos - next_pos)
*max_slots = end_pos - next_pos;
}
de = NULL;
kunmap(dentry_page);
found:
return de;
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level, const char *name, size_t namelen,
f2fs_hash_t namehash, struct page **res_page)
{
int s = GET_DENTRY_SLOTS(namelen);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots = 0;
BUG_ON(level > MAX_DIR_HASH_DEPTH);
nbucket = dir_buckets(level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx, true);
if (IS_ERR(dentry_page)) {
room = true;
continue;
}
de = find_in_block(dentry_page, name, namelen,
&max_slots, namehash, res_page);
if (de)
break;
if (max_slots >= s)
room = true;
f2fs_put_page(dentry_page, 0);
}
if (!de && room && F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
return de;
}
/*
* Find an entry in the specified directory with the wanted name.
* It returns the page where the entry was found (as a parameter - res_page),
* and the entry itself. Page is returned mapped and unlocked.
* Entry is guaranteed to be valid.
*/
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
{
const char *name = child->name;
size_t namelen = child->len;
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
f2fs_hash_t name_hash;
unsigned int max_depth;
unsigned int level;
if (namelen > F2FS_NAME_LEN)
return NULL;
if (npages == 0)
return NULL;
*res_page = NULL;
name_hash = f2fs_dentry_hash(name, namelen);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, name,
namelen, name_hash, res_page);
if (de)
break;
}
if (!de && F2FS_I(dir)->chash != name_hash) {
F2FS_I(dir)->chash = name_hash;
F2FS_I(dir)->clevel = level - 1;
}
return de;
}
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
{
struct page *page;
struct f2fs_dir_entry *de;
struct f2fs_dentry_block *dentry_blk;
page = get_lock_data_page(dir, 0);
if (IS_ERR(page))
return NULL;
dentry_blk = kmap(page);
de = &dentry_blk->dentry[1];
*p = page;
unlock_page(page);
return de;
}
ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
{
ino_t res = 0;
struct f2fs_dir_entry *de;
struct page *page;
de = f2fs_find_entry(dir, qstr, &page);
if (de) {
res = le32_to_cpu(de->ino);
kunmap(page);
f2fs_put_page(page, 0);
}
return res;
}
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
lock_page(page);
wait_on_page_writeback(page);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
kunmap(page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
/* update parent inode number before releasing dentry page */
F2FS_I(inode)->i_pino = dir->i_ino;
f2fs_put_page(page, 1);
}
static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_node *rn;
/* copy name info. to this inode page */
rn = (struct f2fs_node *)page_address(ipage);
rn->i.i_namelen = cpu_to_le32(name->len);
memcpy(rn->i.i_name, name->name, name->len);
set_page_dirty(ipage);
}
static int make_empty_dir(struct inode *inode,
struct inode *parent, struct page *page)
{
struct page *dentry_page;
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
void *kaddr;
dentry_page = get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
kaddr = kmap_atomic(dentry_page);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
de = &dentry_blk->dentry[0];
de->name_len = cpu_to_le16(1);
de->hash_code = 0;
de->ino = cpu_to_le32(inode->i_ino);
memcpy(dentry_blk->filename[0], ".", 1);
set_de_type(de, inode);
de = &dentry_blk->dentry[1];
de->hash_code = 0;
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
memcpy(dentry_blk->filename[1], "..", 2);
set_de_type(de, inode);
test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
kunmap_atomic(kaddr);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
return 0;
}
static struct page *init_inode_metadata(struct inode *inode,
struct inode *dir, const struct qstr *name)
{
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
page = new_inode_page(inode, name);
if (IS_ERR(page))
return page;
if (S_ISDIR(inode->i_mode)) {
err = make_empty_dir(inode, dir, page);
if (err)
goto error;
}
err = f2fs_init_acl(inode, dir);
if (err)
goto error;
err = f2fs_init_security(inode, dir, name, page);
if (err)
goto error;
wait_on_page_writeback(page);
} else {
page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
if (IS_ERR(page))
return page;
wait_on_page_writeback(page);
set_cold_node(inode, page);
}
init_dent_inode(name, page);
/*
* This file should be checkpointed during fsync.
* We lost i_pino from now on.
*/
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
file_lost_pino(inode);
inc_nlink(inode);
}
return page;
error:
f2fs_put_page(page, 1);
remove_inode_page(inode);
return ERR_PTR(err);
}
static void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
if (S_ISDIR(inode->i_mode)) {
inc_nlink(dir);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
if (F2FS_I(dir)->i_current_depth != current_depth) {
F2FS_I(dir)->i_current_depth = current_depth;
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
update_inode_page(dir);
else
mark_inode_dirty(dir);
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
{
int bit_start = 0;
int zero_start, zero_end;
next:
zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_start);
if (zero_start >= NR_DENTRY_IN_BLOCK)
return NR_DENTRY_IN_BLOCK;
zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
zero_start);
if (zero_end - zero_start >= slots)
return zero_start;
bit_start = zero_end + 1;
if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
return NR_DENTRY_IN_BLOCK;
goto next;
}
/*
* Caller should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op().
*/
int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
{
unsigned int bit_pos;
unsigned int level;
unsigned int current_depth;
unsigned long bidx, block;
f2fs_hash_t dentry_hash;
struct f2fs_dir_entry *de;
unsigned int nbucket, nblock;
size_t namelen = name->len;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
int slots = GET_DENTRY_SLOTS(namelen);
struct page *page;
int err = 0;
int i;
dentry_hash = f2fs_dentry_hash(name->name, name->len);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
level = F2FS_I(dir)->clevel;
F2FS_I(dir)->chash = 0;
}
start:
if (current_depth == MAX_DIR_HASH_DEPTH)
return -ENOSPC;
/* Increase the depth, if required */
if (level == current_depth)
++current_depth;
nbucket = dir_buckets(level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
dentry_blk = kmap(dentry_page);
bit_pos = room_for_filename(dentry_blk, slots);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
/* Move to next level to find the empty slot for new dentry */
++level;
goto start;
add_dentry:
wait_on_page_writeback(dentry_page);
page = init_inode_metadata(inode, dir, name);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
de = &dentry_blk->dentry[bit_pos];
de->hash_code = dentry_hash;
de->name_len = cpu_to_le16(namelen);
memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
for (i = 0; i < slots; i++)
test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
set_page_dirty(dentry_page);
/* we don't need to mark_inode_dirty now */
F2FS_I(inode)->i_pino = dir->i_ino;
update_inode(inode, page);
f2fs_put_page(page, 1);
update_parent_metadata(dir, inode, current_depth);
fail:
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
return err;
}
/*
* It only removes the dentry from the dentry page,corresponding name
* entry in name page does not need to be touched during deletion.
*/
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *inode)
{
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
void *kaddr = page_address(page);
int i;
lock_page(page);
wait_on_page_writeback(page);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
0);
kunmap(page); /* kunmap - pair of f2fs_find_entry */
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode && S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
} else {
mark_inode_dirty(dir);
}
if (inode) {
inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(inode);
i_size_write(inode, 0);
}
update_inode_page(inode);
if (inode->i_nlink == 0)
add_orphan_inode(sbi, inode->i_ino);
}
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(dir);
}
f2fs_put_page(page, 1);
}
bool f2fs_empty_dir(struct inode *dir)
{
unsigned long bidx;
struct page *dentry_page;
unsigned int bit_pos;
struct f2fs_dentry_block *dentry_blk;
unsigned long nblock = dir_blocks(dir);
for (bidx = 0; bidx < nblock; bidx++) {
void *kaddr;
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
continue;
else
return false;
}
kaddr = kmap_atomic(dentry_page);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
if (bidx == 0)
bit_pos = 2;
else
bit_pos = 0;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
kunmap_atomic(kaddr);
f2fs_put_page(dentry_page, 1);
if (bit_pos < NR_DENTRY_IN_BLOCK)
return false;
}
return true;
}
static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
{
unsigned long pos = file->f_pos;
struct inode *inode = file_inode(file);
unsigned long npages = dir_blocks(inode);
unsigned char *types = NULL;
unsigned int bit_pos = 0, start_bit_pos = 0;
int over = 0;
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
unsigned int n = 0;
unsigned char d_type = DT_UNKNOWN;
int slots;
types = f2fs_filetype_table;
bit_pos = (pos % NR_DENTRY_IN_BLOCK);
n = (pos / NR_DENTRY_IN_BLOCK);
for ( ; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
if (IS_ERR(dentry_page))
continue;
start_bit_pos = bit_pos;
dentry_blk = kmap(dentry_page);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
d_type = DT_UNKNOWN;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
if (bit_pos >= NR_DENTRY_IN_BLOCK)
break;
de = &dentry_blk->dentry[bit_pos];
if (types && de->file_type < F2FS_FT_MAX)
d_type = types[de->file_type];
over = filldir(dirent,
dentry_blk->filename[bit_pos],
le16_to_cpu(de->name_len),
(n * NR_DENTRY_IN_BLOCK) + bit_pos,
le32_to_cpu(de->ino), d_type);
if (over) {
file->f_pos += bit_pos - start_bit_pos;
goto success;
}
slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
bit_pos += slots;
}
bit_pos = 0;
file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
dentry_page = NULL;
}
success:
if (dentry_page && !IS_ERR(dentry_page)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
return 0;
}
const struct file_operations f2fs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = f2fs_readdir,
.fsync = f2fs_sync_file,
.unlocked_ioctl = f2fs_ioctl,
};