2018-09-12 08:16:07 +07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2012-11-29 11:28:09 +07:00
|
|
|
/*
|
2012-11-02 15:07:47 +07:00
|
|
|
* fs/f2fs/super.c
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/statfs.h>
|
|
|
|
#include <linux/buffer_head.h>
|
|
|
|
#include <linux/backing-dev.h>
|
|
|
|
#include <linux/kthread.h>
|
|
|
|
#include <linux/parser.h>
|
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/seq_file.h>
|
2013-06-28 10:47:01 +07:00
|
|
|
#include <linux/proc_fs.h>
|
2012-11-02 15:07:47 +07:00
|
|
|
#include <linux/random.h>
|
|
|
|
#include <linux/exportfs.h>
|
2013-03-17 15:26:14 +07:00
|
|
|
#include <linux/blkdev.h>
|
2017-07-08 23:13:07 +07:00
|
|
|
#include <linux/quotaops.h>
|
2012-11-02 15:07:47 +07:00
|
|
|
#include <linux/f2fs_fs.h>
|
2013-08-04 21:09:40 +07:00
|
|
|
#include <linux/sysfs.h>
|
2017-08-08 09:54:31 +07:00
|
|
|
#include <linux/quota.h>
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "node.h"
|
2013-03-31 11:26:03 +07:00
|
|
|
#include "segment.h"
|
2012-11-02 15:07:47 +07:00
|
|
|
#include "xattr.h"
|
2013-08-04 21:09:40 +07:00
|
|
|
#include "gc.h"
|
2014-12-18 11:04:08 +07:00
|
|
|
#include "trace.h"
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2013-04-19 23:28:40 +07:00
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
#include <trace/events/f2fs.h>
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static struct kmem_cache *f2fs_inode_cachep;
|
|
|
|
|
2016-04-30 05:34:32 +07:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
2016-04-30 05:49:56 +07:00
|
|
|
|
2018-11-24 16:06:42 +07:00
|
|
|
const char *f2fs_fault_name[FAULT_MAX] = {
|
2016-04-30 05:49:56 +07:00
|
|
|
[FAULT_KMALLOC] = "kmalloc",
|
2017-11-30 18:28:18 +07:00
|
|
|
[FAULT_KVMALLOC] = "kvmalloc",
|
2016-04-30 06:17:09 +07:00
|
|
|
[FAULT_PAGE_ALLOC] = "page alloc",
|
2017-10-28 15:52:30 +07:00
|
|
|
[FAULT_PAGE_GET] = "page get",
|
2017-10-28 15:52:31 +07:00
|
|
|
[FAULT_ALLOC_BIO] = "alloc bio",
|
2016-04-30 06:29:22 +07:00
|
|
|
[FAULT_ALLOC_NID] = "alloc nid",
|
|
|
|
[FAULT_ORPHAN] = "orphan",
|
|
|
|
[FAULT_BLOCK] = "no more block",
|
|
|
|
[FAULT_DIR_DEPTH] = "too big dir depth",
|
2016-05-26 05:24:18 +07:00
|
|
|
[FAULT_EVICT_INODE] = "evict_inode fail",
|
2017-03-10 06:24:24 +07:00
|
|
|
[FAULT_TRUNCATE] = "truncate fail",
|
2018-09-12 08:22:29 +07:00
|
|
|
[FAULT_READ_IO] = "read IO error",
|
2016-09-26 18:45:55 +07:00
|
|
|
[FAULT_CHECKPOINT] = "checkpoint error",
|
2018-08-06 19:30:18 +07:00
|
|
|
[FAULT_DISCARD] = "discard error",
|
2018-09-12 08:22:29 +07:00
|
|
|
[FAULT_WRITE_IO] = "write IO error",
|
2016-04-30 05:49:56 +07:00
|
|
|
};
|
2016-05-16 11:38:50 +07:00
|
|
|
|
2018-08-08 16:36:41 +07:00
|
|
|
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
|
|
|
|
unsigned int type)
|
2016-05-16 11:38:50 +07:00
|
|
|
{
|
2018-03-08 13:22:56 +07:00
|
|
|
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
|
2016-09-23 20:30:09 +07:00
|
|
|
|
2016-05-16 11:38:50 +07:00
|
|
|
if (rate) {
|
2016-09-23 20:30:09 +07:00
|
|
|
atomic_set(&ffi->inject_ops, 0);
|
|
|
|
ffi->inject_rate = rate;
|
2016-05-16 11:38:50 +07:00
|
|
|
}
|
2018-08-08 16:36:41 +07:00
|
|
|
|
|
|
|
if (type)
|
|
|
|
ffi->inject_type = type;
|
|
|
|
|
|
|
|
if (!rate && !type)
|
|
|
|
memset(ffi, 0, sizeof(struct f2fs_fault_info));
|
2016-05-16 11:38:50 +07:00
|
|
|
}
|
2016-04-30 05:34:32 +07:00
|
|
|
#endif
|
|
|
|
|
2015-06-20 02:01:21 +07:00
|
|
|
/* f2fs-wide shrinker description */
|
|
|
|
static struct shrinker f2fs_shrinker_info = {
|
|
|
|
.scan_objects = f2fs_shrink_scan,
|
|
|
|
.count_objects = f2fs_shrink_count,
|
|
|
|
.seeks = DEFAULT_SEEKS,
|
|
|
|
};
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
enum {
|
2013-06-16 07:48:48 +07:00
|
|
|
Opt_gc_background,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_disable_roll_forward,
|
2015-01-24 09:33:46 +07:00
|
|
|
Opt_norecovery,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_discard,
|
2016-07-03 21:05:14 +07:00
|
|
|
Opt_nodiscard,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_noheap,
|
2017-03-25 07:41:45 +07:00
|
|
|
Opt_heap,
|
2013-10-07 09:36:20 +07:00
|
|
|
Opt_user_xattr,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_nouser_xattr,
|
2013-10-07 09:36:20 +07:00
|
|
|
Opt_acl,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_noacl,
|
|
|
|
Opt_active_logs,
|
|
|
|
Opt_disable_ext_identify,
|
2013-08-08 13:16:22 +07:00
|
|
|
Opt_inline_xattr,
|
2017-02-15 09:34:45 +07:00
|
|
|
Opt_noinline_xattr,
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
Opt_inline_xattr_size,
|
2013-11-10 22:13:17 +07:00
|
|
|
Opt_inline_data,
|
2014-09-24 17:16:13 +07:00
|
|
|
Opt_inline_dentry,
|
2016-05-09 18:56:34 +07:00
|
|
|
Opt_noinline_dentry,
|
2014-04-02 13:34:36 +07:00
|
|
|
Opt_flush_merge,
|
2016-05-21 12:39:20 +07:00
|
|
|
Opt_noflush_merge,
|
2014-07-23 23:57:31 +07:00
|
|
|
Opt_nobarrier,
|
2014-10-31 12:47:03 +07:00
|
|
|
Opt_fastboot,
|
2015-02-05 16:55:51 +07:00
|
|
|
Opt_extent_cache,
|
2015-06-26 07:43:04 +07:00
|
|
|
Opt_noextent_cache,
|
2015-03-24 09:20:27 +07:00
|
|
|
Opt_noinline_data,
|
2015-12-16 12:12:16 +07:00
|
|
|
Opt_data_flush,
|
2017-12-28 06:05:52 +07:00
|
|
|
Opt_reserve_root,
|
2018-01-05 12:36:09 +07:00
|
|
|
Opt_resgid,
|
|
|
|
Opt_resuid,
|
2016-06-04 09:29:38 +07:00
|
|
|
Opt_mode,
|
2016-12-22 08:09:19 +07:00
|
|
|
Opt_io_size_bits,
|
2016-04-30 05:34:32 +07:00
|
|
|
Opt_fault_injection,
|
2018-08-08 16:36:41 +07:00
|
|
|
Opt_fault_type,
|
2016-05-21 11:47:24 +07:00
|
|
|
Opt_lazytime,
|
|
|
|
Opt_nolazytime,
|
2017-08-08 09:54:31 +07:00
|
|
|
Opt_quota,
|
|
|
|
Opt_noquota,
|
2017-07-08 23:13:07 +07:00
|
|
|
Opt_usrquota,
|
|
|
|
Opt_grpquota,
|
2017-07-25 23:01:41 +07:00
|
|
|
Opt_prjquota,
|
2017-08-08 09:54:31 +07:00
|
|
|
Opt_usrjquota,
|
|
|
|
Opt_grpjquota,
|
|
|
|
Opt_prjjquota,
|
|
|
|
Opt_offusrjquota,
|
|
|
|
Opt_offgrpjquota,
|
|
|
|
Opt_offprjjquota,
|
|
|
|
Opt_jqfmt_vfsold,
|
|
|
|
Opt_jqfmt_vfsv0,
|
|
|
|
Opt_jqfmt_vfsv1,
|
2018-01-31 09:36:57 +07:00
|
|
|
Opt_whint,
|
2018-02-18 23:50:49 +07:00
|
|
|
Opt_alloc,
|
2018-03-07 11:07:49 +07:00
|
|
|
Opt_fsync,
|
2018-03-15 17:51:42 +07:00
|
|
|
Opt_test_dummy_encryption,
|
2019-05-30 07:49:06 +07:00
|
|
|
Opt_checkpoint_disable,
|
|
|
|
Opt_checkpoint_disable_cap,
|
|
|
|
Opt_checkpoint_disable_cap_perc,
|
|
|
|
Opt_checkpoint_enable,
|
2012-11-02 15:07:47 +07:00
|
|
|
Opt_err,
|
|
|
|
};
|
|
|
|
|
|
|
|
static match_table_t f2fs_tokens = {
|
2013-06-16 07:48:48 +07:00
|
|
|
{Opt_gc_background, "background_gc=%s"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_disable_roll_forward, "disable_roll_forward"},
|
2015-01-24 09:33:46 +07:00
|
|
|
{Opt_norecovery, "norecovery"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_discard, "discard"},
|
2016-07-03 21:05:14 +07:00
|
|
|
{Opt_nodiscard, "nodiscard"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_noheap, "no_heap"},
|
2017-03-25 07:41:45 +07:00
|
|
|
{Opt_heap, "heap"},
|
2013-10-07 09:36:20 +07:00
|
|
|
{Opt_user_xattr, "user_xattr"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_nouser_xattr, "nouser_xattr"},
|
2013-10-07 09:36:20 +07:00
|
|
|
{Opt_acl, "acl"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_noacl, "noacl"},
|
|
|
|
{Opt_active_logs, "active_logs=%u"},
|
|
|
|
{Opt_disable_ext_identify, "disable_ext_identify"},
|
2013-08-08 13:16:22 +07:00
|
|
|
{Opt_inline_xattr, "inline_xattr"},
|
2017-02-15 09:34:45 +07:00
|
|
|
{Opt_noinline_xattr, "noinline_xattr"},
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
{Opt_inline_xattr_size, "inline_xattr_size=%u"},
|
2013-11-10 22:13:17 +07:00
|
|
|
{Opt_inline_data, "inline_data"},
|
2014-09-24 17:16:13 +07:00
|
|
|
{Opt_inline_dentry, "inline_dentry"},
|
2016-05-09 18:56:34 +07:00
|
|
|
{Opt_noinline_dentry, "noinline_dentry"},
|
2014-04-02 13:34:36 +07:00
|
|
|
{Opt_flush_merge, "flush_merge"},
|
2016-05-21 12:39:20 +07:00
|
|
|
{Opt_noflush_merge, "noflush_merge"},
|
2014-07-23 23:57:31 +07:00
|
|
|
{Opt_nobarrier, "nobarrier"},
|
2014-10-31 12:47:03 +07:00
|
|
|
{Opt_fastboot, "fastboot"},
|
2015-02-05 16:55:51 +07:00
|
|
|
{Opt_extent_cache, "extent_cache"},
|
2015-06-26 07:43:04 +07:00
|
|
|
{Opt_noextent_cache, "noextent_cache"},
|
2015-03-24 09:20:27 +07:00
|
|
|
{Opt_noinline_data, "noinline_data"},
|
2015-12-16 12:12:16 +07:00
|
|
|
{Opt_data_flush, "data_flush"},
|
2017-12-28 06:05:52 +07:00
|
|
|
{Opt_reserve_root, "reserve_root=%u"},
|
2018-01-05 12:36:09 +07:00
|
|
|
{Opt_resgid, "resgid=%u"},
|
|
|
|
{Opt_resuid, "resuid=%u"},
|
2016-06-04 09:29:38 +07:00
|
|
|
{Opt_mode, "mode=%s"},
|
2016-12-22 08:09:19 +07:00
|
|
|
{Opt_io_size_bits, "io_bits=%u"},
|
2016-04-30 05:34:32 +07:00
|
|
|
{Opt_fault_injection, "fault_injection=%u"},
|
2018-08-08 16:36:41 +07:00
|
|
|
{Opt_fault_type, "fault_type=%u"},
|
2016-05-21 11:47:24 +07:00
|
|
|
{Opt_lazytime, "lazytime"},
|
|
|
|
{Opt_nolazytime, "nolazytime"},
|
2017-08-08 09:54:31 +07:00
|
|
|
{Opt_quota, "quota"},
|
|
|
|
{Opt_noquota, "noquota"},
|
2017-07-08 23:13:07 +07:00
|
|
|
{Opt_usrquota, "usrquota"},
|
|
|
|
{Opt_grpquota, "grpquota"},
|
2017-07-25 23:01:41 +07:00
|
|
|
{Opt_prjquota, "prjquota"},
|
2017-08-08 09:54:31 +07:00
|
|
|
{Opt_usrjquota, "usrjquota=%s"},
|
|
|
|
{Opt_grpjquota, "grpjquota=%s"},
|
|
|
|
{Opt_prjjquota, "prjjquota=%s"},
|
|
|
|
{Opt_offusrjquota, "usrjquota="},
|
|
|
|
{Opt_offgrpjquota, "grpjquota="},
|
|
|
|
{Opt_offprjjquota, "prjjquota="},
|
|
|
|
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
|
|
|
|
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
|
|
|
|
{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
|
2018-01-31 09:36:57 +07:00
|
|
|
{Opt_whint, "whint_mode=%s"},
|
2018-02-18 23:50:49 +07:00
|
|
|
{Opt_alloc, "alloc_mode=%s"},
|
2018-03-07 11:07:49 +07:00
|
|
|
{Opt_fsync, "fsync_mode=%s"},
|
2018-03-15 17:51:42 +07:00
|
|
|
{Opt_test_dummy_encryption, "test_dummy_encryption"},
|
2019-05-30 07:49:06 +07:00
|
|
|
{Opt_checkpoint_disable, "checkpoint=disable"},
|
|
|
|
{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
|
|
|
|
{Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
|
|
|
|
{Opt_checkpoint_enable, "checkpoint=enable"},
|
2012-11-02 15:07:47 +07:00
|
|
|
{Opt_err, NULL},
|
|
|
|
};
|
|
|
|
|
2019-06-18 16:48:42 +07:00
|
|
|
void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
|
2012-12-30 12:52:05 +07:00
|
|
|
{
|
|
|
|
struct va_format vaf;
|
|
|
|
va_list args;
|
2019-06-18 16:48:42 +07:00
|
|
|
int level;
|
2012-12-30 12:52:05 +07:00
|
|
|
|
|
|
|
va_start(args, fmt);
|
2019-06-18 16:48:42 +07:00
|
|
|
|
|
|
|
level = printk_get_level(fmt);
|
|
|
|
vaf.fmt = printk_skip_level(fmt);
|
2012-12-30 12:52:05 +07:00
|
|
|
vaf.va = &args;
|
2019-06-18 16:48:42 +07:00
|
|
|
printk("%c%cF2FS-fs (%s): %pV\n",
|
|
|
|
KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
|
|
|
|
|
2012-12-30 12:52:05 +07:00
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2017-12-28 06:05:52 +07:00
|
|
|
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
2019-05-30 07:49:04 +07:00
|
|
|
block_t limit = min((sbi->user_block_count << 1) / 1000,
|
|
|
|
sbi->user_block_count - sbi->reserved_blocks);
|
2017-12-28 06:05:52 +07:00
|
|
|
|
|
|
|
/* limit is 0.2% */
|
2018-03-08 13:22:56 +07:00
|
|
|
if (test_opt(sbi, RESERVE_ROOT) &&
|
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
|
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks = limit;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Reduce reserved blocks for root = %u",
|
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks);
|
2017-12-28 06:05:52 +07:00
|
|
|
}
|
2018-01-05 12:36:09 +07:00
|
|
|
if (!test_opt(sbi, RESERVE_ROOT) &&
|
2018-03-08 13:22:56 +07:00
|
|
|
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
|
2018-01-05 12:36:09 +07:00
|
|
|
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
|
2018-03-08 13:22:56 +07:00
|
|
|
!gid_eq(F2FS_OPTION(sbi).s_resgid,
|
2018-01-05 12:36:09 +07:00
|
|
|
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
|
|
|
|
from_kuid_munged(&init_user_ns,
|
|
|
|
F2FS_OPTION(sbi).s_resuid),
|
|
|
|
from_kgid_munged(&init_user_ns,
|
|
|
|
F2FS_OPTION(sbi).s_resgid));
|
2017-12-28 06:05:52 +07:00
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static void init_once(void *foo)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
|
|
|
|
|
|
|
|
inode_init_once(&fi->vfs_inode);
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
static const char * const quotatypes[] = INITQFNAMES;
|
|
|
|
#define QTYPE2NAME(t) (quotatypes[t])
|
|
|
|
static int f2fs_set_qf_name(struct super_block *sb, int qtype,
|
|
|
|
substring_t *args)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
char *qname;
|
|
|
|
int ret = -EINVAL;
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
|
2017-08-08 09:54:31 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
|
2017-10-06 23:14:28 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
qname = match_strdup(args);
|
|
|
|
if (!qname) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Not enough memory for storing quotafile name");
|
2019-01-01 20:33:11 +07:00
|
|
|
return -ENOMEM;
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
|
|
|
|
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
|
2017-08-08 09:54:31 +07:00
|
|
|
ret = 0;
|
|
|
|
else
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "%s quota file already specified",
|
2017-08-08 09:54:31 +07:00
|
|
|
QTYPE2NAME(qtype));
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
if (strchr(qname, '/')) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "quotafile must be on filesystem root");
|
2017-08-08 09:54:31 +07:00
|
|
|
goto errout;
|
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
|
2017-08-08 09:54:31 +07:00
|
|
|
set_opt(sbi, QUOTA);
|
|
|
|
return 0;
|
|
|
|
errout:
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(qname);
|
2017-08-08 09:54:31 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
|
2017-08-08 09:54:31 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
|
2017-08-08 09:54:31 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We do the test below only for project quotas. 'usrquota' and
|
|
|
|
* 'grpquota' mount options are allowed even without quota feature
|
|
|
|
* to support legacy quotas in quota files.
|
|
|
|
*/
|
2018-10-24 17:34:26 +07:00
|
|
|
if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
|
2017-08-08 09:54:31 +07:00
|
|
|
return -1;
|
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) {
|
|
|
|
if (test_opt(sbi, USRQUOTA) &&
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
|
2017-08-08 09:54:31 +07:00
|
|
|
clear_opt(sbi, USRQUOTA);
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (test_opt(sbi, GRPQUOTA) &&
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
|
2017-08-08 09:54:31 +07:00
|
|
|
clear_opt(sbi, GRPQUOTA);
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (test_opt(sbi, PRJQUOTA) &&
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
|
2017-08-08 09:54:31 +07:00
|
|
|
clear_opt(sbi, PRJQUOTA);
|
|
|
|
|
|
|
|
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
|
|
|
|
test_opt(sbi, PRJQUOTA)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "old and new quota format mixing");
|
2017-08-08 09:54:31 +07:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (!F2FS_OPTION(sbi).s_jquota_fmt) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "journaled quota format not specified");
|
2017-08-08 09:54:31 +07:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2017-10-06 23:14:28 +07:00
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt = 0;
|
2017-10-06 23:14:28 +07:00
|
|
|
}
|
2017-08-08 09:54:31 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-06-16 07:48:48 +07:00
|
|
|
static int parse_options(struct super_block *sb, char *options)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
substring_t args[MAX_OPT_ARGS];
|
|
|
|
char *p, *name;
|
|
|
|
int arg = 0;
|
2018-01-05 12:36:09 +07:00
|
|
|
kuid_t uid;
|
|
|
|
kgid_t gid;
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
int ret;
|
|
|
|
#endif
|
2013-06-16 07:48:48 +07:00
|
|
|
|
|
|
|
if (!options)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
while ((p = strsep(&options, ",")) != NULL) {
|
|
|
|
int token;
|
|
|
|
if (!*p)
|
|
|
|
continue;
|
|
|
|
/*
|
|
|
|
* Initialize args struct so we know whether arg was
|
|
|
|
* found; some options take optional arguments.
|
|
|
|
*/
|
|
|
|
args[0].to = args[0].from = NULL;
|
|
|
|
token = match_token(p, f2fs_tokens, args);
|
|
|
|
|
|
|
|
switch (token) {
|
|
|
|
case Opt_gc_background:
|
|
|
|
name = match_strdup(&args[0]);
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
2015-10-06 01:02:54 +07:00
|
|
|
if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
|
2013-06-16 07:48:48 +07:00
|
|
|
set_opt(sbi, BG_GC);
|
2015-10-06 01:02:54 +07:00
|
|
|
clear_opt(sbi, FORCE_FG_GC);
|
|
|
|
} else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
|
2013-06-16 07:48:48 +07:00
|
|
|
clear_opt(sbi, BG_GC);
|
2015-10-06 01:02:54 +07:00
|
|
|
clear_opt(sbi, FORCE_FG_GC);
|
|
|
|
} else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
|
|
|
|
set_opt(sbi, BG_GC);
|
|
|
|
set_opt(sbi, FORCE_FG_GC);
|
|
|
|
} else {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2013-06-16 07:48:48 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2013-06-16 07:48:48 +07:00
|
|
|
break;
|
|
|
|
case Opt_disable_roll_forward:
|
|
|
|
set_opt(sbi, DISABLE_ROLL_FORWARD);
|
|
|
|
break;
|
2015-01-24 09:33:46 +07:00
|
|
|
case Opt_norecovery:
|
|
|
|
/* this option mounts f2fs with ro */
|
|
|
|
set_opt(sbi, DISABLE_ROLL_FORWARD);
|
|
|
|
if (!f2fs_readonly(sb))
|
|
|
|
return -EINVAL;
|
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_discard:
|
f2fs: fix to avoid NULL pointer dereference on se->discard_map
https://bugzilla.kernel.org/show_bug.cgi?id=200951
These is a NULL pointer dereference issue reported in bugzilla:
Hi,
in the setup there is a SATA SSD connected to a SATA-to-USB bridge.
The disc is "Samsung SSD 850 PRO 256G" which supports TRIM.
There are four partitions:
sda1: FAT /boot
sda2: F2FS /
sda3: F2FS /home
sda4: F2FS
The bridge is ASMT1153e which uses the "uas" driver.
There is no TRIM pass-through, so, when mounting it reports:
mounting with "discard" option, but the device does not support discard
The USB host is USB3.0 and UASP capable. It is the one on RK3399.
Given this everything works fine, except there is no TRIM support.
In order to enable TRIM a new UDEV rule is added [1]:
/etc/udev/rules.d/10-sata-bridge-trim.rules:
ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap"
After reboot any F2FS write hangs forever and dmesg reports:
Unable to handle kernel NULL pointer dereference
Also tested on a x86_64 system: works fine even with TRIM enabled.
same disc
same bridge
different usb host controller
different cpu architecture
not root filesystem
Regards,
Vicenç.
[1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280
Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e
Mem abort info:
ESR = 0x96000004
Exception class = DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122
[000000000000003e] pgd=0000000000000000
Internal error: Oops: 96000004 [#1] SMP
Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington
CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1
Hardware name: Sapphire-RK3399 Board (DT)
pstate: 00000005 (nzcv daif -PAN -UAO)
pc : update_sit_entry+0x304/0x4b0
lr : update_sit_entry+0x108/0x4b0
sp : ffff00000ca13bd0
x29: ffff00000ca13bd0 x28: 000000000000003e
x27: 0000000000000020 x26: 0000000000080000
x25: 0000000000000048 x24: ffff8000ebb85cf8
x23: 0000000000000253 x22: 00000000ffffffff
x21: 00000000000535f2 x20: 00000000ffffffdf
x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8
x17: 0000000007ce6926 x16: 000000001c83ffa8
x15: 0000000000000000 x14: ffff8000f602df90
x13: 0000000000000006 x12: 0000000000000040
x11: 0000000000000228 x10: 0000000000000000
x9 : 0000000000000000 x8 : 0000000000000000
x7 : 00000000000535f2 x6 : ffff8000ebff3440
x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8
x3 : 00000000ffffffff x2 : 0000000000000020
x1 : 0000000000000000 x0 : ffff8000eb9e5800
Process nvim (pid: 957, stack limit = 0x0000000063a78320)
Call trace:
update_sit_entry+0x304/0x4b0
f2fs_invalidate_blocks+0x98/0x140
truncate_node+0x90/0x400
f2fs_remove_inode_page+0xe8/0x340
f2fs_evict_inode+0x2b0/0x408
evict+0xe0/0x1e0
iput+0x160/0x260
do_unlinkat+0x214/0x298
__arm64_sys_unlinkat+0x3c/0x68
el0_svc_handler+0x94/0x118
el0_svc+0x8/0xc
Code: f9400800 b9488400 36080140 f9400f01 (387c4820)
---[ end trace a0f21a307118c477 ]---
The reason is it is possible to enable discard flag on block queue via
UDEV, but during mount, f2fs will initialize se->discard_map only if
this flag is set, once the flag is set after mount, f2fs may dereference
NULL pointer on se->discard_map.
So this patch does below changes to fix this issue:
- initialize and update se->discard_map all the time.
- don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag
during mount.
- don't issue small discard on zoned block device.
- introduce some functions to enhance the readability.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-04 02:52:17 +07:00
|
|
|
set_opt(sbi, DISCARD);
|
2013-06-16 07:48:48 +07:00
|
|
|
break;
|
2016-07-03 21:05:14 +07:00
|
|
|
case Opt_nodiscard:
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_blkzoned(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "discard is required for zoned block devices");
|
2016-10-28 15:45:03 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-07-03 21:05:14 +07:00
|
|
|
clear_opt(sbi, DISCARD);
|
2016-10-28 15:44:59 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_noheap:
|
|
|
|
set_opt(sbi, NOHEAP);
|
|
|
|
break;
|
2017-03-25 07:41:45 +07:00
|
|
|
case Opt_heap:
|
|
|
|
clear_opt(sbi, NOHEAP);
|
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
#ifdef CONFIG_F2FS_FS_XATTR
|
2013-10-07 09:36:20 +07:00
|
|
|
case Opt_user_xattr:
|
|
|
|
set_opt(sbi, XATTR_USER);
|
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_nouser_xattr:
|
|
|
|
clear_opt(sbi, XATTR_USER);
|
|
|
|
break;
|
2013-08-08 13:16:22 +07:00
|
|
|
case Opt_inline_xattr:
|
|
|
|
set_opt(sbi, INLINE_XATTR);
|
|
|
|
break;
|
2017-02-15 09:34:45 +07:00
|
|
|
case Opt_noinline_xattr:
|
|
|
|
clear_opt(sbi, INLINE_XATTR);
|
|
|
|
break;
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
case Opt_inline_xattr_size:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
set_opt(sbi, INLINE_XATTR_SIZE);
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).inline_xattr_size = arg;
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
#else
|
2013-10-07 09:36:20 +07:00
|
|
|
case Opt_user_xattr:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "user_xattr options not supported");
|
2013-10-07 09:36:20 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_nouser_xattr:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "nouser_xattr options not supported");
|
2013-06-16 07:48:48 +07:00
|
|
|
break;
|
2013-08-08 13:16:22 +07:00
|
|
|
case Opt_inline_xattr:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "inline_xattr options not supported");
|
2013-08-08 13:16:22 +07:00
|
|
|
break;
|
2017-02-15 09:34:45 +07:00
|
|
|
case Opt_noinline_xattr:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "noinline_xattr options not supported");
|
2017-02-15 09:34:45 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_F2FS_FS_POSIX_ACL
|
2013-10-07 09:36:20 +07:00
|
|
|
case Opt_acl:
|
|
|
|
set_opt(sbi, POSIX_ACL);
|
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_noacl:
|
|
|
|
clear_opt(sbi, POSIX_ACL);
|
|
|
|
break;
|
|
|
|
#else
|
2013-10-07 09:36:20 +07:00
|
|
|
case Opt_acl:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "acl options not supported");
|
2013-10-07 09:36:20 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
case Opt_noacl:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "noacl options not supported");
|
2013-06-16 07:48:48 +07:00
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
case Opt_active_logs:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
|
|
|
|
return -EINVAL;
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).active_logs = arg;
|
2013-06-16 07:48:48 +07:00
|
|
|
break;
|
|
|
|
case Opt_disable_ext_identify:
|
|
|
|
set_opt(sbi, DISABLE_EXT_IDENTIFY);
|
|
|
|
break;
|
2013-11-10 22:13:17 +07:00
|
|
|
case Opt_inline_data:
|
|
|
|
set_opt(sbi, INLINE_DATA);
|
|
|
|
break;
|
2014-09-24 17:16:13 +07:00
|
|
|
case Opt_inline_dentry:
|
|
|
|
set_opt(sbi, INLINE_DENTRY);
|
|
|
|
break;
|
2016-05-09 18:56:34 +07:00
|
|
|
case Opt_noinline_dentry:
|
|
|
|
clear_opt(sbi, INLINE_DENTRY);
|
|
|
|
break;
|
2014-04-02 13:34:36 +07:00
|
|
|
case Opt_flush_merge:
|
|
|
|
set_opt(sbi, FLUSH_MERGE);
|
|
|
|
break;
|
2016-05-21 12:39:20 +07:00
|
|
|
case Opt_noflush_merge:
|
|
|
|
clear_opt(sbi, FLUSH_MERGE);
|
|
|
|
break;
|
2014-07-23 23:57:31 +07:00
|
|
|
case Opt_nobarrier:
|
|
|
|
set_opt(sbi, NOBARRIER);
|
|
|
|
break;
|
2014-10-31 12:47:03 +07:00
|
|
|
case Opt_fastboot:
|
|
|
|
set_opt(sbi, FASTBOOT);
|
|
|
|
break;
|
2015-02-05 16:55:51 +07:00
|
|
|
case Opt_extent_cache:
|
|
|
|
set_opt(sbi, EXTENT_CACHE);
|
|
|
|
break;
|
2015-06-26 07:43:04 +07:00
|
|
|
case Opt_noextent_cache:
|
|
|
|
clear_opt(sbi, EXTENT_CACHE);
|
|
|
|
break;
|
2015-03-24 09:20:27 +07:00
|
|
|
case Opt_noinline_data:
|
|
|
|
clear_opt(sbi, INLINE_DATA);
|
|
|
|
break;
|
2015-12-16 12:12:16 +07:00
|
|
|
case Opt_data_flush:
|
|
|
|
set_opt(sbi, DATA_FLUSH);
|
|
|
|
break;
|
2017-12-28 06:05:52 +07:00
|
|
|
case Opt_reserve_root:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
if (test_opt(sbi, RESERVE_ROOT)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Preserve previous reserve_root=%u",
|
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks);
|
2017-12-28 06:05:52 +07:00
|
|
|
} else {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks = arg;
|
2017-12-28 06:05:52 +07:00
|
|
|
set_opt(sbi, RESERVE_ROOT);
|
|
|
|
}
|
|
|
|
break;
|
2018-01-05 12:36:09 +07:00
|
|
|
case Opt_resuid:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
uid = make_kuid(current_user_ns(), arg);
|
|
|
|
if (!uid_valid(uid)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Invalid uid value %d", arg);
|
2018-01-05 12:36:09 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_resuid = uid;
|
2018-01-05 12:36:09 +07:00
|
|
|
break;
|
|
|
|
case Opt_resgid:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
gid = make_kgid(current_user_ns(), arg);
|
|
|
|
if (!gid_valid(gid)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Invalid gid value %d", arg);
|
2018-01-05 12:36:09 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_resgid = gid;
|
2018-01-05 12:36:09 +07:00
|
|
|
break;
|
2016-06-04 09:29:38 +07:00
|
|
|
case Opt_mode:
|
|
|
|
name = match_strdup(&args[0]);
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
|
|
|
if (strlen(name) == 8 &&
|
|
|
|
!strncmp(name, "adaptive", 8)) {
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_blkzoned(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2016-10-28 15:45:04 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-06-13 23:47:48 +07:00
|
|
|
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
|
2016-06-04 09:29:38 +07:00
|
|
|
} else if (strlen(name) == 3 &&
|
|
|
|
!strncmp(name, "lfs", 3)) {
|
2016-06-13 23:47:48 +07:00
|
|
|
set_opt_mode(sbi, F2FS_MOUNT_LFS);
|
2016-06-04 09:29:38 +07:00
|
|
|
} else {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2016-06-04 09:29:38 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2016-06-04 09:29:38 +07:00
|
|
|
break;
|
2016-12-22 08:09:19 +07:00
|
|
|
case Opt_io_size_bits:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
2019-02-14 23:16:15 +07:00
|
|
|
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "Not support %d, larger than %d",
|
|
|
|
1 << arg, BIO_MAX_PAGES);
|
2016-12-22 08:09:19 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).write_io_size_bits = arg;
|
2016-12-22 08:09:19 +07:00
|
|
|
break;
|
2018-09-12 12:32:52 +07:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
2016-04-30 05:34:32 +07:00
|
|
|
case Opt_fault_injection:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
2018-08-08 16:36:41 +07:00
|
|
|
f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
|
|
|
|
set_opt(sbi, FAULT_INJECTION);
|
|
|
|
break;
|
2018-09-12 12:32:52 +07:00
|
|
|
|
2018-08-08 16:36:41 +07:00
|
|
|
case Opt_fault_type:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
f2fs_build_fault_attr(sbi, 0, arg);
|
2017-01-27 08:35:37 +07:00
|
|
|
set_opt(sbi, FAULT_INJECTION);
|
2018-09-12 12:32:52 +07:00
|
|
|
break;
|
2016-04-30 05:34:32 +07:00
|
|
|
#else
|
2018-09-12 12:32:52 +07:00
|
|
|
case Opt_fault_injection:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "fault_injection options not supported");
|
2016-04-30 05:34:32 +07:00
|
|
|
break;
|
2018-09-12 12:32:52 +07:00
|
|
|
|
|
|
|
case Opt_fault_type:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "fault_type options not supported");
|
2018-09-12 12:32:52 +07:00
|
|
|
break;
|
|
|
|
#endif
|
2016-05-21 11:47:24 +07:00
|
|
|
case Opt_lazytime:
|
2017-11-28 04:05:09 +07:00
|
|
|
sb->s_flags |= SB_LAZYTIME;
|
2016-05-21 11:47:24 +07:00
|
|
|
break;
|
|
|
|
case Opt_nolazytime:
|
2017-11-28 04:05:09 +07:00
|
|
|
sb->s_flags &= ~SB_LAZYTIME;
|
2016-05-21 11:47:24 +07:00
|
|
|
break;
|
2017-07-08 23:13:07 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2017-08-08 09:54:31 +07:00
|
|
|
case Opt_quota:
|
2017-07-08 23:13:07 +07:00
|
|
|
case Opt_usrquota:
|
|
|
|
set_opt(sbi, USRQUOTA);
|
|
|
|
break;
|
|
|
|
case Opt_grpquota:
|
|
|
|
set_opt(sbi, GRPQUOTA);
|
|
|
|
break;
|
2017-07-25 23:01:41 +07:00
|
|
|
case Opt_prjquota:
|
|
|
|
set_opt(sbi, PRJQUOTA);
|
|
|
|
break;
|
2017-08-08 09:54:31 +07:00
|
|
|
case Opt_usrjquota:
|
|
|
|
ret = f2fs_set_qf_name(sb, USRQUOTA, &args[0]);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_grpjquota:
|
|
|
|
ret = f2fs_set_qf_name(sb, GRPQUOTA, &args[0]);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_prjjquota:
|
|
|
|
ret = f2fs_set_qf_name(sb, PRJQUOTA, &args[0]);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_offusrjquota:
|
|
|
|
ret = f2fs_clear_qf_name(sb, USRQUOTA);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_offgrpjquota:
|
|
|
|
ret = f2fs_clear_qf_name(sb, GRPQUOTA);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_offprjjquota:
|
|
|
|
ret = f2fs_clear_qf_name(sb, PRJQUOTA);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
case Opt_jqfmt_vfsold:
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD;
|
2017-08-08 09:54:31 +07:00
|
|
|
break;
|
|
|
|
case Opt_jqfmt_vfsv0:
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0;
|
2017-08-08 09:54:31 +07:00
|
|
|
break;
|
|
|
|
case Opt_jqfmt_vfsv1:
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1;
|
2017-08-08 09:54:31 +07:00
|
|
|
break;
|
|
|
|
case Opt_noquota:
|
|
|
|
clear_opt(sbi, QUOTA);
|
|
|
|
clear_opt(sbi, USRQUOTA);
|
|
|
|
clear_opt(sbi, GRPQUOTA);
|
|
|
|
clear_opt(sbi, PRJQUOTA);
|
|
|
|
break;
|
2017-07-08 23:13:07 +07:00
|
|
|
#else
|
2017-08-08 09:54:31 +07:00
|
|
|
case Opt_quota:
|
2017-07-08 23:13:07 +07:00
|
|
|
case Opt_usrquota:
|
|
|
|
case Opt_grpquota:
|
2017-07-25 23:01:41 +07:00
|
|
|
case Opt_prjquota:
|
2017-08-08 09:54:31 +07:00
|
|
|
case Opt_usrjquota:
|
|
|
|
case Opt_grpjquota:
|
|
|
|
case Opt_prjjquota:
|
|
|
|
case Opt_offusrjquota:
|
|
|
|
case Opt_offgrpjquota:
|
|
|
|
case Opt_offprjjquota:
|
|
|
|
case Opt_jqfmt_vfsold:
|
|
|
|
case Opt_jqfmt_vfsv0:
|
|
|
|
case Opt_jqfmt_vfsv1:
|
|
|
|
case Opt_noquota:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "quota operations not supported");
|
2017-07-08 23:13:07 +07:00
|
|
|
break;
|
|
|
|
#endif
|
2018-01-31 09:36:57 +07:00
|
|
|
case Opt_whint:
|
|
|
|
name = match_strdup(&args[0]);
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
|
|
|
if (strlen(name) == 10 &&
|
|
|
|
!strncmp(name, "user-based", 10)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
|
2018-01-31 09:36:57 +07:00
|
|
|
} else if (strlen(name) == 3 &&
|
|
|
|
!strncmp(name, "off", 3)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
|
2018-01-31 09:36:58 +07:00
|
|
|
} else if (strlen(name) == 8 &&
|
|
|
|
!strncmp(name, "fs-based", 8)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
|
2018-01-31 09:36:57 +07:00
|
|
|
} else {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-01-31 09:36:57 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-01-31 09:36:57 +07:00
|
|
|
break;
|
2018-02-18 23:50:49 +07:00
|
|
|
case Opt_alloc:
|
|
|
|
name = match_strdup(&args[0]);
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (strlen(name) == 7 &&
|
|
|
|
!strncmp(name, "default", 7)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
|
2018-02-18 23:50:49 +07:00
|
|
|
} else if (strlen(name) == 5 &&
|
|
|
|
!strncmp(name, "reuse", 5)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
|
2018-02-18 23:50:49 +07:00
|
|
|
} else {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-02-18 23:50:49 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-02-18 23:50:49 +07:00
|
|
|
break;
|
2018-03-07 11:07:49 +07:00
|
|
|
case Opt_fsync:
|
|
|
|
name = match_strdup(&args[0]);
|
|
|
|
if (!name)
|
|
|
|
return -ENOMEM;
|
|
|
|
if (strlen(name) == 5 &&
|
|
|
|
!strncmp(name, "posix", 5)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
|
2018-03-07 11:07:49 +07:00
|
|
|
} else if (strlen(name) == 6 &&
|
|
|
|
!strncmp(name, "strict", 6)) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
|
2018-05-26 08:02:58 +07:00
|
|
|
} else if (strlen(name) == 9 &&
|
|
|
|
!strncmp(name, "nobarrier", 9)) {
|
|
|
|
F2FS_OPTION(sbi).fsync_mode =
|
|
|
|
FSYNC_MODE_NOBARRIER;
|
2018-03-07 11:07:49 +07:00
|
|
|
} else {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-03-07 11:07:49 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(name);
|
2018-03-07 11:07:49 +07:00
|
|
|
break;
|
2018-03-15 17:51:42 +07:00
|
|
|
case Opt_test_dummy_encryption:
|
2018-12-12 16:50:12 +07:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_encrypt(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Encrypt feature is off");
|
2018-03-15 17:51:42 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
F2FS_OPTION(sbi).test_dummy_encryption = true;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Test dummy encryption mode enabled");
|
2018-03-15 17:51:42 +07:00
|
|
|
#else
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Test dummy encryption mount option ignored");
|
2018-03-15 17:51:42 +07:00
|
|
|
#endif
|
|
|
|
break;
|
2019-05-30 07:49:06 +07:00
|
|
|
case Opt_checkpoint_disable_cap_perc:
|
|
|
|
if (args->from && match_int(args, &arg))
|
2018-08-21 09:21:43 +07:00
|
|
|
return -EINVAL;
|
2019-05-30 07:49:06 +07:00
|
|
|
if (arg < 0 || arg > 100)
|
|
|
|
return -EINVAL;
|
|
|
|
if (arg == 100)
|
|
|
|
F2FS_OPTION(sbi).unusable_cap =
|
|
|
|
sbi->user_block_count;
|
|
|
|
else
|
|
|
|
F2FS_OPTION(sbi).unusable_cap =
|
|
|
|
(sbi->user_block_count / 100) * arg;
|
|
|
|
set_opt(sbi, DISABLE_CHECKPOINT);
|
|
|
|
break;
|
|
|
|
case Opt_checkpoint_disable_cap:
|
|
|
|
if (args->from && match_int(args, &arg))
|
|
|
|
return -EINVAL;
|
|
|
|
F2FS_OPTION(sbi).unusable_cap = arg;
|
|
|
|
set_opt(sbi, DISABLE_CHECKPOINT);
|
|
|
|
break;
|
|
|
|
case Opt_checkpoint_disable:
|
|
|
|
set_opt(sbi, DISABLE_CHECKPOINT);
|
|
|
|
break;
|
|
|
|
case Opt_checkpoint_enable:
|
|
|
|
clear_opt(sbi, DISABLE_CHECKPOINT);
|
2018-08-21 09:21:43 +07:00
|
|
|
break;
|
2013-06-16 07:48:48 +07:00
|
|
|
default:
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
|
|
|
|
p);
|
2013-06-16 07:48:48 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
if (f2fs_check_quota_options(sbi))
|
|
|
|
return -EINVAL;
|
2018-07-24 19:17:52 +07:00
|
|
|
#else
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
|
2018-07-24 19:17:52 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
|
2018-07-26 06:19:48 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-08-08 09:54:31 +07:00
|
|
|
#endif
|
2016-12-22 08:09:19 +07:00
|
|
|
|
|
|
|
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
|
|
|
|
F2FS_IO_SIZE_KB(sbi));
|
2016-12-22 08:09:19 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
|
|
|
|
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
|
2019-03-13 01:49:53 +07:00
|
|
|
int min_size, max_size;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_extra_attr(sbi) ||
|
|
|
|
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
|
2018-01-27 16:29:48 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
if (!test_opt(sbi, INLINE_XATTR)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-03-13 01:49:53 +07:00
|
|
|
|
|
|
|
min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
|
2019-03-04 16:19:04 +07:00
|
|
|
max_size = MAX_INLINE_XATTR_SIZE;
|
2019-03-13 01:49:53 +07:00
|
|
|
|
|
|
|
if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
|
|
|
|
F2FS_OPTION(sbi).inline_xattr_size > max_size) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
|
|
|
|
min_size, max_size);
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2018-01-31 09:36:57 +07:00
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
|
2018-08-21 09:21:43 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-01-31 09:36:57 +07:00
|
|
|
/* Not pass down write hints if the number of active logs is lesser
|
|
|
|
* than NR_CURSEG_TYPE.
|
|
|
|
*/
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
|
|
|
|
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
|
2013-06-16 07:48:48 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static struct inode *f2fs_alloc_inode(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi;
|
|
|
|
|
2013-12-05 08:54:00 +07:00
|
|
|
fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
|
2012-11-02 15:07:47 +07:00
|
|
|
if (!fi)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
init_once((void *) fi);
|
|
|
|
|
2013-03-19 06:03:35 +07:00
|
|
|
/* Initialize f2fs-specific inode info */
|
2016-12-03 06:11:32 +07:00
|
|
|
atomic_set(&fi->dirty_pages, 0);
|
2014-03-20 17:10:08 +07:00
|
|
|
init_rwsem(&fi->i_sem);
|
2015-12-15 12:30:45 +07:00
|
|
|
INIT_LIST_HEAD(&fi->dirty_list);
|
2016-05-21 01:10:10 +07:00
|
|
|
INIT_LIST_HEAD(&fi->gdirty_list);
|
2017-10-19 09:05:57 +07:00
|
|
|
INIT_LIST_HEAD(&fi->inmem_ilist);
|
2014-10-07 07:39:50 +07:00
|
|
|
INIT_LIST_HEAD(&fi->inmem_pages);
|
|
|
|
mutex_init(&fi->inmem_lock);
|
2018-04-24 09:55:28 +07:00
|
|
|
init_rwsem(&fi->i_gc_rwsem[READ]);
|
|
|
|
init_rwsem(&fi->i_gc_rwsem[WRITE]);
|
2017-05-18 10:06:45 +07:00
|
|
|
init_rwsem(&fi->i_mmap_sem);
|
2017-09-07 09:40:54 +07:00
|
|
|
init_rwsem(&fi->i_xattr_sem);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2014-02-27 18:09:05 +07:00
|
|
|
/* Will be used by directory only */
|
|
|
|
fi->i_dir_level = F2FS_SB(sb)->dir_level;
|
2017-07-18 23:19:05 +07:00
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
return &fi->vfs_inode;
|
|
|
|
}
|
|
|
|
|
2013-04-30 09:33:27 +07:00
|
|
|
static int f2fs_drop_inode(struct inode *inode)
|
|
|
|
{
|
2017-02-27 20:02:58 +07:00
|
|
|
int ret;
|
2013-04-30 09:33:27 +07:00
|
|
|
/*
|
|
|
|
* This is to avoid a deadlock condition like below.
|
|
|
|
* writeback_single_inode(inode)
|
|
|
|
* - f2fs_write_data_page
|
|
|
|
* - f2fs_gc -> iput -> evict
|
|
|
|
* - inode_wait_for_writeback(inode)
|
|
|
|
*/
|
2016-05-21 01:10:10 +07:00
|
|
|
if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
|
2015-05-14 04:35:14 +07:00
|
|
|
if (!inode->i_nlink && !is_bad_inode(inode)) {
|
2015-06-20 07:53:26 +07:00
|
|
|
/* to avoid evict_inode call simultaneously */
|
|
|
|
atomic_inc(&inode->i_count);
|
2015-05-14 04:35:14 +07:00
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
|
|
|
|
/* some remained atomic pages should discarded */
|
|
|
|
if (f2fs_is_atomic_file(inode))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_drop_inmem_pages(inode);
|
2015-05-14 04:35:14 +07:00
|
|
|
|
2015-06-20 07:53:26 +07:00
|
|
|
/* should remain fi->extent_tree for writepage */
|
|
|
|
f2fs_destroy_extent_node(inode);
|
|
|
|
|
2015-05-14 04:35:14 +07:00
|
|
|
sb_start_intwrite(inode->i_sb);
|
2016-05-20 23:22:03 +07:00
|
|
|
f2fs_i_size_write(inode, 0);
|
2015-05-14 04:35:14 +07:00
|
|
|
|
2019-03-04 08:32:25 +07:00
|
|
|
f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
|
|
|
|
inode, NULL, 0, DATA);
|
|
|
|
truncate_inode_pages_final(inode->i_mapping);
|
|
|
|
|
2015-05-14 04:35:14 +07:00
|
|
|
if (F2FS_HAS_BLOCKS(inode))
|
2016-06-03 03:49:38 +07:00
|
|
|
f2fs_truncate(inode);
|
2015-05-14 04:35:14 +07:00
|
|
|
|
|
|
|
sb_end_intwrite(inode->i_sb);
|
|
|
|
|
|
|
|
spin_lock(&inode->i_lock);
|
2015-06-20 07:53:26 +07:00
|
|
|
atomic_dec(&inode->i_count);
|
2015-05-14 04:35:14 +07:00
|
|
|
}
|
2017-02-27 20:02:58 +07:00
|
|
|
trace_f2fs_drop_inode(inode, 0);
|
2013-04-30 09:33:27 +07:00
|
|
|
return 0;
|
2015-05-14 04:35:14 +07:00
|
|
|
}
|
2017-02-27 20:02:58 +07:00
|
|
|
ret = generic_drop_inode(inode);
|
|
|
|
trace_f2fs_drop_inode(inode, ret);
|
|
|
|
return ret;
|
2013-04-30 09:33:27 +07:00
|
|
|
}
|
|
|
|
|
2016-10-15 01:51:23 +07:00
|
|
|
int f2fs_inode_dirtied(struct inode *inode, bool sync)
|
2013-06-10 07:17:01 +07:00
|
|
|
{
|
2016-05-21 01:10:10 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2016-10-15 01:51:23 +07:00
|
|
|
int ret = 0;
|
2016-05-21 01:10:10 +07:00
|
|
|
|
|
|
|
spin_lock(&sbi->inode_lock[DIRTY_META]);
|
|
|
|
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
|
2016-10-15 01:51:23 +07:00
|
|
|
ret = 1;
|
|
|
|
} else {
|
|
|
|
set_inode_flag(inode, FI_DIRTY_INODE);
|
|
|
|
stat_inc_dirty_inode(sbi, DIRTY_META);
|
2016-05-21 01:10:10 +07:00
|
|
|
}
|
2016-10-15 01:51:23 +07:00
|
|
|
if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
|
|
|
|
list_add_tail(&F2FS_I(inode)->gdirty_list,
|
2016-05-21 01:10:10 +07:00
|
|
|
&sbi->inode_list[DIRTY_META]);
|
2016-10-15 01:51:23 +07:00
|
|
|
inc_page_count(sbi, F2FS_DIRTY_IMETA);
|
|
|
|
}
|
2016-06-03 01:08:56 +07:00
|
|
|
spin_unlock(&sbi->inode_lock[DIRTY_META]);
|
2016-10-15 01:51:23 +07:00
|
|
|
return ret;
|
2016-05-21 01:10:10 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
void f2fs_inode_synced(struct inode *inode)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
spin_lock(&sbi->inode_lock[DIRTY_META]);
|
|
|
|
if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
|
|
|
|
spin_unlock(&sbi->inode_lock[DIRTY_META]);
|
|
|
|
return;
|
|
|
|
}
|
2016-10-15 01:51:23 +07:00
|
|
|
if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
|
|
|
|
list_del_init(&F2FS_I(inode)->gdirty_list);
|
|
|
|
dec_page_count(sbi, F2FS_DIRTY_IMETA);
|
|
|
|
}
|
2016-05-21 01:10:10 +07:00
|
|
|
clear_inode_flag(inode, FI_DIRTY_INODE);
|
2016-05-21 10:42:37 +07:00
|
|
|
clear_inode_flag(inode, FI_AUTO_RECOVER);
|
2016-05-21 01:10:10 +07:00
|
|
|
stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
|
2016-06-03 01:08:56 +07:00
|
|
|
spin_unlock(&sbi->inode_lock[DIRTY_META]);
|
2013-06-10 07:17:01 +07:00
|
|
|
}
|
|
|
|
|
2016-07-01 09:09:37 +07:00
|
|
|
/*
|
|
|
|
* f2fs_dirty_inode() is called from __mark_inode_dirty()
|
|
|
|
*
|
|
|
|
* We should call set_dirty_inode to write the dirty inode through write_inode.
|
|
|
|
*/
|
|
|
|
static void f2fs_dirty_inode(struct inode *inode, int flags)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
|
|
|
|
inode->i_ino == F2FS_META_INO(sbi))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (flags == I_DIRTY_TIME)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
|
|
|
|
clear_inode_flag(inode, FI_AUTO_RECOVER);
|
|
|
|
|
2016-10-15 01:51:23 +07:00
|
|
|
f2fs_inode_dirtied(inode, false);
|
2016-07-01 09:09:37 +07:00
|
|
|
}
|
|
|
|
|
2019-04-16 06:29:14 +07:00
|
|
|
static void f2fs_free_inode(struct inode *inode)
|
2012-11-02 15:07:47 +07:00
|
|
|
{
|
2019-04-11 03:21:15 +07:00
|
|
|
fscrypt_free_inode(inode);
|
2012-11-02 15:07:47 +07:00
|
|
|
kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
|
|
|
|
}
|
|
|
|
|
2016-05-14 02:36:58 +07:00
|
|
|
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
2016-05-17 01:06:50 +07:00
|
|
|
percpu_counter_destroy(&sbi->alloc_valid_block_count);
|
2016-05-17 01:42:32 +07:00
|
|
|
percpu_counter_destroy(&sbi->total_valid_inode_count);
|
2016-05-14 02:36:58 +07:00
|
|
|
}
|
|
|
|
|
2016-10-07 09:02:05 +07:00
|
|
|
static void destroy_device_list(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < sbi->s_ndevs; i++) {
|
|
|
|
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
2019-03-16 07:13:07 +07:00
|
|
|
kvfree(FDEV(i).blkz_seq);
|
2016-10-07 09:02:05 +07:00
|
|
|
#endif
|
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->devs);
|
2016-10-07 09:02:05 +07:00
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static void f2fs_put_super(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
2017-06-14 16:39:46 +07:00
|
|
|
int i;
|
2017-10-04 08:08:37 +07:00
|
|
|
bool dropped;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
f2fs_quota_off_umount(sb);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2015-06-20 02:01:21 +07:00
|
|
|
/* prevent remaining shrinker jobs */
|
|
|
|
mutex_lock(&sbi->umount_mutex);
|
|
|
|
|
2015-01-15 08:41:41 +07:00
|
|
|
/*
|
|
|
|
* We don't need to do checkpoint when superblock is clean.
|
|
|
|
* But, the previous checkpoint was not done by umount, it needs to do
|
|
|
|
* clean checkpoint again.
|
|
|
|
*/
|
2018-08-21 09:21:43 +07:00
|
|
|
if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
|
|
|
|
!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
|
2014-09-21 11:57:51 +07:00
|
|
|
struct cp_control cpc = {
|
|
|
|
.reason = CP_UMOUNT,
|
|
|
|
};
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_write_checkpoint(sbi, &cpc);
|
2014-09-21 11:57:51 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2016-12-30 05:07:53 +07:00
|
|
|
/* be sure to wait for any on-going discard commands */
|
2019-01-15 01:42:11 +07:00
|
|
|
dropped = f2fs_issue_discard_timeout(sbi);
|
2016-12-30 05:07:53 +07:00
|
|
|
|
f2fs: fix to avoid NULL pointer dereference on se->discard_map
https://bugzilla.kernel.org/show_bug.cgi?id=200951
These is a NULL pointer dereference issue reported in bugzilla:
Hi,
in the setup there is a SATA SSD connected to a SATA-to-USB bridge.
The disc is "Samsung SSD 850 PRO 256G" which supports TRIM.
There are four partitions:
sda1: FAT /boot
sda2: F2FS /
sda3: F2FS /home
sda4: F2FS
The bridge is ASMT1153e which uses the "uas" driver.
There is no TRIM pass-through, so, when mounting it reports:
mounting with "discard" option, but the device does not support discard
The USB host is USB3.0 and UASP capable. It is the one on RK3399.
Given this everything works fine, except there is no TRIM support.
In order to enable TRIM a new UDEV rule is added [1]:
/etc/udev/rules.d/10-sata-bridge-trim.rules:
ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap"
After reboot any F2FS write hangs forever and dmesg reports:
Unable to handle kernel NULL pointer dereference
Also tested on a x86_64 system: works fine even with TRIM enabled.
same disc
same bridge
different usb host controller
different cpu architecture
not root filesystem
Regards,
Vicenç.
[1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280
Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e
Mem abort info:
ESR = 0x96000004
Exception class = DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122
[000000000000003e] pgd=0000000000000000
Internal error: Oops: 96000004 [#1] SMP
Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington
CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1
Hardware name: Sapphire-RK3399 Board (DT)
pstate: 00000005 (nzcv daif -PAN -UAO)
pc : update_sit_entry+0x304/0x4b0
lr : update_sit_entry+0x108/0x4b0
sp : ffff00000ca13bd0
x29: ffff00000ca13bd0 x28: 000000000000003e
x27: 0000000000000020 x26: 0000000000080000
x25: 0000000000000048 x24: ffff8000ebb85cf8
x23: 0000000000000253 x22: 00000000ffffffff
x21: 00000000000535f2 x20: 00000000ffffffdf
x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8
x17: 0000000007ce6926 x16: 000000001c83ffa8
x15: 0000000000000000 x14: ffff8000f602df90
x13: 0000000000000006 x12: 0000000000000040
x11: 0000000000000228 x10: 0000000000000000
x9 : 0000000000000000 x8 : 0000000000000000
x7 : 00000000000535f2 x6 : ffff8000ebff3440
x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8
x3 : 00000000ffffffff x2 : 0000000000000020
x1 : 0000000000000000 x0 : ffff8000eb9e5800
Process nvim (pid: 957, stack limit = 0x0000000063a78320)
Call trace:
update_sit_entry+0x304/0x4b0
f2fs_invalidate_blocks+0x98/0x140
truncate_node+0x90/0x400
f2fs_remove_inode_page+0xe8/0x340
f2fs_evict_inode+0x2b0/0x408
evict+0xe0/0x1e0
iput+0x160/0x260
do_unlinkat+0x214/0x298
__arm64_sys_unlinkat+0x3c/0x68
el0_svc_handler+0x94/0x118
el0_svc+0x8/0xc
Code: f9400800 b9488400 36080140 f9400f01 (387c4820)
---[ end trace a0f21a307118c477 ]---
The reason is it is possible to enable discard flag on block queue via
UDEV, but during mount, f2fs will initialize se->discard_map only if
this flag is set, once the flag is set after mount, f2fs may dereference
NULL pointer on se->discard_map.
So this patch does below changes to fix this issue:
- initialize and update se->discard_map all the time.
- don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag
during mount.
- don't issue small discard on zoned block device.
- introduce some functions to enhance the readability.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-04 02:52:17 +07:00
|
|
|
if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
|
|
|
|
!sbi->discard_blks && !dropped) {
|
2017-04-28 12:56:08 +07:00
|
|
|
struct cp_control cpc = {
|
|
|
|
.reason = CP_UMOUNT | CP_TRIMMED,
|
|
|
|
};
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_write_checkpoint(sbi, &cpc);
|
2017-04-28 12:56:08 +07:00
|
|
|
}
|
|
|
|
|
2014-08-12 08:37:46 +07:00
|
|
|
/*
|
|
|
|
* normally superblock is clean, so we need to release this.
|
|
|
|
* In addition, EIO will skip do checkpoint, we need this as well.
|
|
|
|
*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_release_ino_entry(sbi, true);
|
2014-08-19 23:48:22 +07:00
|
|
|
|
2015-06-20 02:01:21 +07:00
|
|
|
f2fs_leave_shrinker(sbi);
|
|
|
|
mutex_unlock(&sbi->umount_mutex);
|
|
|
|
|
2016-01-29 23:57:59 +07:00
|
|
|
/* our cp_error case, we can wait for any writeback page */
|
2017-05-11 01:28:38 +07:00
|
|
|
f2fs_flush_merged_writes(sbi);
|
2016-01-29 23:57:59 +07:00
|
|
|
|
f2fs: fix to avoid broken of dnode block list
f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.
By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.
Sheng Yong helps to do the test with this patch:
Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8
1 / PERSIST / Index
Base:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08
2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7
3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48
Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333
After:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 798.81 202.5 41143 40613.87 602.71 838.08 913.83
2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27
3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91
Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333
Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189
It looks like atomic write will suffer performance regression.
I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.
BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:
- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
- writeback data: P1, P2, P3;
- writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing
last transaction.
- preflush + fua;
- power-cut
If we don't wait dnode writeback for atomic_write:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85
2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77
3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92
Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18
Patched/Original:
0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294
SQLite's performance recovers.
Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."
Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-02 22:03:19 +07:00
|
|
|
f2fs_wait_on_all_pages_writeback(sbi);
|
|
|
|
|
|
|
|
f2fs_bug_on(sbi, sbi->fsync_node_num);
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
iput(sbi->node_inode);
|
2019-01-01 15:11:30 +07:00
|
|
|
sbi->node_inode = NULL;
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
iput(sbi->meta_inode);
|
2019-01-01 15:11:30 +07:00
|
|
|
sbi->meta_inode = NULL;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2018-12-26 12:50:29 +07:00
|
|
|
/*
|
|
|
|
* iput() can update stat information, if f2fs_write_checkpoint()
|
|
|
|
* above failed with error.
|
|
|
|
*/
|
|
|
|
f2fs_destroy_stats(sbi);
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* destroy f2fs internal modules */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_node_manager(sbi);
|
|
|
|
f2fs_destroy_segment_manager(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->ckpt);
|
2017-06-14 16:39:46 +07:00
|
|
|
|
2017-07-27 01:24:13 +07:00
|
|
|
f2fs_unregister_sysfs(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
sb->s_fs_info = NULL;
|
2016-03-03 03:04:24 +07:00
|
|
|
if (sbi->s_chksum_driver)
|
|
|
|
crypto_free_shash(sbi->s_chksum_driver);
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->raw_super);
|
2016-05-14 02:36:58 +07:00
|
|
|
|
2016-10-07 09:02:05 +07:00
|
|
|
destroy_device_list(sbi);
|
2017-02-27 17:43:12 +07:00
|
|
|
mempool_destroy(sbi->write_io_dummy);
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
for (i = 0; i < MAXQUOTAS; i++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
|
2017-08-08 09:54:31 +07:00
|
|
|
#endif
|
2016-05-14 02:36:58 +07:00
|
|
|
destroy_percpu_info(sbi);
|
2017-05-11 01:18:25 +07:00
|
|
|
for (i = 0; i < NR_PAGE_TYPE; i++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->write_io[i]);
|
|
|
|
kvfree(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
int f2fs_sync_fs(struct super_block *sb, int sync)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
2015-12-23 16:50:30 +07:00
|
|
|
int err = 0;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(sbi)))
|
|
|
|
return 0;
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
|
|
|
|
return 0;
|
2017-10-24 04:48:49 +07:00
|
|
|
|
2013-04-19 23:28:40 +07:00
|
|
|
trace_f2fs_sync_fs(sb, sync);
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
|
|
|
|
return -EAGAIN;
|
|
|
|
|
2013-04-01 06:32:21 +07:00
|
|
|
if (sync) {
|
2014-10-31 12:47:03 +07:00
|
|
|
struct cp_control cpc;
|
|
|
|
|
2015-01-30 02:45:33 +07:00
|
|
|
cpc.reason = __get_cp_reason(sbi);
|
|
|
|
|
2013-04-01 06:32:21 +07:00
|
|
|
mutex_lock(&sbi->gc_mutex);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_write_checkpoint(sbi, &cpc);
|
2013-04-01 06:32:21 +07:00
|
|
|
mutex_unlock(&sbi->gc_mutex);
|
|
|
|
}
|
2015-04-24 04:38:15 +07:00
|
|
|
f2fs_trace_ios(NULL, 1);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2015-12-23 16:50:30 +07:00
|
|
|
return err;
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
2013-01-29 16:30:07 +07:00
|
|
|
static int f2fs_freeze(struct super_block *sb)
|
|
|
|
{
|
2013-05-20 18:28:47 +07:00
|
|
|
if (f2fs_readonly(sb))
|
2013-01-29 16:30:07 +07:00
|
|
|
return 0;
|
|
|
|
|
2016-11-05 04:59:15 +07:00
|
|
|
/* IO error happened before */
|
|
|
|
if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
/* must be clean, since sync_filesystem() was already called */
|
|
|
|
if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
|
|
|
|
return -EINVAL;
|
|
|
|
return 0;
|
2013-01-29 16:30:07 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_unfreeze(struct super_block *sb)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-28 23:32:53 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
static int f2fs_statfs_project(struct super_block *sb,
|
|
|
|
kprojid_t projid, struct kstatfs *buf)
|
|
|
|
{
|
|
|
|
struct kqid qid;
|
|
|
|
struct dquot *dquot;
|
|
|
|
u64 limit;
|
|
|
|
u64 curblock;
|
|
|
|
|
|
|
|
qid = make_kqid_projid(projid);
|
|
|
|
dquot = dqget(sb, qid);
|
|
|
|
if (IS_ERR(dquot))
|
|
|
|
return PTR_ERR(dquot);
|
2018-07-24 19:17:53 +07:00
|
|
|
spin_lock(&dquot->dq_dqb_lock);
|
2017-07-28 23:32:53 +07:00
|
|
|
|
|
|
|
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
|
|
|
|
dquot->dq_dqb.dqb_bsoftlimit :
|
|
|
|
dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
|
|
|
|
if (limit && buf->f_blocks > limit) {
|
|
|
|
curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
|
|
|
|
buf->f_blocks = limit;
|
|
|
|
buf->f_bfree = buf->f_bavail =
|
|
|
|
(buf->f_blocks > curblock) ?
|
|
|
|
(buf->f_blocks - curblock) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
limit = dquot->dq_dqb.dqb_isoftlimit ?
|
|
|
|
dquot->dq_dqb.dqb_isoftlimit :
|
|
|
|
dquot->dq_dqb.dqb_ihardlimit;
|
|
|
|
if (limit && buf->f_files > limit) {
|
|
|
|
buf->f_files = limit;
|
|
|
|
buf->f_ffree =
|
|
|
|
(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
|
|
|
|
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
|
|
|
|
}
|
|
|
|
|
2018-07-24 19:17:53 +07:00
|
|
|
spin_unlock(&dquot->dq_dqb_lock);
|
2017-07-28 23:32:53 +07:00
|
|
|
dqput(dquot);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|
|
|
{
|
|
|
|
struct super_block *sb = dentry->d_sb;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
|
2018-01-04 01:55:07 +07:00
|
|
|
block_t total_count, user_block_count, start_count;
|
2017-06-22 10:55:55 +07:00
|
|
|
u64 avail_node_count;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
total_count = le64_to_cpu(sbi->raw_super->block_count);
|
|
|
|
user_block_count = sbi->user_block_count;
|
|
|
|
start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
|
|
|
|
buf->f_type = F2FS_SUPER_MAGIC;
|
|
|
|
buf->f_bsize = sbi->blocksize;
|
|
|
|
|
|
|
|
buf->f_blocks = total_count - start_count;
|
2018-01-04 01:55:07 +07:00
|
|
|
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
|
2017-10-27 19:45:05 +07:00
|
|
|
sbi->current_reserved_blocks;
|
2019-05-05 10:40:46 +07:00
|
|
|
|
|
|
|
spin_lock(&sbi->stat_lock);
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
|
|
|
|
buf->f_bfree = 0;
|
|
|
|
else
|
|
|
|
buf->f_bfree -= sbi->unusable_block_count;
|
2019-05-05 10:40:46 +07:00
|
|
|
spin_unlock(&sbi->stat_lock);
|
2018-08-21 09:21:43 +07:00
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
|
|
|
|
buf->f_bavail = buf->f_bfree -
|
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks;
|
2017-12-28 06:05:52 +07:00
|
|
|
else
|
|
|
|
buf->f_bavail = 0;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2017-11-16 15:59:14 +07:00
|
|
|
avail_node_count = sbi->total_node_count - sbi->nquota_files -
|
|
|
|
F2FS_RESERVED_NODE_NUM;
|
2017-06-22 10:55:55 +07:00
|
|
|
|
|
|
|
if (avail_node_count > user_block_count) {
|
|
|
|
buf->f_files = user_block_count;
|
|
|
|
buf->f_ffree = buf->f_bavail;
|
|
|
|
} else {
|
|
|
|
buf->f_files = avail_node_count;
|
|
|
|
buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
|
|
|
|
buf->f_bavail);
|
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2013-03-03 11:58:05 +07:00
|
|
|
buf->f_namelen = F2FS_NAME_LEN;
|
2012-11-02 15:07:47 +07:00
|
|
|
buf->f_fsid.val[0] = (u32)id;
|
|
|
|
buf->f_fsid.val[1] = (u32)(id >> 32);
|
|
|
|
|
2017-07-28 23:32:53 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
|
|
|
|
sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
|
|
|
|
f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf);
|
|
|
|
}
|
|
|
|
#endif
|
2012-11-02 15:07:47 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
static inline void f2fs_show_quota_options(struct seq_file *seq,
|
|
|
|
struct super_block *sb)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_jquota_fmt) {
|
2017-08-08 09:54:31 +07:00
|
|
|
char *fmtname = "";
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
switch (F2FS_OPTION(sbi).s_jquota_fmt) {
|
2017-08-08 09:54:31 +07:00
|
|
|
case QFMT_VFS_OLD:
|
|
|
|
fmtname = "vfsold";
|
|
|
|
break;
|
|
|
|
case QFMT_VFS_V0:
|
|
|
|
fmtname = "vfsv0";
|
|
|
|
break;
|
|
|
|
case QFMT_VFS_V1:
|
|
|
|
fmtname = "vfsv1";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
seq_printf(seq, ",jqfmt=%s", fmtname);
|
|
|
|
}
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
|
|
|
|
seq_show_option(seq, "usrjquota",
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[USRQUOTA]);
|
2017-08-08 09:54:31 +07:00
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
|
|
|
|
seq_show_option(seq, "grpjquota",
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]);
|
2017-08-08 09:54:31 +07:00
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
|
|
|
|
seq_show_option(seq, "prjjquota",
|
|
|
|
F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]);
|
2017-08-08 09:54:31 +07:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
|
|
|
|
|
2015-10-06 01:02:54 +07:00
|
|
|
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
|
|
|
|
if (test_opt(sbi, FORCE_FG_GC))
|
|
|
|
seq_printf(seq, ",background_gc=%s", "sync");
|
|
|
|
else
|
|
|
|
seq_printf(seq, ",background_gc=%s", "on");
|
|
|
|
} else {
|
2013-06-16 07:48:48 +07:00
|
|
|
seq_printf(seq, ",background_gc=%s", "off");
|
2015-10-06 01:02:54 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
|
|
|
|
seq_puts(seq, ",disable_roll_forward");
|
|
|
|
if (test_opt(sbi, DISCARD))
|
|
|
|
seq_puts(seq, ",discard");
|
2019-05-24 16:08:39 +07:00
|
|
|
else
|
|
|
|
seq_puts(seq, ",nodiscard");
|
2012-11-02 15:07:47 +07:00
|
|
|
if (test_opt(sbi, NOHEAP))
|
2017-03-25 07:41:45 +07:00
|
|
|
seq_puts(seq, ",no_heap");
|
|
|
|
else
|
|
|
|
seq_puts(seq, ",heap");
|
2012-11-02 15:07:47 +07:00
|
|
|
#ifdef CONFIG_F2FS_FS_XATTR
|
|
|
|
if (test_opt(sbi, XATTR_USER))
|
|
|
|
seq_puts(seq, ",user_xattr");
|
|
|
|
else
|
|
|
|
seq_puts(seq, ",nouser_xattr");
|
2013-08-08 13:16:22 +07:00
|
|
|
if (test_opt(sbi, INLINE_XATTR))
|
|
|
|
seq_puts(seq, ",inline_xattr");
|
2017-02-15 09:34:45 +07:00
|
|
|
else
|
|
|
|
seq_puts(seq, ",noinline_xattr");
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
if (test_opt(sbi, INLINE_XATTR_SIZE))
|
|
|
|
seq_printf(seq, ",inline_xattr_size=%u",
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).inline_xattr_size);
|
2012-11-02 15:07:47 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_F2FS_FS_POSIX_ACL
|
|
|
|
if (test_opt(sbi, POSIX_ACL))
|
|
|
|
seq_puts(seq, ",acl");
|
|
|
|
else
|
|
|
|
seq_puts(seq, ",noacl");
|
|
|
|
#endif
|
|
|
|
if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
|
2013-01-26 01:08:59 +07:00
|
|
|
seq_puts(seq, ",disable_ext_identify");
|
2013-11-10 22:13:17 +07:00
|
|
|
if (test_opt(sbi, INLINE_DATA))
|
|
|
|
seq_puts(seq, ",inline_data");
|
2015-03-24 09:20:27 +07:00
|
|
|
else
|
|
|
|
seq_puts(seq, ",noinline_data");
|
2014-09-24 17:16:13 +07:00
|
|
|
if (test_opt(sbi, INLINE_DENTRY))
|
|
|
|
seq_puts(seq, ",inline_dentry");
|
2016-05-09 18:56:34 +07:00
|
|
|
else
|
|
|
|
seq_puts(seq, ",noinline_dentry");
|
2014-04-11 16:49:55 +07:00
|
|
|
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
|
2014-04-02 13:34:36 +07:00
|
|
|
seq_puts(seq, ",flush_merge");
|
2014-07-23 23:57:31 +07:00
|
|
|
if (test_opt(sbi, NOBARRIER))
|
|
|
|
seq_puts(seq, ",nobarrier");
|
2014-10-31 12:47:03 +07:00
|
|
|
if (test_opt(sbi, FASTBOOT))
|
|
|
|
seq_puts(seq, ",fastboot");
|
2015-02-05 16:55:51 +07:00
|
|
|
if (test_opt(sbi, EXTENT_CACHE))
|
|
|
|
seq_puts(seq, ",extent_cache");
|
2015-06-26 07:43:04 +07:00
|
|
|
else
|
|
|
|
seq_puts(seq, ",noextent_cache");
|
2015-12-16 12:12:16 +07:00
|
|
|
if (test_opt(sbi, DATA_FLUSH))
|
|
|
|
seq_puts(seq, ",data_flush");
|
2016-06-04 09:29:38 +07:00
|
|
|
|
|
|
|
seq_puts(seq, ",mode=");
|
|
|
|
if (test_opt(sbi, ADAPTIVE))
|
|
|
|
seq_puts(seq, "adaptive");
|
|
|
|
else if (test_opt(sbi, LFS))
|
|
|
|
seq_puts(seq, "lfs");
|
2018-03-08 13:22:56 +07:00
|
|
|
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
|
2017-12-28 06:05:52 +07:00
|
|
|
if (test_opt(sbi, RESERVE_ROOT))
|
2018-01-05 12:36:09 +07:00
|
|
|
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks,
|
|
|
|
from_kuid_munged(&init_user_ns,
|
|
|
|
F2FS_OPTION(sbi).s_resuid),
|
|
|
|
from_kgid_munged(&init_user_ns,
|
|
|
|
F2FS_OPTION(sbi).s_resgid));
|
2016-12-22 08:09:19 +07:00
|
|
|
if (F2FS_IO_SIZE_BITS(sbi))
|
2018-09-22 21:43:09 +07:00
|
|
|
seq_printf(seq, ",io_bits=%u",
|
|
|
|
F2FS_OPTION(sbi).write_io_size_bits);
|
2017-01-27 08:35:37 +07:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
2018-08-08 16:36:41 +07:00
|
|
|
if (test_opt(sbi, FAULT_INJECTION)) {
|
2017-06-12 08:44:24 +07:00
|
|
|
seq_printf(seq, ",fault_injection=%u",
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).fault_info.inject_rate);
|
2018-08-08 16:36:41 +07:00
|
|
|
seq_printf(seq, ",fault_type=%u",
|
|
|
|
F2FS_OPTION(sbi).fault_info.inject_type);
|
|
|
|
}
|
2017-01-27 08:35:37 +07:00
|
|
|
#endif
|
2017-07-08 23:13:07 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2017-08-08 09:54:31 +07:00
|
|
|
if (test_opt(sbi, QUOTA))
|
|
|
|
seq_puts(seq, ",quota");
|
2017-07-08 23:13:07 +07:00
|
|
|
if (test_opt(sbi, USRQUOTA))
|
|
|
|
seq_puts(seq, ",usrquota");
|
|
|
|
if (test_opt(sbi, GRPQUOTA))
|
|
|
|
seq_puts(seq, ",grpquota");
|
2017-07-25 23:01:41 +07:00
|
|
|
if (test_opt(sbi, PRJQUOTA))
|
|
|
|
seq_puts(seq, ",prjquota");
|
2017-01-27 08:35:37 +07:00
|
|
|
#endif
|
2017-08-08 09:54:31 +07:00
|
|
|
f2fs_show_quota_options(seq, sbi->sb);
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER)
|
2018-01-31 09:36:57 +07:00
|
|
|
seq_printf(seq, ",whint_mode=%s", "user-based");
|
2018-03-08 13:22:56 +07:00
|
|
|
else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
|
2018-01-31 09:36:58 +07:00
|
|
|
seq_printf(seq, ",whint_mode=%s", "fs-based");
|
2018-12-12 16:50:12 +07:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2018-03-15 17:51:42 +07:00
|
|
|
if (F2FS_OPTION(sbi).test_dummy_encryption)
|
|
|
|
seq_puts(seq, ",test_dummy_encryption");
|
|
|
|
#endif
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT)
|
2018-02-18 23:50:49 +07:00
|
|
|
seq_printf(seq, ",alloc_mode=%s", "default");
|
2018-03-08 13:22:56 +07:00
|
|
|
else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
|
2018-02-18 23:50:49 +07:00
|
|
|
seq_printf(seq, ",alloc_mode=%s", "reuse");
|
2018-03-07 11:07:49 +07:00
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (test_opt(sbi, DISABLE_CHECKPOINT))
|
2019-05-30 07:49:06 +07:00
|
|
|
seq_printf(seq, ",checkpoint=disable:%u",
|
|
|
|
F2FS_OPTION(sbi).unusable_cap);
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
|
2018-03-07 11:07:49 +07:00
|
|
|
seq_printf(seq, ",fsync_mode=%s", "posix");
|
2018-03-08 13:22:56 +07:00
|
|
|
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
|
2018-03-07 11:07:49 +07:00
|
|
|
seq_printf(seq, ",fsync_mode=%s", "strict");
|
2018-07-02 13:07:40 +07:00
|
|
|
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
|
|
|
|
seq_printf(seq, ",fsync_mode=%s", "nobarrier");
|
2012-11-02 15:07:47 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-07 17:11:37 +07:00
|
|
|
static void default_options(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
/* init some FS parameters */
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
|
|
|
|
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
|
|
|
|
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
|
|
|
|
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
|
|
|
|
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
|
2018-03-15 17:51:42 +07:00
|
|
|
F2FS_OPTION(sbi).test_dummy_encryption = false;
|
2018-06-06 22:55:02 +07:00
|
|
|
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
|
|
|
|
F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
|
2015-05-07 17:11:37 +07:00
|
|
|
|
|
|
|
set_opt(sbi, BG_GC);
|
2017-02-08 16:39:44 +07:00
|
|
|
set_opt(sbi, INLINE_XATTR);
|
2015-05-07 17:11:37 +07:00
|
|
|
set_opt(sbi, INLINE_DATA);
|
2016-05-09 18:56:34 +07:00
|
|
|
set_opt(sbi, INLINE_DENTRY);
|
2015-06-20 07:53:26 +07:00
|
|
|
set_opt(sbi, EXTENT_CACHE);
|
2017-03-25 07:41:45 +07:00
|
|
|
set_opt(sbi, NOHEAP);
|
2018-08-21 09:21:43 +07:00
|
|
|
clear_opt(sbi, DISABLE_CHECKPOINT);
|
2019-05-30 07:49:06 +07:00
|
|
|
F2FS_OPTION(sbi).unusable_cap = 0;
|
2017-11-28 04:05:09 +07:00
|
|
|
sbi->sb->s_flags |= SB_LAZYTIME;
|
2016-05-21 12:39:20 +07:00
|
|
|
set_opt(sbi, FLUSH_MERGE);
|
f2fs: fix to avoid NULL pointer dereference on se->discard_map
https://bugzilla.kernel.org/show_bug.cgi?id=200951
These is a NULL pointer dereference issue reported in bugzilla:
Hi,
in the setup there is a SATA SSD connected to a SATA-to-USB bridge.
The disc is "Samsung SSD 850 PRO 256G" which supports TRIM.
There are four partitions:
sda1: FAT /boot
sda2: F2FS /
sda3: F2FS /home
sda4: F2FS
The bridge is ASMT1153e which uses the "uas" driver.
There is no TRIM pass-through, so, when mounting it reports:
mounting with "discard" option, but the device does not support discard
The USB host is USB3.0 and UASP capable. It is the one on RK3399.
Given this everything works fine, except there is no TRIM support.
In order to enable TRIM a new UDEV rule is added [1]:
/etc/udev/rules.d/10-sata-bridge-trim.rules:
ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap"
After reboot any F2FS write hangs forever and dmesg reports:
Unable to handle kernel NULL pointer dereference
Also tested on a x86_64 system: works fine even with TRIM enabled.
same disc
same bridge
different usb host controller
different cpu architecture
not root filesystem
Regards,
Vicenç.
[1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280
Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e
Mem abort info:
ESR = 0x96000004
Exception class = DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122
[000000000000003e] pgd=0000000000000000
Internal error: Oops: 96000004 [#1] SMP
Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington
CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1
Hardware name: Sapphire-RK3399 Board (DT)
pstate: 00000005 (nzcv daif -PAN -UAO)
pc : update_sit_entry+0x304/0x4b0
lr : update_sit_entry+0x108/0x4b0
sp : ffff00000ca13bd0
x29: ffff00000ca13bd0 x28: 000000000000003e
x27: 0000000000000020 x26: 0000000000080000
x25: 0000000000000048 x24: ffff8000ebb85cf8
x23: 0000000000000253 x22: 00000000ffffffff
x21: 00000000000535f2 x20: 00000000ffffffdf
x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8
x17: 0000000007ce6926 x16: 000000001c83ffa8
x15: 0000000000000000 x14: ffff8000f602df90
x13: 0000000000000006 x12: 0000000000000040
x11: 0000000000000228 x10: 0000000000000000
x9 : 0000000000000000 x8 : 0000000000000000
x7 : 00000000000535f2 x6 : ffff8000ebff3440
x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8
x3 : 00000000ffffffff x2 : 0000000000000020
x1 : 0000000000000000 x0 : ffff8000eb9e5800
Process nvim (pid: 957, stack limit = 0x0000000063a78320)
Call trace:
update_sit_entry+0x304/0x4b0
f2fs_invalidate_blocks+0x98/0x140
truncate_node+0x90/0x400
f2fs_remove_inode_page+0xe8/0x340
f2fs_evict_inode+0x2b0/0x408
evict+0xe0/0x1e0
iput+0x160/0x260
do_unlinkat+0x214/0x298
__arm64_sys_unlinkat+0x3c/0x68
el0_svc_handler+0x94/0x118
el0_svc+0x8/0xc
Code: f9400800 b9488400 36080140 f9400f01 (387c4820)
---[ end trace a0f21a307118c477 ]---
The reason is it is possible to enable discard flag on block queue via
UDEV, but during mount, f2fs will initialize se->discard_map only if
this flag is set, once the flag is set after mount, f2fs may dereference
NULL pointer on se->discard_map.
So this patch does below changes to fix this issue:
- initialize and update se->discard_map all the time.
- don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag
during mount.
- don't issue small discard on zoned block device.
- introduce some functions to enhance the readability.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-04 02:52:17 +07:00
|
|
|
set_opt(sbi, DISCARD);
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_blkzoned(sbi))
|
2018-07-05 13:24:11 +07:00
|
|
|
set_opt_mode(sbi, F2FS_MOUNT_LFS);
|
|
|
|
else
|
2016-06-13 23:47:48 +07:00
|
|
|
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
|
2015-05-07 17:11:37 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_F2FS_FS_XATTR
|
|
|
|
set_opt(sbi, XATTR_USER);
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_F2FS_FS_POSIX_ACL
|
|
|
|
set_opt(sbi, POSIX_ACL);
|
|
|
|
#endif
|
2016-09-26 18:45:05 +07:00
|
|
|
|
2018-08-08 16:36:41 +07:00
|
|
|
f2fs_build_fault_attr(sbi, 0, 0);
|
2015-05-07 17:11:37 +07:00
|
|
|
}
|
|
|
|
|
2017-10-06 23:14:28 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
static int f2fs_enable_quotas(struct super_block *sb);
|
|
|
|
#endif
|
2018-08-21 09:21:43 +07:00
|
|
|
|
|
|
|
static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
2019-01-23 05:04:33 +07:00
|
|
|
unsigned int s_flags = sbi->sb->s_flags;
|
2018-08-21 09:21:43 +07:00
|
|
|
struct cp_control cpc;
|
2019-01-23 05:04:33 +07:00
|
|
|
int err = 0;
|
|
|
|
int ret;
|
2019-05-30 07:49:06 +07:00
|
|
|
block_t unusable;
|
2018-08-21 09:21:43 +07:00
|
|
|
|
2019-01-23 05:04:33 +07:00
|
|
|
if (s_flags & SB_RDONLY) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "checkpoint=disable on readonly fs");
|
2019-01-23 05:04:33 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-08-21 09:21:43 +07:00
|
|
|
sbi->sb->s_flags |= SB_ACTIVE;
|
|
|
|
|
|
|
|
f2fs_update_time(sbi, DISABLE_TIME);
|
|
|
|
|
|
|
|
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
|
2018-12-18 08:08:26 +07:00
|
|
|
mutex_lock(&sbi->gc_mutex);
|
2018-08-21 09:21:43 +07:00
|
|
|
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
|
2019-01-23 05:04:33 +07:00
|
|
|
if (err == -ENODATA) {
|
|
|
|
err = 0;
|
2018-08-21 09:21:43 +07:00
|
|
|
break;
|
2019-01-23 05:04:33 +07:00
|
|
|
}
|
2018-12-18 08:08:26 +07:00
|
|
|
if (err && err != -EAGAIN)
|
2019-01-23 05:04:33 +07:00
|
|
|
break;
|
2018-08-21 09:21:43 +07:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:04:33 +07:00
|
|
|
ret = sync_filesystem(sbi->sb);
|
|
|
|
if (ret || err) {
|
|
|
|
err = ret ? ret: err;
|
|
|
|
goto restore_flag;
|
|
|
|
}
|
2018-08-21 09:21:43 +07:00
|
|
|
|
2019-05-30 07:49:06 +07:00
|
|
|
unusable = f2fs_get_unusable_blocks(sbi);
|
|
|
|
if (f2fs_disable_cp_again(sbi, unusable)) {
|
2019-01-23 05:04:33 +07:00
|
|
|
err = -EAGAIN;
|
|
|
|
goto restore_flag;
|
|
|
|
}
|
2018-08-21 09:21:43 +07:00
|
|
|
|
|
|
|
mutex_lock(&sbi->gc_mutex);
|
|
|
|
cpc.reason = CP_PAUSE;
|
|
|
|
set_sbi_flag(sbi, SBI_CP_DISABLED);
|
2019-04-26 16:57:54 +07:00
|
|
|
err = f2fs_write_checkpoint(sbi, &cpc);
|
|
|
|
if (err)
|
|
|
|
goto out_unlock;
|
2018-08-21 09:21:43 +07:00
|
|
|
|
2019-05-05 10:40:46 +07:00
|
|
|
spin_lock(&sbi->stat_lock);
|
2019-05-30 07:49:06 +07:00
|
|
|
sbi->unusable_block_count = unusable;
|
2019-05-05 10:40:46 +07:00
|
|
|
spin_unlock(&sbi->stat_lock);
|
|
|
|
|
2019-04-26 16:57:54 +07:00
|
|
|
out_unlock:
|
2018-08-21 09:21:43 +07:00
|
|
|
mutex_unlock(&sbi->gc_mutex);
|
2019-01-23 05:04:33 +07:00
|
|
|
restore_flag:
|
|
|
|
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
|
|
|
|
return err;
|
2018-08-21 09:21:43 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
mutex_lock(&sbi->gc_mutex);
|
|
|
|
f2fs_dirty_to_prefree(sbi);
|
|
|
|
|
|
|
|
clear_sbi_flag(sbi, SBI_CP_DISABLED);
|
|
|
|
set_sbi_flag(sbi, SBI_IS_DIRTY);
|
|
|
|
mutex_unlock(&sbi->gc_mutex);
|
|
|
|
|
|
|
|
f2fs_sync_fs(sbi->sb, 1);
|
|
|
|
}
|
|
|
|
|
2013-06-16 07:48:48 +07:00
|
|
|
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
struct f2fs_mount_info org_mount_opt;
|
2017-07-08 23:13:07 +07:00
|
|
|
unsigned long old_sb_flags;
|
2018-03-08 13:22:56 +07:00
|
|
|
int err;
|
2014-04-11 16:50:00 +07:00
|
|
|
bool need_restart_gc = false;
|
|
|
|
bool need_stop_gc = false;
|
2015-09-18 15:55:26 +07:00
|
|
|
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
|
2018-08-21 09:21:43 +07:00
|
|
|
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
|
|
|
|
bool checkpoint_changed;
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
int i, j;
|
|
|
|
#endif
|
2013-06-16 07:48:48 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Save the old mount options in case we
|
|
|
|
* need to restore them.
|
|
|
|
*/
|
|
|
|
org_mount_opt = sbi->mount_opt;
|
2017-07-08 23:13:07 +07:00
|
|
|
old_sb_flags = sb->s_flags;
|
2013-06-16 07:48:48 +07:00
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2018-03-08 13:22:56 +07:00
|
|
|
org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt;
|
2017-08-08 09:54:31 +07:00
|
|
|
for (i = 0; i < MAXQUOTAS; i++) {
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[i]) {
|
|
|
|
org_mount_opt.s_qf_names[i] =
|
|
|
|
kstrdup(F2FS_OPTION(sbi).s_qf_names[i],
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!org_mount_opt.s_qf_names[i]) {
|
2017-08-08 09:54:31 +07:00
|
|
|
for (j = 0; j < i; j++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(org_mount_opt.s_qf_names[j]);
|
2017-08-08 09:54:31 +07:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
} else {
|
2018-03-08 13:22:56 +07:00
|
|
|
org_mount_opt.s_qf_names[i] = NULL;
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
/* recover superblocks we couldn't write due to previous RO mount */
|
2017-11-28 04:05:09 +07:00
|
|
|
if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
|
2016-03-24 07:05:27 +07:00
|
|
|
err = f2fs_commit_super(sbi, false);
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
|
|
|
|
err);
|
2016-03-24 07:05:27 +07:00
|
|
|
if (!err)
|
|
|
|
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
|
|
|
|
}
|
|
|
|
|
2015-05-07 17:11:37 +07:00
|
|
|
default_options(sbi);
|
2014-09-15 17:04:44 +07:00
|
|
|
|
2013-06-16 07:48:48 +07:00
|
|
|
/* parse mount options */
|
|
|
|
err = parse_options(sb, data);
|
|
|
|
if (err)
|
|
|
|
goto restore_opts;
|
2018-08-21 09:21:43 +07:00
|
|
|
checkpoint_changed =
|
|
|
|
disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
|
2013-06-16 07:48:48 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Previous and new state of filesystem is RO,
|
2014-04-11 16:50:00 +07:00
|
|
|
* so skip checking GC and FLUSH_MERGE conditions.
|
2013-06-16 07:48:48 +07:00
|
|
|
*/
|
2017-11-28 04:05:09 +07:00
|
|
|
if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
|
2013-06-16 07:48:48 +07:00
|
|
|
goto skip;
|
|
|
|
|
2017-10-06 23:14:28 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2017-11-28 04:05:09 +07:00
|
|
|
if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) {
|
2017-07-08 23:13:07 +07:00
|
|
|
err = dquot_suspend(sb, -1);
|
|
|
|
if (err < 0)
|
|
|
|
goto restore_opts;
|
2018-08-21 09:21:43 +07:00
|
|
|
} else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
|
2017-07-08 23:13:07 +07:00
|
|
|
/* dquot_resume needs RW */
|
2017-11-28 04:05:09 +07:00
|
|
|
sb->s_flags &= ~SB_RDONLY;
|
2017-10-06 23:14:28 +07:00
|
|
|
if (sb_any_quota_suspended(sb)) {
|
|
|
|
dquot_resume(sb, -1);
|
2018-10-24 17:34:26 +07:00
|
|
|
} else if (f2fs_sb_has_quota_ino(sbi)) {
|
2017-10-06 23:14:28 +07:00
|
|
|
err = f2fs_enable_quotas(sb);
|
|
|
|
if (err)
|
|
|
|
goto restore_opts;
|
|
|
|
}
|
2017-07-08 23:13:07 +07:00
|
|
|
}
|
2017-10-06 23:14:28 +07:00
|
|
|
#endif
|
2015-09-18 15:55:26 +07:00
|
|
|
/* disallow enable/disable extent_cache dynamically */
|
|
|
|
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
|
|
|
|
err = -EINVAL;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "switch extent_cache option is not allowed");
|
2015-09-18 15:55:26 +07:00
|
|
|
goto restore_opts;
|
|
|
|
}
|
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
|
|
|
|
err = -EINVAL;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
|
2018-08-21 09:21:43 +07:00
|
|
|
goto restore_opts;
|
|
|
|
}
|
|
|
|
|
2013-06-16 07:48:48 +07:00
|
|
|
/*
|
|
|
|
* We stop the GC thread if FS is mounted as RO
|
|
|
|
* or if background_gc = off is passed in mount
|
|
|
|
* option. Also sync the filesystem.
|
|
|
|
*/
|
2017-11-28 04:05:09 +07:00
|
|
|
if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
|
2013-06-16 07:48:48 +07:00
|
|
|
if (sbi->gc_thread) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_stop_gc_thread(sbi);
|
2014-04-11 16:50:00 +07:00
|
|
|
need_restart_gc = true;
|
2013-06-16 07:48:48 +07:00
|
|
|
}
|
2014-11-18 10:17:20 +07:00
|
|
|
} else if (!sbi->gc_thread) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_start_gc_thread(sbi);
|
2013-06-16 07:48:48 +07:00
|
|
|
if (err)
|
|
|
|
goto restore_opts;
|
2014-04-11 16:50:00 +07:00
|
|
|
need_stop_gc = true;
|
|
|
|
}
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
if (*flags & SB_RDONLY ||
|
|
|
|
F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
|
2016-03-25 00:29:39 +07:00
|
|
|
writeback_inodes_sb(sb, WB_REASON_SYNC);
|
|
|
|
sync_inodes_sb(sb);
|
|
|
|
|
|
|
|
set_sbi_flag(sbi, SBI_IS_DIRTY);
|
|
|
|
set_sbi_flag(sbi, SBI_IS_CLOSE);
|
|
|
|
f2fs_sync_fs(sb, 1);
|
|
|
|
clear_sbi_flag(sbi, SBI_IS_CLOSE);
|
|
|
|
}
|
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (checkpoint_changed) {
|
|
|
|
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
|
|
|
|
err = f2fs_disable_checkpoint(sbi);
|
|
|
|
if (err)
|
|
|
|
goto restore_gc;
|
|
|
|
} else {
|
|
|
|
f2fs_enable_checkpoint(sbi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-11 16:50:00 +07:00
|
|
|
/*
|
|
|
|
* We stop issue flush thread if FS is mounted as RO
|
|
|
|
* or if flush_merge is not passed in mount option.
|
|
|
|
*/
|
2017-11-28 04:05:09 +07:00
|
|
|
if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
|
2016-12-08 07:23:32 +07:00
|
|
|
clear_opt(sbi, FLUSH_MERGE);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_flush_cmd_control(sbi, false);
|
2016-12-08 07:23:32 +07:00
|
|
|
} else {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_create_flush_cmd_control(sbi);
|
2014-04-27 13:21:33 +07:00
|
|
|
if (err)
|
2014-04-27 13:21:21 +07:00
|
|
|
goto restore_gc;
|
2013-06-16 07:48:48 +07:00
|
|
|
}
|
|
|
|
skip:
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
/* Release old quota file names */
|
|
|
|
for (i = 0; i < MAXQUOTAS; i++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(org_mount_opt.s_qf_names[i]);
|
2017-08-08 09:54:31 +07:00
|
|
|
#endif
|
2013-06-16 07:48:48 +07:00
|
|
|
/* Update the POSIXACL Flag */
|
2017-11-28 04:05:09 +07:00
|
|
|
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
|
|
|
|
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
|
2016-03-24 07:05:27 +07:00
|
|
|
|
2017-12-28 06:05:52 +07:00
|
|
|
limit_reserve_root(sbi);
|
2018-09-28 14:24:39 +07:00
|
|
|
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
|
2013-06-16 07:48:48 +07:00
|
|
|
return 0;
|
2014-04-11 16:50:00 +07:00
|
|
|
restore_gc:
|
|
|
|
if (need_restart_gc) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (f2fs_start_gc_thread(sbi))
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_warn(sbi, "background gc thread has stopped");
|
2014-04-11 16:50:00 +07:00
|
|
|
} else if (need_stop_gc) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_stop_gc_thread(sbi);
|
2014-04-11 16:50:00 +07:00
|
|
|
}
|
2013-06-16 07:48:48 +07:00
|
|
|
restore_opts:
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
|
2017-08-08 09:54:31 +07:00
|
|
|
for (i = 0; i < MAXQUOTAS; i++) {
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
#endif
|
2013-06-16 07:48:48 +07:00
|
|
|
sbi->mount_opt = org_mount_opt;
|
2017-07-08 23:13:07 +07:00
|
|
|
sb->s_flags = old_sb_flags;
|
2013-06-16 07:48:48 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
/* Read data from quotafile */
|
|
|
|
static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
|
|
|
|
size_t len, loff_t off)
|
|
|
|
{
|
|
|
|
struct inode *inode = sb_dqopt(sb)->files[type];
|
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
block_t blkidx = F2FS_BYTES_TO_BLK(off);
|
|
|
|
int offset = off & (sb->s_blocksize - 1);
|
|
|
|
int tocopy;
|
|
|
|
size_t toread;
|
|
|
|
loff_t i_size = i_size_read(inode);
|
|
|
|
struct page *page;
|
|
|
|
char *kaddr;
|
|
|
|
|
|
|
|
if (off > i_size)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (off + len > i_size)
|
|
|
|
len = i_size - off;
|
|
|
|
toread = len;
|
|
|
|
while (toread > 0) {
|
|
|
|
tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
|
|
|
|
repeat:
|
2018-03-16 20:23:53 +07:00
|
|
|
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
|
2017-10-19 23:43:56 +07:00
|
|
|
if (IS_ERR(page)) {
|
|
|
|
if (PTR_ERR(page) == -ENOMEM) {
|
|
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
|
|
goto repeat;
|
|
|
|
}
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2017-07-08 23:13:07 +07:00
|
|
|
return PTR_ERR(page);
|
2017-10-19 23:43:56 +07:00
|
|
|
}
|
2017-07-08 23:13:07 +07:00
|
|
|
|
|
|
|
lock_page(page);
|
|
|
|
|
|
|
|
if (unlikely(page->mapping != mapping)) {
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
goto repeat;
|
|
|
|
}
|
|
|
|
if (unlikely(!PageUptodate(page))) {
|
|
|
|
f2fs_put_page(page, 1);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2017-07-08 23:13:07 +07:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
kaddr = kmap_atomic(page);
|
|
|
|
memcpy(data, kaddr + offset, tocopy);
|
|
|
|
kunmap_atomic(kaddr);
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
|
|
|
|
offset = 0;
|
|
|
|
toread -= tocopy;
|
|
|
|
data += tocopy;
|
|
|
|
blkidx++;
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Write to quotafile */
|
|
|
|
static ssize_t f2fs_quota_write(struct super_block *sb, int type,
|
|
|
|
const char *data, size_t len, loff_t off)
|
|
|
|
{
|
|
|
|
struct inode *inode = sb_dqopt(sb)->files[type];
|
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
const struct address_space_operations *a_ops = mapping->a_ops;
|
|
|
|
int offset = off & (sb->s_blocksize - 1);
|
|
|
|
size_t towrite = len;
|
|
|
|
struct page *page;
|
|
|
|
char *kaddr;
|
|
|
|
int err = 0;
|
|
|
|
int tocopy;
|
|
|
|
|
|
|
|
while (towrite > 0) {
|
|
|
|
tocopy = min_t(unsigned long, sb->s_blocksize - offset,
|
|
|
|
towrite);
|
2017-10-19 23:43:56 +07:00
|
|
|
retry:
|
2017-07-08 23:13:07 +07:00
|
|
|
err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
|
|
|
|
&page, NULL);
|
2017-10-19 23:43:56 +07:00
|
|
|
if (unlikely(err)) {
|
|
|
|
if (err == -ENOMEM) {
|
|
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
|
|
goto retry;
|
|
|
|
}
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2017-07-08 23:13:07 +07:00
|
|
|
break;
|
2017-10-19 23:43:56 +07:00
|
|
|
}
|
2017-07-08 23:13:07 +07:00
|
|
|
|
|
|
|
kaddr = kmap_atomic(page);
|
|
|
|
memcpy(kaddr + offset, data, tocopy);
|
|
|
|
kunmap_atomic(kaddr);
|
|
|
|
flush_dcache_page(page);
|
|
|
|
|
|
|
|
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
|
|
|
|
page, NULL);
|
|
|
|
offset = 0;
|
|
|
|
towrite -= tocopy;
|
|
|
|
off += tocopy;
|
|
|
|
data += tocopy;
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == towrite)
|
2017-10-20 02:07:11 +07:00
|
|
|
return err;
|
2017-07-08 23:13:07 +07:00
|
|
|
inode->i_mtime = inode->i_ctime = current_time(inode);
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, false);
|
|
|
|
return len - towrite;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dquot **f2fs_get_dquots(struct inode *inode)
|
|
|
|
{
|
|
|
|
return F2FS_I(inode)->i_dquot;
|
|
|
|
}
|
|
|
|
|
|
|
|
static qsize_t *f2fs_get_reserved_space(struct inode *inode)
|
|
|
|
{
|
|
|
|
return &F2FS_I(inode)->i_reserved_quota;
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
|
|
|
|
{
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it");
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-08 13:22:56 +07:00
|
|
|
return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
|
|
|
|
F2FS_OPTION(sbi).s_jquota_fmt, type);
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
|
2017-10-06 23:14:28 +07:00
|
|
|
int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
|
2017-08-08 09:54:31 +07:00
|
|
|
{
|
2017-10-06 23:14:28 +07:00
|
|
|
int enabled = 0;
|
|
|
|
int i, err;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
|
2017-10-06 23:14:28 +07:00
|
|
|
err = f2fs_enable_quotas(sbi->sb);
|
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot turn on quota_ino: %d", err);
|
2017-10-06 23:14:28 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2017-08-08 09:54:31 +07:00
|
|
|
|
|
|
|
for (i = 0; i < MAXQUOTAS; i++) {
|
2018-03-08 13:22:56 +07:00
|
|
|
if (F2FS_OPTION(sbi).s_qf_names[i]) {
|
2017-10-06 23:14:28 +07:00
|
|
|
err = f2fs_quota_on_mount(sbi, i);
|
|
|
|
if (!err) {
|
|
|
|
enabled = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot turn on quotas: %d on %d",
|
|
|
|
err, i);
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
}
|
2017-10-06 23:14:28 +07:00
|
|
|
return enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
|
|
|
|
unsigned int flags)
|
|
|
|
{
|
|
|
|
struct inode *qf_inode;
|
|
|
|
unsigned long qf_inum;
|
|
|
|
int err;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
|
2017-10-06 23:14:28 +07:00
|
|
|
|
|
|
|
qf_inum = f2fs_qf_ino(sb, type);
|
|
|
|
if (!qf_inum)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
qf_inode = f2fs_iget(sb, qf_inum);
|
|
|
|
if (IS_ERR(qf_inode)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum);
|
2017-10-06 23:14:28 +07:00
|
|
|
return PTR_ERR(qf_inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't account quota for quota files to avoid recursion */
|
|
|
|
qf_inode->i_flags |= S_NOQUOTA;
|
|
|
|
err = dquot_enable(qf_inode, type, format_id, flags);
|
|
|
|
iput(qf_inode);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_enable_quotas(struct super_block *sb)
|
|
|
|
{
|
2019-06-18 16:48:42 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
2017-10-06 23:14:28 +07:00
|
|
|
int type, err = 0;
|
|
|
|
unsigned long qf_inum;
|
|
|
|
bool quota_mopt[MAXQUOTAS] = {
|
2019-06-18 16:48:42 +07:00
|
|
|
test_opt(sbi, USRQUOTA),
|
|
|
|
test_opt(sbi, GRPQUOTA),
|
|
|
|
test_opt(sbi, PRJQUOTA),
|
2017-10-06 23:14:28 +07:00
|
|
|
};
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "quota file may be corrupted, skip loading it");
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
|
|
|
|
|
2017-10-06 23:14:28 +07:00
|
|
|
for (type = 0; type < MAXQUOTAS; type++) {
|
|
|
|
qf_inum = f2fs_qf_ino(sb, type);
|
|
|
|
if (qf_inum) {
|
|
|
|
err = f2fs_quota_enable(sb, type, QFMT_VFS_V1,
|
|
|
|
DQUOT_USAGE_ENABLED |
|
|
|
|
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
|
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.",
|
|
|
|
type, err);
|
2017-10-06 23:14:28 +07:00
|
|
|
for (type--; type >= 0; type--)
|
|
|
|
dquot_quota_off(sb, type);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
set_sbi_flag(F2FS_SB(sb),
|
|
|
|
SBI_QUOTA_NEED_REPAIR);
|
2017-10-06 23:14:28 +07:00
|
|
|
return err;
|
|
|
|
}
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
}
|
2017-10-06 23:14:28 +07:00
|
|
|
return 0;
|
2017-08-08 09:54:31 +07:00
|
|
|
}
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int f2fs_quota_sync(struct super_block *sb, int type)
|
2017-07-08 23:13:07 +07:00
|
|
|
{
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
2017-07-08 23:13:07 +07:00
|
|
|
struct quota_info *dqopt = sb_dqopt(sb);
|
|
|
|
int cnt;
|
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
/*
|
|
|
|
* do_quotactl
|
|
|
|
* f2fs_quota_sync
|
|
|
|
* down_read(quota_sem)
|
|
|
|
* dquot_writeback_dquots()
|
|
|
|
* f2fs_dquot_commit
|
|
|
|
* block_operation
|
|
|
|
* down_read(quota_sem)
|
|
|
|
*/
|
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
|
|
|
|
down_read(&sbi->quota_sem);
|
2017-07-08 23:13:07 +07:00
|
|
|
ret = dquot_writeback_dquots(sb, type);
|
|
|
|
if (ret)
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
goto out;
|
2017-07-08 23:13:07 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now when everything is written we can discard the pagecache so
|
|
|
|
* that userspace sees the changes.
|
|
|
|
*/
|
|
|
|
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
struct address_space *mapping;
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
if (type != -1 && cnt != type)
|
|
|
|
continue;
|
|
|
|
if (!sb_has_quota_active(sb, cnt))
|
|
|
|
continue;
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
mapping = dqopt->files[cnt]->i_mapping;
|
|
|
|
|
|
|
|
ret = filemap_fdatawrite(mapping);
|
2017-07-08 23:13:07 +07:00
|
|
|
if (ret)
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* if we are using journalled quota */
|
|
|
|
if (is_journalled_quota(sbi))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = filemap_fdatawait(mapping);
|
|
|
|
if (ret)
|
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2017-07-08 23:13:07 +07:00
|
|
|
|
|
|
|
inode_lock(dqopt->files[cnt]);
|
|
|
|
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
|
|
|
|
inode_unlock(dqopt->files[cnt]);
|
|
|
|
}
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
out:
|
|
|
|
if (ret)
|
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2019-05-30 00:58:45 +07:00
|
|
|
up_read(&sbi->quota_sem);
|
|
|
|
f2fs_unlock_op(sbi);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
2017-07-08 23:13:07 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
|
|
|
|
const struct path *path)
|
|
|
|
{
|
|
|
|
struct inode *inode;
|
|
|
|
int err;
|
|
|
|
|
2017-08-07 15:37:59 +07:00
|
|
|
err = f2fs_quota_sync(sb, type);
|
2017-07-08 23:13:07 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err = dquot_quota_on(sb, type, format_id, path);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
inode = d_inode(path->dentry);
|
|
|
|
|
|
|
|
inode_lock(inode);
|
2018-04-03 14:08:17 +07:00
|
|
|
F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
|
2018-10-07 18:06:15 +07:00
|
|
|
f2fs_set_inode_flags(inode);
|
2017-07-08 23:13:07 +07:00
|
|
|
inode_unlock(inode);
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, false);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_quota_off(struct super_block *sb, int type)
|
|
|
|
{
|
|
|
|
struct inode *inode = sb_dqopt(sb)->files[type];
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!inode || !igrab(inode))
|
|
|
|
return dquot_quota_off(sb, type);
|
|
|
|
|
2018-06-26 12:12:43 +07:00
|
|
|
err = f2fs_quota_sync(sb, type);
|
|
|
|
if (err)
|
|
|
|
goto out_put;
|
2017-07-08 23:13:07 +07:00
|
|
|
|
|
|
|
err = dquot_quota_off(sb, type);
|
2018-10-24 17:34:26 +07:00
|
|
|
if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
|
2017-07-08 23:13:07 +07:00
|
|
|
goto out_put;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
2018-04-03 14:08:17 +07:00
|
|
|
F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
|
2018-10-07 18:06:15 +07:00
|
|
|
f2fs_set_inode_flags(inode);
|
2017-07-08 23:13:07 +07:00
|
|
|
inode_unlock(inode);
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, false);
|
|
|
|
out_put:
|
|
|
|
iput(inode);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
void f2fs_quota_off_umount(struct super_block *sb)
|
2017-07-08 23:13:07 +07:00
|
|
|
{
|
|
|
|
int type;
|
2018-06-26 12:12:43 +07:00
|
|
|
int err;
|
|
|
|
|
|
|
|
for (type = 0; type < MAXQUOTAS; type++) {
|
|
|
|
err = f2fs_quota_off(sb, type);
|
|
|
|
if (err) {
|
|
|
|
int ret = dquot_quota_off(sb, type);
|
2017-07-08 23:13:07 +07:00
|
|
|
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.",
|
|
|
|
type, err, ret);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
2018-06-26 12:12:43 +07:00
|
|
|
}
|
|
|
|
}
|
2019-01-28 08:59:53 +07:00
|
|
|
/*
|
|
|
|
* In case of checkpoint=disable, we must flush quota blocks.
|
|
|
|
* This can cause NULL exception for node_inode in end_io, since
|
|
|
|
* put_super already dropped it.
|
|
|
|
*/
|
|
|
|
sync_filesystem(sb);
|
2017-07-08 23:13:07 +07:00
|
|
|
}
|
|
|
|
|
2018-10-12 17:49:26 +07:00
|
|
|
static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct quota_info *dqopt = sb_dqopt(sb);
|
|
|
|
int type;
|
|
|
|
|
|
|
|
for (type = 0; type < MAXQUOTAS; type++) {
|
|
|
|
if (!dqopt->files[type])
|
|
|
|
continue;
|
|
|
|
f2fs_inode_synced(dqopt->files[type]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
static int f2fs_dquot_commit(struct dquot *dquot)
|
|
|
|
{
|
2019-05-30 00:58:45 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
down_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = dquot_commit(dquot);
|
|
|
|
if (ret < 0)
|
2019-05-30 00:58:45 +07:00
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
|
|
|
up_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_dquot_acquire(struct dquot *dquot)
|
|
|
|
{
|
2019-05-30 00:58:45 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
down_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = dquot_acquire(dquot);
|
|
|
|
if (ret < 0)
|
2019-05-30 00:58:45 +07:00
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
|
|
|
up_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_dquot_release(struct dquot *dquot)
|
|
|
|
{
|
2019-05-30 00:58:45 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
down_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = dquot_release(dquot);
|
|
|
|
if (ret < 0)
|
2019-05-30 00:58:45 +07:00
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
|
|
|
up_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
|
|
|
|
{
|
|
|
|
struct super_block *sb = dquot->dq_sb;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
down_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = dquot_mark_dquot_dirty(dquot);
|
|
|
|
|
|
|
|
/* if we are using journalled quota */
|
|
|
|
if (is_journalled_quota(sbi))
|
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
up_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_dquot_commit_info(struct super_block *sb, int type)
|
|
|
|
{
|
2019-05-30 00:58:45 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int ret;
|
|
|
|
|
2019-05-30 00:58:45 +07:00
|
|
|
down_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = dquot_commit_info(sb, type);
|
|
|
|
if (ret < 0)
|
2019-05-30 00:58:45 +07:00
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
|
|
|
up_read(&sbi->quota_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
return ret;
|
|
|
|
}
|
2018-10-12 17:49:26 +07:00
|
|
|
|
2018-01-05 16:41:20 +07:00
|
|
|
static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
|
2017-07-25 23:01:41 +07:00
|
|
|
{
|
|
|
|
*projid = F2FS_I(inode)->i_projid;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
static const struct dquot_operations f2fs_quota_operations = {
|
|
|
|
.get_reserved_space = f2fs_get_reserved_space,
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
.write_dquot = f2fs_dquot_commit,
|
|
|
|
.acquire_dquot = f2fs_dquot_acquire,
|
|
|
|
.release_dquot = f2fs_dquot_release,
|
|
|
|
.mark_dirty = f2fs_dquot_mark_dquot_dirty,
|
|
|
|
.write_info = f2fs_dquot_commit_info,
|
2017-07-08 23:13:07 +07:00
|
|
|
.alloc_dquot = dquot_alloc,
|
|
|
|
.destroy_dquot = dquot_destroy,
|
2017-07-25 23:01:41 +07:00
|
|
|
.get_projid = f2fs_get_projid,
|
2017-07-08 23:13:07 +07:00
|
|
|
.get_next_id = dquot_get_next_id,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct quotactl_ops f2fs_quotactl_ops = {
|
|
|
|
.quota_on = f2fs_quota_on,
|
|
|
|
.quota_off = f2fs_quota_off,
|
|
|
|
.quota_sync = f2fs_quota_sync,
|
|
|
|
.get_state = dquot_get_state,
|
|
|
|
.set_info = dquot_set_dqinfo,
|
|
|
|
.get_dqblk = dquot_get_dqblk,
|
|
|
|
.set_dqblk = dquot_set_dqblk,
|
|
|
|
.get_nextdqblk = dquot_get_next_dqblk,
|
|
|
|
};
|
|
|
|
#else
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int f2fs_quota_sync(struct super_block *sb, int type)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
void f2fs_quota_off_umount(struct super_block *sb)
|
2017-07-08 23:13:07 +07:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-08-31 16:36:24 +07:00
|
|
|
static const struct super_operations f2fs_sops = {
|
2012-11-02 15:07:47 +07:00
|
|
|
.alloc_inode = f2fs_alloc_inode,
|
2019-04-16 06:29:14 +07:00
|
|
|
.free_inode = f2fs_free_inode,
|
2013-04-30 09:33:27 +07:00
|
|
|
.drop_inode = f2fs_drop_inode,
|
2012-11-02 15:07:47 +07:00
|
|
|
.write_inode = f2fs_write_inode,
|
2013-06-10 07:17:01 +07:00
|
|
|
.dirty_inode = f2fs_dirty_inode,
|
2012-11-02 15:07:47 +07:00
|
|
|
.show_options = f2fs_show_options,
|
2017-07-08 23:13:07 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
.quota_read = f2fs_quota_read,
|
|
|
|
.quota_write = f2fs_quota_write,
|
|
|
|
.get_dquots = f2fs_get_dquots,
|
|
|
|
#endif
|
2012-11-02 15:07:47 +07:00
|
|
|
.evict_inode = f2fs_evict_inode,
|
|
|
|
.put_super = f2fs_put_super,
|
|
|
|
.sync_fs = f2fs_sync_fs,
|
2013-01-29 16:30:07 +07:00
|
|
|
.freeze_fs = f2fs_freeze,
|
|
|
|
.unfreeze_fs = f2fs_unfreeze,
|
2012-11-02 15:07:47 +07:00
|
|
|
.statfs = f2fs_statfs,
|
2013-06-16 07:48:48 +07:00
|
|
|
.remount_fs = f2fs_remount,
|
2012-11-02 15:07:47 +07:00
|
|
|
};
|
|
|
|
|
2018-12-12 16:50:12 +07:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2015-05-16 06:26:10 +07:00
|
|
|
static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
|
|
|
|
{
|
|
|
|
return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
|
|
|
|
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
|
|
|
|
ctx, len, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
|
|
|
|
void *fs_data)
|
|
|
|
{
|
2018-03-15 17:51:41 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Encrypting the root directory is not allowed because fsck
|
|
|
|
* expects lost+found directory to exist and remain unencrypted
|
|
|
|
* if LOST_FOUND feature is enabled.
|
|
|
|
*
|
|
|
|
*/
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_lost_found(sbi) &&
|
2018-03-15 17:51:41 +07:00
|
|
|
inode->i_ino == F2FS_ROOT_INO(sbi))
|
|
|
|
return -EPERM;
|
|
|
|
|
2015-05-16 06:26:10 +07:00
|
|
|
return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
|
|
|
|
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
|
|
|
|
ctx, len, fs_data, XATTR_CREATE);
|
|
|
|
}
|
|
|
|
|
2018-03-15 17:51:42 +07:00
|
|
|
static bool f2fs_dummy_context(struct inode *inode)
|
|
|
|
{
|
|
|
|
return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
|
|
|
|
}
|
|
|
|
|
2017-02-08 03:42:10 +07:00
|
|
|
static const struct fscrypt_operations f2fs_cryptops = {
|
2017-01-06 04:51:18 +07:00
|
|
|
.key_prefix = "f2fs:",
|
2015-05-16 06:26:10 +07:00
|
|
|
.get_context = f2fs_get_context,
|
|
|
|
.set_context = f2fs_set_context,
|
2018-03-15 17:51:42 +07:00
|
|
|
.dummy_context = f2fs_dummy_context,
|
2015-05-16 06:26:10 +07:00
|
|
|
.empty_dir = f2fs_empty_dir,
|
2018-05-01 05:51:44 +07:00
|
|
|
.max_namelen = F2FS_NAME_LEN,
|
2015-05-16 06:26:10 +07:00
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
|
|
|
|
u64 ino, u32 generation)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
struct inode *inode;
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (f2fs_check_nid_range(sbi, ino))
|
2014-03-12 16:08:36 +07:00
|
|
|
return ERR_PTR(-ESTALE);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* f2fs_iget isn't quite right if the inode is currently unallocated!
|
|
|
|
* However f2fs_iget currently does appropriate checks to handle stale
|
|
|
|
* inodes so everything is OK.
|
|
|
|
*/
|
|
|
|
inode = f2fs_iget(sb, ino);
|
|
|
|
if (IS_ERR(inode))
|
|
|
|
return ERR_CAST(inode);
|
2013-12-06 13:00:58 +07:00
|
|
|
if (unlikely(generation && inode->i_generation != generation)) {
|
2012-11-02 15:07:47 +07:00
|
|
|
/* we didn't find the right inode.. */
|
|
|
|
iput(inode);
|
|
|
|
return ERR_PTR(-ESTALE);
|
|
|
|
}
|
|
|
|
return inode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
|
|
|
|
int fh_len, int fh_type)
|
|
|
|
{
|
|
|
|
return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
|
|
|
|
f2fs_nfs_get_inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
|
|
|
|
int fh_len, int fh_type)
|
|
|
|
{
|
|
|
|
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
|
|
|
|
f2fs_nfs_get_inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct export_operations f2fs_export_ops = {
|
|
|
|
.fh_to_dentry = f2fs_fh_to_dentry,
|
|
|
|
.fh_to_parent = f2fs_fh_to_parent,
|
|
|
|
.get_parent = f2fs_get_parent,
|
|
|
|
};
|
|
|
|
|
2015-12-31 13:35:37 +07:00
|
|
|
static loff_t max_file_blocks(void)
|
2012-11-02 15:07:47 +07:00
|
|
|
{
|
2017-07-18 23:19:06 +07:00
|
|
|
loff_t result = 0;
|
2019-03-25 20:08:19 +07:00
|
|
|
loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2017-07-18 23:19:06 +07:00
|
|
|
/*
|
|
|
|
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 20:59:50 +07:00
|
|
|
* DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
|
2017-07-18 23:19:06 +07:00
|
|
|
* space in inode.i_addr, it will be more safe to reassign
|
|
|
|
* result as zero.
|
|
|
|
*/
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* two direct node blocks */
|
|
|
|
result += (leaf_count * 2);
|
|
|
|
|
|
|
|
/* two indirect node blocks */
|
|
|
|
leaf_count *= NIDS_PER_BLOCK;
|
|
|
|
result += (leaf_count * 2);
|
|
|
|
|
|
|
|
/* one double indirect node block */
|
|
|
|
leaf_count *= NIDS_PER_BLOCK;
|
|
|
|
result += leaf_count;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-03-21 05:33:20 +07:00
|
|
|
static int __f2fs_commit_super(struct buffer_head *bh,
|
|
|
|
struct f2fs_super_block *super)
|
|
|
|
{
|
|
|
|
lock_buffer(bh);
|
|
|
|
if (super)
|
|
|
|
memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
|
|
|
|
set_buffer_dirty(bh);
|
|
|
|
unlock_buffer(bh);
|
|
|
|
|
|
|
|
/* it's rare case, we can do fua all the time */
|
2017-05-02 22:03:47 +07:00
|
|
|
return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
|
2016-03-21 05:33:20 +07:00
|
|
|
}
|
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
|
2016-03-21 05:33:20 +07:00
|
|
|
struct buffer_head *bh)
|
2015-12-15 08:58:18 +07:00
|
|
|
{
|
2016-03-21 05:33:20 +07:00
|
|
|
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
|
|
|
|
(bh->b_data + F2FS_SUPER_OFFSET);
|
2016-03-24 07:05:27 +07:00
|
|
|
struct super_block *sb = sbi->sb;
|
2015-12-15 08:58:18 +07:00
|
|
|
u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
|
|
|
|
u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
|
|
|
|
u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
|
|
|
|
u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
|
|
|
|
u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
|
|
|
|
u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
|
|
|
|
u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
|
|
|
|
u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
|
|
|
|
u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
|
|
|
|
u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
|
|
|
|
u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
|
|
|
|
u32 segment_count = le32_to_cpu(raw_super->segment_count);
|
|
|
|
u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
2016-03-21 05:33:20 +07:00
|
|
|
u64 main_end_blkaddr = main_blkaddr +
|
|
|
|
(segment_count_main << log_blocks_per_seg);
|
|
|
|
u64 seg_end_blkaddr = segment0_blkaddr +
|
|
|
|
(segment_count << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
|
|
|
|
if (segment0_blkaddr != cp_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
|
|
|
|
segment0_blkaddr, cp_blkaddr);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
|
|
|
|
sit_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
|
|
|
|
cp_blkaddr, sit_blkaddr,
|
|
|
|
segment_count_ckpt << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
|
|
|
|
nat_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
|
|
|
|
sit_blkaddr, nat_blkaddr,
|
|
|
|
segment_count_sit << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
|
|
|
|
ssa_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
|
|
|
|
nat_blkaddr, ssa_blkaddr,
|
|
|
|
segment_count_nat << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
|
|
|
|
main_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
|
|
|
|
ssa_blkaddr, main_blkaddr,
|
|
|
|
segment_count_ssa << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-03-21 05:33:20 +07:00
|
|
|
if (main_end_blkaddr > seg_end_blkaddr) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
|
|
|
|
main_blkaddr,
|
|
|
|
segment0_blkaddr +
|
|
|
|
(segment_count << log_blocks_per_seg),
|
|
|
|
segment_count_main << log_blocks_per_seg);
|
2015-12-15 08:58:18 +07:00
|
|
|
return true;
|
2016-03-21 05:33:20 +07:00
|
|
|
} else if (main_end_blkaddr < seg_end_blkaddr) {
|
|
|
|
int err = 0;
|
|
|
|
char *res;
|
|
|
|
|
|
|
|
/* fix in-memory information all the time */
|
|
|
|
raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
|
|
|
|
segment0_blkaddr) >> log_blocks_per_seg);
|
|
|
|
|
|
|
|
if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
|
2016-03-24 07:05:27 +07:00
|
|
|
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
|
2016-03-21 05:33:20 +07:00
|
|
|
res = "internally";
|
|
|
|
} else {
|
|
|
|
err = __f2fs_commit_super(bh, NULL);
|
|
|
|
res = err ? "failed" : "done";
|
|
|
|
}
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
|
|
|
|
res, main_blkaddr,
|
|
|
|
segment0_blkaddr +
|
|
|
|
(segment_count << log_blocks_per_seg),
|
|
|
|
segment_count_main << log_blocks_per_seg);
|
2016-03-21 05:33:20 +07:00
|
|
|
if (err)
|
|
|
|
return true;
|
2015-12-15 08:58:18 +07:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
2016-03-21 05:33:20 +07:00
|
|
|
struct buffer_head *bh)
|
2012-11-02 15:07:47 +07:00
|
|
|
{
|
2018-04-28 09:03:22 +07:00
|
|
|
block_t segment_count, segs_per_sec, secs_per_zone;
|
|
|
|
block_t total_sections, blocks_per_seg;
|
2016-03-21 05:33:20 +07:00
|
|
|
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
|
|
|
|
(bh->b_data + F2FS_SUPER_OFFSET);
|
2012-11-02 15:07:47 +07:00
|
|
|
unsigned int blocksize;
|
2018-09-28 19:25:56 +07:00
|
|
|
size_t crc_offset = 0;
|
|
|
|
__u32 crc = 0;
|
|
|
|
|
2019-07-25 10:08:52 +07:00
|
|
|
if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) {
|
|
|
|
f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
|
|
|
|
F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-09-28 19:25:56 +07:00
|
|
|
/* Check checksum_offset and crc in superblock */
|
2018-10-24 17:34:26 +07:00
|
|
|
if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
|
2018-09-28 19:25:56 +07:00
|
|
|
crc_offset = le32_to_cpu(raw_super->checksum_offset);
|
|
|
|
if (crc_offset !=
|
|
|
|
offsetof(struct f2fs_super_block, crc)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid SB checksum offset: %zu",
|
|
|
|
crc_offset);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-09-28 19:25:56 +07:00
|
|
|
}
|
|
|
|
crc = le32_to_cpu(raw_super->crc);
|
|
|
|
if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-09-28 19:25:56 +07:00
|
|
|
}
|
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2013-02-01 18:07:57 +07:00
|
|
|
/* Currently, support only 4KB page cache size */
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
if (F2FS_BLKSIZE != PAGE_SIZE) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
|
|
|
|
PAGE_SIZE);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2013-02-01 18:07:57 +07:00
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* Currently, support only 4KB block size */
|
|
|
|
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
|
2013-02-01 18:07:57 +07:00
|
|
|
if (blocksize != F2FS_BLKSIZE) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
|
|
|
|
blocksize);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2013-02-01 18:07:57 +07:00
|
|
|
|
2015-12-15 08:58:18 +07:00
|
|
|
/* check log blocks per segment */
|
|
|
|
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid log blocks per segment (%u)",
|
|
|
|
le32_to_cpu(raw_super->log_blocks_per_seg));
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2015-12-15 08:58:18 +07:00
|
|
|
}
|
|
|
|
|
2014-09-15 17:01:10 +07:00
|
|
|
/* Currently, support 512/1024/2048/4096 bytes sector size */
|
|
|
|
if (le32_to_cpu(raw_super->log_sectorsize) >
|
|
|
|
F2FS_MAX_LOG_SECTOR_SIZE ||
|
|
|
|
le32_to_cpu(raw_super->log_sectorsize) <
|
|
|
|
F2FS_MIN_LOG_SECTOR_SIZE) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid log sectorsize (%u)",
|
|
|
|
le32_to_cpu(raw_super->log_sectorsize));
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2014-09-15 17:01:10 +07:00
|
|
|
if (le32_to_cpu(raw_super->log_sectors_per_block) +
|
|
|
|
le32_to_cpu(raw_super->log_sectorsize) !=
|
|
|
|
F2FS_MAX_LOG_SECTOR_SIZE) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
|
|
|
|
le32_to_cpu(raw_super->log_sectors_per_block),
|
|
|
|
le32_to_cpu(raw_super->log_sectorsize));
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2015-12-15 08:58:18 +07:00
|
|
|
|
2018-04-28 09:03:22 +07:00
|
|
|
segment_count = le32_to_cpu(raw_super->segment_count);
|
|
|
|
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
|
|
|
|
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
|
|
|
|
total_sections = le32_to_cpu(raw_super->section_count);
|
|
|
|
|
|
|
|
/* blocks_per_seg should be 512, given the above check */
|
|
|
|
blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
|
|
|
|
|
|
if (segment_count > F2FS_MAX_SEGMENT ||
|
|
|
|
segment_count < F2FS_MIN_SEGMENTS) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (total_sections > segment_count ||
|
|
|
|
total_sections < F2FS_MIN_SEGMENTS ||
|
|
|
|
segs_per_sec > segment_count || !segs_per_sec) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
|
|
|
|
segment_count, total_sections, segs_per_sec);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((segment_count / segs_per_sec) < total_sections) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
|
|
|
|
segment_count, segs_per_sec, total_sections);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
2018-12-22 17:22:26 +07:00
|
|
|
if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
|
|
|
|
segment_count, le64_to_cpu(raw_super->block_count));
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
2018-06-22 23:12:36 +07:00
|
|
|
if (secs_per_zone > total_sections || !secs_per_zone) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
|
|
|
|
secs_per_zone, total_sections);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
|
|
|
|
raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
|
|
|
|
(le32_to_cpu(raw_super->extension_count) +
|
|
|
|
raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)",
|
|
|
|
le32_to_cpu(raw_super->extension_count),
|
|
|
|
raw_super->hot_ext_count,
|
|
|
|
F2FS_MAX_EXTENSION);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (le32_to_cpu(raw_super->cp_payload) >
|
|
|
|
(blocks_per_seg - F2FS_CP_PACKS)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Insane cp_payload (%u > %u)",
|
|
|
|
le32_to_cpu(raw_super->cp_payload),
|
|
|
|
blocks_per_seg - F2FS_CP_PACKS);
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2018-04-28 09:03:22 +07:00
|
|
|
}
|
|
|
|
|
2015-12-15 08:58:18 +07:00
|
|
|
/* check reserved ino info */
|
|
|
|
if (le32_to_cpu(raw_super->node_ino) != 1 ||
|
|
|
|
le32_to_cpu(raw_super->meta_ino) != 2 ||
|
|
|
|
le32_to_cpu(raw_super->root_ino) != 3) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
|
|
|
|
le32_to_cpu(raw_super->node_ino),
|
|
|
|
le32_to_cpu(raw_super->meta_ino),
|
|
|
|
le32_to_cpu(raw_super->root_ino));
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2015-12-15 08:58:18 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
|
2016-03-24 07:05:27 +07:00
|
|
|
if (sanity_check_area_boundary(sbi, bh))
|
2019-07-25 10:08:52 +07:00
|
|
|
return -EFSCORRUPTED;
|
2015-12-15 08:58:18 +07:00
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
2012-11-02 15:07:47 +07:00
|
|
|
{
|
|
|
|
unsigned int total, fsmeta;
|
f2fs: prevent checkpoint once any IO failure is detected
This patch enhances the checkpoint routine to cope with IO errors.
Basically f2fs detects IO errors from end_io_write, and the errors are able to
be occurred during one of data, node, and meta page writes.
In the previous code, when an IO error is occurred during writes, f2fs sets a
flag, CP_ERROR_FLAG, in the raw ckeckpoint buffer which will be written to disk.
Afterwards, write_checkpoint() will check the flag and remount f2fs as a
read-only (ro) mode.
However, even once f2fs is remounted as a ro mode, dirty checkpoint pages are
freely able to be written to disk by flusher or kswapd in background.
In such a case, after cold reboot, f2fs would restore the checkpoint data having
CP_ERROR_FLAG, resulting in disabling write_checkpoint and remounting f2fs as
a ro mode again.
Therefore, let's prevent any checkpoint page (meta) writes once an IO error is
occurred, and remount f2fs as a ro mode right away at that moment.
Reported-by: Oliver Winker <oliver@oli1170.net>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
2013-01-24 17:56:11 +07:00
|
|
|
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
|
|
|
|
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
|
2016-12-06 04:56:04 +07:00
|
|
|
unsigned int ovp_segments, reserved_segments;
|
2017-05-16 00:45:08 +07:00
|
|
|
unsigned int main_segs, blocks_per_seg;
|
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize
during mount, in order to avoid accessing across cache boundary with
this abnormal bitmap size.
- Overview
buffer overrun in build_sit_info() when mounting a crafted f2fs image
- Reproduce
- Kernel message
[ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.584979] ==================================================================
[ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50
[ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295
[ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4
[ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.589438] Call Trace:
[ 548.589474] dump_stack+0x7b/0xb5
[ 548.589487] print_address_description+0x70/0x290
[ 548.589492] kasan_report+0x291/0x390
[ 548.589496] ? kmemdup+0x36/0x50
[ 548.589509] check_memory_region+0x139/0x190
[ 548.589514] memcpy+0x23/0x50
[ 548.589518] kmemdup+0x36/0x50
[ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410
[ 548.589551] ? __asan_loadN+0xf/0x20
[ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240
[ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0
[ 548.589587] ? __put_user_ns+0x40/0x40
[ 548.589604] ? find_next_bit+0x57/0x90
[ 548.589610] f2fs_fill_super+0x194b/0x2b40
[ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589637] ? set_blocksize+0x90/0x140
[ 548.589651] mount_bdev+0x1c5/0x210
[ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589667] f2fs_mount+0x15/0x20
[ 548.589672] mount_fs+0x60/0x1a0
[ 548.589683] ? alloc_vfsmnt+0x309/0x360
[ 548.589688] vfs_kern_mount+0x6b/0x1a0
[ 548.589699] do_mount+0x34a/0x18c0
[ 548.589710] ? lockref_put_or_lock+0xcf/0x160
[ 548.589716] ? copy_mount_string+0x20/0x20
[ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.589734] ? kasan_check_write+0x14/0x20
[ 548.589740] ? _copy_from_user+0x6a/0x90
[ 548.589744] ? memdup_user+0x42/0x60
[ 548.589750] ksys_mount+0x83/0xd0
[ 548.589755] __x64_sys_mount+0x67/0x80
[ 548.589781] do_syscall_64+0x78/0x170
[ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.589820] RIP: 0033:0x7f76fc331b9a
[ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.590242] The buggy address belongs to the page:
[ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 548.592886] flags: 0x2ffff0000000000()
[ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 548.603713] page dumped because: kasan: bad access detected
[ 548.605203] Memory state around the buggy address:
[ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.610629] ^
[ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.615141] ==================================================================
[ 548.616613] Disabling lock debugging due to kernel taint
[ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420
[ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4
[ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420
[ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b
[ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246
[ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7
[ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000
[ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5
[ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040
[ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938
[ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0
[ 548.623317] Call Trace:
[ 548.623325] ? kasan_check_read+0x11/0x20
[ 548.623330] ? __zone_watermark_ok+0x92/0x240
[ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90
[ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60
[ 548.623353] ? warn_alloc+0x250/0x250
[ 548.623358] ? save_stack+0x46/0xd0
[ 548.623361] ? kasan_kmalloc+0xad/0xe0
[ 548.623366] ? __isolate_free_page+0x2a0/0x2a0
[ 548.623370] ? mount_fs+0x60/0x1a0
[ 548.623374] ? vfs_kern_mount+0x6b/0x1a0
[ 548.623378] ? do_mount+0x34a/0x18c0
[ 548.623383] ? ksys_mount+0x83/0xd0
[ 548.623387] ? __x64_sys_mount+0x67/0x80
[ 548.623391] ? do_syscall_64+0x78/0x170
[ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623401] __alloc_pages_nodemask+0x3c5/0x400
[ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420
[ 548.623412] ? __mutex_lock_slowpath+0x20/0x20
[ 548.623417] ? kvmalloc_node+0x31/0x80
[ 548.623424] alloc_pages_current+0x75/0x110
[ 548.623436] kmalloc_order+0x24/0x60
[ 548.623442] kmalloc_order_trace+0x24/0xb0
[ 548.623448] __kmalloc_track_caller+0x207/0x220
[ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0
[ 548.623460] kmemdup+0x20/0x50
[ 548.623465] f2fs_build_node_manager+0x399/0xbb0
[ 548.623470] f2fs_fill_super+0x195e/0x2b40
[ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623481] ? set_blocksize+0x90/0x140
[ 548.623486] mount_bdev+0x1c5/0x210
[ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623495] f2fs_mount+0x15/0x20
[ 548.623498] mount_fs+0x60/0x1a0
[ 548.623503] ? alloc_vfsmnt+0x309/0x360
[ 548.623508] vfs_kern_mount+0x6b/0x1a0
[ 548.623513] do_mount+0x34a/0x18c0
[ 548.623518] ? lockref_put_or_lock+0xcf/0x160
[ 548.623523] ? copy_mount_string+0x20/0x20
[ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.623533] ? kasan_check_write+0x14/0x20
[ 548.623537] ? _copy_from_user+0x6a/0x90
[ 548.623542] ? memdup_user+0x42/0x60
[ 548.623547] ksys_mount+0x83/0xd0
[ 548.623552] __x64_sys_mount+0x67/0x80
[ 548.623557] do_syscall_64+0x78/0x170
[ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623566] RIP: 0033:0x7f76fc331b9a
[ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager
[ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size.
Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech.
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-23 10:25:19 +07:00
|
|
|
unsigned int sit_segs, nat_segs;
|
|
|
|
unsigned int sit_bitmap_size, nat_bitmap_size;
|
|
|
|
unsigned int log_blocks_per_seg;
|
2018-06-27 17:05:54 +07:00
|
|
|
unsigned int segment_count_main;
|
f2fs: fix to do sanity check with cp_pack_start_sum
After fuzzing, cp_pack_start_sum could be corrupted, so current log's
summary info should be wrong due to loading incorrect summary block.
Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it
can lead accessing invalid dirty_i->dirty_segmap bitmap finally.
Add sanity check for cp_pack_start_sum to fix this issue.
https://bugzilla.kernel.org/show_bug.cgi?id=200419
- Reproduce
- Kernel message (f2fs-dev w/ KASAN)
[ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8)
[ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock
[ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716
[ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0
[ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1
[ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0
[ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8
[ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286
[ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2
[ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98
[ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001
[ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200
[ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900
[ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.584112] Call Trace:
[ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150
[ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190
[ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120
[ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190
[ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00
[ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0
[ 3117.584184] ? map_id_range_down+0x17c/0x1b0
[ 3117.584188] ? __put_user_ns+0x30/0x30
[ 3117.584206] ? find_next_bit+0x53/0x90
[ 3117.584237] ? cpumask_next+0x16/0x20
[ 3117.584249] f2fs_fill_super+0x1948/0x2b40
[ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584279] ? sget_userns+0x65e/0x690
[ 3117.584296] ? set_blocksize+0x88/0x130
[ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584305] mount_bdev+0x1c0/0x200
[ 3117.584310] mount_fs+0x5c/0x190
[ 3117.584320] vfs_kern_mount+0x64/0x190
[ 3117.584330] do_mount+0x2e4/0x1450
[ 3117.584343] ? lockref_put_return+0x130/0x130
[ 3117.584347] ? copy_mount_string+0x20/0x20
[ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.584362] ? kasan_kmalloc+0xa6/0xd0
[ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.584377] ? __kmalloc_track_caller+0x196/0x210
[ 3117.584383] ? _copy_from_user+0x61/0x90
[ 3117.584396] ? memdup_user+0x3e/0x60
[ 3117.584401] ksys_mount+0x7e/0xd0
[ 3117.584405] __x64_sys_mount+0x62/0x70
[ 3117.584427] do_syscall_64+0x73/0x160
[ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.584455] RIP: 0033:0x7f5693f14b9a
[ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.584523] ---[ end trace a8e0d899985faf31 ]---
[ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix.
[ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0
[ 3117.685707] ==================================================================
[ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0
[ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225
[ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1
[ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.686483] Call Trace:
[ 3117.686494] dump_stack+0x71/0xab
[ 3117.686512] print_address_description+0x6b/0x290
[ 3117.686517] kasan_report+0x28e/0x390
[ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0
[ 3117.686527] __remove_dirty_segment+0xdd/0x1e0
[ 3117.686532] locate_dirty_segment+0x189/0x190
[ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.686543] recover_data+0x703/0x2c20
[ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.686553] ? ksys_mount+0x7e/0xd0
[ 3117.686564] ? policy_nodemask+0x1a/0x90
[ 3117.686567] ? policy_node+0x56/0x70
[ 3117.686571] ? add_fsync_inode+0xf0/0xf0
[ 3117.686592] ? blk_finish_plug+0x44/0x60
[ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.686602] ? find_inode_fast+0xac/0xc0
[ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.686618] ? __radix_tree_lookup+0x150/0x150
[ 3117.686633] ? dqget+0x670/0x670
[ 3117.686648] ? pagecache_get_page+0x29/0x410
[ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.686674] ? rb_insert_color+0x323/0x3d0
[ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.686683] ? proc_register+0x153/0x1d0
[ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.686695] ? f2fs_attr_store+0x50/0x50
[ 3117.686700] ? proc_create_single_data+0x52/0x60
[ 3117.686707] f2fs_fill_super+0x1d06/0x2b40
[ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686735] ? sget_userns+0x65e/0x690
[ 3117.686740] ? set_blocksize+0x88/0x130
[ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686748] mount_bdev+0x1c0/0x200
[ 3117.686753] mount_fs+0x5c/0x190
[ 3117.686758] vfs_kern_mount+0x64/0x190
[ 3117.686762] do_mount+0x2e4/0x1450
[ 3117.686769] ? lockref_put_return+0x130/0x130
[ 3117.686773] ? copy_mount_string+0x20/0x20
[ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.686780] ? kasan_kmalloc+0xa6/0xd0
[ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.686790] ? __kmalloc_track_caller+0x196/0x210
[ 3117.686795] ? _copy_from_user+0x61/0x90
[ 3117.686801] ? memdup_user+0x3e/0x60
[ 3117.686804] ksys_mount+0x7e/0xd0
[ 3117.686809] __x64_sys_mount+0x62/0x70
[ 3117.686816] do_syscall_64+0x73/0x160
[ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.686829] RIP: 0033:0x7f5693f14b9a
[ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.687005] Allocated by task 1225:
[ 3117.687152] kasan_kmalloc+0xa6/0xd0
[ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200
[ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190
[ 3117.687165] f2fs_fill_super+0x1948/0x2b40
[ 3117.687168] mount_bdev+0x1c0/0x200
[ 3117.687171] mount_fs+0x5c/0x190
[ 3117.687174] vfs_kern_mount+0x64/0x190
[ 3117.687177] do_mount+0x2e4/0x1450
[ 3117.687180] ksys_mount+0x7e/0xd0
[ 3117.687182] __x64_sys_mount+0x62/0x70
[ 3117.687186] do_syscall_64+0x73/0x160
[ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.687285] Freed by task 19:
[ 3117.687412] __kasan_slab_free+0x137/0x190
[ 3117.687416] kfree+0x8b/0x1b0
[ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm]
[ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm]
[ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm]
[ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm]
[ 3117.687528] process_one_work+0x2f9/0x740
[ 3117.687531] worker_thread+0x78/0x6b0
[ 3117.687541] kthread+0x177/0x1c0
[ 3117.687545] ret_from_fork+0x35/0x40
[ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300
which belongs to the cache kmalloc-192 of size 192
[ 3117.688014] The buggy address is located 16 bytes to the right of
192-byte region [ffff88018f0a6300, ffff88018f0a63c0)
[ 3117.688382] The buggy address belongs to the page:
[ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0
[ 3117.688788] flags: 0x17fff8000000100(slab)
[ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180
[ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
[ 3117.689386] page dumped because: kasan: bad access detected
[ 3117.689653] Memory state around the buggy address:
[ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.690448] ^
[ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.691077] ==================================================================
[ 3117.691290] Disabling lock debugging due to kernel taint
[ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0
[ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI
[ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1
[ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.707235] Call Trace:
[ 3117.712077] locate_dirty_segment+0x189/0x190
[ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.721617] recover_data+0x703/0x2c20
[ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.730957] ? ksys_mount+0x7e/0xd0
[ 3117.735573] ? policy_nodemask+0x1a/0x90
[ 3117.740198] ? policy_node+0x56/0x70
[ 3117.744829] ? add_fsync_inode+0xf0/0xf0
[ 3117.749487] ? blk_finish_plug+0x44/0x60
[ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.758831] ? find_inode_fast+0xac/0xc0
[ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.768046] ? __radix_tree_lookup+0x150/0x150
[ 3117.772603] ? dqget+0x670/0x670
[ 3117.777159] ? pagecache_get_page+0x29/0x410
[ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.799086] ? rb_insert_color+0x323/0x3d0
[ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.807563] ? proc_register+0x153/0x1d0
[ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.815947] ? f2fs_attr_store+0x50/0x50
[ 3117.820087] ? proc_create_single_data+0x52/0x60
[ 3117.824262] f2fs_fill_super+0x1d06/0x2b40
[ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.832432] ? sget_userns+0x65e/0x690
[ 3117.836500] ? set_blocksize+0x88/0x130
[ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.844420] mount_bdev+0x1c0/0x200
[ 3117.848275] mount_fs+0x5c/0x190
[ 3117.852053] vfs_kern_mount+0x64/0x190
[ 3117.855810] do_mount+0x2e4/0x1450
[ 3117.859441] ? lockref_put_return+0x130/0x130
[ 3117.862996] ? copy_mount_string+0x20/0x20
[ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.869719] ? kasan_kmalloc+0xa6/0xd0
[ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.876121] ? __kmalloc_track_caller+0x196/0x210
[ 3117.879333] ? _copy_from_user+0x61/0x90
[ 3117.882467] ? memdup_user+0x3e/0x60
[ 3117.885604] ksys_mount+0x7e/0xd0
[ 3117.888700] __x64_sys_mount+0x62/0x70
[ 3117.891742] do_syscall_64+0x73/0x160
[ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.897669] RIP: 0033:0x7f5693f14b9a
[ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.949979] CR2: 0000000000000000
[ 3117.954283] ---[ end trace a8e0d899985faf32 ]---
[ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
- Location
https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit()
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-01 18:16:11 +07:00
|
|
|
unsigned int cp_pack_start_sum, cp_payload;
|
2019-04-15 14:30:50 +07:00
|
|
|
block_t user_block_count, valid_user_blocks;
|
|
|
|
block_t avail_node_count, valid_node_count;
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
int i, j;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
total = le32_to_cpu(raw_super->segment_count);
|
|
|
|
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
|
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize
during mount, in order to avoid accessing across cache boundary with
this abnormal bitmap size.
- Overview
buffer overrun in build_sit_info() when mounting a crafted f2fs image
- Reproduce
- Kernel message
[ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.584979] ==================================================================
[ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50
[ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295
[ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4
[ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.589438] Call Trace:
[ 548.589474] dump_stack+0x7b/0xb5
[ 548.589487] print_address_description+0x70/0x290
[ 548.589492] kasan_report+0x291/0x390
[ 548.589496] ? kmemdup+0x36/0x50
[ 548.589509] check_memory_region+0x139/0x190
[ 548.589514] memcpy+0x23/0x50
[ 548.589518] kmemdup+0x36/0x50
[ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410
[ 548.589551] ? __asan_loadN+0xf/0x20
[ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240
[ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0
[ 548.589587] ? __put_user_ns+0x40/0x40
[ 548.589604] ? find_next_bit+0x57/0x90
[ 548.589610] f2fs_fill_super+0x194b/0x2b40
[ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589637] ? set_blocksize+0x90/0x140
[ 548.589651] mount_bdev+0x1c5/0x210
[ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589667] f2fs_mount+0x15/0x20
[ 548.589672] mount_fs+0x60/0x1a0
[ 548.589683] ? alloc_vfsmnt+0x309/0x360
[ 548.589688] vfs_kern_mount+0x6b/0x1a0
[ 548.589699] do_mount+0x34a/0x18c0
[ 548.589710] ? lockref_put_or_lock+0xcf/0x160
[ 548.589716] ? copy_mount_string+0x20/0x20
[ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.589734] ? kasan_check_write+0x14/0x20
[ 548.589740] ? _copy_from_user+0x6a/0x90
[ 548.589744] ? memdup_user+0x42/0x60
[ 548.589750] ksys_mount+0x83/0xd0
[ 548.589755] __x64_sys_mount+0x67/0x80
[ 548.589781] do_syscall_64+0x78/0x170
[ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.589820] RIP: 0033:0x7f76fc331b9a
[ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.590242] The buggy address belongs to the page:
[ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 548.592886] flags: 0x2ffff0000000000()
[ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 548.603713] page dumped because: kasan: bad access detected
[ 548.605203] Memory state around the buggy address:
[ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.610629] ^
[ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.615141] ==================================================================
[ 548.616613] Disabling lock debugging due to kernel taint
[ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420
[ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4
[ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420
[ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b
[ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246
[ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7
[ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000
[ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5
[ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040
[ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938
[ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0
[ 548.623317] Call Trace:
[ 548.623325] ? kasan_check_read+0x11/0x20
[ 548.623330] ? __zone_watermark_ok+0x92/0x240
[ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90
[ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60
[ 548.623353] ? warn_alloc+0x250/0x250
[ 548.623358] ? save_stack+0x46/0xd0
[ 548.623361] ? kasan_kmalloc+0xad/0xe0
[ 548.623366] ? __isolate_free_page+0x2a0/0x2a0
[ 548.623370] ? mount_fs+0x60/0x1a0
[ 548.623374] ? vfs_kern_mount+0x6b/0x1a0
[ 548.623378] ? do_mount+0x34a/0x18c0
[ 548.623383] ? ksys_mount+0x83/0xd0
[ 548.623387] ? __x64_sys_mount+0x67/0x80
[ 548.623391] ? do_syscall_64+0x78/0x170
[ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623401] __alloc_pages_nodemask+0x3c5/0x400
[ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420
[ 548.623412] ? __mutex_lock_slowpath+0x20/0x20
[ 548.623417] ? kvmalloc_node+0x31/0x80
[ 548.623424] alloc_pages_current+0x75/0x110
[ 548.623436] kmalloc_order+0x24/0x60
[ 548.623442] kmalloc_order_trace+0x24/0xb0
[ 548.623448] __kmalloc_track_caller+0x207/0x220
[ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0
[ 548.623460] kmemdup+0x20/0x50
[ 548.623465] f2fs_build_node_manager+0x399/0xbb0
[ 548.623470] f2fs_fill_super+0x195e/0x2b40
[ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623481] ? set_blocksize+0x90/0x140
[ 548.623486] mount_bdev+0x1c5/0x210
[ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623495] f2fs_mount+0x15/0x20
[ 548.623498] mount_fs+0x60/0x1a0
[ 548.623503] ? alloc_vfsmnt+0x309/0x360
[ 548.623508] vfs_kern_mount+0x6b/0x1a0
[ 548.623513] do_mount+0x34a/0x18c0
[ 548.623518] ? lockref_put_or_lock+0xcf/0x160
[ 548.623523] ? copy_mount_string+0x20/0x20
[ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.623533] ? kasan_check_write+0x14/0x20
[ 548.623537] ? _copy_from_user+0x6a/0x90
[ 548.623542] ? memdup_user+0x42/0x60
[ 548.623547] ksys_mount+0x83/0xd0
[ 548.623552] __x64_sys_mount+0x67/0x80
[ 548.623557] do_syscall_64+0x78/0x170
[ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623566] RIP: 0033:0x7f76fc331b9a
[ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager
[ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size.
Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech.
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-23 10:25:19 +07:00
|
|
|
sit_segs = le32_to_cpu(raw_super->segment_count_sit);
|
|
|
|
fsmeta += sit_segs;
|
|
|
|
nat_segs = le32_to_cpu(raw_super->segment_count_nat);
|
|
|
|
fsmeta += nat_segs;
|
2012-11-02 15:07:47 +07:00
|
|
|
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
|
|
|
|
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
|
|
|
|
|
2013-12-06 13:00:58 +07:00
|
|
|
if (unlikely(fsmeta >= total))
|
2012-11-02 15:07:47 +07:00
|
|
|
return 1;
|
f2fs: prevent checkpoint once any IO failure is detected
This patch enhances the checkpoint routine to cope with IO errors.
Basically f2fs detects IO errors from end_io_write, and the errors are able to
be occurred during one of data, node, and meta page writes.
In the previous code, when an IO error is occurred during writes, f2fs sets a
flag, CP_ERROR_FLAG, in the raw ckeckpoint buffer which will be written to disk.
Afterwards, write_checkpoint() will check the flag and remount f2fs as a
read-only (ro) mode.
However, even once f2fs is remounted as a ro mode, dirty checkpoint pages are
freely able to be written to disk by flusher or kswapd in background.
In such a case, after cold reboot, f2fs would restore the checkpoint data having
CP_ERROR_FLAG, resulting in disabling write_checkpoint and remounting f2fs as
a ro mode again.
Therefore, let's prevent any checkpoint page (meta) writes once an IO error is
occurred, and remount f2fs as a ro mode right away at that moment.
Reported-by: Oliver Winker <oliver@oli1170.net>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
2013-01-24 17:56:11 +07:00
|
|
|
|
2016-12-06 04:56:04 +07:00
|
|
|
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
|
|
|
|
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
|
|
|
|
|
|
|
|
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
|
|
|
|
ovp_segments == 0 || reserved_segments == 0)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
|
2016-12-06 04:56:04 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-06-27 17:05:54 +07:00
|
|
|
user_block_count = le64_to_cpu(ckpt->user_block_count);
|
|
|
|
segment_count_main = le32_to_cpu(raw_super->segment_count_main);
|
|
|
|
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
|
|
if (!user_block_count || user_block_count >=
|
|
|
|
segment_count_main << log_blocks_per_seg) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong user_block_count: %u",
|
|
|
|
user_block_count);
|
2018-06-27 17:05:54 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-04-15 14:30:50 +07:00
|
|
|
valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
|
|
|
|
if (valid_user_blocks > user_block_count) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u",
|
|
|
|
valid_user_blocks, user_block_count);
|
2019-04-15 14:30:50 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
valid_node_count = le32_to_cpu(ckpt->valid_node_count);
|
|
|
|
avail_node_count = sbi->total_node_count - sbi->nquota_files -
|
|
|
|
F2FS_RESERVED_NODE_NUM;
|
|
|
|
if (valid_node_count > avail_node_count) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
|
|
|
|
valid_node_count, avail_node_count);
|
2019-04-15 14:30:50 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-05-16 00:45:08 +07:00
|
|
|
main_segs = le32_to_cpu(raw_super->segment_count_main);
|
|
|
|
blocks_per_seg = sbi->blocks_per_seg;
|
|
|
|
|
|
|
|
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
|
|
|
|
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
|
|
|
|
le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
|
|
|
|
return 1;
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
|
|
|
|
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
|
|
|
|
le32_to_cpu(ckpt->cur_node_segno[j])) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u",
|
|
|
|
i, j,
|
|
|
|
le32_to_cpu(ckpt->cur_node_segno[i]));
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2017-05-16 00:45:08 +07:00
|
|
|
}
|
|
|
|
for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
|
|
|
|
if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
|
|
|
|
le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
|
|
|
|
return 1;
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
|
|
|
|
if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
|
|
|
|
le32_to_cpu(ckpt->cur_data_segno[j])) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u",
|
|
|
|
i, j,
|
|
|
|
le32_to_cpu(ckpt->cur_data_segno[i]));
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
|
|
|
|
for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
|
|
|
|
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
|
|
|
|
le32_to_cpu(ckpt->cur_data_segno[j])) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u",
|
|
|
|
i, j,
|
|
|
|
le32_to_cpu(ckpt->cur_node_segno[i]));
|
f2fs: fix to do sanity check with current segment number
https://bugzilla.kernel.org/show_bug.cgi?id=200219
Reproduction way:
- mount image
- run poc code
- umount image
F2FS-fs (loop1): Bitmap was wrongly set, blk:15364
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: update_sit_entry+0x459/0x4e0 [f2fs]
Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44
EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001
ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282
CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0
Call Trace:
f2fs_allocate_data_block+0x124/0x580 [f2fs]
do_write_page+0x78/0x150 [f2fs]
f2fs_do_write_node_page+0x25/0xa0 [f2fs]
__write_node_page+0x2bf/0x550 [f2fs]
f2fs_sync_node_pages+0x60e/0x6d0 [f2fs]
? sync_inode_metadata+0x2f/0x40
? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs]
? up_write+0x1e/0x80
f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs]
? mark_held_locks+0x5d/0x80
? _raw_spin_unlock_irq+0x27/0x50
kill_f2fs_super+0x68/0x90 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: 0xb7f95c51
Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000
ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246
Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs]
---[ end trace d423f83982cfcdc5 ]---
The reason is, different log headers using the same segment, once
one log's next block address is used by another log, it will cause
panic as above.
Main area: 24 segs, 24 secs 24 zones
- COLD data: 0, 0, 0
- WARM data: 1, 1, 1
- HOT data: 20, 20, 20
- Dir dnode: 22, 22, 22
- File dnode: 22, 22, 22
- Indir nodes: 21, 21, 21
So this patch adds sanity check to detect such condition to avoid
this issue.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-06 19:34:12 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2017-05-16 00:45:08 +07:00
|
|
|
}
|
|
|
|
|
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize
during mount, in order to avoid accessing across cache boundary with
this abnormal bitmap size.
- Overview
buffer overrun in build_sit_info() when mounting a crafted f2fs image
- Reproduce
- Kernel message
[ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.584979] ==================================================================
[ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50
[ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295
[ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4
[ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.589438] Call Trace:
[ 548.589474] dump_stack+0x7b/0xb5
[ 548.589487] print_address_description+0x70/0x290
[ 548.589492] kasan_report+0x291/0x390
[ 548.589496] ? kmemdup+0x36/0x50
[ 548.589509] check_memory_region+0x139/0x190
[ 548.589514] memcpy+0x23/0x50
[ 548.589518] kmemdup+0x36/0x50
[ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410
[ 548.589551] ? __asan_loadN+0xf/0x20
[ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240
[ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0
[ 548.589587] ? __put_user_ns+0x40/0x40
[ 548.589604] ? find_next_bit+0x57/0x90
[ 548.589610] f2fs_fill_super+0x194b/0x2b40
[ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589637] ? set_blocksize+0x90/0x140
[ 548.589651] mount_bdev+0x1c5/0x210
[ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589667] f2fs_mount+0x15/0x20
[ 548.589672] mount_fs+0x60/0x1a0
[ 548.589683] ? alloc_vfsmnt+0x309/0x360
[ 548.589688] vfs_kern_mount+0x6b/0x1a0
[ 548.589699] do_mount+0x34a/0x18c0
[ 548.589710] ? lockref_put_or_lock+0xcf/0x160
[ 548.589716] ? copy_mount_string+0x20/0x20
[ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.589734] ? kasan_check_write+0x14/0x20
[ 548.589740] ? _copy_from_user+0x6a/0x90
[ 548.589744] ? memdup_user+0x42/0x60
[ 548.589750] ksys_mount+0x83/0xd0
[ 548.589755] __x64_sys_mount+0x67/0x80
[ 548.589781] do_syscall_64+0x78/0x170
[ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.589820] RIP: 0033:0x7f76fc331b9a
[ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.590242] The buggy address belongs to the page:
[ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 548.592886] flags: 0x2ffff0000000000()
[ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 548.603713] page dumped because: kasan: bad access detected
[ 548.605203] Memory state around the buggy address:
[ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.610629] ^
[ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.615141] ==================================================================
[ 548.616613] Disabling lock debugging due to kernel taint
[ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420
[ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4
[ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420
[ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b
[ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246
[ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7
[ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000
[ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5
[ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040
[ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938
[ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0
[ 548.623317] Call Trace:
[ 548.623325] ? kasan_check_read+0x11/0x20
[ 548.623330] ? __zone_watermark_ok+0x92/0x240
[ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90
[ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60
[ 548.623353] ? warn_alloc+0x250/0x250
[ 548.623358] ? save_stack+0x46/0xd0
[ 548.623361] ? kasan_kmalloc+0xad/0xe0
[ 548.623366] ? __isolate_free_page+0x2a0/0x2a0
[ 548.623370] ? mount_fs+0x60/0x1a0
[ 548.623374] ? vfs_kern_mount+0x6b/0x1a0
[ 548.623378] ? do_mount+0x34a/0x18c0
[ 548.623383] ? ksys_mount+0x83/0xd0
[ 548.623387] ? __x64_sys_mount+0x67/0x80
[ 548.623391] ? do_syscall_64+0x78/0x170
[ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623401] __alloc_pages_nodemask+0x3c5/0x400
[ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420
[ 548.623412] ? __mutex_lock_slowpath+0x20/0x20
[ 548.623417] ? kvmalloc_node+0x31/0x80
[ 548.623424] alloc_pages_current+0x75/0x110
[ 548.623436] kmalloc_order+0x24/0x60
[ 548.623442] kmalloc_order_trace+0x24/0xb0
[ 548.623448] __kmalloc_track_caller+0x207/0x220
[ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0
[ 548.623460] kmemdup+0x20/0x50
[ 548.623465] f2fs_build_node_manager+0x399/0xbb0
[ 548.623470] f2fs_fill_super+0x195e/0x2b40
[ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623481] ? set_blocksize+0x90/0x140
[ 548.623486] mount_bdev+0x1c5/0x210
[ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623495] f2fs_mount+0x15/0x20
[ 548.623498] mount_fs+0x60/0x1a0
[ 548.623503] ? alloc_vfsmnt+0x309/0x360
[ 548.623508] vfs_kern_mount+0x6b/0x1a0
[ 548.623513] do_mount+0x34a/0x18c0
[ 548.623518] ? lockref_put_or_lock+0xcf/0x160
[ 548.623523] ? copy_mount_string+0x20/0x20
[ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.623533] ? kasan_check_write+0x14/0x20
[ 548.623537] ? _copy_from_user+0x6a/0x90
[ 548.623542] ? memdup_user+0x42/0x60
[ 548.623547] ksys_mount+0x83/0xd0
[ 548.623552] __x64_sys_mount+0x67/0x80
[ 548.623557] do_syscall_64+0x78/0x170
[ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623566] RIP: 0033:0x7f76fc331b9a
[ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager
[ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size.
Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech.
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-23 10:25:19 +07:00
|
|
|
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
|
|
|
|
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
|
|
|
|
|
|
|
|
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
|
|
|
|
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u",
|
|
|
|
sit_bitmap_size, nat_bitmap_size);
|
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize
during mount, in order to avoid accessing across cache boundary with
this abnormal bitmap size.
- Overview
buffer overrun in build_sit_info() when mounting a crafted f2fs image
- Reproduce
- Kernel message
[ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.584979] ==================================================================
[ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50
[ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295
[ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4
[ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.589438] Call Trace:
[ 548.589474] dump_stack+0x7b/0xb5
[ 548.589487] print_address_description+0x70/0x290
[ 548.589492] kasan_report+0x291/0x390
[ 548.589496] ? kmemdup+0x36/0x50
[ 548.589509] check_memory_region+0x139/0x190
[ 548.589514] memcpy+0x23/0x50
[ 548.589518] kmemdup+0x36/0x50
[ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410
[ 548.589551] ? __asan_loadN+0xf/0x20
[ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240
[ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0
[ 548.589587] ? __put_user_ns+0x40/0x40
[ 548.589604] ? find_next_bit+0x57/0x90
[ 548.589610] f2fs_fill_super+0x194b/0x2b40
[ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589637] ? set_blocksize+0x90/0x140
[ 548.589651] mount_bdev+0x1c5/0x210
[ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589667] f2fs_mount+0x15/0x20
[ 548.589672] mount_fs+0x60/0x1a0
[ 548.589683] ? alloc_vfsmnt+0x309/0x360
[ 548.589688] vfs_kern_mount+0x6b/0x1a0
[ 548.589699] do_mount+0x34a/0x18c0
[ 548.589710] ? lockref_put_or_lock+0xcf/0x160
[ 548.589716] ? copy_mount_string+0x20/0x20
[ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.589734] ? kasan_check_write+0x14/0x20
[ 548.589740] ? _copy_from_user+0x6a/0x90
[ 548.589744] ? memdup_user+0x42/0x60
[ 548.589750] ksys_mount+0x83/0xd0
[ 548.589755] __x64_sys_mount+0x67/0x80
[ 548.589781] do_syscall_64+0x78/0x170
[ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.589820] RIP: 0033:0x7f76fc331b9a
[ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.590242] The buggy address belongs to the page:
[ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 548.592886] flags: 0x2ffff0000000000()
[ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 548.603713] page dumped because: kasan: bad access detected
[ 548.605203] Memory state around the buggy address:
[ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.610629] ^
[ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.615141] ==================================================================
[ 548.616613] Disabling lock debugging due to kernel taint
[ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420
[ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4
[ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420
[ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b
[ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246
[ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7
[ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000
[ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5
[ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040
[ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938
[ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0
[ 548.623317] Call Trace:
[ 548.623325] ? kasan_check_read+0x11/0x20
[ 548.623330] ? __zone_watermark_ok+0x92/0x240
[ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90
[ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60
[ 548.623353] ? warn_alloc+0x250/0x250
[ 548.623358] ? save_stack+0x46/0xd0
[ 548.623361] ? kasan_kmalloc+0xad/0xe0
[ 548.623366] ? __isolate_free_page+0x2a0/0x2a0
[ 548.623370] ? mount_fs+0x60/0x1a0
[ 548.623374] ? vfs_kern_mount+0x6b/0x1a0
[ 548.623378] ? do_mount+0x34a/0x18c0
[ 548.623383] ? ksys_mount+0x83/0xd0
[ 548.623387] ? __x64_sys_mount+0x67/0x80
[ 548.623391] ? do_syscall_64+0x78/0x170
[ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623401] __alloc_pages_nodemask+0x3c5/0x400
[ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420
[ 548.623412] ? __mutex_lock_slowpath+0x20/0x20
[ 548.623417] ? kvmalloc_node+0x31/0x80
[ 548.623424] alloc_pages_current+0x75/0x110
[ 548.623436] kmalloc_order+0x24/0x60
[ 548.623442] kmalloc_order_trace+0x24/0xb0
[ 548.623448] __kmalloc_track_caller+0x207/0x220
[ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0
[ 548.623460] kmemdup+0x20/0x50
[ 548.623465] f2fs_build_node_manager+0x399/0xbb0
[ 548.623470] f2fs_fill_super+0x195e/0x2b40
[ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623481] ? set_blocksize+0x90/0x140
[ 548.623486] mount_bdev+0x1c5/0x210
[ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623495] f2fs_mount+0x15/0x20
[ 548.623498] mount_fs+0x60/0x1a0
[ 548.623503] ? alloc_vfsmnt+0x309/0x360
[ 548.623508] vfs_kern_mount+0x6b/0x1a0
[ 548.623513] do_mount+0x34a/0x18c0
[ 548.623518] ? lockref_put_or_lock+0xcf/0x160
[ 548.623523] ? copy_mount_string+0x20/0x20
[ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.623533] ? kasan_check_write+0x14/0x20
[ 548.623537] ? _copy_from_user+0x6a/0x90
[ 548.623542] ? memdup_user+0x42/0x60
[ 548.623547] ksys_mount+0x83/0xd0
[ 548.623552] __x64_sys_mount+0x67/0x80
[ 548.623557] do_syscall_64+0x78/0x170
[ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623566] RIP: 0033:0x7f76fc331b9a
[ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager
[ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size.
Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech.
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-23 10:25:19 +07:00
|
|
|
return 1;
|
|
|
|
}
|
f2fs: fix to do sanity check with cp_pack_start_sum
After fuzzing, cp_pack_start_sum could be corrupted, so current log's
summary info should be wrong due to loading incorrect summary block.
Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it
can lead accessing invalid dirty_i->dirty_segmap bitmap finally.
Add sanity check for cp_pack_start_sum to fix this issue.
https://bugzilla.kernel.org/show_bug.cgi?id=200419
- Reproduce
- Kernel message (f2fs-dev w/ KASAN)
[ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8)
[ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock
[ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716
[ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0
[ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1
[ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0
[ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8
[ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286
[ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2
[ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98
[ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001
[ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200
[ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900
[ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.584112] Call Trace:
[ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150
[ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190
[ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120
[ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190
[ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00
[ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0
[ 3117.584184] ? map_id_range_down+0x17c/0x1b0
[ 3117.584188] ? __put_user_ns+0x30/0x30
[ 3117.584206] ? find_next_bit+0x53/0x90
[ 3117.584237] ? cpumask_next+0x16/0x20
[ 3117.584249] f2fs_fill_super+0x1948/0x2b40
[ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584279] ? sget_userns+0x65e/0x690
[ 3117.584296] ? set_blocksize+0x88/0x130
[ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584305] mount_bdev+0x1c0/0x200
[ 3117.584310] mount_fs+0x5c/0x190
[ 3117.584320] vfs_kern_mount+0x64/0x190
[ 3117.584330] do_mount+0x2e4/0x1450
[ 3117.584343] ? lockref_put_return+0x130/0x130
[ 3117.584347] ? copy_mount_string+0x20/0x20
[ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.584362] ? kasan_kmalloc+0xa6/0xd0
[ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.584377] ? __kmalloc_track_caller+0x196/0x210
[ 3117.584383] ? _copy_from_user+0x61/0x90
[ 3117.584396] ? memdup_user+0x3e/0x60
[ 3117.584401] ksys_mount+0x7e/0xd0
[ 3117.584405] __x64_sys_mount+0x62/0x70
[ 3117.584427] do_syscall_64+0x73/0x160
[ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.584455] RIP: 0033:0x7f5693f14b9a
[ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.584523] ---[ end trace a8e0d899985faf31 ]---
[ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix.
[ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0
[ 3117.685707] ==================================================================
[ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0
[ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225
[ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1
[ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.686483] Call Trace:
[ 3117.686494] dump_stack+0x71/0xab
[ 3117.686512] print_address_description+0x6b/0x290
[ 3117.686517] kasan_report+0x28e/0x390
[ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0
[ 3117.686527] __remove_dirty_segment+0xdd/0x1e0
[ 3117.686532] locate_dirty_segment+0x189/0x190
[ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.686543] recover_data+0x703/0x2c20
[ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.686553] ? ksys_mount+0x7e/0xd0
[ 3117.686564] ? policy_nodemask+0x1a/0x90
[ 3117.686567] ? policy_node+0x56/0x70
[ 3117.686571] ? add_fsync_inode+0xf0/0xf0
[ 3117.686592] ? blk_finish_plug+0x44/0x60
[ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.686602] ? find_inode_fast+0xac/0xc0
[ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.686618] ? __radix_tree_lookup+0x150/0x150
[ 3117.686633] ? dqget+0x670/0x670
[ 3117.686648] ? pagecache_get_page+0x29/0x410
[ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.686674] ? rb_insert_color+0x323/0x3d0
[ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.686683] ? proc_register+0x153/0x1d0
[ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.686695] ? f2fs_attr_store+0x50/0x50
[ 3117.686700] ? proc_create_single_data+0x52/0x60
[ 3117.686707] f2fs_fill_super+0x1d06/0x2b40
[ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686735] ? sget_userns+0x65e/0x690
[ 3117.686740] ? set_blocksize+0x88/0x130
[ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686748] mount_bdev+0x1c0/0x200
[ 3117.686753] mount_fs+0x5c/0x190
[ 3117.686758] vfs_kern_mount+0x64/0x190
[ 3117.686762] do_mount+0x2e4/0x1450
[ 3117.686769] ? lockref_put_return+0x130/0x130
[ 3117.686773] ? copy_mount_string+0x20/0x20
[ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.686780] ? kasan_kmalloc+0xa6/0xd0
[ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.686790] ? __kmalloc_track_caller+0x196/0x210
[ 3117.686795] ? _copy_from_user+0x61/0x90
[ 3117.686801] ? memdup_user+0x3e/0x60
[ 3117.686804] ksys_mount+0x7e/0xd0
[ 3117.686809] __x64_sys_mount+0x62/0x70
[ 3117.686816] do_syscall_64+0x73/0x160
[ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.686829] RIP: 0033:0x7f5693f14b9a
[ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.687005] Allocated by task 1225:
[ 3117.687152] kasan_kmalloc+0xa6/0xd0
[ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200
[ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190
[ 3117.687165] f2fs_fill_super+0x1948/0x2b40
[ 3117.687168] mount_bdev+0x1c0/0x200
[ 3117.687171] mount_fs+0x5c/0x190
[ 3117.687174] vfs_kern_mount+0x64/0x190
[ 3117.687177] do_mount+0x2e4/0x1450
[ 3117.687180] ksys_mount+0x7e/0xd0
[ 3117.687182] __x64_sys_mount+0x62/0x70
[ 3117.687186] do_syscall_64+0x73/0x160
[ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.687285] Freed by task 19:
[ 3117.687412] __kasan_slab_free+0x137/0x190
[ 3117.687416] kfree+0x8b/0x1b0
[ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm]
[ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm]
[ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm]
[ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm]
[ 3117.687528] process_one_work+0x2f9/0x740
[ 3117.687531] worker_thread+0x78/0x6b0
[ 3117.687541] kthread+0x177/0x1c0
[ 3117.687545] ret_from_fork+0x35/0x40
[ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300
which belongs to the cache kmalloc-192 of size 192
[ 3117.688014] The buggy address is located 16 bytes to the right of
192-byte region [ffff88018f0a6300, ffff88018f0a63c0)
[ 3117.688382] The buggy address belongs to the page:
[ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0
[ 3117.688788] flags: 0x17fff8000000100(slab)
[ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180
[ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
[ 3117.689386] page dumped because: kasan: bad access detected
[ 3117.689653] Memory state around the buggy address:
[ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.690448] ^
[ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.691077] ==================================================================
[ 3117.691290] Disabling lock debugging due to kernel taint
[ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0
[ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI
[ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1
[ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.707235] Call Trace:
[ 3117.712077] locate_dirty_segment+0x189/0x190
[ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.721617] recover_data+0x703/0x2c20
[ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.730957] ? ksys_mount+0x7e/0xd0
[ 3117.735573] ? policy_nodemask+0x1a/0x90
[ 3117.740198] ? policy_node+0x56/0x70
[ 3117.744829] ? add_fsync_inode+0xf0/0xf0
[ 3117.749487] ? blk_finish_plug+0x44/0x60
[ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.758831] ? find_inode_fast+0xac/0xc0
[ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.768046] ? __radix_tree_lookup+0x150/0x150
[ 3117.772603] ? dqget+0x670/0x670
[ 3117.777159] ? pagecache_get_page+0x29/0x410
[ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.799086] ? rb_insert_color+0x323/0x3d0
[ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.807563] ? proc_register+0x153/0x1d0
[ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.815947] ? f2fs_attr_store+0x50/0x50
[ 3117.820087] ? proc_create_single_data+0x52/0x60
[ 3117.824262] f2fs_fill_super+0x1d06/0x2b40
[ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.832432] ? sget_userns+0x65e/0x690
[ 3117.836500] ? set_blocksize+0x88/0x130
[ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.844420] mount_bdev+0x1c0/0x200
[ 3117.848275] mount_fs+0x5c/0x190
[ 3117.852053] vfs_kern_mount+0x64/0x190
[ 3117.855810] do_mount+0x2e4/0x1450
[ 3117.859441] ? lockref_put_return+0x130/0x130
[ 3117.862996] ? copy_mount_string+0x20/0x20
[ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.869719] ? kasan_kmalloc+0xa6/0xd0
[ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.876121] ? __kmalloc_track_caller+0x196/0x210
[ 3117.879333] ? _copy_from_user+0x61/0x90
[ 3117.882467] ? memdup_user+0x3e/0x60
[ 3117.885604] ksys_mount+0x7e/0xd0
[ 3117.888700] __x64_sys_mount+0x62/0x70
[ 3117.891742] do_syscall_64+0x73/0x160
[ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.897669] RIP: 0033:0x7f5693f14b9a
[ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.949979] CR2: 0000000000000000
[ 3117.954283] ---[ end trace a8e0d899985faf32 ]---
[ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
- Location
https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit()
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-01 18:16:11 +07:00
|
|
|
|
|
|
|
cp_pack_start_sum = __start_sum_addr(sbi);
|
|
|
|
cp_payload = __cp_payload(sbi);
|
|
|
|
if (cp_pack_start_sum < cp_payload + 1 ||
|
|
|
|
cp_pack_start_sum > blocks_per_seg - 1 -
|
|
|
|
NR_CURSEG_TYPE) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
|
|
|
|
cp_pack_start_sum);
|
f2fs: fix to do sanity check with cp_pack_start_sum
After fuzzing, cp_pack_start_sum could be corrupted, so current log's
summary info should be wrong due to loading incorrect summary block.
Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it
can lead accessing invalid dirty_i->dirty_segmap bitmap finally.
Add sanity check for cp_pack_start_sum to fix this issue.
https://bugzilla.kernel.org/show_bug.cgi?id=200419
- Reproduce
- Kernel message (f2fs-dev w/ KASAN)
[ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8)
[ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock
[ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716
[ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0
[ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1
[ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0
[ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8
[ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286
[ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2
[ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98
[ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001
[ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200
[ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900
[ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.584112] Call Trace:
[ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150
[ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190
[ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120
[ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190
[ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00
[ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0
[ 3117.584184] ? map_id_range_down+0x17c/0x1b0
[ 3117.584188] ? __put_user_ns+0x30/0x30
[ 3117.584206] ? find_next_bit+0x53/0x90
[ 3117.584237] ? cpumask_next+0x16/0x20
[ 3117.584249] f2fs_fill_super+0x1948/0x2b40
[ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584279] ? sget_userns+0x65e/0x690
[ 3117.584296] ? set_blocksize+0x88/0x130
[ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.584305] mount_bdev+0x1c0/0x200
[ 3117.584310] mount_fs+0x5c/0x190
[ 3117.584320] vfs_kern_mount+0x64/0x190
[ 3117.584330] do_mount+0x2e4/0x1450
[ 3117.584343] ? lockref_put_return+0x130/0x130
[ 3117.584347] ? copy_mount_string+0x20/0x20
[ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.584362] ? kasan_kmalloc+0xa6/0xd0
[ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.584377] ? __kmalloc_track_caller+0x196/0x210
[ 3117.584383] ? _copy_from_user+0x61/0x90
[ 3117.584396] ? memdup_user+0x3e/0x60
[ 3117.584401] ksys_mount+0x7e/0xd0
[ 3117.584405] __x64_sys_mount+0x62/0x70
[ 3117.584427] do_syscall_64+0x73/0x160
[ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.584455] RIP: 0033:0x7f5693f14b9a
[ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.584523] ---[ end trace a8e0d899985faf31 ]---
[ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix.
[ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0
[ 3117.685707] ==================================================================
[ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0
[ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225
[ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1
[ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.686483] Call Trace:
[ 3117.686494] dump_stack+0x71/0xab
[ 3117.686512] print_address_description+0x6b/0x290
[ 3117.686517] kasan_report+0x28e/0x390
[ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0
[ 3117.686527] __remove_dirty_segment+0xdd/0x1e0
[ 3117.686532] locate_dirty_segment+0x189/0x190
[ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.686543] recover_data+0x703/0x2c20
[ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.686553] ? ksys_mount+0x7e/0xd0
[ 3117.686564] ? policy_nodemask+0x1a/0x90
[ 3117.686567] ? policy_node+0x56/0x70
[ 3117.686571] ? add_fsync_inode+0xf0/0xf0
[ 3117.686592] ? blk_finish_plug+0x44/0x60
[ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.686602] ? find_inode_fast+0xac/0xc0
[ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.686618] ? __radix_tree_lookup+0x150/0x150
[ 3117.686633] ? dqget+0x670/0x670
[ 3117.686648] ? pagecache_get_page+0x29/0x410
[ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.686674] ? rb_insert_color+0x323/0x3d0
[ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.686683] ? proc_register+0x153/0x1d0
[ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.686695] ? f2fs_attr_store+0x50/0x50
[ 3117.686700] ? proc_create_single_data+0x52/0x60
[ 3117.686707] f2fs_fill_super+0x1d06/0x2b40
[ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686735] ? sget_userns+0x65e/0x690
[ 3117.686740] ? set_blocksize+0x88/0x130
[ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.686748] mount_bdev+0x1c0/0x200
[ 3117.686753] mount_fs+0x5c/0x190
[ 3117.686758] vfs_kern_mount+0x64/0x190
[ 3117.686762] do_mount+0x2e4/0x1450
[ 3117.686769] ? lockref_put_return+0x130/0x130
[ 3117.686773] ? copy_mount_string+0x20/0x20
[ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.686780] ? kasan_kmalloc+0xa6/0xd0
[ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.686790] ? __kmalloc_track_caller+0x196/0x210
[ 3117.686795] ? _copy_from_user+0x61/0x90
[ 3117.686801] ? memdup_user+0x3e/0x60
[ 3117.686804] ksys_mount+0x7e/0xd0
[ 3117.686809] __x64_sys_mount+0x62/0x70
[ 3117.686816] do_syscall_64+0x73/0x160
[ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.686829] RIP: 0033:0x7f5693f14b9a
[ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.687005] Allocated by task 1225:
[ 3117.687152] kasan_kmalloc+0xa6/0xd0
[ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200
[ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190
[ 3117.687165] f2fs_fill_super+0x1948/0x2b40
[ 3117.687168] mount_bdev+0x1c0/0x200
[ 3117.687171] mount_fs+0x5c/0x190
[ 3117.687174] vfs_kern_mount+0x64/0x190
[ 3117.687177] do_mount+0x2e4/0x1450
[ 3117.687180] ksys_mount+0x7e/0xd0
[ 3117.687182] __x64_sys_mount+0x62/0x70
[ 3117.687186] do_syscall_64+0x73/0x160
[ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.687285] Freed by task 19:
[ 3117.687412] __kasan_slab_free+0x137/0x190
[ 3117.687416] kfree+0x8b/0x1b0
[ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm]
[ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm]
[ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm]
[ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm]
[ 3117.687528] process_one_work+0x2f9/0x740
[ 3117.687531] worker_thread+0x78/0x6b0
[ 3117.687541] kthread+0x177/0x1c0
[ 3117.687545] ret_from_fork+0x35/0x40
[ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300
which belongs to the cache kmalloc-192 of size 192
[ 3117.688014] The buggy address is located 16 bytes to the right of
192-byte region [ffff88018f0a6300, ffff88018f0a63c0)
[ 3117.688382] The buggy address belongs to the page:
[ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0
[ 3117.688788] flags: 0x17fff8000000100(slab)
[ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180
[ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
[ 3117.689386] page dumped because: kasan: bad access detected
[ 3117.689653] Memory state around the buggy address:
[ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.690448] ^
[ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 3117.691077] ==================================================================
[ 3117.691290] Disabling lock debugging due to kernel taint
[ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0
[ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI
[ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1
[ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
[ 3117.707235] Call Trace:
[ 3117.712077] locate_dirty_segment+0x189/0x190
[ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0
[ 3117.721617] recover_data+0x703/0x2c20
[ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50
[ 3117.730957] ? ksys_mount+0x7e/0xd0
[ 3117.735573] ? policy_nodemask+0x1a/0x90
[ 3117.740198] ? policy_node+0x56/0x70
[ 3117.744829] ? add_fsync_inode+0xf0/0xf0
[ 3117.749487] ? blk_finish_plug+0x44/0x60
[ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0
[ 3117.758831] ? find_inode_fast+0xac/0xc0
[ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320
[ 3117.768046] ? __radix_tree_lookup+0x150/0x150
[ 3117.772603] ? dqget+0x670/0x670
[ 3117.777159] ? pagecache_get_page+0x29/0x410
[ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0
[ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320
[ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50
[ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60
[ 3117.799086] ? rb_insert_color+0x323/0x3d0
[ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700
[ 3117.807563] ? proc_register+0x153/0x1d0
[ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10
[ 3117.815947] ? f2fs_attr_store+0x50/0x50
[ 3117.820087] ? proc_create_single_data+0x52/0x60
[ 3117.824262] f2fs_fill_super+0x1d06/0x2b40
[ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.832432] ? sget_userns+0x65e/0x690
[ 3117.836500] ? set_blocksize+0x88/0x130
[ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0
[ 3117.844420] mount_bdev+0x1c0/0x200
[ 3117.848275] mount_fs+0x5c/0x190
[ 3117.852053] vfs_kern_mount+0x64/0x190
[ 3117.855810] do_mount+0x2e4/0x1450
[ 3117.859441] ? lockref_put_return+0x130/0x130
[ 3117.862996] ? copy_mount_string+0x20/0x20
[ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40
[ 3117.869719] ? kasan_kmalloc+0xa6/0xd0
[ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90
[ 3117.876121] ? __kmalloc_track_caller+0x196/0x210
[ 3117.879333] ? _copy_from_user+0x61/0x90
[ 3117.882467] ? memdup_user+0x3e/0x60
[ 3117.885604] ksys_mount+0x7e/0xd0
[ 3117.888700] __x64_sys_mount+0x62/0x70
[ 3117.891742] do_syscall_64+0x73/0x160
[ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 3117.897669] RIP: 0033:0x7f5693f14b9a
[ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
[ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
[ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
[ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
[ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
[ 3117.949979] CR2: 0000000000000000
[ 3117.954283] ---[ end trace a8e0d899985faf32 ]---
[ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
[ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
[ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
[ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
[ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
[ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
[ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
[ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
[ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
[ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
- Location
https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit()
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-01 18:16:11 +07:00
|
|
|
return 1;
|
|
|
|
}
|
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize
during mount, in order to avoid accessing across cache boundary with
this abnormal bitmap size.
- Overview
buffer overrun in build_sit_info() when mounting a crafted f2fs image
- Reproduce
- Kernel message
[ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.584979] ==================================================================
[ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50
[ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295
[ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4
[ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.589438] Call Trace:
[ 548.589474] dump_stack+0x7b/0xb5
[ 548.589487] print_address_description+0x70/0x290
[ 548.589492] kasan_report+0x291/0x390
[ 548.589496] ? kmemdup+0x36/0x50
[ 548.589509] check_memory_region+0x139/0x190
[ 548.589514] memcpy+0x23/0x50
[ 548.589518] kmemdup+0x36/0x50
[ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410
[ 548.589551] ? __asan_loadN+0xf/0x20
[ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240
[ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0
[ 548.589587] ? __put_user_ns+0x40/0x40
[ 548.589604] ? find_next_bit+0x57/0x90
[ 548.589610] f2fs_fill_super+0x194b/0x2b40
[ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589637] ? set_blocksize+0x90/0x140
[ 548.589651] mount_bdev+0x1c5/0x210
[ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.589667] f2fs_mount+0x15/0x20
[ 548.589672] mount_fs+0x60/0x1a0
[ 548.589683] ? alloc_vfsmnt+0x309/0x360
[ 548.589688] vfs_kern_mount+0x6b/0x1a0
[ 548.589699] do_mount+0x34a/0x18c0
[ 548.589710] ? lockref_put_or_lock+0xcf/0x160
[ 548.589716] ? copy_mount_string+0x20/0x20
[ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.589734] ? kasan_check_write+0x14/0x20
[ 548.589740] ? _copy_from_user+0x6a/0x90
[ 548.589744] ? memdup_user+0x42/0x60
[ 548.589750] ksys_mount+0x83/0xd0
[ 548.589755] __x64_sys_mount+0x67/0x80
[ 548.589781] do_syscall_64+0x78/0x170
[ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.589820] RIP: 0033:0x7f76fc331b9a
[ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.590242] The buggy address belongs to the page:
[ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 548.592886] flags: 0x2ffff0000000000()
[ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 548.603713] page dumped because: kasan: bad access detected
[ 548.605203] Memory state around the buggy address:
[ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.610629] ^
[ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 548.615141] ==================================================================
[ 548.616613] Disabling lock debugging due to kernel taint
[ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420
[ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4
[ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420
[ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b
[ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246
[ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7
[ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000
[ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5
[ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040
[ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938
[ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0
[ 548.623317] Call Trace:
[ 548.623325] ? kasan_check_read+0x11/0x20
[ 548.623330] ? __zone_watermark_ok+0x92/0x240
[ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90
[ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60
[ 548.623353] ? warn_alloc+0x250/0x250
[ 548.623358] ? save_stack+0x46/0xd0
[ 548.623361] ? kasan_kmalloc+0xad/0xe0
[ 548.623366] ? __isolate_free_page+0x2a0/0x2a0
[ 548.623370] ? mount_fs+0x60/0x1a0
[ 548.623374] ? vfs_kern_mount+0x6b/0x1a0
[ 548.623378] ? do_mount+0x34a/0x18c0
[ 548.623383] ? ksys_mount+0x83/0xd0
[ 548.623387] ? __x64_sys_mount+0x67/0x80
[ 548.623391] ? do_syscall_64+0x78/0x170
[ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623401] __alloc_pages_nodemask+0x3c5/0x400
[ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420
[ 548.623412] ? __mutex_lock_slowpath+0x20/0x20
[ 548.623417] ? kvmalloc_node+0x31/0x80
[ 548.623424] alloc_pages_current+0x75/0x110
[ 548.623436] kmalloc_order+0x24/0x60
[ 548.623442] kmalloc_order_trace+0x24/0xb0
[ 548.623448] __kmalloc_track_caller+0x207/0x220
[ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0
[ 548.623460] kmemdup+0x20/0x50
[ 548.623465] f2fs_build_node_manager+0x399/0xbb0
[ 548.623470] f2fs_fill_super+0x195e/0x2b40
[ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623481] ? set_blocksize+0x90/0x140
[ 548.623486] mount_bdev+0x1c5/0x210
[ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0
[ 548.623495] f2fs_mount+0x15/0x20
[ 548.623498] mount_fs+0x60/0x1a0
[ 548.623503] ? alloc_vfsmnt+0x309/0x360
[ 548.623508] vfs_kern_mount+0x6b/0x1a0
[ 548.623513] do_mount+0x34a/0x18c0
[ 548.623518] ? lockref_put_or_lock+0xcf/0x160
[ 548.623523] ? copy_mount_string+0x20/0x20
[ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0
[ 548.623533] ? kasan_check_write+0x14/0x20
[ 548.623537] ? _copy_from_user+0x6a/0x90
[ 548.623542] ? memdup_user+0x42/0x60
[ 548.623547] ksys_mount+0x83/0xd0
[ 548.623552] __x64_sys_mount+0x67/0x80
[ 548.623557] do_syscall_64+0x78/0x170
[ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 548.623566] RIP: 0033:0x7f76fc331b9a
[ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
[ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a
[ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0
[ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
[ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0
[ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003
[ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager
[ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201)
[ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
[ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578
sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size.
Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech.
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-23 10:25:19 +07:00
|
|
|
|
f2fs: fix to check layout on last valid checkpoint park
As Ju Hyung reported:
"
I was semi-forced today to use the new kernel and test f2fs.
My Ubuntu initramfs got a bit wonky and I had to boot into live CD and
fix some stuffs. The live CD was using 4.15 kernel, and just mounting
the f2fs partition there corrupted f2fs and my 4.19(with 5.1-rc1-4.19
f2fs-stable merged) refused to mount with "SIT is corrupted node"
message.
I used the latest f2fs-tools sent by Chao including "fsck.f2fs: fix to
repair cp_loads blocks at correct position"
It spit out 140M worth of output, but at least I didn't have to run it
twice. Everything returned "Ok" in the 2nd run.
The new log is at
http://arter97.com/f2fs/final
After fixing the image, I used my 4.19 kernel with 5.2-rc1-4.19
f2fs-stable merged and it mounted.
But, I got this:
[ 1.047791] F2FS-fs (nvme0n1p3): layout of large_nat_bitmap is
deprecated, run fsck to repair, chksum_offset: 4092
[ 1.081307] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint
[ 1.161520] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs
[ 1.162418] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7e00
But after doing a reboot, the message is gone:
[ 1.098423] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint
[ 1.177771] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs
[ 1.178365] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7eda
I'm not exactly sure why the kernel detected that I'm still using the
old layout on the first boot. Maybe fsck didn't fix it properly, or
the check from the kernel is improper.
"
Although we have rebuild the old deprecated checkpoint with new layout
during repair, we only repair last checkpoint park, the other old one is
remained.
Once the image was mounted, we will 1) sanity check layout and 2) decide
which checkpoint park to use according to cp_ver. So that we will print
reported message unnecessarily at step 1), to avoid it, we simply move
layout check into f2fs_sanity_check_ckpt() after step 2).
Reported-by: Park Ju Hyung <qkrwngud825@gmail.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-05-20 09:09:22 +07:00
|
|
|
if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) &&
|
|
|
|
le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
|
2019-07-11 08:29:15 +07:00
|
|
|
f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, "
|
|
|
|
"please run fsck v1.13.0 or higher to repair, chksum_offset: %u, "
|
|
|
|
"fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"",
|
2019-06-18 16:48:42 +07:00
|
|
|
le32_to_cpu(ckpt->checksum_offset));
|
f2fs: fix to check layout on last valid checkpoint park
As Ju Hyung reported:
"
I was semi-forced today to use the new kernel and test f2fs.
My Ubuntu initramfs got a bit wonky and I had to boot into live CD and
fix some stuffs. The live CD was using 4.15 kernel, and just mounting
the f2fs partition there corrupted f2fs and my 4.19(with 5.1-rc1-4.19
f2fs-stable merged) refused to mount with "SIT is corrupted node"
message.
I used the latest f2fs-tools sent by Chao including "fsck.f2fs: fix to
repair cp_loads blocks at correct position"
It spit out 140M worth of output, but at least I didn't have to run it
twice. Everything returned "Ok" in the 2nd run.
The new log is at
http://arter97.com/f2fs/final
After fixing the image, I used my 4.19 kernel with 5.2-rc1-4.19
f2fs-stable merged and it mounted.
But, I got this:
[ 1.047791] F2FS-fs (nvme0n1p3): layout of large_nat_bitmap is
deprecated, run fsck to repair, chksum_offset: 4092
[ 1.081307] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint
[ 1.161520] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs
[ 1.162418] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7e00
But after doing a reboot, the message is gone:
[ 1.098423] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint
[ 1.177771] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs
[ 1.178365] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7eda
I'm not exactly sure why the kernel detected that I'm still using the
old layout on the first boot. Maybe fsck didn't fix it properly, or
the check from the kernel is improper.
"
Although we have rebuild the old deprecated checkpoint with new layout
during repair, we only repair last checkpoint park, the other old one is
remained.
Once the image was mounted, we will 1) sanity check layout and 2) decide
which checkpoint park to use according to cp_ver. So that we will print
reported message unnecessarily at step 1), to avoid it, we simply move
layout check into f2fs_sanity_check_ckpt() after step 2).
Reported-by: Park Ju Hyung <qkrwngud825@gmail.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-05-20 09:09:22 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-08-12 06:49:25 +07:00
|
|
|
if (unlikely(f2fs_cp_error(sbi))) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "A bug case: need to run fsck");
|
f2fs: prevent checkpoint once any IO failure is detected
This patch enhances the checkpoint routine to cope with IO errors.
Basically f2fs detects IO errors from end_io_write, and the errors are able to
be occurred during one of data, node, and meta page writes.
In the previous code, when an IO error is occurred during writes, f2fs sets a
flag, CP_ERROR_FLAG, in the raw ckeckpoint buffer which will be written to disk.
Afterwards, write_checkpoint() will check the flag and remount f2fs as a
read-only (ro) mode.
However, even once f2fs is remounted as a ro mode, dirty checkpoint pages are
freely able to be written to disk by flusher or kswapd in background.
In such a case, after cold reboot, f2fs would restore the checkpoint data having
CP_ERROR_FLAG, resulting in disabling write_checkpoint and remounting f2fs as
a ro mode again.
Therefore, let's prevent any checkpoint page (meta) writes once an IO error is
occurred, and remount f2fs as a ro mode right away at that moment.
Reported-by: Oliver Winker <oliver@oli1170.net>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
2013-01-24 17:56:11 +07:00
|
|
|
return 1;
|
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void init_sb_info(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
struct f2fs_super_block *raw_super = sbi->raw_super;
|
2018-10-24 15:09:42 +07:00
|
|
|
int i;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
sbi->log_sectors_per_block =
|
|
|
|
le32_to_cpu(raw_super->log_sectors_per_block);
|
|
|
|
sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
|
|
|
|
sbi->blocksize = 1 << sbi->log_blocksize;
|
|
|
|
sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
|
|
|
sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
|
|
|
|
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
|
|
|
|
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
|
|
|
|
sbi->total_sections = le32_to_cpu(raw_super->section_count);
|
|
|
|
sbi->total_node_count =
|
|
|
|
(le32_to_cpu(raw_super->segment_count_nat) / 2)
|
|
|
|
* sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
|
|
|
|
sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
|
|
|
|
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
|
|
|
|
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
|
2013-03-31 11:26:03 +07:00
|
|
|
sbi->cur_victim_sec = NULL_SECNO;
|
f2fs: support subsectional garbage collection
Section is minimal garbage collection unit of f2fs, in zoned block
device, or ancient block mapping flash device, in order to improve
GC efficiency, we can align GC unit to lower device erase unit,
normally, it consists of multiple of segments.
Once background or foreground GC triggers, it brings a large number
of IOs, which will impact user IO, and also occupy cpu/memory resource
intensively.
So, to reduce impact of GC on large size section, this patch supports
subsectional GC, in one cycle of GC, it only migrate partial segment{s}
in victim section. Currently, by default, we use sbi->segs_per_sec as
migration granularity.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-10-24 17:37:27 +07:00
|
|
|
sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
|
|
|
|
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
|
2014-01-08 11:45:08 +07:00
|
|
|
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
|
f2fs: support subsectional garbage collection
Section is minimal garbage collection unit of f2fs, in zoned block
device, or ancient block mapping flash device, in order to improve
GC efficiency, we can align GC unit to lower device erase unit,
normally, it consists of multiple of segments.
Once background or foreground GC triggers, it brings a large number
of IOs, which will impact user IO, and also occupy cpu/memory resource
intensively.
So, to reduce impact of GC on large size section, this patch supports
subsectional GC, in one cycle of GC, it only migrate partial segment{s}
in victim section. Currently, by default, we use sbi->segs_per_sec as
migration granularity.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-10-24 17:37:27 +07:00
|
|
|
sbi->migration_granularity = sbi->segs_per_sec;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2014-02-27 18:09:05 +07:00
|
|
|
sbi->dir_level = DEF_DIR_LEVEL;
|
2016-01-09 06:51:50 +07:00
|
|
|
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
|
2016-01-09 07:57:48 +07:00
|
|
|
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
|
2018-09-19 15:48:47 +07:00
|
|
|
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
|
|
|
|
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
|
2018-08-21 09:21:43 +07:00
|
|
|
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
|
2019-01-15 01:42:11 +07:00
|
|
|
sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
|
|
|
|
DEF_UMOUNT_DISCARD_TIMEOUT;
|
2015-01-28 16:48:42 +07:00
|
|
|
clear_sbi_flag(sbi, SBI_NEED_FSCK);
|
2015-06-20 02:01:21 +07:00
|
|
|
|
2016-10-21 09:09:57 +07:00
|
|
|
for (i = 0; i < NR_COUNT_TYPE; i++)
|
|
|
|
atomic_set(&sbi->nr_pages[i], 0);
|
|
|
|
|
2018-06-04 22:20:36 +07:00
|
|
|
for (i = 0; i < META; i++)
|
|
|
|
atomic_set(&sbi->wb_sync_req[i], 0);
|
2017-03-29 08:07:38 +07:00
|
|
|
|
2015-06-20 02:01:21 +07:00
|
|
|
INIT_LIST_HEAD(&sbi->s_list);
|
|
|
|
mutex_init(&sbi->umount_mutex);
|
2018-05-26 08:00:13 +07:00
|
|
|
init_rwsem(&sbi->io_order_lock);
|
2016-09-20 10:04:18 +07:00
|
|
|
spin_lock_init(&sbi->cp_lock);
|
2017-09-29 12:59:39 +07:00
|
|
|
|
|
|
|
sbi->dirty_device = 0;
|
|
|
|
spin_lock_init(&sbi->dev_lock);
|
2018-02-11 21:53:20 +07:00
|
|
|
|
2018-02-26 21:04:13 +07:00
|
|
|
init_rwsem(&sbi->sb_lock);
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
2016-05-14 02:36:58 +07:00
|
|
|
static int init_percpu_info(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
2016-10-21 09:09:57 +07:00
|
|
|
int err;
|
2016-05-17 01:06:50 +07:00
|
|
|
|
2016-05-17 01:42:32 +07:00
|
|
|
err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2018-09-05 13:54:02 +07:00
|
|
|
err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
|
2016-05-17 01:06:50 +07:00
|
|
|
GFP_KERNEL);
|
2018-09-05 13:54:02 +07:00
|
|
|
if (err)
|
|
|
|
percpu_counter_destroy(&sbi->alloc_valid_block_count);
|
|
|
|
|
|
|
|
return err;
|
2016-05-14 02:36:58 +07:00
|
|
|
}
|
|
|
|
|
2016-10-28 15:45:05 +07:00
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
2016-10-07 09:02:05 +07:00
|
|
|
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
|
2016-10-28 15:45:05 +07:00
|
|
|
{
|
2016-10-07 09:02:05 +07:00
|
|
|
struct block_device *bdev = FDEV(devi).bdev;
|
2016-10-28 15:45:05 +07:00
|
|
|
sector_t nr_sectors = bdev->bd_part->nr_sects;
|
|
|
|
sector_t sector = 0;
|
|
|
|
struct blk_zone *zones;
|
|
|
|
unsigned int i, nr_zones;
|
|
|
|
unsigned int n = 0;
|
|
|
|
int err = -EIO;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_blkzoned(sbi))
|
2016-10-28 15:45:05 +07:00
|
|
|
return 0;
|
|
|
|
|
2016-10-07 09:02:05 +07:00
|
|
|
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
|
2017-01-12 21:58:32 +07:00
|
|
|
SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
|
2016-10-07 09:02:05 +07:00
|
|
|
return -EINVAL;
|
2017-01-12 21:58:32 +07:00
|
|
|
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
|
2016-10-07 09:02:05 +07:00
|
|
|
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
|
|
|
|
__ilog2_u32(sbi->blocks_per_blkz))
|
|
|
|
return -EINVAL;
|
2016-10-28 15:45:05 +07:00
|
|
|
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
|
2016-10-07 09:02:05 +07:00
|
|
|
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
|
|
|
|
sbi->log_blocks_per_blkz;
|
2017-01-12 21:58:32 +07:00
|
|
|
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
|
2016-10-07 09:02:05 +07:00
|
|
|
FDEV(devi).nr_blkz++;
|
2016-10-28 15:45:05 +07:00
|
|
|
|
2019-03-16 07:13:07 +07:00
|
|
|
FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
|
|
|
|
BITS_TO_LONGS(FDEV(devi).nr_blkz)
|
|
|
|
* sizeof(unsigned long),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!FDEV(devi).blkz_seq)
|
2016-10-28 15:45:05 +07:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
#define F2FS_REPORT_NR_ZONES 4096
|
|
|
|
|
treewide: Use array_size() in f2fs_kzalloc()
The f2fs_kzalloc() function has no 2-factor argument form, so
multiplication factors need to be wrapped in array_size(). This patch
replaces cases of:
f2fs_kzalloc(handle, a * b, gfp)
with:
f2fs_kzalloc(handle, array_size(a, b), gfp)
as well as handling cases of:
f2fs_kzalloc(handle, a * b * c, gfp)
with:
f2fs_kzalloc(handle, array3_size(a, b, c), gfp)
This does, however, attempt to ignore constant size factors like:
f2fs_kzalloc(handle, 4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
expression HANDLE;
type TYPE;
expression THING, E;
@@
(
f2fs_kzalloc(HANDLE,
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
f2fs_kzalloc(HANDLE,
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression HANDLE;
expression COUNT;
typedef u8;
typedef __u8;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(char) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
expression HANDLE;
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
expression HANDLE;
identifier SIZE, COUNT;
@@
f2fs_kzalloc(HANDLE,
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression HANDLE;
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression HANDLE;
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
expression HANDLE;
identifier STRIDE, SIZE, COUNT;
@@
(
f2fs_kzalloc(HANDLE,
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression HANDLE;
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
f2fs_kzalloc(HANDLE, C1 * C2 * C3, ...)
|
f2fs_kzalloc(HANDLE,
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression HANDLE;
expression E1, E2;
constant C1, C2;
@@
(
f2fs_kzalloc(HANDLE, C1 * C2, ...)
|
f2fs_kzalloc(HANDLE,
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:28:23 +07:00
|
|
|
zones = f2fs_kzalloc(sbi,
|
|
|
|
array_size(F2FS_REPORT_NR_ZONES,
|
|
|
|
sizeof(struct blk_zone)),
|
|
|
|
GFP_KERNEL);
|
2016-10-28 15:45:05 +07:00
|
|
|
if (!zones)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Get block zones type */
|
|
|
|
while (zones && sector < nr_sectors) {
|
|
|
|
|
|
|
|
nr_zones = F2FS_REPORT_NR_ZONES;
|
2019-07-01 12:09:16 +07:00
|
|
|
err = blkdev_report_zones(bdev, sector, zones, &nr_zones);
|
2016-10-28 15:45:05 +07:00
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
if (!nr_zones) {
|
|
|
|
err = -EIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nr_zones; i++) {
|
2019-03-16 07:13:07 +07:00
|
|
|
if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
|
|
|
|
set_bit(n, FDEV(devi).blkz_seq);
|
2016-10-28 15:45:05 +07:00
|
|
|
sector += zones[i].len;
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(zones);
|
2016-10-28 15:45:05 +07:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-10-14 17:47:11 +07:00
|
|
|
/*
|
|
|
|
* Read f2fs raw super block.
|
2016-02-17 07:59:01 +07:00
|
|
|
* Because we have two copies of super block, so read both of them
|
|
|
|
* to get the first valid one. If any one of them is broken, we pass
|
|
|
|
* them recovery flag back to the caller.
|
2013-10-14 17:47:11 +07:00
|
|
|
*/
|
2016-03-24 07:05:27 +07:00
|
|
|
static int read_raw_super_block(struct f2fs_sb_info *sbi,
|
2013-10-14 17:47:11 +07:00
|
|
|
struct f2fs_super_block **raw_super,
|
2015-12-15 16:19:26 +07:00
|
|
|
int *valid_super_block, int *recovery)
|
2013-02-01 18:07:03 +07:00
|
|
|
{
|
2016-03-24 07:05:27 +07:00
|
|
|
struct super_block *sb = sbi->sb;
|
2016-02-17 07:59:01 +07:00
|
|
|
int block;
|
2015-12-15 16:19:26 +07:00
|
|
|
struct buffer_head *bh;
|
2016-03-21 05:33:20 +07:00
|
|
|
struct f2fs_super_block *super;
|
2015-05-21 13:42:53 +07:00
|
|
|
int err = 0;
|
2013-02-01 18:07:03 +07:00
|
|
|
|
2015-12-15 16:17:20 +07:00
|
|
|
super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
|
|
|
|
if (!super)
|
|
|
|
return -ENOMEM;
|
2016-02-17 07:59:01 +07:00
|
|
|
|
|
|
|
for (block = 0; block < 2; block++) {
|
|
|
|
bh = sb_bread(sb, block);
|
|
|
|
if (!bh) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Unable to read %dth superblock",
|
|
|
|
block + 1);
|
2016-02-17 07:59:01 +07:00
|
|
|
err = -EIO;
|
|
|
|
continue;
|
|
|
|
}
|
2013-02-01 18:07:03 +07:00
|
|
|
|
2016-02-17 07:59:01 +07:00
|
|
|
/* sanity checking of raw super */
|
2019-07-25 10:08:52 +07:00
|
|
|
err = sanity_check_raw_super(sbi, bh);
|
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
|
|
|
|
block + 1);
|
2016-02-17 07:59:01 +07:00
|
|
|
brelse(bh);
|
|
|
|
continue;
|
|
|
|
}
|
2013-02-01 18:07:03 +07:00
|
|
|
|
2016-02-17 07:59:01 +07:00
|
|
|
if (!*raw_super) {
|
2016-03-21 05:33:20 +07:00
|
|
|
memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
|
|
|
|
sizeof(*super));
|
2016-02-17 07:59:01 +07:00
|
|
|
*valid_super_block = block;
|
|
|
|
*raw_super = super;
|
|
|
|
}
|
|
|
|
brelse(bh);
|
2015-05-21 13:42:53 +07:00
|
|
|
}
|
|
|
|
|
2016-02-17 07:59:01 +07:00
|
|
|
/* Fail to read any one of the superblocks*/
|
|
|
|
if (err < 0)
|
|
|
|
*recovery = 1;
|
2015-05-21 13:42:53 +07:00
|
|
|
|
|
|
|
/* No valid superblock */
|
2016-02-17 07:59:01 +07:00
|
|
|
if (!*raw_super)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(super);
|
2016-02-17 07:59:01 +07:00
|
|
|
else
|
|
|
|
err = 0;
|
2015-05-21 13:42:53 +07:00
|
|
|
|
2016-02-17 07:59:01 +07:00
|
|
|
return err;
|
2013-02-01 18:07:03 +07:00
|
|
|
}
|
|
|
|
|
2016-03-21 05:33:20 +07:00
|
|
|
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
|
2015-04-21 08:49:51 +07:00
|
|
|
{
|
2015-12-08 01:16:58 +07:00
|
|
|
struct buffer_head *bh;
|
2018-09-28 19:25:56 +07:00
|
|
|
__u32 crc = 0;
|
2015-04-21 08:49:51 +07:00
|
|
|
int err;
|
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
if ((recover && f2fs_readonly(sbi->sb)) ||
|
|
|
|
bdev_read_only(sbi->sb->s_bdev)) {
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
|
2016-03-24 00:42:01 +07:00
|
|
|
return -EROFS;
|
2016-03-24 07:05:27 +07:00
|
|
|
}
|
2016-03-24 00:42:01 +07:00
|
|
|
|
2018-09-28 19:25:56 +07:00
|
|
|
/* we should update superblock crc here */
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
|
2018-09-28 19:25:56 +07:00
|
|
|
crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
|
|
|
|
offsetof(struct f2fs_super_block, crc));
|
|
|
|
F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
|
|
|
|
}
|
|
|
|
|
2016-03-21 05:33:20 +07:00
|
|
|
/* write back-up superblock first */
|
2018-01-29 18:13:15 +07:00
|
|
|
bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
|
2015-12-08 01:16:58 +07:00
|
|
|
if (!bh)
|
|
|
|
return -EIO;
|
2016-03-21 05:33:20 +07:00
|
|
|
err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
|
2015-12-08 01:16:58 +07:00
|
|
|
brelse(bh);
|
2015-06-08 12:28:03 +07:00
|
|
|
|
|
|
|
/* if we are in recovery path, skip writing valid superblock */
|
|
|
|
if (recover || err)
|
2015-12-08 01:16:58 +07:00
|
|
|
return err;
|
2015-04-21 08:49:51 +07:00
|
|
|
|
|
|
|
/* write current valid superblock */
|
2018-01-29 18:13:15 +07:00
|
|
|
bh = sb_bread(sbi->sb, sbi->valid_super_block);
|
2016-03-21 05:33:20 +07:00
|
|
|
if (!bh)
|
|
|
|
return -EIO;
|
|
|
|
err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
|
|
|
|
brelse(bh);
|
|
|
|
return err;
|
2015-04-21 08:49:51 +07:00
|
|
|
}
|
|
|
|
|
2016-10-07 09:02:05 +07:00
|
|
|
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
|
2017-02-27 18:52:49 +07:00
|
|
|
unsigned int max_devices = MAX_DEVICES;
|
2016-10-07 09:02:05 +07:00
|
|
|
int i;
|
|
|
|
|
2017-02-27 18:52:49 +07:00
|
|
|
/* Initialize single device information */
|
|
|
|
if (!RDEV(0).path[0]) {
|
|
|
|
if (!bdev_is_zoned(sbi->sb->s_bdev))
|
2016-10-07 09:02:05 +07:00
|
|
|
return 0;
|
2017-02-27 18:52:49 +07:00
|
|
|
max_devices = 1;
|
|
|
|
}
|
2016-10-07 09:02:05 +07:00
|
|
|
|
2017-02-27 18:52:49 +07:00
|
|
|
/*
|
|
|
|
* Initialize multiple devices information, or single
|
|
|
|
* zoned block device information.
|
|
|
|
*/
|
treewide: Use array_size() in f2fs_kzalloc()
The f2fs_kzalloc() function has no 2-factor argument form, so
multiplication factors need to be wrapped in array_size(). This patch
replaces cases of:
f2fs_kzalloc(handle, a * b, gfp)
with:
f2fs_kzalloc(handle, array_size(a, b), gfp)
as well as handling cases of:
f2fs_kzalloc(handle, a * b * c, gfp)
with:
f2fs_kzalloc(handle, array3_size(a, b, c), gfp)
This does, however, attempt to ignore constant size factors like:
f2fs_kzalloc(handle, 4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
expression HANDLE;
type TYPE;
expression THING, E;
@@
(
f2fs_kzalloc(HANDLE,
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
f2fs_kzalloc(HANDLE,
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression HANDLE;
expression COUNT;
typedef u8;
typedef __u8;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(char) * COUNT
+ COUNT
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
expression HANDLE;
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
expression HANDLE;
identifier SIZE, COUNT;
@@
f2fs_kzalloc(HANDLE,
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression HANDLE;
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression HANDLE;
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
f2fs_kzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
expression HANDLE;
identifier STRIDE, SIZE, COUNT;
@@
(
f2fs_kzalloc(HANDLE,
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kzalloc(HANDLE,
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression HANDLE;
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
f2fs_kzalloc(HANDLE, C1 * C2 * C3, ...)
|
f2fs_kzalloc(HANDLE,
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression HANDLE;
expression E1, E2;
constant C1, C2;
@@
(
f2fs_kzalloc(HANDLE, C1 * C2, ...)
|
f2fs_kzalloc(HANDLE,
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:28:23 +07:00
|
|
|
sbi->devs = f2fs_kzalloc(sbi,
|
|
|
|
array_size(max_devices,
|
|
|
|
sizeof(struct f2fs_dev_info)),
|
|
|
|
GFP_KERNEL);
|
2017-02-27 18:52:49 +07:00
|
|
|
if (!sbi->devs)
|
|
|
|
return -ENOMEM;
|
2016-10-07 09:02:05 +07:00
|
|
|
|
2017-02-27 18:52:49 +07:00
|
|
|
for (i = 0; i < max_devices; i++) {
|
2016-10-07 09:02:05 +07:00
|
|
|
|
2017-02-27 18:52:49 +07:00
|
|
|
if (i > 0 && !RDEV(i).path[0])
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (max_devices == 1) {
|
|
|
|
/* Single zoned block device mount */
|
|
|
|
FDEV(0).bdev =
|
|
|
|
blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
|
2016-10-07 09:02:05 +07:00
|
|
|
sbi->sb->s_mode, sbi->sb->s_type);
|
2017-02-27 18:52:49 +07:00
|
|
|
} else {
|
|
|
|
/* Multi-device mount */
|
|
|
|
memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
|
|
|
|
FDEV(i).total_segments =
|
|
|
|
le32_to_cpu(RDEV(i).total_segments);
|
|
|
|
if (i == 0) {
|
|
|
|
FDEV(i).start_blk = 0;
|
|
|
|
FDEV(i).end_blk = FDEV(i).start_blk +
|
|
|
|
(FDEV(i).total_segments <<
|
|
|
|
sbi->log_blocks_per_seg) - 1 +
|
|
|
|
le32_to_cpu(raw_super->segment0_blkaddr);
|
|
|
|
} else {
|
|
|
|
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
|
|
|
|
FDEV(i).end_blk = FDEV(i).start_blk +
|
|
|
|
(FDEV(i).total_segments <<
|
|
|
|
sbi->log_blocks_per_seg) - 1;
|
|
|
|
}
|
|
|
|
FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
|
2016-10-07 09:02:05 +07:00
|
|
|
sbi->sb->s_mode, sbi->sb->s_type);
|
2017-02-27 18:52:49 +07:00
|
|
|
}
|
2016-10-07 09:02:05 +07:00
|
|
|
if (IS_ERR(FDEV(i).bdev))
|
|
|
|
return PTR_ERR(FDEV(i).bdev);
|
|
|
|
|
|
|
|
/* to release errored devices */
|
|
|
|
sbi->s_ndevs = i + 1;
|
|
|
|
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
|
|
|
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
|
2018-10-24 17:34:26 +07:00
|
|
|
!f2fs_sb_has_blkzoned(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Zoned block device feature not enabled\n");
|
2016-10-07 09:02:05 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
|
|
|
|
if (init_blkz_info(sbi, i)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
|
2016-10-07 09:02:05 +07:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-02-27 18:52:49 +07:00
|
|
|
if (max_devices == 1)
|
|
|
|
break;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
|
|
|
|
i, FDEV(i).path,
|
|
|
|
FDEV(i).total_segments,
|
|
|
|
FDEV(i).start_blk, FDEV(i).end_blk,
|
|
|
|
bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
|
|
|
|
"Host-aware" : "Host-managed");
|
2016-10-07 09:02:05 +07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x",
|
|
|
|
i, FDEV(i).path,
|
|
|
|
FDEV(i).total_segments,
|
|
|
|
FDEV(i).start_blk, FDEV(i).end_blk);
|
|
|
|
}
|
|
|
|
f2fs_info(sbi,
|
|
|
|
"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
|
2016-10-07 09:02:05 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-02-23 05:09:30 +07:00
|
|
|
static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
struct f2fs_sm_info *sm_i = SM_I(sbi);
|
|
|
|
|
|
|
|
/* adjust parameters according to the volume size */
|
|
|
|
if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
|
2018-03-08 13:22:56 +07:00
|
|
|
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
|
2018-02-23 05:09:30 +07:00
|
|
|
sm_i->dcc_info->discard_granularity = 1;
|
|
|
|
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
|
|
|
|
}
|
2018-06-11 17:02:01 +07:00
|
|
|
|
|
|
|
sbi->readdir_ra = 1;
|
2018-02-23 05:09:30 +07:00
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi;
|
2015-05-21 13:42:53 +07:00
|
|
|
struct f2fs_super_block *raw_super;
|
2012-11-02 15:07:47 +07:00
|
|
|
struct inode *root;
|
2016-05-11 16:08:14 +07:00
|
|
|
int err;
|
2019-02-19 15:23:53 +07:00
|
|
|
bool skip_recovery = false, need_fsck = false;
|
2015-01-24 08:41:39 +07:00
|
|
|
char *options = NULL;
|
2015-12-15 16:19:26 +07:00
|
|
|
int recovery, i, valid_super_block;
|
2016-01-27 08:57:30 +07:00
|
|
|
struct curseg_info *seg_i;
|
2019-02-19 15:23:53 +07:00
|
|
|
int retry_cnt = 1;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2014-08-09 05:37:41 +07:00
|
|
|
try_onemore:
|
2015-05-21 13:42:53 +07:00
|
|
|
err = -EINVAL;
|
|
|
|
raw_super = NULL;
|
2015-12-15 16:19:26 +07:00
|
|
|
valid_super_block = -1;
|
2015-05-21 13:42:53 +07:00
|
|
|
recovery = 0;
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* allocate memory for f2fs-specific super block info */
|
|
|
|
sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
|
|
|
|
if (!sbi)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
sbi->sb = sb;
|
|
|
|
|
2016-03-03 03:04:24 +07:00
|
|
|
/* Load the checksum driver */
|
|
|
|
sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
|
|
|
|
if (IS_ERR(sbi->s_chksum_driver)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot load crc32 driver.");
|
2016-03-03 03:04:24 +07:00
|
|
|
err = PTR_ERR(sbi->s_chksum_driver);
|
|
|
|
sbi->s_chksum_driver = NULL;
|
|
|
|
goto free_sbi;
|
|
|
|
}
|
|
|
|
|
2013-01-12 12:41:13 +07:00
|
|
|
/* set a block size */
|
2013-12-06 13:00:58 +07:00
|
|
|
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "unable to set blocksize");
|
2012-11-02 15:07:47 +07:00
|
|
|
goto free_sbi;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2016-03-24 07:05:27 +07:00
|
|
|
err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
|
2015-12-15 16:19:26 +07:00
|
|
|
&recovery);
|
2013-10-14 17:47:11 +07:00
|
|
|
if (err)
|
|
|
|
goto free_sbi;
|
|
|
|
|
2013-06-07 13:16:53 +07:00
|
|
|
sb->s_fs_info = sbi;
|
2016-06-13 23:47:48 +07:00
|
|
|
sbi->raw_super = raw_super;
|
|
|
|
|
2017-07-31 19:19:09 +07:00
|
|
|
/* precompute checksum seed for metadata */
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_inode_chksum(sbi))
|
2017-07-31 19:19:09 +07:00
|
|
|
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
|
|
|
|
sizeof(raw_super->uuid));
|
|
|
|
|
2016-10-28 15:45:01 +07:00
|
|
|
/*
|
|
|
|
* The BLKZONED feature indicates that the drive was formatted with
|
|
|
|
* zone alignment optimization. This is optional for host-aware
|
|
|
|
* devices, but mandatory for host-managed zoned block devices.
|
|
|
|
*/
|
|
|
|
#ifndef CONFIG_BLK_DEV_ZONED
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_blkzoned(sbi)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Zoned block device support is not enabled");
|
2017-06-12 08:44:27 +07:00
|
|
|
err = -EOPNOTSUPP;
|
2016-10-28 15:45:01 +07:00
|
|
|
goto free_sb_buf;
|
|
|
|
}
|
|
|
|
#endif
|
2015-05-07 17:11:37 +07:00
|
|
|
default_options(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
/* parse mount options */
|
2015-01-24 08:41:39 +07:00
|
|
|
options = kstrdup((const char *)data, GFP_KERNEL);
|
|
|
|
if (data && !options) {
|
|
|
|
err = -ENOMEM;
|
2012-11-02 15:07:47 +07:00
|
|
|
goto free_sb_buf;
|
2015-01-24 08:41:39 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
err = parse_options(sb, options);
|
|
|
|
if (err)
|
|
|
|
goto free_options;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2015-12-31 13:35:37 +07:00
|
|
|
sbi->max_file_blocks = max_file_blocks();
|
|
|
|
sb->s_maxbytes = sbi->max_file_blocks <<
|
|
|
|
le32_to_cpu(raw_super->log_blocksize);
|
2012-11-02 15:07:47 +07:00
|
|
|
sb->s_max_links = F2FS_LINK_MAX;
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
sb->dq_op = &f2fs_quota_operations;
|
2019-05-21 06:17:56 +07:00
|
|
|
sb->s_qcop = &f2fs_quotactl_ops;
|
2017-07-25 23:01:41 +07:00
|
|
|
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
|
2017-11-16 15:59:14 +07:00
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi)) {
|
2017-11-16 15:59:14 +07:00
|
|
|
for (i = 0; i < MAXQUOTAS; i++) {
|
|
|
|
if (f2fs_qf_ino(sbi->sb, i))
|
|
|
|
sbi->nquota_files++;
|
|
|
|
}
|
|
|
|
}
|
2017-07-08 23:13:07 +07:00
|
|
|
#endif
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
sb->s_op = &f2fs_sops;
|
2018-12-12 16:50:12 +07:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2015-05-16 06:26:10 +07:00
|
|
|
sb->s_cop = &f2fs_cryptops;
|
2017-10-10 02:15:38 +07:00
|
|
|
#endif
|
2012-11-02 15:07:47 +07:00
|
|
|
sb->s_xattr = f2fs_xattr_handlers;
|
|
|
|
sb->s_export_op = &f2fs_export_ops;
|
|
|
|
sb->s_magic = F2FS_SUPER_MAGIC;
|
|
|
|
sb->s_time_gran = 1;
|
2017-11-28 04:05:09 +07:00
|
|
|
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
|
|
|
|
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
|
2017-05-10 20:06:33 +07:00
|
|
|
memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
|
2018-01-09 18:33:39 +07:00
|
|
|
sb->s_iflags |= SB_I_CGROUPWB;
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
/* init f2fs-specific super block info */
|
2015-12-15 16:19:26 +07:00
|
|
|
sbi->valid_super_block = valid_super_block;
|
2012-11-02 15:07:47 +07:00
|
|
|
mutex_init(&sbi->gc_mutex);
|
2018-08-10 07:53:34 +07:00
|
|
|
mutex_init(&sbi->writepages);
|
2012-11-02 15:07:47 +07:00
|
|
|
mutex_init(&sbi->cp_mutex);
|
2019-06-05 10:33:25 +07:00
|
|
|
mutex_init(&sbi->resize_mutex);
|
2014-07-03 17:58:39 +07:00
|
|
|
init_rwsem(&sbi->node_write);
|
2017-03-13 19:22:18 +07:00
|
|
|
init_rwsem(&sbi->node_change);
|
2015-08-12 02:45:39 +07:00
|
|
|
|
|
|
|
/* disallow all the data/node/meta page writes */
|
|
|
|
set_sbi_flag(sbi, SBI_POR_DOING);
|
2012-11-02 15:07:47 +07:00
|
|
|
spin_lock_init(&sbi->stat_lock);
|
2013-11-18 15:16:17 +07:00
|
|
|
|
2017-08-02 22:21:48 +07:00
|
|
|
/* init iostat info */
|
|
|
|
spin_lock_init(&sbi->iostat_lock);
|
|
|
|
sbi->iostat_enable = false;
|
|
|
|
|
2013-12-11 11:54:01 +07:00
|
|
|
for (i = 0; i < NR_PAGE_TYPE; i++) {
|
2017-05-11 01:18:25 +07:00
|
|
|
int n = (i == META) ? 1: NR_TEMP_TYPE;
|
|
|
|
int j;
|
|
|
|
|
treewide: Use array_size() in f2fs_kmalloc()
The f2fs_kmalloc() function has no 2-factor argument form, so
multiplication factors need to be wrapped in array_size(). This patch
replaces cases of:
f2fs_kmalloc(handle, a * b, gfp)
with:
f2fs_kmalloc(handle, array_size(a, b), gfp)
as well as handling cases of:
f2fs_kmalloc(handle, a * b * c, gfp)
with:
f2fs_kmalloc(handle, array3_size(a, b, c), gfp)
This does, however, attempt to ignore constant size factors like:
f2fs_kmalloc(handle, 4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
expression HANDLE;
type TYPE;
expression THING, E;
@@
(
f2fs_kmalloc(HANDLE,
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
f2fs_kmalloc(HANDLE,
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression HANDLE;
expression COUNT;
typedef u8;
typedef __u8;
@@
(
f2fs_kmalloc(HANDLE,
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(char) * COUNT
+ COUNT
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
expression HANDLE;
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
expression HANDLE;
identifier SIZE, COUNT;
@@
f2fs_kmalloc(HANDLE,
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression HANDLE;
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression HANDLE;
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
f2fs_kmalloc(HANDLE,
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
f2fs_kmalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
expression HANDLE;
identifier STRIDE, SIZE, COUNT;
@@
(
f2fs_kmalloc(HANDLE,
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kmalloc(HANDLE,
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression HANDLE;
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
f2fs_kmalloc(HANDLE, C1 * C2 * C3, ...)
|
f2fs_kmalloc(HANDLE,
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression HANDLE;
expression E1, E2;
constant C1, C2;
@@
(
f2fs_kmalloc(HANDLE, C1 * C2, ...)
|
f2fs_kmalloc(HANDLE,
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:28:16 +07:00
|
|
|
sbi->write_io[i] =
|
|
|
|
f2fs_kmalloc(sbi,
|
|
|
|
array_size(n,
|
|
|
|
sizeof(struct f2fs_bio_info)),
|
|
|
|
GFP_KERNEL);
|
2017-06-11 14:21:11 +07:00
|
|
|
if (!sbi->write_io[i]) {
|
|
|
|
err = -ENOMEM;
|
2018-09-05 13:54:01 +07:00
|
|
|
goto free_bio_info;
|
2017-06-11 14:21:11 +07:00
|
|
|
}
|
2017-05-11 01:18:25 +07:00
|
|
|
|
|
|
|
for (j = HOT; j < n; j++) {
|
|
|
|
init_rwsem(&sbi->write_io[i][j].io_rwsem);
|
|
|
|
sbi->write_io[i][j].sbi = sbi;
|
|
|
|
sbi->write_io[i][j].bio = NULL;
|
2017-05-19 22:37:01 +07:00
|
|
|
spin_lock_init(&sbi->write_io[i][j].io_lock);
|
|
|
|
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
|
2017-05-11 01:18:25 +07:00
|
|
|
}
|
2013-12-11 11:54:01 +07:00
|
|
|
}
|
2013-11-18 15:16:17 +07:00
|
|
|
|
2016-08-05 01:38:25 +07:00
|
|
|
init_rwsem(&sbi->cp_rwsem);
|
2019-05-30 00:58:45 +07:00
|
|
|
init_rwsem(&sbi->quota_sem);
|
2013-11-07 10:48:25 +07:00
|
|
|
init_waitqueue_head(&sbi->cp_wait);
|
2012-11-02 15:07:47 +07:00
|
|
|
init_sb_info(sbi);
|
|
|
|
|
2016-05-14 02:36:58 +07:00
|
|
|
err = init_percpu_info(sbi);
|
|
|
|
if (err)
|
2018-01-17 15:31:35 +07:00
|
|
|
goto free_bio_info;
|
2016-05-14 02:36:58 +07:00
|
|
|
|
2016-12-15 01:12:56 +07:00
|
|
|
if (F2FS_IO_SIZE(sbi) > 1) {
|
|
|
|
sbi->write_io_dummy =
|
2017-02-27 17:43:13 +07:00
|
|
|
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
|
2017-06-12 08:44:27 +07:00
|
|
|
if (!sbi->write_io_dummy) {
|
|
|
|
err = -ENOMEM;
|
2018-01-17 15:31:35 +07:00
|
|
|
goto free_percpu;
|
2017-06-12 08:44:27 +07:00
|
|
|
}
|
2016-12-15 01:12:56 +07:00
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* get an inode for meta space */
|
|
|
|
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
|
|
|
|
if (IS_ERR(sbi->meta_inode)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to read F2FS meta data inode");
|
2012-11-02 15:07:47 +07:00
|
|
|
err = PTR_ERR(sbi->meta_inode);
|
2016-12-15 01:12:56 +07:00
|
|
|
goto free_io_dummy;
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_get_valid_checkpoint(sbi);
|
2012-12-30 12:52:05 +07:00
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
|
2012-11-02 15:07:47 +07:00
|
|
|
goto free_meta_inode;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
|
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
2019-01-25 08:48:38 +07:00
|
|
|
if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
|
|
|
|
set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
|
|
|
|
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
|
|
|
|
}
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
|
2019-06-05 10:33:25 +07:00
|
|
|
if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG))
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
|
|
|
2016-10-07 09:02:05 +07:00
|
|
|
/* Initialize device list */
|
|
|
|
err = f2fs_scan_devices(sbi);
|
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to find devices");
|
2016-10-07 09:02:05 +07:00
|
|
|
goto free_devices;
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
sbi->total_valid_node_count =
|
|
|
|
le32_to_cpu(sbi->ckpt->valid_node_count);
|
2016-05-17 01:42:32 +07:00
|
|
|
percpu_counter_set(&sbi->total_valid_inode_count,
|
|
|
|
le32_to_cpu(sbi->ckpt->valid_inode_count));
|
2012-11-02 15:07:47 +07:00
|
|
|
sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
|
|
|
|
sbi->total_valid_block_count =
|
|
|
|
le64_to_cpu(sbi->ckpt->valid_block_count);
|
|
|
|
sbi->last_valid_block_count = sbi->total_valid_block_count;
|
2017-06-26 15:24:41 +07:00
|
|
|
sbi->reserved_blocks = 0;
|
2017-10-27 19:45:05 +07:00
|
|
|
sbi->current_reserved_blocks = 0;
|
2017-12-28 06:05:52 +07:00
|
|
|
limit_reserve_root(sbi);
|
2016-05-17 01:06:50 +07:00
|
|
|
|
2015-12-16 12:09:20 +07:00
|
|
|
for (i = 0; i < NR_INODE_TYPE; i++) {
|
|
|
|
INIT_LIST_HEAD(&sbi->inode_list[i]);
|
|
|
|
spin_lock_init(&sbi->inode_lock[i]);
|
|
|
|
}
|
2019-05-20 16:36:59 +07:00
|
|
|
mutex_init(&sbi->flush_lock);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_init_extent_cache_info(sbi);
|
f2fs: enable rb-tree extent cache
This patch enables rb-tree based extent cache in f2fs.
When we mount with "-o extent_cache", f2fs will try to add recently accessed
page-block mappings into rb-tree based extent cache as much as possible, instead
of original one extent info cache.
By this way, f2fs can support more effective cache between dnode page cache and
disk. It will supply high hit ratio in the cache with fewer memory when dnode
page cache are reclaimed in environment of low memory.
Storage: Sandisk sd card 64g
1.append write file (offset: 0, size: 128M);
2.override write file (offset: 2M, size: 1M);
3.override write file (offset: 4M, size: 1M);
...
4.override write file (offset: 48M, size: 1M);
...
5.override write file (offset: 112M, size: 1M);
6.sync
7.echo 3 > /proc/sys/vm/drop_caches
8.read file (size:128M, unit: 4k, count: 32768)
(time dd if=/mnt/f2fs/128m bs=4k count=32768)
Extent Hit Ratio:
before patched
Hit Ratio 121 / 1071 1071 / 1071
Performance:
before patched
real 0m37.051s 0m35.556s
user 0m0.040s 0m0.026s
sys 0m2.990s 0m2.251s
Memory Cost:
before patched
Tree Count: 0 1 (size: 24 bytes)
Node Count: 0 45 (size: 1440 bytes)
v3:
o retest and given more details of test result.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-02-05 16:57:31 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_init_ino_entry_info(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
f2fs: fix to avoid broken of dnode block list
f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.
By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.
Sheng Yong helps to do the test with this patch:
Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8
1 / PERSIST / Index
Base:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08
2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7
3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48
Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333
After:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 798.81 202.5 41143 40613.87 602.71 838.08 913.83
2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27
3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91
Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333
Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189
It looks like atomic write will suffer performance regression.
I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.
BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:
- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
- writeback data: P1, P2, P3;
- writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing
last transaction.
- preflush + fua;
- power-cut
If we don't wait dnode writeback for atomic_write:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85
2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77
3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92
Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18
Patched/Original:
0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294
SQLite's performance recovers.
Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."
Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-02 22:03:19 +07:00
|
|
|
f2fs_init_fsync_node_info(sbi);
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* setup f2fs internal modules */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_build_segment_manager(sbi);
|
2012-12-30 12:52:05 +07:00
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)",
|
|
|
|
err);
|
2012-11-02 15:07:47 +07:00
|
|
|
goto free_sm;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_build_node_manager(sbi);
|
2012-12-30 12:52:05 +07:00
|
|
|
if (err) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)",
|
|
|
|
err);
|
2012-11-02 15:07:47 +07:00
|
|
|
goto free_nm;
|
2012-12-30 12:52:05 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2016-01-27 08:57:30 +07:00
|
|
|
/* For write statistics */
|
|
|
|
if (sb->s_bdev->bd_part)
|
|
|
|
sbi->sectors_written_start =
|
2018-07-18 18:47:38 +07:00
|
|
|
(u64)part_stat_read(sb->s_bdev->bd_part,
|
|
|
|
sectors[STAT_WRITE]);
|
2016-01-27 08:57:30 +07:00
|
|
|
|
|
|
|
/* Read accumulated write IO statistics if exists */
|
|
|
|
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
|
|
|
|
if (__exist_node_summaries(sbi))
|
|
|
|
sbi->kbytes_written =
|
2016-03-29 17:00:15 +07:00
|
|
|
le64_to_cpu(seg_i->journal->info.kbytes_written);
|
2016-01-27 08:57:30 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_build_gc_manager(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2018-12-26 12:50:29 +07:00
|
|
|
err = f2fs_build_stats(sbi);
|
|
|
|
if (err)
|
|
|
|
goto free_nm;
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
/* get an inode for node space */
|
|
|
|
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
|
|
|
|
if (IS_ERR(sbi->node_inode)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to read node inode");
|
2012-11-02 15:07:47 +07:00
|
|
|
err = PTR_ERR(sbi->node_inode);
|
2018-12-26 12:50:29 +07:00
|
|
|
goto free_stats;
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* read root inode and dentry */
|
|
|
|
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
|
|
|
|
if (IS_ERR(root)) {
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Failed to read root inode");
|
2012-11-02 15:07:47 +07:00
|
|
|
err = PTR_ERR(root);
|
2018-12-26 12:50:29 +07:00
|
|
|
goto free_node_inode;
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
f2fs: fix to do sanity check with inline flags
https://bugzilla.kernel.org/show_bug.cgi?id=200221
- Overview
BUG() in clear_inode() when mounting and un-mounting a corrupted f2fs image
- Reproduce
- Kernel message
[ 538.601448] F2FS-fs (loop0): Invalid segment/section count (31, 24 x 1376257)
[ 538.601458] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock
[ 538.724091] F2FS-fs (loop0): Try to recover 2th superblock, ret: 0
[ 538.724102] F2FS-fs (loop0): Mounted with checkpoint version = 2
[ 540.970834] ------------[ cut here ]------------
[ 540.970838] kernel BUG at fs/inode.c:512!
[ 540.971750] invalid opcode: 0000 [#1] SMP KASAN PTI
[ 540.972755] CPU: 1 PID: 1305 Comm: umount Not tainted 4.18.0-rc1+ #4
[ 540.974034] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 540.982913] RIP: 0010:clear_inode+0xc0/0xd0
[ 540.983774] Code: 8d a3 30 01 00 00 4c 89 e7 e8 1c ec f8 ff 48 8b 83 30 01 00 00 49 39 c4 75 1a 48 c7 83 a0 00 00 00 60 00 00 00 5b 41 5c 5d c3 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 1f 40 00 66 66 66 66 90 55
[ 540.987570] RSP: 0018:ffff8801e34a7b70 EFLAGS: 00010002
[ 540.988636] RAX: 0000000000000000 RBX: ffff8801e9b744e8 RCX: ffffffffb840eb3a
[ 540.990063] RDX: dffffc0000000000 RSI: 0000000000000004 RDI: ffff8801e9b746b8
[ 540.991499] RBP: ffff8801e34a7b80 R08: ffffed003d36e8ce R09: ffffed003d36e8ce
[ 540.992923] R10: 0000000000000001 R11: ffffed003d36e8cd R12: ffff8801e9b74668
[ 540.994360] R13: ffff8801e9b74760 R14: ffff8801e9b74528 R15: ffff8801e9b74530
[ 540.995786] FS: 00007f4662bdf840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 540.997403] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 540.998571] CR2: 000000000175c568 CR3: 00000001dcfe6000 CR4: 00000000000006e0
[ 541.000015] Call Trace:
[ 541.000554] f2fs_evict_inode+0x253/0x630
[ 541.001381] evict+0x16f/0x290
[ 541.002015] iput+0x280/0x300
[ 541.002654] dentry_unlink_inode+0x165/0x1e0
[ 541.003528] __dentry_kill+0x16a/0x260
[ 541.004300] dentry_kill+0x70/0x250
[ 541.005018] dput+0x154/0x1d0
[ 541.005635] do_one_tree+0x34/0x40
[ 541.006354] shrink_dcache_for_umount+0x3f/0xa0
[ 541.007285] generic_shutdown_super+0x43/0x1c0
[ 541.008192] kill_block_super+0x52/0x80
[ 541.008978] kill_f2fs_super+0x62/0x70
[ 541.009750] deactivate_locked_super+0x6f/0xa0
[ 541.010664] deactivate_super+0x5e/0x80
[ 541.011450] cleanup_mnt+0x61/0xa0
[ 541.012151] __cleanup_mnt+0x12/0x20
[ 541.012893] task_work_run+0xc8/0xf0
[ 541.013635] exit_to_usermode_loop+0x125/0x130
[ 541.014555] do_syscall_64+0x138/0x170
[ 541.015340] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 541.016375] RIP: 0033:0x7f46624bf487
[ 541.017104] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48
[ 541.020923] RSP: 002b:00007fff5e12e9a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
[ 541.022452] RAX: 0000000000000000 RBX: 0000000001753030 RCX: 00007f46624bf487
[ 541.023885] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000000000175a1e0
[ 541.025318] RBP: 000000000175a1e0 R08: 0000000000000000 R09: 0000000000000014
[ 541.026755] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f46629c883c
[ 541.028186] R13: 0000000000000000 R14: 0000000001753210 R15: 00007fff5e12ec30
[ 541.029626] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 541.039445] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 541.040392] RIP: 0010:clear_inode+0xc0/0xd0
[ 541.041240] Code: 8d a3 30 01 00 00 4c 89 e7 e8 1c ec f8 ff 48 8b 83 30 01 00 00 49 39 c4 75 1a 48 c7 83 a0 00 00 00 60 00 00 00 5b 41 5c 5d c3 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 1f 40 00 66 66 66 66 90 55
[ 541.045042] RSP: 0018:ffff8801e34a7b70 EFLAGS: 00010002
[ 541.046099] RAX: 0000000000000000 RBX: ffff8801e9b744e8 RCX: ffffffffb840eb3a
[ 541.047537] RDX: dffffc0000000000 RSI: 0000000000000004 RDI: ffff8801e9b746b8
[ 541.048965] RBP: ffff8801e34a7b80 R08: ffffed003d36e8ce R09: ffffed003d36e8ce
[ 541.050402] R10: 0000000000000001 R11: ffffed003d36e8cd R12: ffff8801e9b74668
[ 541.051832] R13: ffff8801e9b74760 R14: ffff8801e9b74528 R15: ffff8801e9b74530
[ 541.053263] FS: 00007f4662bdf840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[ 541.054891] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 541.056039] CR2: 000000000175c568 CR3: 00000001dcfe6000 CR4: 00000000000006e0
[ 541.058506] ==================================================================
[ 541.059991] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0
[ 541.061513] Read of size 8 at addr ffff8801e34a7970 by task umount/1305
[ 541.063302] CPU: 1 PID: 1305 Comm: umount Tainted: G D 4.18.0-rc1+ #4
[ 541.064838] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 541.066778] Call Trace:
[ 541.067294] dump_stack+0x7b/0xb5
[ 541.067986] print_address_description+0x70/0x290
[ 541.068941] kasan_report+0x291/0x390
[ 541.069692] ? update_stack_state+0x38c/0x3e0
[ 541.070598] __asan_load8+0x54/0x90
[ 541.071315] update_stack_state+0x38c/0x3e0
[ 541.072172] ? __read_once_size_nocheck.constprop.7+0x20/0x20
[ 541.073340] ? vprintk_func+0x27/0x60
[ 541.074096] ? printk+0xa3/0xd3
[ 541.074762] ? __save_stack_trace+0x5e/0x100
[ 541.075634] unwind_next_frame.part.5+0x18e/0x490
[ 541.076594] ? unwind_dump+0x290/0x290
[ 541.077368] ? __show_regs+0x2c4/0x330
[ 541.078142] __unwind_start+0x106/0x190
[ 541.085422] __save_stack_trace+0x5e/0x100
[ 541.086268] ? __save_stack_trace+0x5e/0x100
[ 541.087161] ? unlink_anon_vmas+0xba/0x2c0
[ 541.087997] save_stack_trace+0x1f/0x30
[ 541.088782] save_stack+0x46/0xd0
[ 541.089475] ? __alloc_pages_slowpath+0x1420/0x1420
[ 541.090477] ? flush_tlb_mm_range+0x15e/0x220
[ 541.091364] ? __dec_node_state+0x24/0xb0
[ 541.092180] ? lock_page_memcg+0x85/0xf0
[ 541.092979] ? unlock_page_memcg+0x16/0x80
[ 541.093812] ? page_remove_rmap+0x198/0x520
[ 541.094674] ? mark_page_accessed+0x133/0x200
[ 541.095559] ? _cond_resched+0x1a/0x50
[ 541.096326] ? unmap_page_range+0xcd4/0xe50
[ 541.097179] ? rb_next+0x58/0x80
[ 541.097845] ? rb_next+0x58/0x80
[ 541.098518] __kasan_slab_free+0x13c/0x1a0
[ 541.099352] ? unlink_anon_vmas+0xba/0x2c0
[ 541.100184] kasan_slab_free+0xe/0x10
[ 541.100934] kmem_cache_free+0x89/0x1e0
[ 541.101724] unlink_anon_vmas+0xba/0x2c0
[ 541.102534] free_pgtables+0x101/0x1b0
[ 541.103299] exit_mmap+0x146/0x2a0
[ 541.103996] ? __ia32_sys_munmap+0x50/0x50
[ 541.104829] ? kasan_check_read+0x11/0x20
[ 541.105649] ? mm_update_next_owner+0x322/0x380
[ 541.106578] mmput+0x8b/0x1d0
[ 541.107191] do_exit+0x43a/0x1390
[ 541.107876] ? mm_update_next_owner+0x380/0x380
[ 541.108791] ? deactivate_super+0x5e/0x80
[ 541.109610] ? cleanup_mnt+0x61/0xa0
[ 541.110351] ? __cleanup_mnt+0x12/0x20
[ 541.111115] ? task_work_run+0xc8/0xf0
[ 541.111879] ? exit_to_usermode_loop+0x125/0x130
[ 541.112817] rewind_stack_do_exit+0x17/0x20
[ 541.113666] RIP: 0033:0x7f46624bf487
[ 541.114404] Code: Bad RIP value.
[ 541.115094] RSP: 002b:00007fff5e12e9a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
[ 541.116605] RAX: 0000000000000000 RBX: 0000000001753030 RCX: 00007f46624bf487
[ 541.118034] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000000000175a1e0
[ 541.119472] RBP: 000000000175a1e0 R08: 0000000000000000 R09: 0000000000000014
[ 541.120890] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f46629c883c
[ 541.122321] R13: 0000000000000000 R14: 0000000001753210 R15: 00007fff5e12ec30
[ 541.124061] The buggy address belongs to the page:
[ 541.125042] page:ffffea00078d29c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 541.126651] flags: 0x2ffff0000000000()
[ 541.127418] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000
[ 541.128963] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 541.130516] page dumped because: kasan: bad access detected
[ 541.131954] Memory state around the buggy address:
[ 541.132924] ffff8801e34a7800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00
[ 541.134378] ffff8801e34a7880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 541.135814] >ffff8801e34a7900: 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1
[ 541.137253] ^
[ 541.138637] ffff8801e34a7980: f1 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 541.140075] ffff8801e34a7a00: 00 00 00 00 00 00 00 00 f3 00 00 00 00 00 00 00
[ 541.141509] ==================================================================
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/inode.c#L512
BUG_ON(inode->i_data.nrpages);
The root cause is root directory inode is corrupted, it has both
inline_data and inline_dentry flag, and its nlink is zero, so in
->evict(), after dropping all page cache, it grabs page #0 for inline
data truncation, result in panic in later clear_inode() where we will
check inode->i_data.nrpages value.
This patch adds inline flags check in sanity_check_inode, in addition,
do sanity check with root inode's nlink.
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-06-28 23:19:25 +07:00
|
|
|
if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
|
|
|
|
!root->i_size || !root->i_nlink) {
|
2014-07-25 11:55:09 +07:00
|
|
|
iput(root);
|
2013-11-28 14:43:43 +07:00
|
|
|
err = -EINVAL;
|
2018-12-26 12:50:29 +07:00
|
|
|
goto free_node_inode;
|
2013-11-28 14:43:43 +07:00
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
|
|
|
|
sb->s_root = d_make_root(root); /* allocate root dentry */
|
|
|
|
if (!sb->s_root) {
|
|
|
|
err = -ENOMEM;
|
2019-01-23 14:49:44 +07:00
|
|
|
goto free_node_inode;
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
2017-07-27 01:24:13 +07:00
|
|
|
err = f2fs_register_sysfs(sbi);
|
2013-08-04 21:09:40 +07:00
|
|
|
if (err)
|
2017-06-14 16:39:46 +07:00
|
|
|
goto free_root_inode;
|
2013-08-04 21:09:40 +07:00
|
|
|
|
2017-10-06 23:14:28 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2018-07-26 18:24:25 +07:00
|
|
|
/* Enable quota usage during mount */
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
|
2017-10-06 23:14:28 +07:00
|
|
|
err = f2fs_enable_quotas(sb);
|
2018-10-03 07:20:58 +07:00
|
|
|
if (err)
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
|
2017-10-06 23:14:28 +07:00
|
|
|
}
|
|
|
|
#endif
|
2017-08-08 09:54:31 +07:00
|
|
|
/* if there are nt orphan nodes free them */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_recover_orphan_inodes(sbi);
|
2017-08-08 09:54:31 +07:00
|
|
|
if (err)
|
2017-10-06 23:14:28 +07:00
|
|
|
goto free_meta;
|
2017-08-08 09:54:31 +07:00
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
|
2019-02-19 15:23:53 +07:00
|
|
|
goto reset_checkpoint;
|
2018-08-21 09:21:43 +07:00
|
|
|
|
2014-02-19 16:23:32 +07:00
|
|
|
/* recover fsynced data */
|
|
|
|
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
|
2015-01-24 10:16:59 +07:00
|
|
|
/*
|
|
|
|
* mount should be failed, when device has readonly mode, and
|
|
|
|
* previous checkpoint was not done by clean system shutdown.
|
|
|
|
*/
|
2019-04-22 19:22:37 +07:00
|
|
|
if (f2fs_hw_is_readonly(sbi)) {
|
|
|
|
if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
|
|
|
|
err = -EROFS;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
|
2019-04-22 19:22:37 +07:00
|
|
|
goto free_meta;
|
|
|
|
}
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "write access unavailable, skipping recovery");
|
2019-04-22 19:22:37 +07:00
|
|
|
goto reset_checkpoint;
|
2015-01-24 10:16:59 +07:00
|
|
|
}
|
2015-03-16 20:08:44 +07:00
|
|
|
|
|
|
|
if (need_fsck)
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
|
|
|
2019-02-19 15:23:53 +07:00
|
|
|
if (skip_recovery)
|
|
|
|
goto reset_checkpoint;
|
2016-09-20 07:55:10 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_recover_fsync_data(sbi, false);
|
2016-03-24 06:12:58 +07:00
|
|
|
if (err < 0) {
|
2019-02-19 15:23:53 +07:00
|
|
|
if (err != -ENOMEM)
|
|
|
|
skip_recovery = true;
|
2015-03-16 20:08:44 +07:00
|
|
|
need_fsck = true;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Cannot recover all fsync data errno=%d",
|
|
|
|
err);
|
2017-08-08 09:54:31 +07:00
|
|
|
goto free_meta;
|
2014-08-09 05:37:41 +07:00
|
|
|
}
|
2016-03-24 06:12:58 +07:00
|
|
|
} else {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_recover_fsync_data(sbi, true);
|
2016-03-24 06:12:58 +07:00
|
|
|
|
|
|
|
if (!f2fs_readonly(sb) && err > 0) {
|
|
|
|
err = -EINVAL;
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_err(sbi, "Need to recover fsync data");
|
2017-10-06 23:14:28 +07:00
|
|
|
goto free_meta;
|
2016-03-24 06:12:58 +07:00
|
|
|
}
|
2014-02-19 16:23:32 +07:00
|
|
|
}
|
2019-02-19 15:23:53 +07:00
|
|
|
reset_checkpoint:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
/* f2fs_recover_fsync_data() cleared this already */
|
2015-08-12 02:45:39 +07:00
|
|
|
clear_sbi_flag(sbi, SBI_POR_DOING);
|
2013-08-04 21:09:40 +07:00
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
|
|
|
|
err = f2fs_disable_checkpoint(sbi);
|
|
|
|
if (err)
|
2019-01-23 05:04:33 +07:00
|
|
|
goto sync_free_meta;
|
2018-08-21 09:21:43 +07:00
|
|
|
} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
|
|
|
|
f2fs_enable_checkpoint(sbi);
|
|
|
|
}
|
|
|
|
|
2014-02-19 16:23:32 +07:00
|
|
|
/*
|
|
|
|
* If filesystem is not mounted as read-only then
|
|
|
|
* do start the gc_thread.
|
|
|
|
*/
|
2014-11-18 10:16:01 +07:00
|
|
|
if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
|
2014-02-19 16:23:32 +07:00
|
|
|
/* After POR, we can run background GC thread.*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_start_gc_thread(sbi);
|
2014-02-19 16:23:32 +07:00
|
|
|
if (err)
|
2019-01-23 05:04:33 +07:00
|
|
|
goto sync_free_meta;
|
2014-02-19 16:23:32 +07:00
|
|
|
}
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(options);
|
2015-05-21 13:42:53 +07:00
|
|
|
|
|
|
|
/* recover broken superblock */
|
2016-03-24 00:42:01 +07:00
|
|
|
if (recovery) {
|
2016-02-22 17:33:20 +07:00
|
|
|
err = f2fs_commit_super(sbi, true);
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_info(sbi, "Try to recover %dth superblock, ret: %d",
|
|
|
|
sbi->valid_super_block ? 1 : 2, err);
|
2015-05-21 13:42:53 +07:00
|
|
|
}
|
|
|
|
|
2017-11-30 18:28:20 +07:00
|
|
|
f2fs_join_shrinker(sbi);
|
|
|
|
|
2018-02-23 05:09:30 +07:00
|
|
|
f2fs_tuning_parameters(sbi);
|
|
|
|
|
2019-06-18 16:48:42 +07:00
|
|
|
f2fs_notice(sbi, "Mounted with checkpoint version = %llx",
|
|
|
|
cur_cp_version(F2FS_CKPT(sbi)));
|
2016-01-09 06:51:50 +07:00
|
|
|
f2fs_update_time(sbi, CP_TIME);
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2019-01-25 08:48:38 +07:00
|
|
|
clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
|
2012-11-02 15:07:47 +07:00
|
|
|
return 0;
|
2014-02-19 16:23:32 +07:00
|
|
|
|
2019-01-23 05:04:33 +07:00
|
|
|
sync_free_meta:
|
|
|
|
/* safe to flush all the data */
|
|
|
|
sync_filesystem(sbi->sb);
|
2019-02-19 15:23:53 +07:00
|
|
|
retry_cnt = 0;
|
2019-01-23 05:04:33 +07:00
|
|
|
|
2017-08-08 09:54:31 +07:00
|
|
|
free_meta:
|
2017-10-06 23:14:28 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2018-10-12 17:49:26 +07:00
|
|
|
f2fs_truncate_quota_inode_pages(sb);
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
|
2017-10-06 23:14:28 +07:00
|
|
|
f2fs_quota_off_umount(sbi->sb);
|
|
|
|
#endif
|
2017-08-08 09:54:31 +07:00
|
|
|
/*
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
* Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
|
2017-08-08 09:54:31 +07:00
|
|
|
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
* followed by f2fs_write_checkpoint() through f2fs_write_node_pages(), which
|
|
|
|
* falls into an infinite loop in f2fs_sync_meta_pages().
|
2017-08-08 09:54:31 +07:00
|
|
|
*/
|
|
|
|
truncate_inode_pages_final(META_MAPPING(sbi));
|
2019-01-23 05:04:33 +07:00
|
|
|
/* evict some inodes being cached by GC */
|
|
|
|
evict_inodes(sb);
|
2017-07-27 01:24:13 +07:00
|
|
|
f2fs_unregister_sysfs(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_root_inode:
|
|
|
|
dput(sb->s_root);
|
|
|
|
sb->s_root = NULL;
|
|
|
|
free_node_inode:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_release_ino_entry(sbi, true);
|
2017-11-30 18:28:20 +07:00
|
|
|
truncate_inode_pages_final(NODE_MAPPING(sbi));
|
2012-11-02 15:07:47 +07:00
|
|
|
iput(sbi->node_inode);
|
2019-01-01 15:11:30 +07:00
|
|
|
sbi->node_inode = NULL;
|
2018-12-26 12:50:29 +07:00
|
|
|
free_stats:
|
|
|
|
f2fs_destroy_stats(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_nm:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_node_manager(sbi);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_sm:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_segment_manager(sbi);
|
2016-10-07 09:02:05 +07:00
|
|
|
free_devices:
|
|
|
|
destroy_device_list(sbi);
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->ckpt);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_meta_inode:
|
|
|
|
make_bad_inode(sbi->meta_inode);
|
|
|
|
iput(sbi->meta_inode);
|
2019-01-01 15:11:30 +07:00
|
|
|
sbi->meta_inode = NULL;
|
2016-12-15 01:12:56 +07:00
|
|
|
free_io_dummy:
|
|
|
|
mempool_destroy(sbi->write_io_dummy);
|
2018-01-17 15:31:35 +07:00
|
|
|
free_percpu:
|
|
|
|
destroy_percpu_info(sbi);
|
|
|
|
free_bio_info:
|
2017-05-11 01:18:25 +07:00
|
|
|
for (i = 0; i < NR_PAGE_TYPE; i++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi->write_io[i]);
|
2018-01-17 15:31:35 +07:00
|
|
|
free_options:
|
2017-08-08 09:54:31 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
|
|
|
for (i = 0; i < MAXQUOTAS; i++)
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
|
2017-08-08 09:54:31 +07:00
|
|
|
#endif
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(options);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_sb_buf:
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(raw_super);
|
2012-11-02 15:07:47 +07:00
|
|
|
free_sbi:
|
2016-03-03 03:04:24 +07:00
|
|
|
if (sbi->s_chksum_driver)
|
|
|
|
crypto_free_shash(sbi->s_chksum_driver);
|
2018-12-14 09:38:33 +07:00
|
|
|
kvfree(sbi);
|
2014-08-09 05:37:41 +07:00
|
|
|
|
|
|
|
/* give only one another chance */
|
2019-02-19 15:23:53 +07:00
|
|
|
if (retry_cnt > 0 && skip_recovery) {
|
|
|
|
retry_cnt--;
|
2014-08-09 05:37:41 +07:00
|
|
|
shrink_dcache_sb(sb);
|
|
|
|
goto try_onemore;
|
|
|
|
}
|
2012-11-02 15:07:47 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
|
|
|
|
const char *dev_name, void *data)
|
|
|
|
{
|
|
|
|
return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
|
|
|
|
}
|
|
|
|
|
2015-01-15 07:34:24 +07:00
|
|
|
static void kill_f2fs_super(struct super_block *sb)
|
|
|
|
{
|
2017-06-29 22:17:45 +07:00
|
|
|
if (sb->s_root) {
|
2018-07-07 06:47:34 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
|
|
|
|
set_sbi_flag(sbi, SBI_IS_CLOSE);
|
|
|
|
f2fs_stop_gc_thread(sbi);
|
|
|
|
f2fs_stop_discard_thread(sbi);
|
|
|
|
|
|
|
|
if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
|
|
|
|
!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
|
|
|
|
struct cp_control cpc = {
|
|
|
|
.reason = CP_UMOUNT,
|
|
|
|
};
|
|
|
|
f2fs_write_checkpoint(sbi, &cpc);
|
|
|
|
}
|
f2fs: fix to flush all dirty inodes recovered in readonly fs
generic/417 reported as blow:
------------[ cut here ]------------
kernel BUG at /home/yuchao/git/devf2fs/inode.c:695!
invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 1 PID: 21697 Comm: umount Tainted: G W O 4.18.0-rc2+ #39
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
EIP: f2fs_evict_inode+0x556/0x580 [f2fs]
Call Trace:
? _raw_spin_unlock+0x2c/0x50
evict+0xa8/0x170
dispose_list+0x34/0x40
evict_inodes+0x118/0x120
generic_shutdown_super+0x41/0x100
? rcu_read_lock_sched_held+0x97/0xa0
kill_block_super+0x22/0x50
kill_f2fs_super+0x6f/0x80 [f2fs]
deactivate_locked_super+0x3d/0x70
deactivate_super+0x40/0x60
cleanup_mnt+0x39/0x70
__cleanup_mnt+0x10/0x20
task_work_run+0x81/0xa0
exit_to_usermode_loop+0x59/0xa7
do_fast_syscall_32+0x1f5/0x22c
entry_SYSENTER_32+0x53/0x86
EIP: f2fs_evict_inode+0x556/0x580 [f2fs]
It can simply reproduced with scripts:
Enable quota feature during mkfs.
Testcase1:
1. mkfs.f2fs /dev/zram0
2. mount -t f2fs /dev/zram0 /mnt/f2fs
3. xfs_io -f /mnt/f2fs/file -c "pwrite 0 4k" -c "fsync"
4. godown /mnt/f2fs
5. umount /mnt/f2fs
6. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs
7. umount /mnt/f2fs
Testcase2:
1. mkfs.f2fs /dev/zram0
2. mount -t f2fs /dev/zram0 /mnt/f2fs
3. touch /mnt/f2fs/file
4. create process[pid = x] do:
a) open /mnt/f2fs/file;
b) unlink /mnt/f2fs/file
5. godown -f /mnt/f2fs
6. kill process[pid = x]
7. umount /mnt/f2fs
8. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs
9. umount /mnt/f2fs
The reason is: during recovery, i_{c,m}time of inode will be updated, then
the inode can be set dirty w/o being tracked in sbi->inode_list[DIRTY_META]
global list, so later write_checkpoint will not flush such dirty inode into
node page.
Once umount is called, sync_filesystem() in generic_shutdown_super() will
skip syncng dirty inodes due to sb_rdonly check, leaving dirty inodes
there.
To solve this issue, during umount, add remove SB_RDONLY flag in
sb->s_flags, to make sure sync_filesystem() will not be skipped.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-22 16:11:05 +07:00
|
|
|
|
|
|
|
if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb))
|
|
|
|
sb->s_flags &= ~SB_RDONLY;
|
2017-06-29 22:17:45 +07:00
|
|
|
}
|
2015-01-15 07:34:24 +07:00
|
|
|
kill_block_super(sb);
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
static struct file_system_type f2fs_fs_type = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.name = "f2fs",
|
|
|
|
.mount = f2fs_mount,
|
2015-01-15 07:34:24 +07:00
|
|
|
.kill_sb = kill_f2fs_super,
|
2012-11-02 15:07:47 +07:00
|
|
|
.fs_flags = FS_REQUIRES_DEV,
|
|
|
|
};
|
2013-03-03 10:39:14 +07:00
|
|
|
MODULE_ALIAS_FS("f2fs");
|
2012-11-02 15:07:47 +07:00
|
|
|
|
2013-01-16 22:08:30 +07:00
|
|
|
static int __init init_inodecache(void)
|
2012-11-02 15:07:47 +07:00
|
|
|
{
|
2016-01-15 06:18:21 +07:00
|
|
|
f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
|
|
|
|
sizeof(struct f2fs_inode_info), 0,
|
|
|
|
SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
|
2013-12-06 13:00:58 +07:00
|
|
|
if (!f2fs_inode_cachep)
|
2012-11-02 15:07:47 +07:00
|
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void destroy_inodecache(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Make sure all delayed rcu free inodes are flushed before we
|
|
|
|
* destroy cache.
|
|
|
|
*/
|
|
|
|
rcu_barrier();
|
|
|
|
kmem_cache_destroy(f2fs_inode_cachep);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init init_f2fs_fs(void)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
disable loading f2fs module on PAGE_SIZE > 4KB
The following patch disables loading of f2fs module on architectures
which have PAGE_SIZE > 4096 , since it is impossible to mount f2fs on
such architectures , log messages are:
mount: /mnt: wrong fs type, bad option, bad superblock on
/dev/vdiskb1, missing codepage or helper program, or other error.
/dev/vdiskb1: F2FS filesystem,
UUID=1d8b9ca4-2389-4910-af3b-10998969f09c, volume name ""
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 1th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Can't find valid F2FS
filesystem in 2th superblock
May 15 18:03:13 ttip kernel: F2FS-fs (vdiskb1): Invalid
page_cache_size (8192), supports only 4KB
which was introduced by git commit 5c9b469295fb6b10d98923eab5e79c4edb80ed20
tested on git kernel 4.17.0-rc6-00309-gec30dcf7f425
with patch applied:
modprobe: ERROR: could not insert 'f2fs': Invalid argument
May 28 01:40:28 v215 kernel: F2FS not supported on PAGE_SIZE(8192) != 4096
Signed-off-by: Anatoly Pugachev <matorola@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-28 06:06:37 +07:00
|
|
|
if (PAGE_SIZE != F2FS_BLKSIZE) {
|
|
|
|
printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
|
|
|
|
PAGE_SIZE, F2FS_BLKSIZE);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2015-01-08 05:07:36 +07:00
|
|
|
f2fs_build_trace_ios();
|
|
|
|
|
2012-11-02 15:07:47 +07:00
|
|
|
err = init_inodecache();
|
|
|
|
if (err)
|
|
|
|
goto fail;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_create_node_manager_caches();
|
2012-11-02 15:07:47 +07:00
|
|
|
if (err)
|
2013-08-20 15:49:51 +07:00
|
|
|
goto free_inodecache;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_create_segment_manager_caches();
|
2012-11-02 15:07:47 +07:00
|
|
|
if (err)
|
2013-08-20 15:49:51 +07:00
|
|
|
goto free_node_manager_caches;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_create_checkpoint_caches();
|
2012-11-02 15:07:47 +07:00
|
|
|
if (err)
|
2014-12-29 14:56:18 +07:00
|
|
|
goto free_segment_manager_caches;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_create_extent_cache();
|
f2fs: enable rb-tree extent cache
This patch enables rb-tree based extent cache in f2fs.
When we mount with "-o extent_cache", f2fs will try to add recently accessed
page-block mappings into rb-tree based extent cache as much as possible, instead
of original one extent info cache.
By this way, f2fs can support more effective cache between dnode page cache and
disk. It will supply high hit ratio in the cache with fewer memory when dnode
page cache are reclaimed in environment of low memory.
Storage: Sandisk sd card 64g
1.append write file (offset: 0, size: 128M);
2.override write file (offset: 2M, size: 1M);
3.override write file (offset: 4M, size: 1M);
...
4.override write file (offset: 48M, size: 1M);
...
5.override write file (offset: 112M, size: 1M);
6.sync
7.echo 3 > /proc/sys/vm/drop_caches
8.read file (size:128M, unit: 4k, count: 32768)
(time dd if=/mnt/f2fs/128m bs=4k count=32768)
Extent Hit Ratio:
before patched
Hit Ratio 121 / 1071 1071 / 1071
Performance:
before patched
real 0m37.051s 0m35.556s
user 0m0.040s 0m0.026s
sys 0m2.990s 0m2.251s
Memory Cost:
before patched
Tree Count: 0 1 (size: 24 bytes)
Node Count: 0 45 (size: 1440 bytes)
v3:
o retest and given more details of test result.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-02-05 16:57:31 +07:00
|
|
|
if (err)
|
|
|
|
goto free_checkpoint_caches;
|
2017-07-27 01:24:13 +07:00
|
|
|
err = f2fs_init_sysfs();
|
2017-06-14 16:39:46 +07:00
|
|
|
if (err)
|
f2fs: enable rb-tree extent cache
This patch enables rb-tree based extent cache in f2fs.
When we mount with "-o extent_cache", f2fs will try to add recently accessed
page-block mappings into rb-tree based extent cache as much as possible, instead
of original one extent info cache.
By this way, f2fs can support more effective cache between dnode page cache and
disk. It will supply high hit ratio in the cache with fewer memory when dnode
page cache are reclaimed in environment of low memory.
Storage: Sandisk sd card 64g
1.append write file (offset: 0, size: 128M);
2.override write file (offset: 2M, size: 1M);
3.override write file (offset: 4M, size: 1M);
...
4.override write file (offset: 48M, size: 1M);
...
5.override write file (offset: 112M, size: 1M);
6.sync
7.echo 3 > /proc/sys/vm/drop_caches
8.read file (size:128M, unit: 4k, count: 32768)
(time dd if=/mnt/f2fs/128m bs=4k count=32768)
Extent Hit Ratio:
before patched
Hit Ratio 121 / 1071 1071 / 1071
Performance:
before patched
real 0m37.051s 0m35.556s
user 0m0.040s 0m0.026s
sys 0m2.990s 0m2.251s
Memory Cost:
before patched
Tree Count: 0 1 (size: 24 bytes)
Node Count: 0 45 (size: 1440 bytes)
v3:
o retest and given more details of test result.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-02-05 16:57:31 +07:00
|
|
|
goto free_extent_cache;
|
2015-06-20 02:01:21 +07:00
|
|
|
err = register_shrinker(&f2fs_shrinker_info);
|
2015-05-16 05:37:24 +07:00
|
|
|
if (err)
|
2017-06-14 16:39:46 +07:00
|
|
|
goto free_sysfs;
|
2015-06-20 02:01:21 +07:00
|
|
|
err = register_filesystem(&f2fs_fs_type);
|
|
|
|
if (err)
|
|
|
|
goto free_shrinker;
|
2019-01-04 20:26:18 +07:00
|
|
|
f2fs_create_root_stats();
|
f2fs: refactor read path to allow multiple postprocessing steps
Currently f2fs's ->readpage() and ->readpages() assume that either the
data undergoes no postprocessing, or decryption only. But with
fs-verity, there will be an additional authenticity verification step,
and it may be needed either by itself, or combined with decryption.
To support this, store a 'struct bio_post_read_ctx' in ->bi_private
which contains a work struct, a bitmask of postprocessing steps that are
enabled, and an indicator of the current step. The bio completion
routine, if there was no I/O error, enqueues the first postprocessing
step. When that completes, it continues to the next step. Pages that
fail any postprocessing step have PageError set. Once all steps have
completed, pages without PageError set are set Uptodate, and all pages
are unlocked.
Also replace f2fs_encrypted_file() with a new function
f2fs_post_read_required() in places like direct I/O and garbage
collection that really should be testing whether the file needs special
I/O processing, not whether it is encrypted specifically.
This may also be useful for other future f2fs features such as
compression.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-04-19 01:09:48 +07:00
|
|
|
err = f2fs_init_post_read_processing();
|
|
|
|
if (err)
|
|
|
|
goto free_root_stats;
|
2013-08-20 15:49:51 +07:00
|
|
|
return 0;
|
|
|
|
|
f2fs: refactor read path to allow multiple postprocessing steps
Currently f2fs's ->readpage() and ->readpages() assume that either the
data undergoes no postprocessing, or decryption only. But with
fs-verity, there will be an additional authenticity verification step,
and it may be needed either by itself, or combined with decryption.
To support this, store a 'struct bio_post_read_ctx' in ->bi_private
which contains a work struct, a bitmask of postprocessing steps that are
enabled, and an indicator of the current step. The bio completion
routine, if there was no I/O error, enqueues the first postprocessing
step. When that completes, it continues to the next step. Pages that
fail any postprocessing step have PageError set. Once all steps have
completed, pages without PageError set are set Uptodate, and all pages
are unlocked.
Also replace f2fs_encrypted_file() with a new function
f2fs_post_read_required() in places like direct I/O and garbage
collection that really should be testing whether the file needs special
I/O processing, not whether it is encrypted specifically.
This may also be useful for other future f2fs features such as
compression.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-04-19 01:09:48 +07:00
|
|
|
free_root_stats:
|
|
|
|
f2fs_destroy_root_stats();
|
2015-10-29 08:13:04 +07:00
|
|
|
unregister_filesystem(&f2fs_fs_type);
|
2015-06-20 02:01:21 +07:00
|
|
|
free_shrinker:
|
|
|
|
unregister_shrinker(&f2fs_shrinker_info);
|
2017-06-14 16:39:46 +07:00
|
|
|
free_sysfs:
|
2017-07-27 01:24:13 +07:00
|
|
|
f2fs_exit_sysfs();
|
f2fs: enable rb-tree extent cache
This patch enables rb-tree based extent cache in f2fs.
When we mount with "-o extent_cache", f2fs will try to add recently accessed
page-block mappings into rb-tree based extent cache as much as possible, instead
of original one extent info cache.
By this way, f2fs can support more effective cache between dnode page cache and
disk. It will supply high hit ratio in the cache with fewer memory when dnode
page cache are reclaimed in environment of low memory.
Storage: Sandisk sd card 64g
1.append write file (offset: 0, size: 128M);
2.override write file (offset: 2M, size: 1M);
3.override write file (offset: 4M, size: 1M);
...
4.override write file (offset: 48M, size: 1M);
...
5.override write file (offset: 112M, size: 1M);
6.sync
7.echo 3 > /proc/sys/vm/drop_caches
8.read file (size:128M, unit: 4k, count: 32768)
(time dd if=/mnt/f2fs/128m bs=4k count=32768)
Extent Hit Ratio:
before patched
Hit Ratio 121 / 1071 1071 / 1071
Performance:
before patched
real 0m37.051s 0m35.556s
user 0m0.040s 0m0.026s
sys 0m2.990s 0m2.251s
Memory Cost:
before patched
Tree Count: 0 1 (size: 24 bytes)
Node Count: 0 45 (size: 1440 bytes)
v3:
o retest and given more details of test result.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-02-05 16:57:31 +07:00
|
|
|
free_extent_cache:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_extent_cache();
|
2013-08-20 15:49:51 +07:00
|
|
|
free_checkpoint_caches:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_checkpoint_caches();
|
2013-11-15 11:55:58 +07:00
|
|
|
free_segment_manager_caches:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_segment_manager_caches();
|
2013-08-20 15:49:51 +07:00
|
|
|
free_node_manager_caches:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_node_manager_caches();
|
2013-08-20 15:49:51 +07:00
|
|
|
free_inodecache:
|
|
|
|
destroy_inodecache();
|
2012-11-02 15:07:47 +07:00
|
|
|
fail:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit exit_f2fs_fs(void)
|
|
|
|
{
|
f2fs: refactor read path to allow multiple postprocessing steps
Currently f2fs's ->readpage() and ->readpages() assume that either the
data undergoes no postprocessing, or decryption only. But with
fs-verity, there will be an additional authenticity verification step,
and it may be needed either by itself, or combined with decryption.
To support this, store a 'struct bio_post_read_ctx' in ->bi_private
which contains a work struct, a bitmask of postprocessing steps that are
enabled, and an indicator of the current step. The bio completion
routine, if there was no I/O error, enqueues the first postprocessing
step. When that completes, it continues to the next step. Pages that
fail any postprocessing step have PageError set. Once all steps have
completed, pages without PageError set are set Uptodate, and all pages
are unlocked.
Also replace f2fs_encrypted_file() with a new function
f2fs_post_read_required() in places like direct I/O and garbage
collection that really should be testing whether the file needs special
I/O processing, not whether it is encrypted specifically.
This may also be useful for other future f2fs features such as
compression.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-04-19 01:09:48 +07:00
|
|
|
f2fs_destroy_post_read_processing();
|
2013-01-15 17:58:47 +07:00
|
|
|
f2fs_destroy_root_stats();
|
2012-11-02 15:07:47 +07:00
|
|
|
unregister_filesystem(&f2fs_fs_type);
|
2016-05-18 07:02:25 +07:00
|
|
|
unregister_shrinker(&f2fs_shrinker_info);
|
2017-07-27 01:24:13 +07:00
|
|
|
f2fs_exit_sysfs();
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_destroy_extent_cache();
|
|
|
|
f2fs_destroy_checkpoint_caches();
|
|
|
|
f2fs_destroy_segment_manager_caches();
|
|
|
|
f2fs_destroy_node_manager_caches();
|
2012-11-02 15:07:47 +07:00
|
|
|
destroy_inodecache();
|
2015-01-08 05:09:48 +07:00
|
|
|
f2fs_destroy_trace_ios();
|
2012-11-02 15:07:47 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(init_f2fs_fs)
|
|
|
|
module_exit(exit_f2fs_fs)
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
|
|
|
|
MODULE_DESCRIPTION("Flash Friendly File System");
|
|
|
|
MODULE_LICENSE("GPL");
|
2016-11-05 04:59:15 +07:00
|
|
|
|