2018-09-12 08:16:07 +07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2012-11-29 11:28:09 +07:00
|
|
|
/*
|
2012-11-02 15:09:44 +07:00
|
|
|
* fs/f2fs/file.c
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/buffer_head.h>
|
|
|
|
#include <linux/writeback.h>
|
2013-03-16 09:13:04 +07:00
|
|
|
#include <linux/blkdev.h>
|
2012-11-02 15:09:44 +07:00
|
|
|
#include <linux/falloc.h>
|
|
|
|
#include <linux/types.h>
|
2013-02-04 21:41:41 +07:00
|
|
|
#include <linux/compat.h>
|
2012-11-02 15:09:44 +07:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/mount.h>
|
2014-04-28 16:12:36 +07:00
|
|
|
#include <linux/pagevec.h>
|
2017-01-14 04:12:29 +07:00
|
|
|
#include <linux/uio.h>
|
2016-05-21 07:01:00 +07:00
|
|
|
#include <linux/uuid.h>
|
2016-07-09 05:16:47 +07:00
|
|
|
#include <linux/file.h>
|
2012-11-02 15:09:44 +07:00
|
|
|
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "node.h"
|
|
|
|
#include "segment.h"
|
|
|
|
#include "xattr.h"
|
|
|
|
#include "acl.h"
|
2015-07-10 17:08:10 +07:00
|
|
|
#include "gc.h"
|
2014-12-18 11:04:08 +07:00
|
|
|
#include "trace.h"
|
2013-04-19 23:28:40 +07:00
|
|
|
#include <trace/events/f2fs.h>
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2018-04-15 03:10:02 +07:00
|
|
|
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
|
2017-05-18 10:06:45 +07:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(vmf->vma->vm_file);
|
2018-04-15 03:10:02 +07:00
|
|
|
vm_fault_t ret;
|
2017-05-18 10:06:45 +07:00
|
|
|
|
|
|
|
down_read(&F2FS_I(inode)->i_mmap_sem);
|
2018-04-15 03:10:02 +07:00
|
|
|
ret = filemap_fault(vmf);
|
2017-05-18 10:06:45 +07:00
|
|
|
up_read(&F2FS_I(inode)->i_mmap_sem);
|
|
|
|
|
2018-04-15 03:10:02 +07:00
|
|
|
return ret;
|
2017-05-18 10:06:45 +07:00
|
|
|
}
|
|
|
|
|
2018-04-15 03:10:02 +07:00
|
|
|
static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
|
|
|
struct page *page = vmf->page;
|
2017-02-25 05:56:41 +07:00
|
|
|
struct inode *inode = file_inode(vmf->vma->vm_file);
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2018-09-27 17:33:18 +07:00
|
|
|
struct dnode_of_data dn = { .node_changed = false };
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 17:08:30 +07:00
|
|
|
int err;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(sbi))) {
|
|
|
|
err = -EIO;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
sb_start_pagefault(inode->i_sb);
|
2014-10-24 09:48:09 +07:00
|
|
|
|
|
|
|
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
|
2014-08-08 06:32:25 +07:00
|
|
|
|
2017-02-25 05:56:41 +07:00
|
|
|
file_update_time(vmf->vma->vm_file);
|
2017-05-18 10:06:45 +07:00
|
|
|
down_read(&F2FS_I(inode)->i_mmap_sem);
|
2012-11-02 15:09:44 +07:00
|
|
|
lock_page(page);
|
2013-12-06 13:00:58 +07:00
|
|
|
if (unlikely(page->mapping != inode->i_mapping ||
|
2013-04-28 07:04:18 +07:00
|
|
|
page_offset(page) > i_size_read(inode) ||
|
2013-12-06 13:00:58 +07:00
|
|
|
!PageUptodate(page))) {
|
2012-11-02 15:09:44 +07:00
|
|
|
unlock_page(page);
|
|
|
|
err = -EFAULT;
|
2017-05-18 10:06:45 +07:00
|
|
|
goto out_sem;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
2018-09-27 17:33:18 +07:00
|
|
|
/* block allocation */
|
|
|
|
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
|
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
|
|
|
err = f2fs_get_block(&dn, page->index);
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
|
|
|
|
if (err) {
|
|
|
|
unlock_page(page);
|
|
|
|
goto out_sem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fill the page */
|
2018-12-25 16:43:42 +07:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA, false, true);
|
2018-09-27 17:33:18 +07:00
|
|
|
|
|
|
|
/* wait for GCed page writeback via META_MAPPING */
|
|
|
|
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
/*
|
|
|
|
* check to see if the page is mapped already (no holes)
|
|
|
|
*/
|
|
|
|
if (PageMappedToDisk(page))
|
2018-09-27 17:33:18 +07:00
|
|
|
goto out_sem;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
|
|
|
/* page is wholly or partially inside EOF */
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
|
2015-09-11 13:43:52 +07:00
|
|
|
i_size_read(inode)) {
|
2018-05-30 02:21:14 +07:00
|
|
|
loff_t offset;
|
2018-05-30 02:33:07 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
offset = i_size_read(inode) & ~PAGE_MASK;
|
|
|
|
zero_user_segment(page, offset, PAGE_SIZE);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
set_page_dirty(page);
|
2016-07-01 08:49:15 +07:00
|
|
|
if (!PageUptodate(page))
|
|
|
|
SetPageUptodate(page);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-08-02 22:21:48 +07:00
|
|
|
f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
|
2018-10-05 12:17:39 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2017-08-02 22:21:48 +07:00
|
|
|
|
2013-10-25 12:26:31 +07:00
|
|
|
trace_f2fs_vm_page_mkwrite(page, DATA);
|
2017-05-18 10:06:45 +07:00
|
|
|
out_sem:
|
|
|
|
up_read(&F2FS_I(inode)->i_mmap_sem);
|
2018-09-27 17:33:18 +07:00
|
|
|
|
|
|
|
f2fs_balance_fs(sbi, dn.node_changed);
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
sb_end_pagefault(inode->i_sb);
|
2017-10-24 04:48:49 +07:00
|
|
|
err:
|
2012-11-02 15:09:44 +07:00
|
|
|
return block_page_mkwrite_return(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct f2fs_file_vm_ops = {
|
2017-05-18 10:06:45 +07:00
|
|
|
.fault = f2fs_filemap_fault,
|
2014-04-08 05:37:19 +07:00
|
|
|
.map_pages = filemap_map_pages,
|
2013-01-17 16:37:41 +07:00
|
|
|
.page_mkwrite = f2fs_vm_page_mkwrite,
|
2012-11-02 15:09:44 +07:00
|
|
|
};
|
|
|
|
|
2013-06-14 06:52:35 +07:00
|
|
|
static int get_parent_ino(struct inode *inode, nid_t *pino)
|
|
|
|
{
|
|
|
|
struct dentry *dentry;
|
|
|
|
|
|
|
|
inode = igrab(inode);
|
|
|
|
dentry = d_find_any_alias(inode);
|
|
|
|
iput(inode);
|
|
|
|
if (!dentry)
|
|
|
|
return 0;
|
|
|
|
|
2013-07-22 20:12:56 +07:00
|
|
|
*pino = parent_ino(dentry);
|
|
|
|
dput(dentry);
|
2013-06-14 06:52:35 +07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-11-06 21:51:45 +07:00
|
|
|
static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
|
2014-08-20 17:37:35 +07:00
|
|
|
{
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2017-11-06 21:51:45 +07:00
|
|
|
enum cp_reason_type cp_reason = CP_NO_NEEDED;
|
2014-08-20 17:37:35 +07:00
|
|
|
|
2017-11-06 21:51:45 +07:00
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
cp_reason = CP_NON_REGULAR;
|
|
|
|
else if (inode->i_nlink != 1)
|
|
|
|
cp_reason = CP_HARDLINK;
|
2016-08-30 08:23:45 +07:00
|
|
|
else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_SB_NEED_CP;
|
2014-08-20 17:37:35 +07:00
|
|
|
else if (file_wrong_pino(inode))
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_WRONG_PINO;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
else if (!f2fs_space_for_roll_forward(sbi))
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_NO_SPC_ROLL;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_NODE_NEED_CP;
|
2014-10-31 12:47:03 +07:00
|
|
|
else if (test_opt(sbi, FASTBOOT))
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_FASTBOOT_MODE;
|
2018-03-08 13:22:56 +07:00
|
|
|
else if (F2FS_OPTION(sbi).active_logs == 2)
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = CP_SPEC_LOG_NUM;
|
2018-03-08 13:22:56 +07:00
|
|
|
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_need_dentry_mark(sbi, inode->i_ino) &&
|
|
|
|
f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
|
|
|
|
TRANS_DIR_INO))
|
2017-12-28 23:09:44 +07:00
|
|
|
cp_reason = CP_RECOVER_DIR;
|
2014-08-20 17:37:35 +07:00
|
|
|
|
2017-11-06 21:51:45 +07:00
|
|
|
return cp_reason;
|
2014-08-20 17:37:35 +07:00
|
|
|
}
|
|
|
|
|
2014-12-08 13:29:40 +07:00
|
|
|
static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
|
|
|
|
{
|
|
|
|
struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
|
|
|
|
bool ret = false;
|
|
|
|
/* But we need to avoid that there are some inode updates */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
|
2014-12-08 13:29:40 +07:00
|
|
|
ret = true;
|
|
|
|
f2fs_put_page(i, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-12-08 13:29:41 +07:00
|
|
|
static void try_to_fix_pino(struct inode *inode)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
nid_t pino;
|
|
|
|
|
|
|
|
down_write(&fi->i_sem);
|
|
|
|
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
|
|
|
|
get_parent_ino(inode, &pino)) {
|
2016-05-20 23:52:20 +07:00
|
|
|
f2fs_i_pino_write(inode, pino);
|
2014-12-08 13:29:41 +07:00
|
|
|
file_got_pino(inode);
|
|
|
|
}
|
2016-05-21 06:32:49 +07:00
|
|
|
up_write(&fi->i_sem);
|
2014-12-08 13:29:41 +07:00
|
|
|
}
|
|
|
|
|
2016-04-15 23:43:17 +07:00
|
|
|
static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
|
|
|
|
int datasync, bool atomic)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2014-09-11 04:58:18 +07:00
|
|
|
nid_t ino = inode->i_ino;
|
2012-11-02 15:09:44 +07:00
|
|
|
int ret = 0;
|
2017-11-06 21:51:45 +07:00
|
|
|
enum cp_reason_type cp_reason = 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
struct writeback_control wbc = {
|
2014-03-03 09:28:40 +07:00
|
|
|
.sync_mode = WB_SYNC_ALL,
|
2012-11-02 15:09:44 +07:00
|
|
|
.nr_to_write = LONG_MAX,
|
|
|
|
.for_reclaim = 0,
|
|
|
|
};
|
f2fs: fix to avoid broken of dnode block list
f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.
By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.
Sheng Yong helps to do the test with this patch:
Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8
1 / PERSIST / Index
Base:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08
2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7
3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48
Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333
After:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 798.81 202.5 41143 40613.87 602.71 838.08 913.83
2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27
3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91
Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333
Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189
It looks like atomic write will suffer performance regression.
I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.
BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:
- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
- writeback data: P1, P2, P3;
- writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing
last transaction.
- preflush + fua;
- power-cut
If we don't wait dnode writeback for atomic_write:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85
2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77
3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92
Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18
Patched/Original:
0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294
SQLite's performance recovers.
Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."
Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-02 22:03:19 +07:00
|
|
|
unsigned int seq_id = 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(f2fs_readonly(inode->i_sb) ||
|
|
|
|
is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
|
2012-12-01 08:56:01 +07:00
|
|
|
return 0;
|
|
|
|
|
2013-04-19 23:28:40 +07:00
|
|
|
trace_f2fs_sync_file_enter(inode);
|
2014-07-25 09:11:43 +07:00
|
|
|
|
2018-11-06 09:25:29 +07:00
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
|
|
goto go_write;
|
|
|
|
|
2014-07-25 09:11:43 +07:00
|
|
|
/* if fdatasync is triggered, let's do in-place-update */
|
2016-01-01 04:49:17 +07:00
|
|
|
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
|
2016-05-21 00:13:22 +07:00
|
|
|
set_inode_flag(inode, FI_NEED_IPU);
|
2017-07-08 02:20:52 +07:00
|
|
|
ret = file_write_and_wait_range(file, start, end);
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_NEED_IPU);
|
2014-09-11 06:53:02 +07:00
|
|
|
|
2013-04-19 23:28:40 +07:00
|
|
|
if (ret) {
|
2017-11-06 21:51:45 +07:00
|
|
|
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
|
2012-11-02 15:09:44 +07:00
|
|
|
return ret;
|
2013-04-19 23:28:40 +07:00
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2014-12-08 13:29:40 +07:00
|
|
|
/* if the inode is dirty, let's recover all the time */
|
2016-11-17 19:53:31 +07:00
|
|
|
if (!f2fs_skip_inode_update(inode, datasync)) {
|
2015-08-16 11:51:05 +07:00
|
|
|
f2fs_write_inode(inode, NULL);
|
2014-12-08 13:29:40 +07:00
|
|
|
goto go_write;
|
|
|
|
}
|
|
|
|
|
2014-07-25 09:08:02 +07:00
|
|
|
/*
|
|
|
|
* if there is no written data, don't waste time to write recovery info.
|
|
|
|
*/
|
2016-05-21 00:13:22 +07:00
|
|
|
if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
!f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
|
2014-09-11 05:04:03 +07:00
|
|
|
|
2014-12-08 13:29:40 +07:00
|
|
|
/* it may call write_inode just prior to fsync */
|
|
|
|
if (need_inode_page_update(sbi, ino))
|
2014-09-11 05:04:03 +07:00
|
|
|
goto go_write;
|
|
|
|
|
2016-05-21 00:13:22 +07:00
|
|
|
if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_exist_written_data(sbi, ino, UPDATE_INO))
|
2014-07-25 09:08:02 +07:00
|
|
|
goto flush_out;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-09-11 05:04:03 +07:00
|
|
|
go_write:
|
2013-07-03 08:55:52 +07:00
|
|
|
/*
|
|
|
|
* Both of fdatasync() and fsync() are able to be recovered from
|
|
|
|
* sudden-power-off.
|
|
|
|
*/
|
2016-05-21 00:13:22 +07:00
|
|
|
down_read(&F2FS_I(inode)->i_sem);
|
2017-11-06 21:51:45 +07:00
|
|
|
cp_reason = need_do_checkpoint(inode);
|
2016-05-21 00:13:22 +07:00
|
|
|
up_read(&F2FS_I(inode)->i_sem);
|
2014-03-20 17:10:08 +07:00
|
|
|
|
2017-11-06 21:51:45 +07:00
|
|
|
if (cp_reason) {
|
2012-11-02 15:09:44 +07:00
|
|
|
/* all the dirty node pages should be flushed for POR */
|
|
|
|
ret = f2fs_sync_fs(inode->i_sb, 1);
|
2014-03-20 17:10:08 +07:00
|
|
|
|
2014-12-08 13:29:41 +07:00
|
|
|
/*
|
|
|
|
* We've secured consistency through sync_fs. Following pino
|
|
|
|
* will be used only for fsynced inodes after checkpoint.
|
|
|
|
*/
|
|
|
|
try_to_fix_pino(inode);
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_APPEND_WRITE);
|
|
|
|
clear_inode_flag(inode, FI_UPDATE_WRITE);
|
2014-12-08 13:29:41 +07:00
|
|
|
goto out;
|
|
|
|
}
|
f2fs: fix conditions to remain recovery information in f2fs_sync_file
This patch revisited whole the recovery information during the f2fs_sync_file.
In this patch, there are three information to make a decision.
a) IS_CHECKPOINTED, /* is it checkpointed before? */
b) HAS_FSYNCED_INODE, /* is the inode fsynced before? */
c) HAS_LAST_FSYNC, /* has the latest node fsync mark? */
And, the scenarios for our rule are based on:
[Term] F: fsync_mark, D: dentry_mark
1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)
For example, #3, the three conditions should be changed as follows.
inode(x) | CP | dnode(F) | inode(x) | inode(F)
a) x o o o o
b) x x x x o
c) x o o x o
If f2fs_sync_file stops ------^,
it should write inode(F) --------------^
So, the need_inode_block_update should return true, since
c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
For example, #8,
CP | alloc | dnode(F) | inode(x) | inode(DF)
a) o x x x x
b) x x x o
c) o o x o
If f2fs_sync_file stops -------^,
it should write inode(DF) --------------^
Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-09-16 04:50:48 +07:00
|
|
|
sync_nodes:
|
2018-06-04 22:20:36 +07:00
|
|
|
atomic_inc(&sbi->wb_sync_req[NODE]);
|
f2fs: fix to avoid broken of dnode block list
f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.
By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.
Sheng Yong helps to do the test with this patch:
Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8
1 / PERSIST / Index
Base:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08
2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7
3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48
Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333
After:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 798.81 202.5 41143 40613.87 602.71 838.08 913.83
2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27
3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91
Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333
Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189
It looks like atomic write will suffer performance regression.
I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.
BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:
- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
- writeback data: P1, P2, P3;
- writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing
last transaction.
- preflush + fua;
- power-cut
If we don't wait dnode writeback for atomic_write:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85
2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77
3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92
Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18
Patched/Original:
0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294
SQLite's performance recovers.
Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."
Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-02 22:03:19 +07:00
|
|
|
ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
|
2018-06-04 22:20:36 +07:00
|
|
|
atomic_dec(&sbi->wb_sync_req[NODE]);
|
2016-04-15 23:25:04 +07:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2014-12-08 13:29:41 +07:00
|
|
|
|
2015-01-10 07:27:17 +07:00
|
|
|
/* if cp_error was enabled, we should avoid infinite loop */
|
2015-12-24 17:04:56 +07:00
|
|
|
if (unlikely(f2fs_cp_error(sbi))) {
|
|
|
|
ret = -EIO;
|
2015-01-10 07:27:17 +07:00
|
|
|
goto out;
|
2015-12-24 17:04:56 +07:00
|
|
|
}
|
2015-01-10 07:27:17 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (f2fs_need_inode_block_update(sbi, ino)) {
|
2016-10-15 01:51:23 +07:00
|
|
|
f2fs_mark_inode_dirty_sync(inode, true);
|
2014-12-08 13:29:41 +07:00
|
|
|
f2fs_write_inode(inode, NULL);
|
|
|
|
goto sync_nodes;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
2014-12-08 13:29:41 +07:00
|
|
|
|
2017-07-28 16:29:12 +07:00
|
|
|
/*
|
|
|
|
* If it's atomic_write, it's just fine to keep write ordering. So
|
|
|
|
* here we don't need to wait for node write completion, since we use
|
|
|
|
* node chain which serializes node blocks. If one of node writes are
|
|
|
|
* reordered, we can see simply broken chain, resulting in stopping
|
|
|
|
* roll-forward recovery. It means we'll recover all or none node blocks
|
|
|
|
* given fsync mark.
|
|
|
|
*/
|
|
|
|
if (!atomic) {
|
f2fs: fix to avoid broken of dnode block list
f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.
By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.
Sheng Yong helps to do the test with this patch:
Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8
1 / PERSIST / Index
Base:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08
2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7
3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48
Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333
After:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 798.81 202.5 41143 40613.87 602.71 838.08 913.83
2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27
3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91
Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333
Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189
It looks like atomic write will suffer performance regression.
I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.
BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:
- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
- writeback data: P1, P2, P3;
- writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing
last transaction.
- preflush + fua;
- power-cut
If we don't wait dnode writeback for atomic_write:
SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS)
1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85
2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77
3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92
Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18
Patched/Original:
0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294
SQLite's performance recovers.
Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."
Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-02 22:03:19 +07:00
|
|
|
ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
|
2017-07-28 16:29:12 +07:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
}
|
2014-12-08 13:29:41 +07:00
|
|
|
|
|
|
|
/* once recovery info is written, don't need to tack this */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_APPEND_WRITE);
|
2014-12-08 13:29:41 +07:00
|
|
|
flush_out:
|
2018-05-26 08:02:58 +07:00
|
|
|
if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
|
2017-09-29 12:59:38 +07:00
|
|
|
ret = f2fs_issue_flush(sbi, inode->i_ino);
|
2017-09-29 12:59:36 +07:00
|
|
|
if (!ret) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
|
2017-09-29 12:59:36 +07:00
|
|
|
clear_inode_flag(inode, FI_UPDATE_WRITE);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
|
2017-09-29 12:59:36 +07:00
|
|
|
}
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2012-11-02 15:09:44 +07:00
|
|
|
out:
|
2017-11-06 21:51:45 +07:00
|
|
|
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
|
2015-04-24 04:38:15 +07:00
|
|
|
f2fs_trace_ios(NULL, 1);
|
2012-11-02 15:09:44 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-04-15 23:43:17 +07:00
|
|
|
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|
|
|
{
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
|
|
|
|
return -EIO;
|
2016-04-15 23:43:17 +07:00
|
|
|
return f2fs_do_sync_file(file, start, end, datasync, false);
|
|
|
|
}
|
|
|
|
|
2014-04-28 16:12:36 +07:00
|
|
|
static pgoff_t __get_first_dirty_index(struct address_space *mapping,
|
|
|
|
pgoff_t pgofs, int whence)
|
|
|
|
{
|
2017-11-16 08:34:55 +07:00
|
|
|
struct page *page;
|
2014-04-28 16:12:36 +07:00
|
|
|
int nr_pages;
|
|
|
|
|
|
|
|
if (whence != SEEK_DATA)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* find first dirty page index */
|
2017-11-16 08:34:55 +07:00
|
|
|
nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
|
|
|
|
1, &page);
|
|
|
|
if (!nr_pages)
|
|
|
|
return ULONG_MAX;
|
|
|
|
pgofs = page->index;
|
|
|
|
put_page(page);
|
2014-04-28 16:12:36 +07:00
|
|
|
return pgofs;
|
|
|
|
}
|
|
|
|
|
2018-06-05 16:44:11 +07:00
|
|
|
static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
|
|
|
|
pgoff_t dirty, pgoff_t pgofs, int whence)
|
2014-04-28 16:12:36 +07:00
|
|
|
{
|
|
|
|
switch (whence) {
|
|
|
|
case SEEK_DATA:
|
|
|
|
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
|
2018-06-05 16:44:11 +07:00
|
|
|
is_valid_data_blkaddr(sbi, blkaddr))
|
2014-04-28 16:12:36 +07:00
|
|
|
return true;
|
|
|
|
break;
|
|
|
|
case SEEK_HOLE:
|
|
|
|
if (blkaddr == NULL_ADDR)
|
|
|
|
return true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-04-23 13:10:24 +07:00
|
|
|
static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
|
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
|
|
|
loff_t maxbytes = inode->i_sb->s_maxbytes;
|
|
|
|
struct dnode_of_data dn;
|
2014-04-28 16:12:36 +07:00
|
|
|
pgoff_t pgofs, end_offset, dirty;
|
|
|
|
loff_t data_ofs = offset;
|
|
|
|
loff_t isize;
|
2014-04-23 13:10:24 +07:00
|
|
|
int err = 0;
|
|
|
|
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_lock(inode);
|
2014-04-23 13:10:24 +07:00
|
|
|
|
|
|
|
isize = i_size_read(inode);
|
|
|
|
if (offset >= isize)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
/* handle inline data case */
|
2014-09-24 17:19:10 +07:00
|
|
|
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
|
2014-04-23 13:10:24 +07:00
|
|
|
if (whence == SEEK_HOLE)
|
|
|
|
data_ofs = isize;
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
|
2014-04-23 13:10:24 +07:00
|
|
|
|
2014-04-28 16:12:36 +07:00
|
|
|
dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
|
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
|
2014-04-23 13:10:24 +07:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
|
2014-04-23 13:10:24 +07:00
|
|
|
if (err && err != -ENOENT) {
|
|
|
|
goto fail;
|
|
|
|
} else if (err == -ENOENT) {
|
2014-08-06 21:22:50 +07:00
|
|
|
/* direct node does not exists */
|
2014-04-23 13:10:24 +07:00
|
|
|
if (whence == SEEK_DATA) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
pgofs = f2fs_get_next_page_offset(&dn, pgofs);
|
2014-04-23 13:10:24 +07:00
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-26 14:39:35 +07:00
|
|
|
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
|
2014-04-23 13:10:24 +07:00
|
|
|
|
|
|
|
/* find data/hole in dnode block */
|
|
|
|
for (; dn.ofs_in_node < end_offset;
|
|
|
|
dn.ofs_in_node++, pgofs++,
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
|
2014-04-23 13:10:24 +07:00
|
|
|
block_t blkaddr;
|
2018-05-30 02:33:07 +07:00
|
|
|
|
2017-07-18 23:19:06 +07:00
|
|
|
blkaddr = datablock_addr(dn.inode,
|
|
|
|
dn.node_page, dn.ofs_in_node);
|
2014-04-23 13:10:24 +07:00
|
|
|
|
f2fs: fix to do sanity check with block address in main area
This patch add to do sanity check with below field:
- cp_pack_total_block_count
- blkaddr of data/node
- extent info
- Overview
BUG() in verify_block_addr() when writing to a corrupted f2fs image
- Reproduce (4.18 upstream kernel)
- POC (poc.c)
static void activity(char *mpoint) {
char *foo_bar_baz;
int err;
static int buf[8192];
memset(buf, 0, sizeof(buf));
err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint);
int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777);
if (fd >= 0) {
write(fd, (char *)buf, sizeof(buf));
fdatasync(fd);
close(fd);
}
}
int main(int argc, char *argv[]) {
activity(argv[1]);
return 0;
}
- Kernel message
[ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3
[ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240
[ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4
[ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240
[ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d
[ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202
[ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113
[ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540
[ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8
[ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540
[ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450
[ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.729171] Call Trace:
[ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00
[ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0
[ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280
[ 699.729269] ? __radix_tree_replace+0xa3/0x120
[ 699.729276] __write_data_page+0x5c7/0xe30
[ 699.729291] ? kasan_check_read+0x11/0x20
[ 699.729310] ? page_mapped+0x8a/0x110
[ 699.729321] ? page_mkclean+0xe9/0x160
[ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00
[ 699.729331] ? invalid_page_referenced_vma+0x130/0x130
[ 699.729345] ? clear_page_dirty_for_io+0x332/0x450
[ 699.729351] f2fs_write_cache_pages+0x4ca/0x860
[ 699.729358] ? __write_data_page+0xe30/0xe30
[ 699.729374] ? percpu_counter_add_batch+0x22/0xa0
[ 699.729380] ? kasan_check_write+0x14/0x20
[ 699.729391] ? _raw_spin_lock+0x17/0x40
[ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
[ 699.729413] ? iov_iter_advance+0x113/0x640
[ 699.729418] ? f2fs_write_end+0x133/0x2e0
[ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640
[ 699.729428] f2fs_write_data_pages+0x329/0x520
[ 699.729433] ? generic_perform_write+0x250/0x320
[ 699.729438] ? f2fs_write_cache_pages+0x860/0x860
[ 699.729454] ? current_time+0x110/0x110
[ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370
[ 699.729464] do_writepages+0x37/0xb0
[ 699.729468] ? f2fs_write_cache_pages+0x860/0x860
[ 699.729472] ? do_writepages+0x37/0xb0
[ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0
[ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0
[ 699.729496] ? __vfs_write+0x2b2/0x410
[ 699.729501] file_write_and_wait_range+0x66/0xb0
[ 699.729506] f2fs_do_sync_file+0x1f9/0xd90
[ 699.729511] ? truncate_partial_data_page+0x290/0x290
[ 699.729521] ? __sb_end_write+0x30/0x50
[ 699.729526] ? vfs_write+0x20f/0x260
[ 699.729530] f2fs_sync_file+0x9a/0xb0
[ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.729548] vfs_fsync_range+0x68/0x100
[ 699.729554] ? __fget_light+0xc9/0xe0
[ 699.729558] do_fsync+0x3d/0x70
[ 699.729562] __x64_sys_fdatasync+0x24/0x30
[ 699.729585] do_syscall_64+0x78/0x170
[ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 699.729613] RIP: 0033:0x7f9bf930d800
[ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
[ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 699.729782] ------------[ cut here ]------------
[ 699.729785] kernel BUG at fs/f2fs/segment.h:654!
[ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI
[ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4
[ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
[ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
[ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
[ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
[ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
[ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
[ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
[ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
[ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.752874] Call Trace:
[ 699.753386] ? f2fs_inplace_write_data+0x93/0x240
[ 699.754341] f2fs_inplace_write_data+0xd2/0x240
[ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00
[ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0
[ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280
[ 699.758209] ? __radix_tree_replace+0xa3/0x120
[ 699.759164] __write_data_page+0x5c7/0xe30
[ 699.760002] ? kasan_check_read+0x11/0x20
[ 699.760823] ? page_mapped+0x8a/0x110
[ 699.761573] ? page_mkclean+0xe9/0x160
[ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00
[ 699.763332] ? invalid_page_referenced_vma+0x130/0x130
[ 699.764374] ? clear_page_dirty_for_io+0x332/0x450
[ 699.765347] f2fs_write_cache_pages+0x4ca/0x860
[ 699.766276] ? __write_data_page+0xe30/0xe30
[ 699.767161] ? percpu_counter_add_batch+0x22/0xa0
[ 699.768112] ? kasan_check_write+0x14/0x20
[ 699.768951] ? _raw_spin_lock+0x17/0x40
[ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
[ 699.770885] ? iov_iter_advance+0x113/0x640
[ 699.771743] ? f2fs_write_end+0x133/0x2e0
[ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640
[ 699.773680] f2fs_write_data_pages+0x329/0x520
[ 699.774603] ? generic_perform_write+0x250/0x320
[ 699.775544] ? f2fs_write_cache_pages+0x860/0x860
[ 699.776510] ? current_time+0x110/0x110
[ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370
[ 699.778279] do_writepages+0x37/0xb0
[ 699.779026] ? f2fs_write_cache_pages+0x860/0x860
[ 699.779978] ? do_writepages+0x37/0xb0
[ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0
[ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0
[ 699.782820] ? __vfs_write+0x2b2/0x410
[ 699.783597] file_write_and_wait_range+0x66/0xb0
[ 699.784540] f2fs_do_sync_file+0x1f9/0xd90
[ 699.785381] ? truncate_partial_data_page+0x290/0x290
[ 699.786415] ? __sb_end_write+0x30/0x50
[ 699.787204] ? vfs_write+0x20f/0x260
[ 699.787941] f2fs_sync_file+0x9a/0xb0
[ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.789572] vfs_fsync_range+0x68/0x100
[ 699.790360] ? __fget_light+0xc9/0xe0
[ 699.791128] do_fsync+0x3d/0x70
[ 699.791779] __x64_sys_fdatasync+0x24/0x30
[ 699.792614] do_syscall_64+0x78/0x170
[ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 699.794406] RIP: 0033:0x7f9bf930d800
[ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
[ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]---
[ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
[ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
[ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
[ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
[ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
[ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
[ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
[ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
[ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.835556] ==================================================================
[ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0
[ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309
[ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4
[ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.843475] Call Trace:
[ 699.843982] dump_stack+0x7b/0xb5
[ 699.844661] print_address_description+0x70/0x290
[ 699.845607] kasan_report+0x291/0x390
[ 699.846351] ? update_stack_state+0x38c/0x3e0
[ 699.853831] __asan_load8+0x54/0x90
[ 699.854569] update_stack_state+0x38c/0x3e0
[ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20
[ 699.856601] ? __save_stack_trace+0x5e/0x100
[ 699.857476] unwind_next_frame.part.5+0x18e/0x490
[ 699.858448] ? unwind_dump+0x290/0x290
[ 699.859217] ? clear_page_dirty_for_io+0x332/0x450
[ 699.860185] __unwind_start+0x106/0x190
[ 699.860974] __save_stack_trace+0x5e/0x100
[ 699.861808] ? __save_stack_trace+0x5e/0x100
[ 699.862691] ? unlink_anon_vmas+0xba/0x2c0
[ 699.863525] save_stack_trace+0x1f/0x30
[ 699.864312] save_stack+0x46/0xd0
[ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420
[ 699.865990] ? flush_tlb_mm_range+0x15e/0x220
[ 699.866889] ? kasan_check_write+0x14/0x20
[ 699.867724] ? __dec_node_state+0x92/0xb0
[ 699.868543] ? lock_page_memcg+0x85/0xf0
[ 699.869350] ? unlock_page_memcg+0x16/0x80
[ 699.870185] ? page_remove_rmap+0x198/0x520
[ 699.871048] ? mark_page_accessed+0x133/0x200
[ 699.871930] ? _cond_resched+0x1a/0x50
[ 699.872700] ? unmap_page_range+0xcd4/0xe50
[ 699.873551] ? rb_next+0x58/0x80
[ 699.874217] ? rb_next+0x58/0x80
[ 699.874895] __kasan_slab_free+0x13c/0x1a0
[ 699.875734] ? unlink_anon_vmas+0xba/0x2c0
[ 699.876563] kasan_slab_free+0xe/0x10
[ 699.877315] kmem_cache_free+0x89/0x1e0
[ 699.878095] unlink_anon_vmas+0xba/0x2c0
[ 699.878913] free_pgtables+0x101/0x1b0
[ 699.879677] exit_mmap+0x146/0x2a0
[ 699.880378] ? __ia32_sys_munmap+0x50/0x50
[ 699.881214] ? kasan_check_read+0x11/0x20
[ 699.882052] ? mm_update_next_owner+0x322/0x380
[ 699.882985] mmput+0x8b/0x1d0
[ 699.883602] do_exit+0x43a/0x1390
[ 699.884288] ? mm_update_next_owner+0x380/0x380
[ 699.885212] ? f2fs_sync_file+0x9a/0xb0
[ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.886877] ? vfs_fsync_range+0x68/0x100
[ 699.887694] ? __fget_light+0xc9/0xe0
[ 699.888442] ? do_fsync+0x3d/0x70
[ 699.889118] ? __x64_sys_fdatasync+0x24/0x30
[ 699.889996] rewind_stack_do_exit+0x17/0x20
[ 699.890860] RIP: 0033:0x7f9bf930d800
[ 699.891585] Code: Bad RIP value.
[ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.901241] The buggy address belongs to the page:
[ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 699.903811] flags: 0x2ffff0000000000()
[ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000
[ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000
[ 699.907673] page dumped because: kasan: bad access detected
[ 699.909108] Memory state around the buggy address:
[ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00
[ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2
[ 699.914392] ^
[ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00
[ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00
[ 699.918634] ==================================================================
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-01 18:13:44 +07:00
|
|
|
if (__is_valid_data_blkaddr(blkaddr) &&
|
|
|
|
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
|
|
|
|
blkaddr, DATA_GENERIC)) {
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2018-06-05 16:44:11 +07:00
|
|
|
if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
|
|
|
|
pgofs, whence)) {
|
2014-04-23 13:10:24 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (whence == SEEK_DATA)
|
|
|
|
goto fail;
|
|
|
|
found:
|
2014-04-28 15:02:48 +07:00
|
|
|
if (whence == SEEK_HOLE && data_ofs > isize)
|
|
|
|
data_ofs = isize;
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_unlock(inode);
|
2014-04-23 13:10:24 +07:00
|
|
|
return vfs_setpos(file, data_ofs, maxbytes);
|
|
|
|
fail:
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_unlock(inode);
|
2014-04-23 13:10:24 +07:00
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
|
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
|
|
|
loff_t maxbytes = inode->i_sb->s_maxbytes;
|
|
|
|
|
|
|
|
switch (whence) {
|
|
|
|
case SEEK_SET:
|
|
|
|
case SEEK_CUR:
|
|
|
|
case SEEK_END:
|
|
|
|
return generic_file_llseek_size(file, offset, whence,
|
|
|
|
maxbytes, i_size_read(inode));
|
|
|
|
case SEEK_DATA:
|
|
|
|
case SEEK_HOLE:
|
2014-09-09 00:59:43 +07:00
|
|
|
if (offset < 0)
|
|
|
|
return -ENXIO;
|
2014-04-23 13:10:24 +07:00
|
|
|
return f2fs_seek_block(file, offset, whence);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
{
|
2014-10-24 09:48:09 +07:00
|
|
|
struct inode *inode = file_inode(file);
|
2015-12-23 02:09:35 +07:00
|
|
|
int err;
|
2014-10-24 09:48:09 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
|
|
|
|
return -EIO;
|
|
|
|
|
2014-10-24 09:48:09 +07:00
|
|
|
/* we don't need to use inline_data strictly */
|
2015-12-23 02:09:35 +07:00
|
|
|
err = f2fs_convert_inline_inode(inode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2014-10-24 09:48:09 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
file_accessed(file);
|
|
|
|
vma->vm_ops = &f2fs_file_vm_ops;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-22 10:39:58 +07:00
|
|
|
static int f2fs_file_open(struct inode *inode, struct file *filp)
|
|
|
|
{
|
2017-11-30 03:35:28 +07:00
|
|
|
int err = fscrypt_file_open(inode, filp);
|
2015-04-22 10:39:58 +07:00
|
|
|
|
2017-11-30 03:35:28 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-03-08 17:34:38 +07:00
|
|
|
|
|
|
|
filp->f_mode |= FMODE_NOWAIT;
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
return dquot_file_open(inode, filp);
|
2015-04-22 10:39:58 +07:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
struct f2fs_node *raw_node;
|
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 19:34:48 +07:00
|
|
|
int nr_free = 0, ofs = dn->ofs_in_node, len = count;
|
2012-11-02 15:09:44 +07:00
|
|
|
__le32 *addr;
|
2017-07-18 23:19:06 +07:00
|
|
|
int base = 0;
|
|
|
|
|
|
|
|
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
|
|
|
|
base = get_extra_isize(dn->inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2013-07-15 16:57:38 +07:00
|
|
|
raw_node = F2FS_NODE(dn->node_page);
|
2017-07-18 23:19:06 +07:00
|
|
|
addr = blkaddr_in_node(raw_node) + base + ofs;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2014-01-18 03:44:39 +07:00
|
|
|
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
|
2012-11-02 15:09:44 +07:00
|
|
|
block_t blkaddr = le32_to_cpu(*addr);
|
2018-05-30 02:33:07 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
if (blkaddr == NULL_ADDR)
|
|
|
|
continue;
|
|
|
|
|
2014-12-31 13:57:55 +07:00
|
|
|
dn->data_blkaddr = NULL_ADDR;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_set_data_blkaddr(dn);
|
f2fs: fix to do sanity check with block address in main area
This patch add to do sanity check with below field:
- cp_pack_total_block_count
- blkaddr of data/node
- extent info
- Overview
BUG() in verify_block_addr() when writing to a corrupted f2fs image
- Reproduce (4.18 upstream kernel)
- POC (poc.c)
static void activity(char *mpoint) {
char *foo_bar_baz;
int err;
static int buf[8192];
memset(buf, 0, sizeof(buf));
err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint);
int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777);
if (fd >= 0) {
write(fd, (char *)buf, sizeof(buf));
fdatasync(fd);
close(fd);
}
}
int main(int argc, char *argv[]) {
activity(argv[1]);
return 0;
}
- Kernel message
[ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3
[ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240
[ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4
[ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240
[ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d
[ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202
[ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113
[ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540
[ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8
[ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540
[ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450
[ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.729171] Call Trace:
[ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00
[ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0
[ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280
[ 699.729269] ? __radix_tree_replace+0xa3/0x120
[ 699.729276] __write_data_page+0x5c7/0xe30
[ 699.729291] ? kasan_check_read+0x11/0x20
[ 699.729310] ? page_mapped+0x8a/0x110
[ 699.729321] ? page_mkclean+0xe9/0x160
[ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00
[ 699.729331] ? invalid_page_referenced_vma+0x130/0x130
[ 699.729345] ? clear_page_dirty_for_io+0x332/0x450
[ 699.729351] f2fs_write_cache_pages+0x4ca/0x860
[ 699.729358] ? __write_data_page+0xe30/0xe30
[ 699.729374] ? percpu_counter_add_batch+0x22/0xa0
[ 699.729380] ? kasan_check_write+0x14/0x20
[ 699.729391] ? _raw_spin_lock+0x17/0x40
[ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
[ 699.729413] ? iov_iter_advance+0x113/0x640
[ 699.729418] ? f2fs_write_end+0x133/0x2e0
[ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640
[ 699.729428] f2fs_write_data_pages+0x329/0x520
[ 699.729433] ? generic_perform_write+0x250/0x320
[ 699.729438] ? f2fs_write_cache_pages+0x860/0x860
[ 699.729454] ? current_time+0x110/0x110
[ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370
[ 699.729464] do_writepages+0x37/0xb0
[ 699.729468] ? f2fs_write_cache_pages+0x860/0x860
[ 699.729472] ? do_writepages+0x37/0xb0
[ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0
[ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0
[ 699.729496] ? __vfs_write+0x2b2/0x410
[ 699.729501] file_write_and_wait_range+0x66/0xb0
[ 699.729506] f2fs_do_sync_file+0x1f9/0xd90
[ 699.729511] ? truncate_partial_data_page+0x290/0x290
[ 699.729521] ? __sb_end_write+0x30/0x50
[ 699.729526] ? vfs_write+0x20f/0x260
[ 699.729530] f2fs_sync_file+0x9a/0xb0
[ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.729548] vfs_fsync_range+0x68/0x100
[ 699.729554] ? __fget_light+0xc9/0xe0
[ 699.729558] do_fsync+0x3d/0x70
[ 699.729562] __x64_sys_fdatasync+0x24/0x30
[ 699.729585] do_syscall_64+0x78/0x170
[ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 699.729613] RIP: 0033:0x7f9bf930d800
[ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
[ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]---
[ 699.729782] ------------[ cut here ]------------
[ 699.729785] kernel BUG at fs/f2fs/segment.h:654!
[ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI
[ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4
[ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
[ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
[ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
[ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
[ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
[ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
[ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
[ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
[ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.752874] Call Trace:
[ 699.753386] ? f2fs_inplace_write_data+0x93/0x240
[ 699.754341] f2fs_inplace_write_data+0xd2/0x240
[ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00
[ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0
[ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280
[ 699.758209] ? __radix_tree_replace+0xa3/0x120
[ 699.759164] __write_data_page+0x5c7/0xe30
[ 699.760002] ? kasan_check_read+0x11/0x20
[ 699.760823] ? page_mapped+0x8a/0x110
[ 699.761573] ? page_mkclean+0xe9/0x160
[ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00
[ 699.763332] ? invalid_page_referenced_vma+0x130/0x130
[ 699.764374] ? clear_page_dirty_for_io+0x332/0x450
[ 699.765347] f2fs_write_cache_pages+0x4ca/0x860
[ 699.766276] ? __write_data_page+0xe30/0xe30
[ 699.767161] ? percpu_counter_add_batch+0x22/0xa0
[ 699.768112] ? kasan_check_write+0x14/0x20
[ 699.768951] ? _raw_spin_lock+0x17/0x40
[ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
[ 699.770885] ? iov_iter_advance+0x113/0x640
[ 699.771743] ? f2fs_write_end+0x133/0x2e0
[ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640
[ 699.773680] f2fs_write_data_pages+0x329/0x520
[ 699.774603] ? generic_perform_write+0x250/0x320
[ 699.775544] ? f2fs_write_cache_pages+0x860/0x860
[ 699.776510] ? current_time+0x110/0x110
[ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370
[ 699.778279] do_writepages+0x37/0xb0
[ 699.779026] ? f2fs_write_cache_pages+0x860/0x860
[ 699.779978] ? do_writepages+0x37/0xb0
[ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0
[ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0
[ 699.782820] ? __vfs_write+0x2b2/0x410
[ 699.783597] file_write_and_wait_range+0x66/0xb0
[ 699.784540] f2fs_do_sync_file+0x1f9/0xd90
[ 699.785381] ? truncate_partial_data_page+0x290/0x290
[ 699.786415] ? __sb_end_write+0x30/0x50
[ 699.787204] ? vfs_write+0x20f/0x260
[ 699.787941] f2fs_sync_file+0x9a/0xb0
[ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.789572] vfs_fsync_range+0x68/0x100
[ 699.790360] ? __fget_light+0xc9/0xe0
[ 699.791128] do_fsync+0x3d/0x70
[ 699.791779] __x64_sys_fdatasync+0x24/0x30
[ 699.792614] do_syscall_64+0x78/0x170
[ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 699.794406] RIP: 0033:0x7f9bf930d800
[ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
[ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
[ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]---
[ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
[ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
[ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
[ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
[ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
[ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
[ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
[ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
[ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
[ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
[ 699.835556] ==================================================================
[ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0
[ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309
[ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4
[ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 699.843475] Call Trace:
[ 699.843982] dump_stack+0x7b/0xb5
[ 699.844661] print_address_description+0x70/0x290
[ 699.845607] kasan_report+0x291/0x390
[ 699.846351] ? update_stack_state+0x38c/0x3e0
[ 699.853831] __asan_load8+0x54/0x90
[ 699.854569] update_stack_state+0x38c/0x3e0
[ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20
[ 699.856601] ? __save_stack_trace+0x5e/0x100
[ 699.857476] unwind_next_frame.part.5+0x18e/0x490
[ 699.858448] ? unwind_dump+0x290/0x290
[ 699.859217] ? clear_page_dirty_for_io+0x332/0x450
[ 699.860185] __unwind_start+0x106/0x190
[ 699.860974] __save_stack_trace+0x5e/0x100
[ 699.861808] ? __save_stack_trace+0x5e/0x100
[ 699.862691] ? unlink_anon_vmas+0xba/0x2c0
[ 699.863525] save_stack_trace+0x1f/0x30
[ 699.864312] save_stack+0x46/0xd0
[ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420
[ 699.865990] ? flush_tlb_mm_range+0x15e/0x220
[ 699.866889] ? kasan_check_write+0x14/0x20
[ 699.867724] ? __dec_node_state+0x92/0xb0
[ 699.868543] ? lock_page_memcg+0x85/0xf0
[ 699.869350] ? unlock_page_memcg+0x16/0x80
[ 699.870185] ? page_remove_rmap+0x198/0x520
[ 699.871048] ? mark_page_accessed+0x133/0x200
[ 699.871930] ? _cond_resched+0x1a/0x50
[ 699.872700] ? unmap_page_range+0xcd4/0xe50
[ 699.873551] ? rb_next+0x58/0x80
[ 699.874217] ? rb_next+0x58/0x80
[ 699.874895] __kasan_slab_free+0x13c/0x1a0
[ 699.875734] ? unlink_anon_vmas+0xba/0x2c0
[ 699.876563] kasan_slab_free+0xe/0x10
[ 699.877315] kmem_cache_free+0x89/0x1e0
[ 699.878095] unlink_anon_vmas+0xba/0x2c0
[ 699.878913] free_pgtables+0x101/0x1b0
[ 699.879677] exit_mmap+0x146/0x2a0
[ 699.880378] ? __ia32_sys_munmap+0x50/0x50
[ 699.881214] ? kasan_check_read+0x11/0x20
[ 699.882052] ? mm_update_next_owner+0x322/0x380
[ 699.882985] mmput+0x8b/0x1d0
[ 699.883602] do_exit+0x43a/0x1390
[ 699.884288] ? mm_update_next_owner+0x380/0x380
[ 699.885212] ? f2fs_sync_file+0x9a/0xb0
[ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90
[ 699.886877] ? vfs_fsync_range+0x68/0x100
[ 699.887694] ? __fget_light+0xc9/0xe0
[ 699.888442] ? do_fsync+0x3d/0x70
[ 699.889118] ? __x64_sys_fdatasync+0x24/0x30
[ 699.889996] rewind_stack_do_exit+0x17/0x20
[ 699.890860] RIP: 0033:0x7f9bf930d800
[ 699.891585] Code: Bad RIP value.
[ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
[ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
[ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
[ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
[ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
[ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
[ 699.901241] The buggy address belongs to the page:
[ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 699.903811] flags: 0x2ffff0000000000()
[ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000
[ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000
[ 699.907673] page dumped because: kasan: bad access detected
[ 699.909108] Memory state around the buggy address:
[ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00
[ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2
[ 699.914392] ^
[ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00
[ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00
[ 699.918634] ==================================================================
- Location
https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644
Reported-by Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-08-01 18:13:44 +07:00
|
|
|
|
|
|
|
if (__is_valid_data_blkaddr(blkaddr) &&
|
|
|
|
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
|
|
|
|
continue;
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_invalidate_blocks(sbi, blkaddr);
|
2015-03-18 07:16:35 +07:00
|
|
|
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
|
2012-11-02 15:09:44 +07:00
|
|
|
nr_free++;
|
|
|
|
}
|
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 19:34:48 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
if (nr_free) {
|
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 19:34:48 +07:00
|
|
|
pgoff_t fofs;
|
|
|
|
/*
|
|
|
|
* once we invalidate valid blkaddr in range [ofs, ofs + count],
|
|
|
|
* we will invalidate all blkaddr in the whole range.
|
|
|
|
*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
|
2016-01-26 14:39:35 +07:00
|
|
|
dn->inode) + ofs;
|
f2fs: update extent tree in batches
This patch introduce a new helper f2fs_update_extent_tree_range which can
do extent mapping update at a specified range.
The main idea is:
1) punch all mapping info in extent node(s) which are at a specified range;
2) try to merge new extent mapping with adjacent node, or failing that,
insert the mapping into extent tree as a new node.
In order to see the benefit, I add a function for stating time stamping
count as below:
uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
My test environment is: ubuntu, intel i7-3770, 16G memory, 256g micron ssd.
truncation path: update extent cache from truncate_data_blocks_range
non-truncataion path: update extent cache from other paths
total: all update paths
a) Removing 128MB file which has one extent node mapping whole range of
file:
1. dd if=/dev/zero of=/mnt/f2fs/128M bs=1M count=128
2. sync
3. rm /mnt/f2fs/128M
Before:
total count average
truncation: 7651022 32768 233.49
Patched:
total count average
truncation: 3321 33 100.64
b) fsstress:
fsstress -d /mnt/f2fs -l 5 -n 100 -p 20
Test times: 5 times.
Before:
total count average
truncation: 5812480.6 20911.6 277.95
non-truncation: 7783845.6 13440.8 579.12
total: 13596326.2 34352.4 395.79
Patched:
total count average
truncation: 1281283.0 3041.6 421.25
non-truncation: 7355844.4 13662.8 538.38
total: 8637127.4 16704.4 517.06
1) For the updates in truncation path:
- we can see updating in batches leads total tsc and update count reducing
explicitly;
- besides, for a single batched updating, punching multiple extent nodes
in a loop, result in executing more operations, so our average tsc
increase intensively.
2) For the updates in non-truncation path:
- there is a little improvement, that is because for the scenario that we
just need to update in the head or tail of extent node, new interface
optimize to update info in extent node directly, rather than removing
original extent node for updating and then inserting that updated one
into cache as new node.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-08-26 19:34:48 +07:00
|
|
|
f2fs_update_extent_cache_range(dn, fofs, 0, len);
|
2013-06-08 19:25:40 +07:00
|
|
|
dec_valid_block_count(sbi, dn->inode, nr_free);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
dn->ofs_in_node = ofs;
|
2013-04-19 23:28:52 +07:00
|
|
|
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2013-04-19 23:28:52 +07:00
|
|
|
trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
|
|
|
|
dn->ofs_in_node, nr_free);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
2015-03-10 12:16:25 +07:00
|
|
|
static int truncate_partial_data_page(struct inode *inode, u64 from,
|
2015-05-01 07:00:33 +07:00
|
|
|
bool cache_only)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2018-05-30 02:21:14 +07:00
|
|
|
loff_t offset = from & (PAGE_SIZE - 1);
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pgoff_t index = from >> PAGE_SHIFT;
|
2015-05-01 07:00:33 +07:00
|
|
|
struct address_space *mapping = inode->i_mapping;
|
2012-11-02 15:09:44 +07:00
|
|
|
struct page *page;
|
|
|
|
|
2015-05-01 07:00:33 +07:00
|
|
|
if (!offset && !cache_only)
|
2014-10-24 09:48:09 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2015-05-01 07:00:33 +07:00
|
|
|
if (cache_only) {
|
2016-09-07 05:55:54 +07:00
|
|
|
page = find_lock_page(mapping, index);
|
2015-05-01 07:00:33 +07:00
|
|
|
if (page && PageUptodate(page))
|
|
|
|
goto truncate_out;
|
|
|
|
f2fs_put_page(page, 1);
|
2014-10-24 09:48:09 +07:00
|
|
|
return 0;
|
2015-05-01 07:00:33 +07:00
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
page = f2fs_get_lock_data_page(inode, index, true);
|
2015-05-01 07:00:33 +07:00
|
|
|
if (IS_ERR(page))
|
2017-02-28 19:32:41 +07:00
|
|
|
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
|
2015-05-01 07:00:33 +07:00
|
|
|
truncate_out:
|
2018-12-25 16:43:42 +07:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA, true, true);
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
zero_user(page, offset, PAGE_SIZE - offset);
|
2017-06-14 22:05:32 +07:00
|
|
|
|
|
|
|
/* An encrypted inode should have a key and truncate the last page. */
|
|
|
|
f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
|
|
|
|
if (!cache_only)
|
2015-03-10 12:16:25 +07:00
|
|
|
set_page_dirty(page);
|
2012-11-02 15:09:44 +07:00
|
|
|
f2fs_put_page(page, 1);
|
2014-10-24 09:48:09 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
|
|
|
|
bool buf_write)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
struct dnode_of_data dn;
|
|
|
|
pgoff_t free_from;
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 22:13:20 +07:00
|
|
|
int count = 0, err = 0;
|
2014-10-24 09:48:09 +07:00
|
|
|
struct page *ipage;
|
2015-03-10 12:16:25 +07:00
|
|
|
bool truncate_page = false;
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2013-04-19 23:28:52 +07:00
|
|
|
trace_f2fs_truncate_blocks_enter(inode, from);
|
|
|
|
|
2018-03-20 22:08:29 +07:00
|
|
|
free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-05-05 18:13:03 +07:00
|
|
|
if (free_from >= sbi->max_file_blocks)
|
|
|
|
goto free_partial;
|
|
|
|
|
2014-08-15 06:32:54 +07:00
|
|
|
if (lock)
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
__do_map_lock(sbi, flag, true);
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 22:13:20 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ipage = f2fs_get_node_page(sbi, inode->i_ino);
|
2014-10-24 09:48:09 +07:00
|
|
|
if (IS_ERR(ipage)) {
|
|
|
|
err = PTR_ERR(ipage);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f2fs_has_inline_data(inode)) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_truncate_inline_inode(inode, ipage, from);
|
2014-10-24 09:48:09 +07:00
|
|
|
f2fs_put_page(ipage, 1);
|
2015-03-10 12:16:25 +07:00
|
|
|
truncate_page = true;
|
2014-10-24 09:48:09 +07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
set_new_dnode(&dn, inode, ipage, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (err) {
|
|
|
|
if (err == -ENOENT)
|
|
|
|
goto free_next;
|
2014-10-24 09:48:09 +07:00
|
|
|
goto out;
|
2014-10-16 00:24:34 +07:00
|
|
|
}
|
|
|
|
|
2016-01-26 14:39:35 +07:00
|
|
|
count = ADDRS_PER_PAGE(dn.node_page, inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
|
|
|
count -= dn.ofs_in_node;
|
2014-09-03 05:52:58 +07:00
|
|
|
f2fs_bug_on(sbi, count < 0);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 14:21:29 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_truncate_data_blocks_range(&dn, count);
|
2012-11-02 15:09:44 +07:00
|
|
|
free_from += count;
|
|
|
|
}
|
|
|
|
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
free_next:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_truncate_inode_blocks(inode, free_from);
|
2014-11-12 02:01:01 +07:00
|
|
|
out:
|
|
|
|
if (lock)
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
__do_map_lock(sbi, flag, false);
|
2016-05-05 18:13:03 +07:00
|
|
|
free_partial:
|
2014-10-24 09:48:09 +07:00
|
|
|
/* lastly zero out the first data page */
|
|
|
|
if (!err)
|
2015-03-10 12:16:25 +07:00
|
|
|
err = truncate_partial_data_page(inode, from, truncate_page);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2013-04-19 23:28:52 +07:00
|
|
|
trace_f2fs_truncate_blocks_exit(inode, err);
|
2012-11-02 15:09:44 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-06-03 03:49:38 +07:00
|
|
|
int f2fs_truncate(struct inode *inode)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2015-08-24 16:39:42 +07:00
|
|
|
int err;
|
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
|
|
|
|
return -EIO;
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
|
|
|
S_ISLNK(inode->i_mode)))
|
2015-08-24 16:39:42 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2013-04-19 23:28:52 +07:00
|
|
|
trace_f2fs_truncate(inode);
|
|
|
|
|
2017-03-10 06:24:24 +07:00
|
|
|
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
|
|
|
|
f2fs_show_injection_info(FAULT_TRUNCATE);
|
|
|
|
return -EIO;
|
|
|
|
}
|
2018-08-14 04:38:06 +07:00
|
|
|
|
2014-11-12 05:10:01 +07:00
|
|
|
/* we should check inline_data size */
|
2015-12-23 02:09:35 +07:00
|
|
|
if (!f2fs_may_inline_data(inode)) {
|
2015-08-24 16:39:42 +07:00
|
|
|
err = f2fs_convert_inline_inode(inode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2014-11-12 05:10:01 +07:00
|
|
|
}
|
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
|
2015-08-24 16:39:42 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2016-09-14 21:48:04 +07:00
|
|
|
inode->i_mtime = inode->i_ctime = current_time(inode);
|
2016-10-15 01:51:23 +07:00
|
|
|
f2fs_mark_inode_dirty_sync(inode, false);
|
2015-08-24 16:39:42 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
statx: Add a system call to make enhanced file info available
Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.
The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode. This change is propagated to the vfs_getattr*()
function.
Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.
========
OVERVIEW
========
The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.
A number of requests were gathered for features to be included. The
following have been included:
(1) Make the fields a consistent size on all arches and make them large.
(2) Spare space, request flags and information flags are provided for
future expansion.
(3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
__s64).
(4) Creation time: The SMB protocol carries the creation time, which could
be exported by Samba, which will in turn help CIFS make use of
FS-Cache as that can be used for coherency data (stx_btime).
This is also specified in NFSv4 as a recommended attribute and could
be exported by NFSD [Steve French].
(5) Lightweight stat: Ask for just those details of interest, and allow a
netfs (such as NFS) to approximate anything not of interest, possibly
without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
Dilger] (AT_STATX_DONT_SYNC).
(6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
its cached attributes are up to date [Trond Myklebust]
(AT_STATX_FORCE_SYNC).
And the following have been left out for future extension:
(7) Data version number: Could be used by userspace NFS servers [Aneesh
Kumar].
Can also be used to modify fill_post_wcc() in NFSD which retrieves
i_version directly, but has just called vfs_getattr(). It could get
it from the kstat struct if it used vfs_xgetattr() instead.
(There's disagreement on the exact semantics of a single field, since
not all filesystems do this the same way).
(8) BSD stat compatibility: Including more fields from the BSD stat such
as creation time (st_btime) and inode generation number (st_gen)
[Jeremy Allison, Bernd Schubert].
(9) Inode generation number: Useful for FUSE and userspace NFS servers
[Bernd Schubert].
(This was asked for but later deemed unnecessary with the
open-by-handle capability available and caused disagreement as to
whether it's a security hole or not).
(10) Extra coherency data may be useful in making backups [Andreas Dilger].
(No particular data were offered, but things like last backup
timestamp, the data version number and the DOS archive bit would come
into this category).
(11) Allow the filesystem to indicate what it can/cannot provide: A
filesystem can now say it doesn't support a standard stat feature if
that isn't available, so if, for instance, inode numbers or UIDs don't
exist or are fabricated locally...
(This requires a separate system call - I have an fsinfo() call idea
for this).
(12) Store a 16-byte volume ID in the superblock that can be returned in
struct xstat [Steve French].
(Deferred to fsinfo).
(13) Include granularity fields in the time data to indicate the
granularity of each of the times (NFSv4 time_delta) [Steve French].
(Deferred to fsinfo).
(14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags.
Note that the Linux IOC flags are a mess and filesystems such as Ext4
define flags that aren't in linux/fs.h, so translation in the kernel
may be a necessity (or, possibly, we provide the filesystem type too).
(Some attributes are made available in stx_attributes, but the general
feeling was that the IOC flags were to ext[234]-specific and shouldn't
be exposed through statx this way).
(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
Michael Kerrisk].
(Deferred, probably to fsinfo. Finding out if there's an ACL or
seclabal might require extra filesystem operations).
(16) Femtosecond-resolution timestamps [Dave Chinner].
(A __reserved field has been left in the statx_timestamp struct for
this - if there proves to be a need).
(17) A set multiple attributes syscall to go with this.
===============
NEW SYSTEM CALL
===============
The new system call is:
int ret = statx(int dfd,
const char *filename,
unsigned int flags,
unsigned int mask,
struct statx *buffer);
The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat(). There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.
Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):
(1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
respect.
(2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
its attributes with the server - which might require data writeback to
occur to get the timestamps correct.
(3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
network filesystem. The resulting values should be considered
approximate.
mask is a bitmask indicating the fields in struct statx that are of
interest to the caller. The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat(). It should be noted that asking for
more information may entail extra I/O operations.
buffer points to the destination for the data. This must be 256 bytes in
size.
======================
MAIN ATTRIBUTES RECORD
======================
The following structures are defined in which to return the main attribute
set:
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 __spare1[1];
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
The defined bits in request_mask and stx_mask are:
STATX_TYPE Want/got stx_mode & S_IFMT
STATX_MODE Want/got stx_mode & ~S_IFMT
STATX_NLINK Want/got stx_nlink
STATX_UID Want/got stx_uid
STATX_GID Want/got stx_gid
STATX_ATIME Want/got stx_atime{,_ns}
STATX_MTIME Want/got stx_mtime{,_ns}
STATX_CTIME Want/got stx_ctime{,_ns}
STATX_INO Want/got stx_ino
STATX_SIZE Want/got stx_size
STATX_BLOCKS Want/got stx_blocks
STATX_BASIC_STATS [The stuff in the normal stat struct]
STATX_BTIME Want/got stx_btime{,_ns}
STATX_ALL [All currently available stuff]
stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.
Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution. Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.
The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does. The following
attributes map to FS_*_FL flags and are the same numerical value:
STATX_ATTR_COMPRESSED File is compressed by the fs
STATX_ATTR_IMMUTABLE File is marked immutable
STATX_ATTR_APPEND File is append-only
STATX_ATTR_NODUMP File is not to be dumped
STATX_ATTR_ENCRYPTED File requires key to decrypt in fs
Within the kernel, the supported flags are listed by:
KSTAT_ATTR_FS_IOC_FLAGS
[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]
New flags include:
STATX_ATTR_AUTOMOUNT Object is an automount trigger
These are for the use of GUI tools that might want to mark files specially,
depending on what they are.
Fields in struct statx come in a number of classes:
(0) stx_dev_*, stx_blksize.
These are local system information and are always available.
(1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
stx_size, stx_blocks.
These will be returned whether the caller asks for them or not. The
corresponding bits in stx_mask will be set to indicate whether they
actually have valid values.
If the caller didn't ask for them, then they may be approximated. For
example, NFS won't waste any time updating them from the server,
unless as a byproduct of updating something requested.
If the values don't actually exist for the underlying object (such as
UID or GID on a DOS file), then the bit won't be set in the stx_mask,
even if the caller asked for the value. In such a case, the returned
value will be a fabrication.
Note that there are instances where the type might not be valid, for
instance Windows reparse points.
(2) stx_rdev_*.
This will be set only if stx_mode indicates we're looking at a
blockdev or a chardev, otherwise will be 0.
(3) stx_btime.
Similar to (1), except this will be set to 0 if it doesn't exist.
=======
TESTING
=======
The following test program can be used to test the statx system call:
samples/statx/test-statx.c
Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.
Here's some example output. Firstly, an NFS directory that crosses to
another FSID. Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.
[root@andromeda ~]# /tmp/test-statx -A /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:26 Inode: 1703937 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)
Secondly, the result of automounting on that directory.
[root@andromeda ~]# /tmp/test-statx /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:27 Inode: 2 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-01-31 23:46:22 +07:00
|
|
|
int f2fs_getattr(const struct path *path, struct kstat *stat,
|
2017-05-03 22:59:12 +07:00
|
|
|
u32 request_mask, unsigned int query_flags)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
statx: Add a system call to make enhanced file info available
Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.
The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode. This change is propagated to the vfs_getattr*()
function.
Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.
========
OVERVIEW
========
The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.
A number of requests were gathered for features to be included. The
following have been included:
(1) Make the fields a consistent size on all arches and make them large.
(2) Spare space, request flags and information flags are provided for
future expansion.
(3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
__s64).
(4) Creation time: The SMB protocol carries the creation time, which could
be exported by Samba, which will in turn help CIFS make use of
FS-Cache as that can be used for coherency data (stx_btime).
This is also specified in NFSv4 as a recommended attribute and could
be exported by NFSD [Steve French].
(5) Lightweight stat: Ask for just those details of interest, and allow a
netfs (such as NFS) to approximate anything not of interest, possibly
without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
Dilger] (AT_STATX_DONT_SYNC).
(6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
its cached attributes are up to date [Trond Myklebust]
(AT_STATX_FORCE_SYNC).
And the following have been left out for future extension:
(7) Data version number: Could be used by userspace NFS servers [Aneesh
Kumar].
Can also be used to modify fill_post_wcc() in NFSD which retrieves
i_version directly, but has just called vfs_getattr(). It could get
it from the kstat struct if it used vfs_xgetattr() instead.
(There's disagreement on the exact semantics of a single field, since
not all filesystems do this the same way).
(8) BSD stat compatibility: Including more fields from the BSD stat such
as creation time (st_btime) and inode generation number (st_gen)
[Jeremy Allison, Bernd Schubert].
(9) Inode generation number: Useful for FUSE and userspace NFS servers
[Bernd Schubert].
(This was asked for but later deemed unnecessary with the
open-by-handle capability available and caused disagreement as to
whether it's a security hole or not).
(10) Extra coherency data may be useful in making backups [Andreas Dilger].
(No particular data were offered, but things like last backup
timestamp, the data version number and the DOS archive bit would come
into this category).
(11) Allow the filesystem to indicate what it can/cannot provide: A
filesystem can now say it doesn't support a standard stat feature if
that isn't available, so if, for instance, inode numbers or UIDs don't
exist or are fabricated locally...
(This requires a separate system call - I have an fsinfo() call idea
for this).
(12) Store a 16-byte volume ID in the superblock that can be returned in
struct xstat [Steve French].
(Deferred to fsinfo).
(13) Include granularity fields in the time data to indicate the
granularity of each of the times (NFSv4 time_delta) [Steve French].
(Deferred to fsinfo).
(14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags.
Note that the Linux IOC flags are a mess and filesystems such as Ext4
define flags that aren't in linux/fs.h, so translation in the kernel
may be a necessity (or, possibly, we provide the filesystem type too).
(Some attributes are made available in stx_attributes, but the general
feeling was that the IOC flags were to ext[234]-specific and shouldn't
be exposed through statx this way).
(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
Michael Kerrisk].
(Deferred, probably to fsinfo. Finding out if there's an ACL or
seclabal might require extra filesystem operations).
(16) Femtosecond-resolution timestamps [Dave Chinner].
(A __reserved field has been left in the statx_timestamp struct for
this - if there proves to be a need).
(17) A set multiple attributes syscall to go with this.
===============
NEW SYSTEM CALL
===============
The new system call is:
int ret = statx(int dfd,
const char *filename,
unsigned int flags,
unsigned int mask,
struct statx *buffer);
The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat(). There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.
Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):
(1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
respect.
(2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
its attributes with the server - which might require data writeback to
occur to get the timestamps correct.
(3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
network filesystem. The resulting values should be considered
approximate.
mask is a bitmask indicating the fields in struct statx that are of
interest to the caller. The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat(). It should be noted that asking for
more information may entail extra I/O operations.
buffer points to the destination for the data. This must be 256 bytes in
size.
======================
MAIN ATTRIBUTES RECORD
======================
The following structures are defined in which to return the main attribute
set:
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 __spare1[1];
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
The defined bits in request_mask and stx_mask are:
STATX_TYPE Want/got stx_mode & S_IFMT
STATX_MODE Want/got stx_mode & ~S_IFMT
STATX_NLINK Want/got stx_nlink
STATX_UID Want/got stx_uid
STATX_GID Want/got stx_gid
STATX_ATIME Want/got stx_atime{,_ns}
STATX_MTIME Want/got stx_mtime{,_ns}
STATX_CTIME Want/got stx_ctime{,_ns}
STATX_INO Want/got stx_ino
STATX_SIZE Want/got stx_size
STATX_BLOCKS Want/got stx_blocks
STATX_BASIC_STATS [The stuff in the normal stat struct]
STATX_BTIME Want/got stx_btime{,_ns}
STATX_ALL [All currently available stuff]
stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.
Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution. Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.
The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does. The following
attributes map to FS_*_FL flags and are the same numerical value:
STATX_ATTR_COMPRESSED File is compressed by the fs
STATX_ATTR_IMMUTABLE File is marked immutable
STATX_ATTR_APPEND File is append-only
STATX_ATTR_NODUMP File is not to be dumped
STATX_ATTR_ENCRYPTED File requires key to decrypt in fs
Within the kernel, the supported flags are listed by:
KSTAT_ATTR_FS_IOC_FLAGS
[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]
New flags include:
STATX_ATTR_AUTOMOUNT Object is an automount trigger
These are for the use of GUI tools that might want to mark files specially,
depending on what they are.
Fields in struct statx come in a number of classes:
(0) stx_dev_*, stx_blksize.
These are local system information and are always available.
(1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
stx_size, stx_blocks.
These will be returned whether the caller asks for them or not. The
corresponding bits in stx_mask will be set to indicate whether they
actually have valid values.
If the caller didn't ask for them, then they may be approximated. For
example, NFS won't waste any time updating them from the server,
unless as a byproduct of updating something requested.
If the values don't actually exist for the underlying object (such as
UID or GID on a DOS file), then the bit won't be set in the stx_mask,
even if the caller asked for the value. In such a case, the returned
value will be a fabrication.
Note that there are instances where the type might not be valid, for
instance Windows reparse points.
(2) stx_rdev_*.
This will be set only if stx_mode indicates we're looking at a
blockdev or a chardev, otherwise will be 0.
(3) stx_btime.
Similar to (1), except this will be set to 0 if it doesn't exist.
=======
TESTING
=======
The following test program can be used to test the statx system call:
samples/statx/test-statx.c
Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.
Here's some example output. Firstly, an NFS directory that crosses to
another FSID. Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.
[root@andromeda ~]# /tmp/test-statx -A /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:26 Inode: 1703937 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)
Secondly, the result of automounting on that directory.
[root@andromeda ~]# /tmp/test-statx /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:27 Inode: 2 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-01-31 23:46:22 +07:00
|
|
|
struct inode *inode = d_inode(path->dentry);
|
2017-05-03 22:59:12 +07:00
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
2018-01-25 13:54:42 +07:00
|
|
|
struct f2fs_inode *ri;
|
2017-05-03 22:59:12 +07:00
|
|
|
unsigned int flags;
|
|
|
|
|
2018-01-25 13:54:42 +07:00
|
|
|
if (f2fs_has_extra_attr(inode) &&
|
2018-10-24 17:34:26 +07:00
|
|
|
f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
|
2018-01-25 13:54:42 +07:00
|
|
|
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
|
|
|
|
stat->result_mask |= STATX_BTIME;
|
|
|
|
stat->btime.tv_sec = fi->i_crtime.tv_sec;
|
|
|
|
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
|
|
|
|
}
|
|
|
|
|
2018-04-08 10:25:53 +07:00
|
|
|
flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (flags & F2FS_APPEND_FL)
|
2017-05-03 22:59:12 +07:00
|
|
|
stat->attributes |= STATX_ATTR_APPEND;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (flags & F2FS_COMPR_FL)
|
2017-05-03 22:59:12 +07:00
|
|
|
stat->attributes |= STATX_ATTR_COMPRESSED;
|
|
|
|
if (f2fs_encrypted_inode(inode))
|
|
|
|
stat->attributes |= STATX_ATTR_ENCRYPTED;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (flags & F2FS_IMMUTABLE_FL)
|
2017-05-03 22:59:12 +07:00
|
|
|
stat->attributes |= STATX_ATTR_IMMUTABLE;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (flags & F2FS_NODUMP_FL)
|
2017-05-03 22:59:12 +07:00
|
|
|
stat->attributes |= STATX_ATTR_NODUMP;
|
|
|
|
|
|
|
|
stat->attributes_mask |= (STATX_ATTR_APPEND |
|
|
|
|
STATX_ATTR_COMPRESSED |
|
|
|
|
STATX_ATTR_ENCRYPTED |
|
|
|
|
STATX_ATTR_IMMUTABLE |
|
|
|
|
STATX_ATTR_NODUMP);
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
generic_fillattr(inode, stat);
|
2017-10-14 00:27:45 +07:00
|
|
|
|
|
|
|
/* we need to show initial sectors used for inline_data/dentries */
|
|
|
|
if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
|
|
|
|
f2fs_has_inline_dentry(inode))
|
|
|
|
stat->blocks += (stat->size + 511) >> 9;
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_F2FS_FS_POSIX_ACL
|
|
|
|
static void __setattr_copy(struct inode *inode, const struct iattr *attr)
|
|
|
|
{
|
|
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
|
|
|
|
if (ia_valid & ATTR_UID)
|
|
|
|
inode->i_uid = attr->ia_uid;
|
|
|
|
if (ia_valid & ATTR_GID)
|
|
|
|
inode->i_gid = attr->ia_gid;
|
|
|
|
if (ia_valid & ATTR_ATIME)
|
vfs: change inode times to use struct timespec64
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.
The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.
The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.
virtual patch
@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
current_time ( ... )
{
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
...
- return timespec_trunc(
+ return timespec64_trunc(
... );
}
@ depends on patch @
identifier xtime;
@@
struct \( iattr \| inode \| kstat \) {
...
- struct timespec xtime;
+ struct timespec64 xtime;
...
}
@ depends on patch @
identifier t;
@@
struct inode_operations {
...
int (*update_time) (...,
- struct timespec t,
+ struct timespec64 t,
...);
...
}
@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
...) { ... }
@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
) { ... }
@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)
<+... when != ts
(
- timespec_equal(&inode_node->i_xtime, &ts)
+ timespec64_equal(&inode_node->i_xtime, &ts)
|
- timespec_equal(&ts, &inode_node->i_xtime)
+ timespec64_equal(&ts, &inode_node->i_xtime)
|
- timespec_compare(&inode_node->i_xtime, &ts)
+ timespec64_compare(&inode_node->i_xtime, &ts)
|
- timespec_compare(&ts, &inode_node->i_xtime)
+ timespec64_compare(&ts, &inode_node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., &ts,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts(&ts)
+ ktime_get_real_ts64(&ts)
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
|
- timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
+ timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
|
- timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(&attr1->ia_xtime1)
+ ktime_get_real_ts64(&attr1->ia_xtime1)
|
- ktime_get_real_ts(&attr.ia_xtime1)
+ ktime_get_real_ts64(&attr.ia_xtime1)
)
@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- &attr->ia_xtime,
+ &ts,
...);
)
...+>
}
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(stat->xtime);
ret = fn (...,
- &stat->xtime);
+ &ts);
)
...+>
}
@ depends on patch @
struct inode *node;
struct inode *node2;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier i_xtime3 =~ "^i_[acm]time$";
struct iattr *attrp;
struct iattr *attrp2;
struct iattr attr ;
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
struct kstat *stat;
struct kstat stat1;
struct timespec64 ts;
identifier xtime =~ "^[acmb]time$";
expression e;
@@
(
( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ;
|
node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
stat->xtime = node2->i_xtime1;
|
stat1.xtime = node2->i_xtime1;
|
( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ;
|
( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
|
- e = node->i_xtime1;
+ e = timespec64_to_timespec( node->i_xtime1 );
|
- e = attrp->ia_xtime1;
+ e = timespec64_to_timespec( attrp->ia_xtime1 );
|
node->i_xtime1 = current_time(...);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
- node->i_xtime1 = e;
+ node->i_xtime1 = timespec_to_timespec64(e);
)
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: <anton@tuxera.com>
Cc: <balbi@kernel.org>
Cc: <bfields@fieldses.org>
Cc: <darrick.wong@oracle.com>
Cc: <dhowells@redhat.com>
Cc: <dsterba@suse.com>
Cc: <dwmw2@infradead.org>
Cc: <hch@lst.de>
Cc: <hirofumi@mail.parknet.co.jp>
Cc: <hubcap@omnibond.com>
Cc: <jack@suse.com>
Cc: <jaegeuk@kernel.org>
Cc: <jaharkes@cs.cmu.edu>
Cc: <jslaby@suse.com>
Cc: <keescook@chromium.org>
Cc: <mark@fasheh.com>
Cc: <miklos@szeredi.hu>
Cc: <nico@linaro.org>
Cc: <reiserfs-devel@vger.kernel.org>
Cc: <richard@nod.at>
Cc: <sage@redhat.com>
Cc: <sfrench@samba.org>
Cc: <swhiteho@redhat.com>
Cc: <tj@kernel.org>
Cc: <trond.myklebust@primarydata.com>
Cc: <tytso@mit.edu>
Cc: <viro@zeniv.linux.org.uk>
2018-05-09 09:36:02 +07:00
|
|
|
inode->i_atime = timespec64_trunc(attr->ia_atime,
|
|
|
|
inode->i_sb->s_time_gran);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (ia_valid & ATTR_MTIME)
|
vfs: change inode times to use struct timespec64
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.
The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.
The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.
virtual patch
@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
current_time ( ... )
{
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
...
- return timespec_trunc(
+ return timespec64_trunc(
... );
}
@ depends on patch @
identifier xtime;
@@
struct \( iattr \| inode \| kstat \) {
...
- struct timespec xtime;
+ struct timespec64 xtime;
...
}
@ depends on patch @
identifier t;
@@
struct inode_operations {
...
int (*update_time) (...,
- struct timespec t,
+ struct timespec64 t,
...);
...
}
@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
...) { ... }
@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
) { ... }
@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)
<+... when != ts
(
- timespec_equal(&inode_node->i_xtime, &ts)
+ timespec64_equal(&inode_node->i_xtime, &ts)
|
- timespec_equal(&ts, &inode_node->i_xtime)
+ timespec64_equal(&ts, &inode_node->i_xtime)
|
- timespec_compare(&inode_node->i_xtime, &ts)
+ timespec64_compare(&inode_node->i_xtime, &ts)
|
- timespec_compare(&ts, &inode_node->i_xtime)
+ timespec64_compare(&ts, &inode_node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., &ts,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts(&ts)
+ ktime_get_real_ts64(&ts)
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
|
- timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
+ timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
|
- timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(&attr1->ia_xtime1)
+ ktime_get_real_ts64(&attr1->ia_xtime1)
|
- ktime_get_real_ts(&attr.ia_xtime1)
+ ktime_get_real_ts64(&attr.ia_xtime1)
)
@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- &attr->ia_xtime,
+ &ts,
...);
)
...+>
}
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(stat->xtime);
ret = fn (...,
- &stat->xtime);
+ &ts);
)
...+>
}
@ depends on patch @
struct inode *node;
struct inode *node2;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier i_xtime3 =~ "^i_[acm]time$";
struct iattr *attrp;
struct iattr *attrp2;
struct iattr attr ;
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
struct kstat *stat;
struct kstat stat1;
struct timespec64 ts;
identifier xtime =~ "^[acmb]time$";
expression e;
@@
(
( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ;
|
node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
stat->xtime = node2->i_xtime1;
|
stat1.xtime = node2->i_xtime1;
|
( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ;
|
( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
|
- e = node->i_xtime1;
+ e = timespec64_to_timespec( node->i_xtime1 );
|
- e = attrp->ia_xtime1;
+ e = timespec64_to_timespec( attrp->ia_xtime1 );
|
node->i_xtime1 = current_time(...);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
- node->i_xtime1 = e;
+ node->i_xtime1 = timespec_to_timespec64(e);
)
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: <anton@tuxera.com>
Cc: <balbi@kernel.org>
Cc: <bfields@fieldses.org>
Cc: <darrick.wong@oracle.com>
Cc: <dhowells@redhat.com>
Cc: <dsterba@suse.com>
Cc: <dwmw2@infradead.org>
Cc: <hch@lst.de>
Cc: <hirofumi@mail.parknet.co.jp>
Cc: <hubcap@omnibond.com>
Cc: <jack@suse.com>
Cc: <jaegeuk@kernel.org>
Cc: <jaharkes@cs.cmu.edu>
Cc: <jslaby@suse.com>
Cc: <keescook@chromium.org>
Cc: <mark@fasheh.com>
Cc: <miklos@szeredi.hu>
Cc: <nico@linaro.org>
Cc: <reiserfs-devel@vger.kernel.org>
Cc: <richard@nod.at>
Cc: <sage@redhat.com>
Cc: <sfrench@samba.org>
Cc: <swhiteho@redhat.com>
Cc: <tj@kernel.org>
Cc: <trond.myklebust@primarydata.com>
Cc: <tytso@mit.edu>
Cc: <viro@zeniv.linux.org.uk>
2018-05-09 09:36:02 +07:00
|
|
|
inode->i_mtime = timespec64_trunc(attr->ia_mtime,
|
|
|
|
inode->i_sb->s_time_gran);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (ia_valid & ATTR_CTIME)
|
vfs: change inode times to use struct timespec64
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.
The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.
The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.
virtual patch
@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
current_time ( ... )
{
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
...
- return timespec_trunc(
+ return timespec64_trunc(
... );
}
@ depends on patch @
identifier xtime;
@@
struct \( iattr \| inode \| kstat \) {
...
- struct timespec xtime;
+ struct timespec64 xtime;
...
}
@ depends on patch @
identifier t;
@@
struct inode_operations {
...
int (*update_time) (...,
- struct timespec t,
+ struct timespec64 t,
...);
...
}
@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
...) { ... }
@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
) { ... }
@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)
<+... when != ts
(
- timespec_equal(&inode_node->i_xtime, &ts)
+ timespec64_equal(&inode_node->i_xtime, &ts)
|
- timespec_equal(&ts, &inode_node->i_xtime)
+ timespec64_equal(&ts, &inode_node->i_xtime)
|
- timespec_compare(&inode_node->i_xtime, &ts)
+ timespec64_compare(&inode_node->i_xtime, &ts)
|
- timespec_compare(&ts, &inode_node->i_xtime)
+ timespec64_compare(&ts, &inode_node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., &ts,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts(&ts)
+ ktime_get_real_ts64(&ts)
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
|
- timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
+ timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
|
- timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(&attr1->ia_xtime1)
+ ktime_get_real_ts64(&attr1->ia_xtime1)
|
- ktime_get_real_ts(&attr.ia_xtime1)
+ ktime_get_real_ts64(&attr.ia_xtime1)
)
@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- &attr->ia_xtime,
+ &ts,
...);
)
...+>
}
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(stat->xtime);
ret = fn (...,
- &stat->xtime);
+ &ts);
)
...+>
}
@ depends on patch @
struct inode *node;
struct inode *node2;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier i_xtime3 =~ "^i_[acm]time$";
struct iattr *attrp;
struct iattr *attrp2;
struct iattr attr ;
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
struct kstat *stat;
struct kstat stat1;
struct timespec64 ts;
identifier xtime =~ "^[acmb]time$";
expression e;
@@
(
( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ;
|
node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
stat->xtime = node2->i_xtime1;
|
stat1.xtime = node2->i_xtime1;
|
( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ;
|
( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
|
- e = node->i_xtime1;
+ e = timespec64_to_timespec( node->i_xtime1 );
|
- e = attrp->ia_xtime1;
+ e = timespec64_to_timespec( attrp->ia_xtime1 );
|
node->i_xtime1 = current_time(...);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
- node->i_xtime1 = e;
+ node->i_xtime1 = timespec_to_timespec64(e);
)
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: <anton@tuxera.com>
Cc: <balbi@kernel.org>
Cc: <bfields@fieldses.org>
Cc: <darrick.wong@oracle.com>
Cc: <dhowells@redhat.com>
Cc: <dsterba@suse.com>
Cc: <dwmw2@infradead.org>
Cc: <hch@lst.de>
Cc: <hirofumi@mail.parknet.co.jp>
Cc: <hubcap@omnibond.com>
Cc: <jack@suse.com>
Cc: <jaegeuk@kernel.org>
Cc: <jaharkes@cs.cmu.edu>
Cc: <jslaby@suse.com>
Cc: <keescook@chromium.org>
Cc: <mark@fasheh.com>
Cc: <miklos@szeredi.hu>
Cc: <nico@linaro.org>
Cc: <reiserfs-devel@vger.kernel.org>
Cc: <richard@nod.at>
Cc: <sage@redhat.com>
Cc: <sfrench@samba.org>
Cc: <swhiteho@redhat.com>
Cc: <tj@kernel.org>
Cc: <trond.myklebust@primarydata.com>
Cc: <tytso@mit.edu>
Cc: <viro@zeniv.linux.org.uk>
2018-05-09 09:36:02 +07:00
|
|
|
inode->i_ctime = timespec64_trunc(attr->ia_ctime,
|
|
|
|
inode->i_sb->s_time_gran);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (ia_valid & ATTR_MODE) {
|
|
|
|
umode_t mode = attr->ia_mode;
|
|
|
|
|
|
|
|
if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
|
|
|
|
mode &= ~S_ISGID;
|
2016-05-21 00:13:22 +07:00
|
|
|
set_acl_inode(inode, mode);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define __setattr_copy setattr_copy
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
|
|
|
|
{
|
2015-03-18 05:25:59 +07:00
|
|
|
struct inode *inode = d_inode(dentry);
|
2012-11-02 15:09:44 +07:00
|
|
|
int err;
|
2016-12-11 14:35:15 +07:00
|
|
|
bool size_changed = false;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
|
|
|
|
return -EIO;
|
|
|
|
|
2016-05-26 21:55:18 +07:00
|
|
|
err = setattr_prepare(dentry, attr);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2017-11-30 03:35:32 +07:00
|
|
|
err = fscrypt_prepare_setattr(dentry, attr);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2017-07-08 23:13:07 +07:00
|
|
|
if (is_quota_modification(inode, attr)) {
|
|
|
|
err = dquot_initialize(inode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
if ((attr->ia_valid & ATTR_UID &&
|
|
|
|
!uid_eq(attr->ia_uid, inode->i_uid)) ||
|
|
|
|
(attr->ia_valid & ATTR_GID &&
|
|
|
|
!gid_eq(attr->ia_gid, inode->i_gid))) {
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
f2fs_lock_op(F2FS_I_SB(inode));
|
2017-07-08 23:13:07 +07:00
|
|
|
err = dquot_transfer(inode, attr);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
if (err) {
|
|
|
|
set_sbi_flag(F2FS_I_SB(inode),
|
|
|
|
SBI_QUOTA_NEED_REPAIR);
|
|
|
|
f2fs_unlock_op(F2FS_I_SB(inode));
|
2017-07-08 23:13:07 +07:00
|
|
|
return err;
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* update uid/gid under lock_op(), so that dquot and inode can
|
|
|
|
* be updated atomically.
|
|
|
|
*/
|
|
|
|
if (attr->ia_valid & ATTR_UID)
|
|
|
|
inode->i_uid = attr->ia_uid;
|
|
|
|
if (attr->ia_valid & ATTR_GID)
|
|
|
|
inode->i_gid = attr->ia_gid;
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, true);
|
|
|
|
f2fs_unlock_op(F2FS_I_SB(inode));
|
2017-07-08 23:13:07 +07:00
|
|
|
}
|
|
|
|
|
2014-09-15 17:02:09 +07:00
|
|
|
if (attr->ia_valid & ATTR_SIZE) {
|
2018-08-05 22:04:25 +07:00
|
|
|
bool to_smaller = (attr->ia_size <= i_size_read(inode));
|
|
|
|
|
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-08-05 22:04:25 +07:00
|
|
|
|
|
|
|
truncate_setsize(inode, attr->ia_size);
|
|
|
|
|
|
|
|
if (to_smaller)
|
2016-06-03 03:49:38 +07:00
|
|
|
err = f2fs_truncate(inode);
|
2018-08-05 22:04:25 +07:00
|
|
|
/*
|
|
|
|
* do not trim all blocks after i_size if target size is
|
|
|
|
* larger than i_size.
|
|
|
|
*/
|
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2018-08-05 22:04:25 +07:00
|
|
|
|
|
|
|
if (err)
|
|
|
|
return err;
|
2015-12-01 10:36:16 +07:00
|
|
|
|
2018-08-05 22:04:25 +07:00
|
|
|
if (!to_smaller) {
|
2015-12-01 10:36:16 +07:00
|
|
|
/* should convert inline inode here */
|
2015-12-23 02:09:35 +07:00
|
|
|
if (!f2fs_may_inline_data(inode)) {
|
2015-12-01 10:36:16 +07:00
|
|
|
err = f2fs_convert_inline_inode(inode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
2016-09-14 21:48:04 +07:00
|
|
|
inode->i_mtime = inode->i_ctime = current_time(inode);
|
2014-09-15 17:02:09 +07:00
|
|
|
}
|
2016-12-11 14:35:15 +07:00
|
|
|
|
2017-10-09 16:55:19 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_sem);
|
|
|
|
F2FS_I(inode)->last_disk_size = i_size_read(inode);
|
|
|
|
up_write(&F2FS_I(inode)->i_sem);
|
|
|
|
|
2016-12-11 14:35:15 +07:00
|
|
|
size_changed = true;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
__setattr_copy(inode, attr);
|
|
|
|
|
|
|
|
if (attr->ia_valid & ATTR_MODE) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
|
2016-05-21 00:13:22 +07:00
|
|
|
if (err || is_inode_flag_set(inode, FI_ACL_MODE)) {
|
|
|
|
inode->i_mode = F2FS_I(inode)->i_acl_mode;
|
|
|
|
clear_inode_flag(inode, FI_ACL_MODE);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-11 14:35:15 +07:00
|
|
|
/* file size may changed here */
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, size_changed);
|
2016-10-15 03:30:31 +07:00
|
|
|
|
|
|
|
/* inode change will produce dirty node pages flushed by checkpoint */
|
|
|
|
f2fs_balance_fs(F2FS_I_SB(inode), true);
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct inode_operations f2fs_file_inode_operations = {
|
|
|
|
.getattr = f2fs_getattr,
|
|
|
|
.setattr = f2fs_setattr,
|
|
|
|
.get_acl = f2fs_get_acl,
|
2013-12-20 20:16:45 +07:00
|
|
|
.set_acl = f2fs_set_acl,
|
2012-11-02 15:09:44 +07:00
|
|
|
#ifdef CONFIG_F2FS_FS_XATTR
|
|
|
|
.listxattr = f2fs_listxattr,
|
|
|
|
#endif
|
2014-06-08 02:30:14 +07:00
|
|
|
.fiemap = f2fs_fiemap,
|
2012-11-02 15:09:44 +07:00
|
|
|
};
|
|
|
|
|
2015-08-07 17:36:06 +07:00
|
|
|
static int fill_zero(struct inode *inode, pgoff_t index,
|
2012-11-02 15:09:44 +07:00
|
|
|
loff_t start, loff_t len)
|
|
|
|
{
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
if (!len)
|
2015-08-07 17:36:06 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-01-08 05:15:04 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2013-01-25 16:33:41 +07:00
|
|
|
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 17:08:30 +07:00
|
|
|
f2fs_lock_op(sbi);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
page = f2fs_get_new_data_page(inode, NULL, index, false);
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 17:08:30 +07:00
|
|
|
f2fs_unlock_op(sbi);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2015-08-07 17:36:06 +07:00
|
|
|
if (IS_ERR(page))
|
|
|
|
return PTR_ERR(page);
|
|
|
|
|
2018-12-25 16:43:42 +07:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA, true, true);
|
2015-08-07 17:36:06 +07:00
|
|
|
zero_user(page, start, len);
|
|
|
|
set_page_dirty(page);
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2015-09-17 19:22:44 +07:00
|
|
|
while (pg_start < pg_end) {
|
2012-11-02 15:09:44 +07:00
|
|
|
struct dnode_of_data dn;
|
2015-09-17 19:22:44 +07:00
|
|
|
pgoff_t end_offset, count;
|
2013-01-11 12:09:38 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
|
2012-11-02 15:09:44 +07:00
|
|
|
if (err) {
|
2015-09-17 19:22:44 +07:00
|
|
|
if (err == -ENOENT) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
pg_start = f2fs_get_next_page_offset(&dn,
|
|
|
|
pg_start);
|
2012-11-02 15:09:44 +07:00
|
|
|
continue;
|
2015-09-17 19:22:44 +07:00
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-01-26 14:39:35 +07:00
|
|
|
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
|
2015-09-17 19:22:44 +07:00
|
|
|
count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
|
|
|
|
|
|
|
|
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_truncate_data_blocks_range(&dn, count);
|
2012-11-02 15:09:44 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
2015-09-17 19:22:44 +07:00
|
|
|
|
|
|
|
pg_start += count;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-22 15:52:50 +07:00
|
|
|
static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
|
|
|
pgoff_t pg_start, pg_end;
|
|
|
|
loff_t off_start, off_end;
|
2015-12-23 02:09:35 +07:00
|
|
|
int ret;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2015-12-23 02:09:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 22:13:20 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
|
|
|
|
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
off_start = offset & (PAGE_SIZE - 1);
|
|
|
|
off_end = (offset + len) & (PAGE_SIZE - 1);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
|
|
|
if (pg_start == pg_end) {
|
2015-08-07 17:36:06 +07:00
|
|
|
ret = fill_zero(inode, pg_start, off_start,
|
2012-11-02 15:09:44 +07:00
|
|
|
off_end - off_start);
|
2015-08-07 17:36:06 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2012-11-02 15:09:44 +07:00
|
|
|
} else {
|
2015-08-07 17:36:06 +07:00
|
|
|
if (off_start) {
|
|
|
|
ret = fill_zero(inode, pg_start++, off_start,
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
PAGE_SIZE - off_start);
|
2015-08-07 17:36:06 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
if (off_end) {
|
|
|
|
ret = fill_zero(inode, pg_end, 0, off_end);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
|
|
|
if (pg_start < pg_end) {
|
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
loff_t blk_start, blk_end;
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2013-04-09 08:16:44 +07:00
|
|
|
|
2016-01-08 05:15:04 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
blk_start = (loff_t)pg_start << PAGE_SHIFT;
|
|
|
|
blk_end = (loff_t)pg_end << PAGE_SHIFT;
|
2018-08-05 22:04:25 +07:00
|
|
|
|
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-08-05 22:04:25 +07:00
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
truncate_inode_pages_range(mapping, blk_start,
|
|
|
|
blk_end - 1);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 14:21:29 +07:00
|
|
|
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 17:08:30 +07:00
|
|
|
f2fs_lock_op(sbi);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 17:08:30 +07:00
|
|
|
f2fs_unlock_op(sbi);
|
2018-08-05 22:04:25 +07:00
|
|
|
|
2017-05-18 10:06:45 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
|
|
|
|
int *do_replace, pgoff_t off, pgoff_t len)
|
2015-05-06 12:09:46 +07:00
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct dnode_of_data dn;
|
2016-07-09 07:42:21 +07:00
|
|
|
int ret, done, i;
|
2015-07-16 17:18:11 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
next_dnode:
|
2015-10-08 02:28:41 +07:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
|
2015-10-08 02:28:41 +07:00
|
|
|
if (ret && ret != -ENOENT) {
|
|
|
|
return ret;
|
|
|
|
} else if (ret == -ENOENT) {
|
2016-07-09 07:42:21 +07:00
|
|
|
if (dn.max_level == 0)
|
|
|
|
return -ENOENT;
|
|
|
|
done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
|
|
|
|
blkaddr += done;
|
|
|
|
do_replace += done;
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
|
|
|
|
dn.ofs_in_node, len);
|
|
|
|
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
|
2017-07-18 23:19:06 +07:00
|
|
|
*blkaddr = datablock_addr(dn.inode,
|
|
|
|
dn.node_page, dn.ofs_in_node);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
|
2016-07-09 07:42:21 +07:00
|
|
|
|
|
|
|
if (test_opt(sbi, LFS)) {
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
2015-10-08 02:28:41 +07:00
|
|
|
/* do not invalidate this block address */
|
2016-02-24 16:16:47 +07:00
|
|
|
f2fs_update_data_blkaddr(&dn, NULL_ADDR);
|
2016-07-09 07:42:21 +07:00
|
|
|
*do_replace = 1;
|
2015-05-06 12:09:46 +07:00
|
|
|
}
|
2015-10-08 02:28:41 +07:00
|
|
|
}
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
next:
|
|
|
|
len -= done;
|
|
|
|
off += done;
|
|
|
|
if (len)
|
|
|
|
goto next_dnode;
|
|
|
|
return 0;
|
|
|
|
}
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
|
|
|
|
int *do_replace, pgoff_t off, int len)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
int ret, i;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
for (i = 0; i < len; i++, do_replace++, blkaddr++) {
|
|
|
|
if (*do_replace == 0)
|
|
|
|
continue;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
|
2016-07-09 07:42:21 +07:00
|
|
|
if (ret) {
|
|
|
|
dec_valid_block_count(sbi, inode, 1);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_invalidate_blocks(sbi, *blkaddr);
|
2016-07-09 07:42:21 +07:00
|
|
|
} else {
|
|
|
|
f2fs_update_data_blkaddr(&dn, *blkaddr);
|
2016-06-04 09:29:38 +07:00
|
|
|
}
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
|
|
|
|
block_t *blkaddr, int *do_replace,
|
|
|
|
pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
|
|
|
|
pgoff_t i = 0;
|
|
|
|
int ret;
|
2016-06-04 09:29:38 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
while (i < len) {
|
|
|
|
if (blkaddr[i] == NULL_ADDR && !full) {
|
|
|
|
i++;
|
|
|
|
continue;
|
2015-10-08 02:28:41 +07:00
|
|
|
}
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
struct node_info ni;
|
|
|
|
size_t new_size;
|
|
|
|
pgoff_t ilen;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
|
2016-07-09 07:42:21 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2018-07-16 23:02:17 +07:00
|
|
|
ret = f2fs_get_node_info(sbi, dn.nid, &ni);
|
|
|
|
if (ret) {
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
ilen = min((pgoff_t)
|
|
|
|
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
|
|
|
|
dn.ofs_in_node, len - i);
|
|
|
|
do {
|
2017-07-18 23:19:06 +07:00
|
|
|
dn.data_blkaddr = datablock_addr(dn.inode,
|
|
|
|
dn.node_page, dn.ofs_in_node);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_truncate_data_blocks_range(&dn, 1);
|
2016-07-09 07:42:21 +07:00
|
|
|
|
|
|
|
if (do_replace[i]) {
|
|
|
|
f2fs_i_blocks_write(src_inode,
|
2017-07-08 23:13:07 +07:00
|
|
|
1, false, false);
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_i_blocks_write(dst_inode,
|
2017-07-08 23:13:07 +07:00
|
|
|
1, true, false);
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
|
|
|
|
blkaddr[i], ni.version, true, false);
|
|
|
|
|
|
|
|
do_replace[i] = 0;
|
|
|
|
}
|
|
|
|
dn.ofs_in_node++;
|
|
|
|
i++;
|
|
|
|
new_size = (dst + i) << PAGE_SHIFT;
|
|
|
|
if (dst_inode->i_size < new_size)
|
|
|
|
f2fs_i_size_write(dst_inode, new_size);
|
2016-11-24 01:51:17 +07:00
|
|
|
} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
|
2015-10-08 02:28:41 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
} else {
|
|
|
|
struct page *psrc, *pdst;
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
psrc = f2fs_get_lock_data_page(src_inode,
|
|
|
|
src + i, true);
|
2016-07-09 07:42:21 +07:00
|
|
|
if (IS_ERR(psrc))
|
|
|
|
return PTR_ERR(psrc);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
|
2016-07-09 07:42:21 +07:00
|
|
|
true);
|
|
|
|
if (IS_ERR(pdst)) {
|
|
|
|
f2fs_put_page(psrc, 1);
|
|
|
|
return PTR_ERR(pdst);
|
|
|
|
}
|
|
|
|
f2fs_copy_page(psrc, pdst);
|
|
|
|
set_page_dirty(pdst);
|
|
|
|
f2fs_put_page(pdst, 1);
|
2015-10-08 02:28:41 +07:00
|
|
|
f2fs_put_page(psrc, 1);
|
2015-05-06 12:09:46 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_truncate_hole(src_inode,
|
|
|
|
src + i, src + i + 1);
|
2016-07-09 07:42:21 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
i++;
|
|
|
|
}
|
2015-10-08 02:28:41 +07:00
|
|
|
}
|
|
|
|
return 0;
|
2016-07-09 07:42:21 +07:00
|
|
|
}
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
static int __exchange_data_block(struct inode *src_inode,
|
|
|
|
struct inode *dst_inode, pgoff_t src, pgoff_t dst,
|
2016-07-17 11:59:22 +07:00
|
|
|
pgoff_t len, bool full)
|
2016-07-09 07:42:21 +07:00
|
|
|
{
|
|
|
|
block_t *src_blkaddr;
|
|
|
|
int *do_replace;
|
2016-07-17 11:59:22 +07:00
|
|
|
pgoff_t olen;
|
2016-07-09 07:42:21 +07:00
|
|
|
int ret;
|
|
|
|
|
2016-07-17 11:59:22 +07:00
|
|
|
while (len) {
|
|
|
|
olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
|
2016-07-09 07:42:21 +07:00
|
|
|
|
2017-11-30 18:28:18 +07:00
|
|
|
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
|
treewide: Use array_size in f2fs_kvzalloc()
The f2fs_kvzalloc() function has no 2-factor argument form, so
multiplication factors need to be wrapped in array_size(). This patch
replaces cases of:
f2fs_kvzalloc(handle, a * b, gfp)
with:
f2fs_kvzalloc(handle, array_size(a, b), gfp)
as well as handling cases of:
f2fs_kvzalloc(handle, a * b * c, gfp)
with:
f2fs_kvzalloc(handle, array3_size(a, b, c), gfp)
This does, however, attempt to ignore constant size factors like:
f2fs_kvzalloc(handle, 4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
expression HANDLE;
type TYPE;
expression THING, E;
@@
(
f2fs_kvzalloc(HANDLE,
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
f2fs_kvzalloc(HANDLE,
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression HANDLE;
expression COUNT;
typedef u8;
typedef __u8;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(char) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
expression HANDLE;
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
expression HANDLE;
identifier SIZE, COUNT;
@@
f2fs_kvzalloc(HANDLE,
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression HANDLE;
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression HANDLE;
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
expression HANDLE;
identifier STRIDE, SIZE, COUNT;
@@
(
f2fs_kvzalloc(HANDLE,
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression HANDLE;
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
f2fs_kvzalloc(HANDLE, C1 * C2 * C3, ...)
|
f2fs_kvzalloc(HANDLE,
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression HANDLE;
expression E1, E2;
constant C1, C2;
@@
(
f2fs_kvzalloc(HANDLE, C1 * C2, ...)
|
f2fs_kvzalloc(HANDLE,
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:28:35 +07:00
|
|
|
array_size(olen, sizeof(block_t)),
|
|
|
|
GFP_KERNEL);
|
2016-07-17 11:59:22 +07:00
|
|
|
if (!src_blkaddr)
|
|
|
|
return -ENOMEM;
|
2016-07-09 07:42:21 +07:00
|
|
|
|
2017-11-30 18:28:18 +07:00
|
|
|
do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
|
treewide: Use array_size in f2fs_kvzalloc()
The f2fs_kvzalloc() function has no 2-factor argument form, so
multiplication factors need to be wrapped in array_size(). This patch
replaces cases of:
f2fs_kvzalloc(handle, a * b, gfp)
with:
f2fs_kvzalloc(handle, array_size(a, b), gfp)
as well as handling cases of:
f2fs_kvzalloc(handle, a * b * c, gfp)
with:
f2fs_kvzalloc(handle, array3_size(a, b, c), gfp)
This does, however, attempt to ignore constant size factors like:
f2fs_kvzalloc(handle, 4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
expression HANDLE;
type TYPE;
expression THING, E;
@@
(
f2fs_kvzalloc(HANDLE,
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
f2fs_kvzalloc(HANDLE,
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression HANDLE;
expression COUNT;
typedef u8;
typedef __u8;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(char) * COUNT
+ COUNT
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
expression HANDLE;
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
expression HANDLE;
identifier SIZE, COUNT;
@@
f2fs_kvzalloc(HANDLE,
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression HANDLE;
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression HANDLE;
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
f2fs_kvzalloc(HANDLE,
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
expression HANDLE;
identifier STRIDE, SIZE, COUNT;
@@
(
f2fs_kvzalloc(HANDLE,
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
f2fs_kvzalloc(HANDLE,
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression HANDLE;
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
f2fs_kvzalloc(HANDLE, C1 * C2 * C3, ...)
|
f2fs_kvzalloc(HANDLE,
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression HANDLE;
expression E1, E2;
constant C1, C2;
@@
(
f2fs_kvzalloc(HANDLE, C1 * C2, ...)
|
f2fs_kvzalloc(HANDLE,
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:28:35 +07:00
|
|
|
array_size(olen, sizeof(int)),
|
|
|
|
GFP_KERNEL);
|
2016-07-17 11:59:22 +07:00
|
|
|
if (!do_replace) {
|
|
|
|
kvfree(src_blkaddr);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2016-07-09 07:42:21 +07:00
|
|
|
|
2016-07-17 11:59:22 +07:00
|
|
|
ret = __read_out_blkaddrs(src_inode, src_blkaddr,
|
|
|
|
do_replace, src, olen);
|
|
|
|
if (ret)
|
|
|
|
goto roll_back;
|
2016-07-09 07:42:21 +07:00
|
|
|
|
2016-07-17 11:59:22 +07:00
|
|
|
ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
|
|
|
|
do_replace, src, dst, olen, full);
|
|
|
|
if (ret)
|
|
|
|
goto roll_back;
|
|
|
|
|
|
|
|
src += olen;
|
|
|
|
dst += olen;
|
|
|
|
len -= olen;
|
|
|
|
|
|
|
|
kvfree(src_blkaddr);
|
|
|
|
kvfree(do_replace);
|
|
|
|
}
|
2016-07-09 07:42:21 +07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
roll_back:
|
2018-05-28 22:47:19 +07:00
|
|
|
__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
|
2016-07-09 07:42:21 +07:00
|
|
|
kvfree(src_blkaddr);
|
|
|
|
kvfree(do_replace);
|
2015-10-08 02:28:41 +07:00
|
|
|
return ret;
|
|
|
|
}
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
|
2015-10-08 02:28:41 +07:00
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
|
2018-07-25 10:11:56 +07:00
|
|
|
pgoff_t start = offset >> PAGE_SHIFT;
|
|
|
|
pgoff_t end = (offset + len) >> PAGE_SHIFT;
|
2016-07-09 07:42:21 +07:00
|
|
|
int ret;
|
2015-10-08 02:28:41 +07:00
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2016-07-13 01:07:52 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
/* avoid gc operation during block exchange */
|
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2016-07-13 01:07:52 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
f2fs_drop_extent_tree(inode);
|
|
|
|
truncate_pagecache(inode, offset);
|
2016-07-09 07:42:21 +07:00
|
|
|
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
|
|
|
|
f2fs_unlock_op(sbi);
|
2018-07-25 10:11:56 +07:00
|
|
|
|
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2015-05-06 12:09:46 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
|
|
|
{
|
|
|
|
loff_t new_size;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (offset + len >= i_size_read(inode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* collapse range should be aligned to block size of f2fs. */
|
|
|
|
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-12-23 02:09:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-06-18 03:59:05 +07:00
|
|
|
|
2015-05-06 12:09:46 +07:00
|
|
|
/* write out all dirty pages from offset */
|
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
ret = f2fs_do_collapse(inode, offset, len);
|
2015-05-06 12:09:46 +07:00
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-05-06 12:09:46 +07:00
|
|
|
|
2015-10-08 02:28:41 +07:00
|
|
|
/* write out all moved pages, if possible */
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2015-10-08 02:28:41 +07:00
|
|
|
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
|
|
|
truncate_pagecache(inode, offset);
|
|
|
|
|
2015-05-06 12:09:46 +07:00
|
|
|
new_size = i_size_read(inode) - len;
|
2015-10-08 02:28:41 +07:00
|
|
|
truncate_pagecache(inode, new_size);
|
2015-05-06 12:09:46 +07:00
|
|
|
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = f2fs_truncate_blocks(inode, new_size, true, false);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2015-05-06 12:09:46 +07:00
|
|
|
if (!ret)
|
2016-05-20 23:22:03 +07:00
|
|
|
f2fs_i_size_write(inode, new_size);
|
2015-05-06 12:09:46 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
|
|
|
|
pgoff_t end)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
|
|
|
|
pgoff_t index = start;
|
|
|
|
unsigned int ofs_in_node = dn->ofs_in_node;
|
|
|
|
blkcnt_t count = 0;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
for (; index < end; index++, dn->ofs_in_node++) {
|
2017-07-18 23:19:06 +07:00
|
|
|
if (datablock_addr(dn->inode, dn->node_page,
|
|
|
|
dn->ofs_in_node) == NULL_ADDR)
|
2016-05-09 18:56:31 +07:00
|
|
|
count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
dn->ofs_in_node = ofs_in_node;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_reserve_new_blocks(dn, count);
|
2016-05-09 18:56:31 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
dn->ofs_in_node = ofs_in_node;
|
|
|
|
for (index = start; index < end; index++, dn->ofs_in_node++) {
|
2017-07-18 23:19:06 +07:00
|
|
|
dn->data_blkaddr = datablock_addr(dn->inode,
|
|
|
|
dn->node_page, dn->ofs_in_node);
|
2016-05-09 18:56:31 +07:00
|
|
|
/*
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
* f2fs_reserve_new_blocks will not guarantee entire block
|
2016-05-09 18:56:31 +07:00
|
|
|
* allocation.
|
|
|
|
*/
|
|
|
|
if (dn->data_blkaddr == NULL_ADDR) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (dn->data_blkaddr != NEW_ADDR) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
|
2016-05-09 18:56:31 +07:00
|
|
|
dn->data_blkaddr = NEW_ADDR;
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_set_data_blkaddr(dn);
|
2016-05-09 18:56:31 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
f2fs_update_extent_cache_range(dn, start, 0, index - start);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
|
|
|
|
int mode)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
pgoff_t index, pg_start, pg_end;
|
|
|
|
loff_t new_size = i_size_read(inode);
|
|
|
|
loff_t off_start, off_end;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ret = inode_newsize_ok(inode, (len + offset));
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2015-12-23 02:09:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-05-06 12:11:13 +07:00
|
|
|
|
|
|
|
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
|
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-05-06 12:11:13 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
|
|
|
|
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
|
2015-05-06 12:11:13 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
off_start = offset & (PAGE_SIZE - 1);
|
|
|
|
off_end = (offset + len) & (PAGE_SIZE - 1);
|
2015-05-06 12:11:13 +07:00
|
|
|
|
|
|
|
if (pg_start == pg_end) {
|
2015-08-07 17:36:06 +07:00
|
|
|
ret = fill_zero(inode, pg_start, off_start,
|
|
|
|
off_end - off_start);
|
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-08-07 17:36:06 +07:00
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
new_size = max_t(loff_t, new_size, offset + len);
|
|
|
|
} else {
|
|
|
|
if (off_start) {
|
2015-08-07 17:36:06 +07:00
|
|
|
ret = fill_zero(inode, pg_start++, off_start,
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
PAGE_SIZE - off_start);
|
2015-08-07 17:36:06 +07:00
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-08-07 17:36:06 +07:00
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
new_size = max_t(loff_t, new_size,
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
(loff_t)pg_start << PAGE_SHIFT);
|
2015-05-06 12:11:13 +07:00
|
|
|
}
|
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
for (index = pg_start; index < pg_end;) {
|
2015-05-06 12:11:13 +07:00
|
|
|
struct dnode_of_data dn;
|
2016-05-09 18:56:31 +07:00
|
|
|
unsigned int end_offset;
|
|
|
|
pgoff_t end;
|
2015-05-06 12:11:13 +07:00
|
|
|
|
2018-08-05 22:02:22 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-08-05 22:02:22 +07:00
|
|
|
|
|
|
|
truncate_pagecache_range(inode,
|
|
|
|
(loff_t)index << PAGE_SHIFT,
|
|
|
|
((loff_t)pg_end << PAGE_SHIFT) - 1);
|
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
|
2015-05-06 12:11:13 +07:00
|
|
|
if (ret) {
|
|
|
|
f2fs_unlock_op(sbi);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-08-05 22:02:22 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2015-05-06 12:11:13 +07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
|
|
|
|
end = min(pg_end, end_offset - dn.ofs_in_node + index);
|
|
|
|
|
|
|
|
ret = f2fs_do_zero_range(&dn, index, end);
|
2015-05-06 12:11:13 +07:00
|
|
|
f2fs_put_dnode(&dn);
|
2018-08-05 22:02:22 +07:00
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
f2fs_unlock_op(sbi);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2018-08-05 22:02:22 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2016-10-11 21:57:02 +07:00
|
|
|
|
|
|
|
f2fs_balance_fs(sbi, dn.node_changed);
|
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2015-05-06 12:11:13 +07:00
|
|
|
|
2016-05-09 18:56:31 +07:00
|
|
|
index = end;
|
2015-05-06 12:11:13 +07:00
|
|
|
new_size = max_t(loff_t, new_size,
|
2016-05-09 18:56:31 +07:00
|
|
|
(loff_t)index << PAGE_SHIFT);
|
2015-05-06 12:11:13 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (off_end) {
|
2015-08-07 17:36:06 +07:00
|
|
|
ret = fill_zero(inode, pg_end, 0, off_end);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2015-05-06 12:11:13 +07:00
|
|
|
new_size = max_t(loff_t, new_size, offset + len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
2018-02-25 22:38:21 +07:00
|
|
|
if (new_size > i_size_read(inode)) {
|
|
|
|
if (mode & FALLOC_FL_KEEP_SIZE)
|
|
|
|
file_set_keep_isize(inode);
|
|
|
|
else
|
|
|
|
f2fs_i_size_write(inode, new_size);
|
|
|
|
}
|
2015-05-06 12:11:13 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-05-28 18:16:57 +07:00
|
|
|
static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2016-07-09 07:42:21 +07:00
|
|
|
pgoff_t nr, pg_start, pg_end, delta, idx;
|
2015-05-28 18:16:57 +07:00
|
|
|
loff_t new_size;
|
2015-10-08 02:28:41 +07:00
|
|
|
int ret = 0;
|
2015-05-28 18:16:57 +07:00
|
|
|
|
|
|
|
new_size = i_size_read(inode) + len;
|
2017-03-10 16:54:52 +07:00
|
|
|
ret = inode_newsize_ok(inode, new_size);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-05-28 18:16:57 +07:00
|
|
|
|
|
|
|
if (offset >= i_size_read(inode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* insert range should be aligned to block size of f2fs. */
|
|
|
|
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-12-23 02:09:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-06-18 03:59:05 +07:00
|
|
|
|
2016-01-08 05:15:04 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2015-12-23 04:23:35 +07:00
|
|
|
|
2017-05-18 10:06:45 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
f2fs: guarantee journalled quota data by checkpoint
For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.
The implementation is as below:
1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
a) flush dquot metadata into quota file.
b) flush quota file to storage to keep file usage be consistent.
2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
a) checkpoint will skip syncing dquot metadata.
b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
hint for fsck repairing.
3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().
Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-20 19:05:00 +07:00
|
|
|
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2015-05-28 18:16:57 +07:00
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-05-28 18:16:57 +07:00
|
|
|
|
|
|
|
/* write out all dirty pages from offset */
|
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
|
|
|
if (ret)
|
2018-07-25 10:11:56 +07:00
|
|
|
return ret;
|
2015-05-28 18:16:57 +07:00
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pg_start = offset >> PAGE_SHIFT;
|
|
|
|
pg_end = (offset + len) >> PAGE_SHIFT;
|
2015-05-28 18:16:57 +07:00
|
|
|
delta = pg_end - pg_start;
|
2016-07-09 07:42:21 +07:00
|
|
|
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
|
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
/* avoid gc operation during block exchange */
|
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
|
|
|
truncate_pagecache(inode, offset);
|
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
while (!ret && idx > pg_start) {
|
|
|
|
nr = idx - pg_start;
|
|
|
|
if (nr > delta)
|
|
|
|
nr = delta;
|
|
|
|
idx -= nr;
|
2015-05-28 18:16:57 +07:00
|
|
|
|
|
|
|
f2fs_lock_op(sbi);
|
2016-07-13 01:07:52 +07:00
|
|
|
f2fs_drop_extent_tree(inode);
|
|
|
|
|
2016-07-09 07:42:21 +07:00
|
|
|
ret = __exchange_data_block(inode, inode, idx,
|
|
|
|
idx + delta, nr, false);
|
2015-05-28 18:16:57 +07:00
|
|
|
f2fs_unlock_op(sbi);
|
|
|
|
}
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2015-05-28 18:16:57 +07:00
|
|
|
|
2015-10-08 02:28:41 +07:00
|
|
|
/* write out all moved pages, if possible */
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_mmap_sem);
|
2015-10-08 02:28:41 +07:00
|
|
|
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
|
|
|
truncate_pagecache(inode, offset);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_mmap_sem);
|
2015-10-08 02:28:41 +07:00
|
|
|
|
|
|
|
if (!ret)
|
2016-05-20 23:22:03 +07:00
|
|
|
f2fs_i_size_write(inode, new_size);
|
2015-05-28 18:16:57 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
static int expand_inode_data(struct inode *inode, loff_t offset,
|
|
|
|
loff_t len, int mode)
|
|
|
|
{
|
2014-09-03 05:31:18 +07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2017-11-28 07:23:00 +07:00
|
|
|
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
|
2018-11-13 13:33:45 +07:00
|
|
|
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
|
|
|
|
.m_may_create = true };
|
2016-05-07 05:30:38 +07:00
|
|
|
pgoff_t pg_end;
|
2012-11-02 15:09:44 +07:00
|
|
|
loff_t new_size = i_size_read(inode);
|
2016-05-07 05:30:38 +07:00
|
|
|
loff_t off_end;
|
2016-11-12 07:31:56 +07:00
|
|
|
int err;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-11-12 07:31:56 +07:00
|
|
|
err = inode_newsize_ok(inode, (len + offset));
|
|
|
|
if (err)
|
|
|
|
return err;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-11-12 07:31:56 +07:00
|
|
|
err = f2fs_convert_inline_inode(inode);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2013-12-27 10:28:59 +07:00
|
|
|
|
2016-01-08 05:15:04 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2015-12-23 04:23:35 +07:00
|
|
|
|
2016-05-07 05:30:38 +07:00
|
|
|
pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
off_end = (offset + len) & (PAGE_SIZE - 1);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-05-07 05:30:38 +07:00
|
|
|
map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
|
|
|
|
map.m_len = pg_end - map.m_lblk;
|
|
|
|
if (off_end)
|
|
|
|
map.m_len++;
|
2014-06-13 11:05:55 +07:00
|
|
|
|
2016-11-12 07:31:56 +07:00
|
|
|
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
|
|
|
|
if (err) {
|
2016-05-07 05:30:38 +07:00
|
|
|
pgoff_t last_off;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-05-07 05:30:38 +07:00
|
|
|
if (!map.m_len)
|
2016-11-12 07:31:56 +07:00
|
|
|
return err;
|
2014-06-13 11:07:31 +07:00
|
|
|
|
2016-05-07 05:30:38 +07:00
|
|
|
last_off = map.m_lblk + map.m_len - 1;
|
|
|
|
|
|
|
|
/* update new size to the failed position */
|
2018-05-30 02:34:58 +07:00
|
|
|
new_size = (last_off == pg_end) ? offset + len :
|
2016-05-07 05:30:38 +07:00
|
|
|
(loff_t)(last_off + 1) << PAGE_SHIFT;
|
|
|
|
} else {
|
|
|
|
new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
2017-11-05 20:53:30 +07:00
|
|
|
if (new_size > i_size_read(inode)) {
|
|
|
|
if (mode & FALLOC_FL_KEEP_SIZE)
|
|
|
|
file_set_keep_isize(inode);
|
|
|
|
else
|
|
|
|
f2fs_i_size_write(inode, new_size);
|
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-11-12 07:31:56 +07:00
|
|
|
return err;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static long f2fs_fallocate(struct file *file, int mode,
|
|
|
|
loff_t offset, loff_t len)
|
|
|
|
{
|
2013-02-28 04:59:05 +07:00
|
|
|
struct inode *inode = file_inode(file);
|
2015-04-21 13:59:12 +07:00
|
|
|
long ret = 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
|
|
|
|
return -EIO;
|
|
|
|
|
2015-09-11 13:39:02 +07:00
|
|
|
/* f2fs only support ->fallocate for regular file */
|
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-05-28 18:16:57 +07:00
|
|
|
if (f2fs_encrypted_inode(inode) &&
|
|
|
|
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
|
2015-04-22 10:39:58 +07:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2015-05-06 12:09:46 +07:00
|
|
|
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
|
2015-05-28 18:16:57 +07:00
|
|
|
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
|
|
|
|
FALLOC_FL_INSERT_RANGE))
|
2012-11-02 15:09:44 +07:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_lock(inode);
|
2014-01-28 09:29:26 +07:00
|
|
|
|
2015-04-21 13:59:12 +07:00
|
|
|
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
|
|
|
if (offset >= inode->i_size)
|
|
|
|
goto out;
|
|
|
|
|
2013-11-22 15:52:50 +07:00
|
|
|
ret = punch_hole(inode, offset, len);
|
2015-05-06 12:09:46 +07:00
|
|
|
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
|
|
|
|
ret = f2fs_collapse_range(inode, offset, len);
|
2015-05-06 12:11:13 +07:00
|
|
|
} else if (mode & FALLOC_FL_ZERO_RANGE) {
|
|
|
|
ret = f2fs_zero_range(inode, offset, len, mode);
|
2015-05-28 18:16:57 +07:00
|
|
|
} else if (mode & FALLOC_FL_INSERT_RANGE) {
|
|
|
|
ret = f2fs_insert_range(inode, offset, len);
|
2015-05-06 12:09:46 +07:00
|
|
|
} else {
|
2012-11-02 15:09:44 +07:00
|
|
|
ret = expand_inode_data(inode, offset, len, mode);
|
2015-05-06 12:09:46 +07:00
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2012-12-30 12:52:37 +07:00
|
|
|
if (!ret) {
|
2016-09-14 21:48:04 +07:00
|
|
|
inode->i_mtime = inode->i_ctime = current_time(inode);
|
2016-10-15 01:51:23 +07:00
|
|
|
f2fs_mark_inode_dirty_sync(inode, false);
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2012-12-30 12:52:37 +07:00
|
|
|
}
|
2014-01-28 09:29:26 +07:00
|
|
|
|
2015-04-21 13:59:12 +07:00
|
|
|
out:
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_unlock(inode);
|
2014-01-28 09:29:26 +07:00
|
|
|
|
2013-04-23 15:00:52 +07:00
|
|
|
trace_f2fs_fallocate(inode, mode, offset, len, ret);
|
2012-11-02 15:09:44 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
static int f2fs_release_file(struct inode *inode, struct file *filp)
|
|
|
|
{
|
2016-04-12 01:51:51 +07:00
|
|
|
/*
|
|
|
|
* f2fs_relase_file is called at every close calls. So we should
|
|
|
|
* not drop any inmemory pages by close called by other process.
|
|
|
|
*/
|
|
|
|
if (!(filp->f_mode & FMODE_WRITE) ||
|
|
|
|
atomic_read(&inode->i_writecount) != 1)
|
|
|
|
return 0;
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
/* some remained atomic pages should discarded */
|
|
|
|
if (f2fs_is_atomic_file(inode))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_drop_inmem_pages(inode);
|
2014-12-09 21:08:59 +07:00
|
|
|
if (f2fs_is_volatile_file(inode)) {
|
2016-05-21 00:13:22 +07:00
|
|
|
set_inode_flag(inode, FI_DROP_CACHE);
|
2014-12-09 21:08:59 +07:00
|
|
|
filemap_fdatawrite(inode->i_mapping);
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_DROP_CACHE);
|
2018-06-04 22:20:51 +07:00
|
|
|
clear_inode_flag(inode, FI_VOLATILE_FILE);
|
|
|
|
stat_dec_volatile_write(inode);
|
2014-12-09 21:08:59 +07:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-25 09:46:29 +07:00
|
|
|
static int f2fs_file_flush(struct file *file, fl_owner_t id)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2017-07-25 09:46:29 +07:00
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the process doing a transaction is crashed, we should do
|
|
|
|
* roll-back. Otherwise, other reader/write can see corrupted database
|
|
|
|
* until all the writers close its file. Since this should be done
|
|
|
|
* before dropping file lock, it needs to do in ->flush.
|
|
|
|
*/
|
|
|
|
if (f2fs_is_atomic_file(inode) &&
|
|
|
|
F2FS_I(inode)->inmem_task == current)
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_drop_inmem_pages(inode);
|
2017-07-25 09:46:29 +07:00
|
|
|
return 0;
|
2012-11-02 15:09:44 +07:00
|
|
|
}
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
|
2012-11-02 15:09:44 +07:00
|
|
|
{
|
2013-02-28 04:59:05 +07:00
|
|
|
struct inode *inode = file_inode(filp);
|
2012-11-02 15:09:44 +07:00
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
2018-04-08 10:27:14 +07:00
|
|
|
unsigned int flags = fi->i_flags;
|
|
|
|
|
2018-07-17 19:41:47 +07:00
|
|
|
if (f2fs_encrypted_inode(inode))
|
2018-04-08 10:27:14 +07:00
|
|
|
flags |= F2FS_ENCRYPT_FL;
|
|
|
|
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
|
|
|
|
flags |= F2FS_INLINE_DATA_FL;
|
|
|
|
|
|
|
|
flags &= F2FS_FL_USER_VISIBLE;
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
return put_user(flags, (int __user *)arg);
|
|
|
|
}
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
unsigned int oldflags;
|
|
|
|
|
|
|
|
/* Is it quota file? Do not allow user to mess with it */
|
|
|
|
if (IS_NOQUOTA(inode))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
flags = f2fs_mask_flags(inode->i_mode, flags);
|
|
|
|
|
|
|
|
oldflags = fi->i_flags;
|
|
|
|
|
2018-04-03 14:08:17 +07:00
|
|
|
if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
|
2017-07-28 23:32:52 +07:00
|
|
|
if (!capable(CAP_LINUX_IMMUTABLE))
|
|
|
|
return -EPERM;
|
|
|
|
|
2018-04-08 10:25:53 +07:00
|
|
|
flags = flags & F2FS_FL_USER_MODIFIABLE;
|
|
|
|
flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE;
|
2017-07-28 23:32:52 +07:00
|
|
|
fi->i_flags = flags;
|
|
|
|
|
2018-04-03 14:08:17 +07:00
|
|
|
if (fi->i_flags & F2FS_PROJINHERIT_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
set_inode_flag(inode, FI_PROJ_INHERIT);
|
|
|
|
else
|
|
|
|
clear_inode_flag(inode, FI_PROJ_INHERIT);
|
|
|
|
|
|
|
|
inode->i_ctime = current_time(inode);
|
|
|
|
f2fs_set_inode_flags(inode);
|
2018-12-18 18:20:17 +07:00
|
|
|
f2fs_mark_inode_dirty_sync(inode, true);
|
2017-07-28 23:32:52 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
2016-08-23 19:10:47 +07:00
|
|
|
unsigned int flags;
|
2014-09-25 05:37:02 +07:00
|
|
|
int ret;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
if (get_user(flags, (int __user *)arg))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_lock(inode);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
ret = __f2fs_ioc_setflags(inode, flags);
|
2012-11-02 15:09:44 +07:00
|
|
|
|
2017-05-03 22:59:13 +07:00
|
|
|
inode_unlock(inode);
|
2014-09-25 05:37:02 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
2014-09-21 12:06:39 +07:00
|
|
|
|
2015-01-23 19:36:04 +07:00
|
|
|
static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
|
|
|
|
return put_user(inode->i_generation, (int __user *)arg);
|
|
|
|
}
|
|
|
|
|
2014-10-07 07:39:50 +07:00
|
|
|
static int f2fs_ioc_start_atomic_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
2015-07-17 17:06:35 +07:00
|
|
|
int ret;
|
2014-10-07 07:39:50 +07:00
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
2017-03-17 09:04:15 +07:00
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_lock(inode);
|
|
|
|
|
2018-07-27 16:15:11 +07:00
|
|
|
if (f2fs_is_atomic_file(inode)) {
|
|
|
|
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
|
|
|
|
ret = -EINVAL;
|
2016-05-09 18:56:32 +07:00
|
|
|
goto out;
|
2018-07-27 16:15:11 +07:00
|
|
|
}
|
2014-10-07 07:39:50 +07:00
|
|
|
|
2015-07-17 17:06:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
2016-05-09 18:56:32 +07:00
|
|
|
goto out;
|
2014-10-07 07:39:50 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
|
|
|
|
2016-04-13 04:36:11 +07:00
|
|
|
if (!get_dirty_pages(inode))
|
2018-04-18 10:06:39 +07:00
|
|
|
goto skip_flush;
|
2016-04-13 04:36:11 +07:00
|
|
|
|
|
|
|
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
|
2016-12-03 06:11:32 +07:00
|
|
|
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
|
2016-04-13 04:36:11 +07:00
|
|
|
inode->i_ino, get_dirty_pages(inode));
|
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
|
2018-07-25 10:11:56 +07:00
|
|
|
if (ret) {
|
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2017-03-18 08:20:55 +07:00
|
|
|
goto out;
|
2018-07-25 10:11:56 +07:00
|
|
|
}
|
2018-04-18 10:06:39 +07:00
|
|
|
skip_flush:
|
|
|
|
set_inode_flag(inode, FI_ATOMIC_FILE);
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
|
2018-07-25 10:11:56 +07:00
|
|
|
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
2017-03-18 08:20:55 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2017-07-25 09:46:29 +07:00
|
|
|
F2FS_I(inode)->inmem_task = current;
|
2016-12-29 04:55:09 +07:00
|
|
|
stat_inc_atomic_write(inode);
|
|
|
|
stat_update_max_atomic_write(inode);
|
2017-03-18 08:20:55 +07:00
|
|
|
out:
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_unlock(inode);
|
2016-05-09 18:56:32 +07:00
|
|
|
mnt_drop_write_file(filp);
|
2016-04-13 04:36:11 +07:00
|
|
|
return ret;
|
2014-10-07 07:39:50 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_commit_atomic_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
f2fs_balance_fs(F2FS_I_SB(inode), true);
|
2016-05-09 18:56:33 +07:00
|
|
|
|
2018-07-25 10:11:56 +07:00
|
|
|
inode_lock(inode);
|
2018-02-27 21:45:24 +07:00
|
|
|
|
2018-04-18 16:45:02 +07:00
|
|
|
if (f2fs_is_volatile_file(inode)) {
|
|
|
|
ret = -EINVAL;
|
2016-05-09 18:56:32 +07:00
|
|
|
goto err_out;
|
2018-04-18 16:45:02 +07:00
|
|
|
}
|
2016-05-09 18:56:32 +07:00
|
|
|
|
2015-07-25 14:29:17 +07:00
|
|
|
if (f2fs_is_atomic_file(inode)) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ret = f2fs_commit_inmem_pages(inode);
|
2017-01-07 17:50:26 +07:00
|
|
|
if (ret)
|
2015-07-25 14:52:52 +07:00
|
|
|
goto err_out;
|
2017-01-07 17:50:26 +07:00
|
|
|
|
2016-12-29 04:55:09 +07:00
|
|
|
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
|
2017-01-07 17:50:26 +07:00
|
|
|
if (!ret) {
|
|
|
|
clear_inode_flag(inode, FI_ATOMIC_FILE);
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
|
2017-01-07 17:50:26 +07:00
|
|
|
stat_dec_atomic_write(inode);
|
2016-01-10 08:08:38 +07:00
|
|
|
}
|
2016-12-29 04:55:09 +07:00
|
|
|
} else {
|
2017-08-23 17:23:25 +07:00
|
|
|
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
|
2015-07-25 14:29:17 +07:00
|
|
|
}
|
2015-07-25 14:52:52 +07:00
|
|
|
err_out:
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
|
|
|
|
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_unlock(inode);
|
2014-10-07 07:39:50 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-10-07 06:11:16 +07:00
|
|
|
static int f2fs_ioc_start_volatile_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
2015-07-17 17:06:35 +07:00
|
|
|
int ret;
|
2014-10-07 06:11:16 +07:00
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
2017-03-17 14:43:57 +07:00
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_lock(inode);
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
if (f2fs_is_volatile_file(inode))
|
2016-05-09 18:56:32 +07:00
|
|
|
goto out;
|
2014-12-09 21:08:59 +07:00
|
|
|
|
2015-07-17 17:06:35 +07:00
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
2016-05-09 18:56:32 +07:00
|
|
|
goto out;
|
2014-10-24 09:48:09 +07:00
|
|
|
|
2017-03-22 16:23:45 +07:00
|
|
|
stat_inc_volatile_write(inode);
|
|
|
|
stat_update_max_volatile_write(inode);
|
|
|
|
|
2016-05-21 00:13:22 +07:00
|
|
|
set_inode_flag(inode, FI_VOLATILE_FILE);
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2016-05-09 18:56:32 +07:00
|
|
|
out:
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_unlock(inode);
|
2016-05-09 18:56:32 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
2014-10-07 06:11:16 +07:00
|
|
|
}
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
static int f2fs_ioc_release_volatile_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
2016-05-09 18:56:32 +07:00
|
|
|
int ret;
|
2014-12-09 21:08:59 +07:00
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_lock(inode);
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
if (!f2fs_is_volatile_file(inode))
|
2016-05-09 18:56:32 +07:00
|
|
|
goto out;
|
2014-12-09 21:08:59 +07:00
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
if (!f2fs_is_first_block_written(inode)) {
|
|
|
|
ret = truncate_partial_data_page(inode, 0, true);
|
|
|
|
goto out;
|
|
|
|
}
|
2015-03-18 07:16:35 +07:00
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = punch_hole(inode, 0, F2FS_BLKSIZE);
|
|
|
|
out:
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_unlock(inode);
|
2016-05-09 18:56:32 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
2014-12-09 21:08:59 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_abort_volatile_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_lock(inode);
|
|
|
|
|
2016-04-12 03:15:10 +07:00
|
|
|
if (f2fs_is_atomic_file(inode))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_drop_inmem_pages(inode);
|
2015-12-30 06:46:33 +07:00
|
|
|
if (f2fs_is_volatile_file(inode)) {
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_VOLATILE_FILE);
|
2017-03-22 16:23:45 +07:00
|
|
|
stat_dec_volatile_write(inode);
|
2016-04-15 23:43:17 +07:00
|
|
|
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
|
2015-12-30 06:46:33 +07:00
|
|
|
}
|
2015-06-09 07:51:10 +07:00
|
|
|
|
2018-07-27 16:15:11 +07:00
|
|
|
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
|
|
|
|
|
2016-05-09 18:56:33 +07:00
|
|
|
inode_unlock(inode);
|
|
|
|
|
2014-12-09 21:08:59 +07:00
|
|
|
mnt_drop_write_file(filp);
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2014-12-09 21:08:59 +07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-01-09 10:15:53 +07:00
|
|
|
static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
__u32 in;
|
2018-06-20 17:39:53 +07:00
|
|
|
int ret = 0;
|
2015-01-09 10:15:53 +07:00
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (get_user(in, (__u32 __user *)arg))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-05-18 13:21:52 +07:00
|
|
|
if (in != F2FS_GOING_DOWN_FULLSYNC) {
|
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2016-05-09 18:56:32 +07:00
|
|
|
|
2015-01-09 10:15:53 +07:00
|
|
|
switch (in) {
|
|
|
|
case F2FS_GOING_DOWN_FULLSYNC:
|
|
|
|
sb = freeze_bdev(sb->s_bdev);
|
2018-01-17 21:28:52 +07:00
|
|
|
if (IS_ERR(sb)) {
|
|
|
|
ret = PTR_ERR(sb);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (sb) {
|
2016-05-19 04:07:56 +07:00
|
|
|
f2fs_stop_checkpoint(sbi, false);
|
2018-06-22 03:46:23 +07:00
|
|
|
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
|
2015-01-09 10:15:53 +07:00
|
|
|
thaw_bdev(sb->s_bdev, sb);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case F2FS_GOING_DOWN_METASYNC:
|
|
|
|
/* do checkpoint only */
|
2018-01-17 21:28:52 +07:00
|
|
|
ret = f2fs_sync_fs(sb, 1);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
2016-05-19 04:07:56 +07:00
|
|
|
f2fs_stop_checkpoint(sbi, false);
|
2018-06-22 03:46:23 +07:00
|
|
|
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
|
2015-01-09 10:15:53 +07:00
|
|
|
break;
|
|
|
|
case F2FS_GOING_DOWN_NOSYNC:
|
2016-05-19 04:07:56 +07:00
|
|
|
f2fs_stop_checkpoint(sbi, false);
|
2018-06-22 03:46:23 +07:00
|
|
|
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
|
2015-01-09 10:15:53 +07:00
|
|
|
break;
|
2015-10-07 23:46:37 +07:00
|
|
|
case F2FS_GOING_DOWN_METAFLUSH:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
|
2016-05-19 04:07:56 +07:00
|
|
|
f2fs_stop_checkpoint(sbi, false);
|
2018-06-22 03:46:23 +07:00
|
|
|
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
|
2015-10-07 23:46:37 +07:00
|
|
|
break;
|
2018-11-29 04:26:03 +07:00
|
|
|
case F2FS_GOING_DOWN_NEED_FSCK:
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
|
|
/* do checkpoint only */
|
|
|
|
ret = f2fs_sync_fs(sb, 1);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
break;
|
2015-01-09 10:15:53 +07:00
|
|
|
default:
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2015-01-09 10:15:53 +07:00
|
|
|
}
|
2018-01-18 16:23:29 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_stop_gc_thread(sbi);
|
|
|
|
f2fs_stop_discard_thread(sbi);
|
2018-01-18 16:23:29 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
f2fs_drop_discard_cmd(sbi);
|
2018-01-18 16:23:29 +07:00
|
|
|
clear_opt(sbi, DISCARD);
|
|
|
|
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2016-05-09 18:56:32 +07:00
|
|
|
out:
|
2018-05-18 13:21:52 +07:00
|
|
|
if (in != F2FS_GOING_DOWN_FULLSYNC)
|
|
|
|
mnt_drop_write_file(filp);
|
2016-05-09 18:56:32 +07:00
|
|
|
return ret;
|
2015-01-09 10:15:53 +07:00
|
|
|
}
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct super_block *sb = inode->i_sb;
|
|
|
|
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
|
|
|
struct fstrim_range range;
|
|
|
|
int ret;
|
2014-09-21 12:06:39 +07:00
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
2014-09-21 12:06:39 +07:00
|
|
|
|
f2fs: fix to avoid NULL pointer dereference on se->discard_map
https://bugzilla.kernel.org/show_bug.cgi?id=200951
These is a NULL pointer dereference issue reported in bugzilla:
Hi,
in the setup there is a SATA SSD connected to a SATA-to-USB bridge.
The disc is "Samsung SSD 850 PRO 256G" which supports TRIM.
There are four partitions:
sda1: FAT /boot
sda2: F2FS /
sda3: F2FS /home
sda4: F2FS
The bridge is ASMT1153e which uses the "uas" driver.
There is no TRIM pass-through, so, when mounting it reports:
mounting with "discard" option, but the device does not support discard
The USB host is USB3.0 and UASP capable. It is the one on RK3399.
Given this everything works fine, except there is no TRIM support.
In order to enable TRIM a new UDEV rule is added [1]:
/etc/udev/rules.d/10-sata-bridge-trim.rules:
ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap"
After reboot any F2FS write hangs forever and dmesg reports:
Unable to handle kernel NULL pointer dereference
Also tested on a x86_64 system: works fine even with TRIM enabled.
same disc
same bridge
different usb host controller
different cpu architecture
not root filesystem
Regards,
Vicenç.
[1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280
Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e
Mem abort info:
ESR = 0x96000004
Exception class = DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122
[000000000000003e] pgd=0000000000000000
Internal error: Oops: 96000004 [#1] SMP
Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington
CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1
Hardware name: Sapphire-RK3399 Board (DT)
pstate: 00000005 (nzcv daif -PAN -UAO)
pc : update_sit_entry+0x304/0x4b0
lr : update_sit_entry+0x108/0x4b0
sp : ffff00000ca13bd0
x29: ffff00000ca13bd0 x28: 000000000000003e
x27: 0000000000000020 x26: 0000000000080000
x25: 0000000000000048 x24: ffff8000ebb85cf8
x23: 0000000000000253 x22: 00000000ffffffff
x21: 00000000000535f2 x20: 00000000ffffffdf
x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8
x17: 0000000007ce6926 x16: 000000001c83ffa8
x15: 0000000000000000 x14: ffff8000f602df90
x13: 0000000000000006 x12: 0000000000000040
x11: 0000000000000228 x10: 0000000000000000
x9 : 0000000000000000 x8 : 0000000000000000
x7 : 00000000000535f2 x6 : ffff8000ebff3440
x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8
x3 : 00000000ffffffff x2 : 0000000000000020
x1 : 0000000000000000 x0 : ffff8000eb9e5800
Process nvim (pid: 957, stack limit = 0x0000000063a78320)
Call trace:
update_sit_entry+0x304/0x4b0
f2fs_invalidate_blocks+0x98/0x140
truncate_node+0x90/0x400
f2fs_remove_inode_page+0xe8/0x340
f2fs_evict_inode+0x2b0/0x408
evict+0xe0/0x1e0
iput+0x160/0x260
do_unlinkat+0x214/0x298
__arm64_sys_unlinkat+0x3c/0x68
el0_svc_handler+0x94/0x118
el0_svc+0x8/0xc
Code: f9400800 b9488400 36080140 f9400f01 (387c4820)
---[ end trace a0f21a307118c477 ]---
The reason is it is possible to enable discard flag on block queue via
UDEV, but during mount, f2fs will initialize se->discard_map only if
this flag is set, once the flag is set after mount, f2fs may dereference
NULL pointer on se->discard_map.
So this patch does below changes to fix this issue:
- initialize and update se->discard_map all the time.
- don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag
during mount.
- don't issue small discard on zoned block device.
- introduce some functions to enhance the readability.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-09-04 02:52:17 +07:00
|
|
|
if (!f2fs_hw_support_discard(F2FS_SB(sb)))
|
2014-09-25 05:37:02 +07:00
|
|
|
return -EOPNOTSUPP;
|
2014-09-21 12:06:39 +07:00
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
2014-09-21 12:06:39 +07:00
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
range.minlen = max((unsigned int)range.minlen,
|
|
|
|
q->limits.discard_granularity);
|
|
|
|
ret = f2fs_trim_fs(F2FS_SB(sb), &range);
|
2016-05-09 18:56:32 +07:00
|
|
|
mnt_drop_write_file(filp);
|
2014-09-25 05:37:02 +07:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2014-09-21 12:06:39 +07:00
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
if (copy_to_user((struct fstrim_range __user *)arg, &range,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2014-09-25 05:37:02 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-21 05:19:06 +07:00
|
|
|
static bool uuid_is_nonzero(__u8 u[16])
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
if (u[i])
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
|
2017-11-14 18:28:42 +07:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
2016-05-09 18:56:32 +07:00
|
|
|
|
2016-11-27 07:07:49 +07:00
|
|
|
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
|
2015-04-21 05:19:06 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
|
|
|
|
{
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
|
2017-11-14 18:28:42 +07:00
|
|
|
return -EOPNOTSUPP;
|
2016-11-27 07:07:49 +07:00
|
|
|
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
|
2015-04-21 05:19:06 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
int err;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_encrypt(sbi))
|
2015-04-21 05:19:06 +07:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
err = mnt_want_write_file(filp);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2018-02-26 21:04:13 +07:00
|
|
|
down_write(&sbi->sb_lock);
|
2018-02-11 21:53:20 +07:00
|
|
|
|
|
|
|
if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
|
|
|
|
goto got_it;
|
|
|
|
|
2015-04-21 05:19:06 +07:00
|
|
|
/* update superblock with uuid */
|
|
|
|
generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
|
|
|
|
|
2015-06-08 12:28:03 +07:00
|
|
|
err = f2fs_commit_super(sbi, false);
|
2015-04-21 05:19:06 +07:00
|
|
|
if (err) {
|
|
|
|
/* undo new data */
|
|
|
|
memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
|
2018-02-11 21:53:20 +07:00
|
|
|
goto out_err;
|
2015-04-21 05:19:06 +07:00
|
|
|
}
|
|
|
|
got_it:
|
|
|
|
if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
|
|
|
|
16))
|
2018-02-11 21:53:20 +07:00
|
|
|
err = -EFAULT;
|
|
|
|
out_err:
|
2018-02-26 21:04:13 +07:00
|
|
|
up_write(&sbi->sb_lock);
|
2018-02-11 21:53:20 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return err;
|
2015-04-21 05:19:06 +07:00
|
|
|
}
|
|
|
|
|
2015-07-10 17:08:10 +07:00
|
|
|
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2015-10-05 21:22:44 +07:00
|
|
|
__u32 sync;
|
2016-05-09 18:56:32 +07:00
|
|
|
int ret;
|
2015-07-10 17:08:10 +07:00
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
2015-10-05 21:22:44 +07:00
|
|
|
if (get_user(sync, (__u32 __user *)arg))
|
2015-07-10 17:08:10 +07:00
|
|
|
return -EFAULT;
|
|
|
|
|
2015-10-05 21:22:44 +07:00
|
|
|
if (f2fs_readonly(sbi->sb))
|
|
|
|
return -EROFS;
|
2015-07-10 17:08:10 +07:00
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2015-10-05 21:22:44 +07:00
|
|
|
if (!sync) {
|
2016-05-09 18:56:32 +07:00
|
|
|
if (!mutex_trylock(&sbi->gc_mutex)) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
2015-10-05 21:22:44 +07:00
|
|
|
} else {
|
|
|
|
mutex_lock(&sbi->gc_mutex);
|
2015-07-10 17:08:10 +07:00
|
|
|
}
|
|
|
|
|
2017-04-14 05:17:00 +07:00
|
|
|
ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
|
2016-05-09 18:56:32 +07:00
|
|
|
out:
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
2015-07-10 17:08:10 +07:00
|
|
|
}
|
|
|
|
|
2017-06-16 06:44:42 +07:00
|
|
|
static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct f2fs_gc_range range;
|
|
|
|
u64 end;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (f2fs_readonly(sbi->sb))
|
|
|
|
return -EROFS;
|
|
|
|
|
2018-04-24 10:40:30 +07:00
|
|
|
end = range.start + range.len;
|
|
|
|
if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-06-16 06:44:42 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
do_more:
|
|
|
|
if (!range.sync) {
|
|
|
|
if (!mutex_trylock(&sbi->gc_mutex)) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
mutex_lock(&sbi->gc_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
|
2018-10-30 19:37:55 +07:00
|
|
|
range.start += BLKS_PER_SEC(sbi);
|
2017-06-16 06:44:42 +07:00
|
|
|
if (range.start <= end)
|
|
|
|
goto do_more;
|
|
|
|
out:
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-17 19:41:49 +07:00
|
|
|
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
|
2015-10-05 21:24:19 +07:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2016-05-09 18:56:32 +07:00
|
|
|
int ret;
|
2015-10-05 21:24:19 +07:00
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (f2fs_readonly(sbi->sb))
|
|
|
|
return -EROFS;
|
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
|
|
|
|
f2fs_msg(sbi->sb, KERN_INFO,
|
|
|
|
"Skipping Checkpoint. Checkpoints currently disabled.");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-05-09 18:56:32 +07:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = f2fs_sync_fs(sbi->sb, 1);
|
|
|
|
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
2015-10-05 21:24:19 +07:00
|
|
|
}
|
|
|
|
|
2015-10-27 08:53:45 +07:00
|
|
|
static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
|
|
|
|
struct file *filp,
|
|
|
|
struct f2fs_defragment *range)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
2018-01-10 17:18:52 +07:00
|
|
|
struct f2fs_map_blocks map = { .m_next_extent = NULL,
|
2018-11-20 03:29:35 +07:00
|
|
|
.m_seg_type = NO_CHECK_TYPE ,
|
|
|
|
.m_may_create = false };
|
2018-05-30 02:34:58 +07:00
|
|
|
struct extent_info ei = {0, 0, 0};
|
2018-01-10 17:18:52 +07:00
|
|
|
pgoff_t pg_start, pg_end, next_pgofs;
|
2015-12-01 10:56:52 +07:00
|
|
|
unsigned int blk_per_seg = sbi->blocks_per_seg;
|
2015-10-27 08:53:45 +07:00
|
|
|
unsigned int total = 0, sec_num;
|
|
|
|
block_t blk_end = 0;
|
|
|
|
bool fragmented = false;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* if in-place-update policy is enabled, don't waste time here */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (f2fs_should_update_inplace(inode, NULL))
|
2015-10-27 08:53:45 +07:00
|
|
|
return -EINVAL;
|
|
|
|
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
pg_start = range->start >> PAGE_SHIFT;
|
|
|
|
pg_end = (range->start + range->len) >> PAGE_SHIFT;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
2016-01-08 05:15:04 +07:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2015-10-27 08:53:45 +07:00
|
|
|
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_lock(inode);
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
/* writeback all dirty pages in the range */
|
|
|
|
err = filemap_write_and_wait_range(inode->i_mapping, range->start,
|
2015-12-14 12:34:00 +07:00
|
|
|
range->start + range->len - 1);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lookup mapping info in extent cache, skip defragmenting if physical
|
|
|
|
* block addresses are continuous.
|
|
|
|
*/
|
|
|
|
if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
|
|
|
|
if (ei.fofs + ei.len >= pg_end)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
map.m_lblk = pg_start;
|
2018-01-10 17:18:52 +07:00
|
|
|
map.m_next_pgofs = &next_pgofs;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* lookup mapping info in dnode page cache, skip defragmenting if all
|
|
|
|
* physical block addresses are continuous even if there are hole(s)
|
|
|
|
* in logical blocks.
|
|
|
|
*/
|
|
|
|
while (map.m_lblk < pg_end) {
|
2015-12-15 16:02:41 +07:00
|
|
|
map.m_len = pg_end - map.m_lblk;
|
2017-08-09 16:27:30 +07:00
|
|
|
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
|
2018-01-10 17:18:52 +07:00
|
|
|
map.m_lblk = next_pgofs;
|
2015-10-27 08:53:45 +07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-01-10 17:18:51 +07:00
|
|
|
if (blk_end && blk_end != map.m_pblk)
|
2015-10-27 08:53:45 +07:00
|
|
|
fragmented = true;
|
2018-01-10 17:18:51 +07:00
|
|
|
|
|
|
|
/* record total count of block that we're going to move */
|
|
|
|
total += map.m_len;
|
|
|
|
|
2015-10-27 08:53:45 +07:00
|
|
|
blk_end = map.m_pblk + map.m_len;
|
|
|
|
|
|
|
|
map.m_lblk += map.m_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!fragmented)
|
|
|
|
goto out;
|
|
|
|
|
2018-01-10 17:18:51 +07:00
|
|
|
sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* make sure there are enough free section for LFS allocation, this can
|
|
|
|
* avoid defragment running in SSR mode when free section are allocated
|
|
|
|
* intensively
|
|
|
|
*/
|
2016-09-02 02:02:51 +07:00
|
|
|
if (has_not_enough_free_secs(sbi, 0, sec_num)) {
|
2015-10-27 08:53:45 +07:00
|
|
|
err = -EAGAIN;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-01-10 17:18:51 +07:00
|
|
|
map.m_lblk = pg_start;
|
|
|
|
map.m_len = pg_end - pg_start;
|
|
|
|
total = 0;
|
|
|
|
|
2015-10-27 08:53:45 +07:00
|
|
|
while (map.m_lblk < pg_end) {
|
|
|
|
pgoff_t idx;
|
|
|
|
int cnt = 0;
|
|
|
|
|
|
|
|
do_map:
|
2015-12-15 16:02:41 +07:00
|
|
|
map.m_len = pg_end - map.m_lblk;
|
2017-08-09 16:27:30 +07:00
|
|
|
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (err)
|
|
|
|
goto clear_out;
|
|
|
|
|
|
|
|
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
|
2018-01-10 17:18:52 +07:00
|
|
|
map.m_lblk = next_pgofs;
|
2015-10-27 08:53:45 +07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:13:22 +07:00
|
|
|
set_inode_flag(inode, FI_DO_DEFRAG);
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
idx = map.m_lblk;
|
|
|
|
while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
|
|
|
|
struct page *page;
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
page = f2fs_get_lock_data_page(inode, idx, true);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (IS_ERR(page)) {
|
|
|
|
err = PTR_ERR(page);
|
|
|
|
goto clear_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
set_page_dirty(page);
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
cnt++;
|
|
|
|
total++;
|
|
|
|
}
|
|
|
|
|
|
|
|
map.m_lblk = idx;
|
|
|
|
|
|
|
|
if (idx < pg_end && cnt < blk_per_seg)
|
|
|
|
goto do_map;
|
|
|
|
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_DO_DEFRAG);
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
err = filemap_fdatawrite(inode->i_mapping);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
clear_out:
|
2016-05-21 00:13:22 +07:00
|
|
|
clear_inode_flag(inode, FI_DO_DEFRAG);
|
2015-10-27 08:53:45 +07:00
|
|
|
out:
|
2016-01-23 03:40:57 +07:00
|
|
|
inode_unlock(inode);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (!err)
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
|
|
|
range->len = (u64)total << PAGE_SHIFT;
|
2015-10-27 08:53:45 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct f2fs_defragment range;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
2017-04-25 19:45:12 +07:00
|
|
|
if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
|
2015-10-27 08:53:45 +07:00
|
|
|
return -EINVAL;
|
|
|
|
|
2017-03-10 16:55:07 +07:00
|
|
|
if (f2fs_readonly(sbi->sb))
|
|
|
|
return -EROFS;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
|
2017-03-10 16:55:07 +07:00
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
/* verify alignment of offset & size */
|
2017-03-10 16:55:07 +07:00
|
|
|
if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
|
|
|
|
return -EINVAL;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
2017-03-08 09:47:12 +07:00
|
|
|
if (unlikely((range.start + range.len) >> PAGE_SHIFT >
|
2017-03-10 16:55:07 +07:00
|
|
|
sbi->max_file_blocks))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = mnt_want_write_file(filp);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2017-03-08 09:47:12 +07:00
|
|
|
|
2015-10-27 08:53:45 +07:00
|
|
|
err = f2fs_defragment_range(sbi, filp, &range);
|
2017-03-10 16:55:07 +07:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
|
2016-01-09 07:57:48 +07:00
|
|
|
f2fs_update_time(sbi, REQ_TIME);
|
2015-10-27 08:53:45 +07:00
|
|
|
if (err < 0)
|
2017-03-10 16:55:07 +07:00
|
|
|
return err;
|
2015-10-27 08:53:45 +07:00
|
|
|
|
|
|
|
if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
|
|
|
|
sizeof(range)))
|
2017-03-10 16:55:07 +07:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return 0;
|
2015-10-27 08:53:45 +07:00
|
|
|
}
|
|
|
|
|
2016-07-09 05:16:47 +07:00
|
|
|
static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
|
|
|
|
struct file *file_out, loff_t pos_out, size_t len)
|
|
|
|
{
|
|
|
|
struct inode *src = file_inode(file_in);
|
|
|
|
struct inode *dst = file_inode(file_out);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(src);
|
|
|
|
size_t olen = len, dst_max_i_size = 0;
|
|
|
|
size_t dst_osize;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (file_in->f_path.mnt != file_out->f_path.mnt ||
|
|
|
|
src->i_sb != dst->i_sb)
|
|
|
|
return -EXDEV;
|
|
|
|
|
|
|
|
if (unlikely(f2fs_readonly(src->i_sb)))
|
|
|
|
return -EROFS;
|
|
|
|
|
2016-08-04 19:13:02 +07:00
|
|
|
if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
|
|
|
|
return -EINVAL;
|
2016-07-09 05:16:47 +07:00
|
|
|
|
|
|
|
if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2016-09-13 10:35:42 +07:00
|
|
|
if (src == dst) {
|
|
|
|
if (pos_in == pos_out)
|
|
|
|
return 0;
|
|
|
|
if (pos_out > pos_in && pos_out < pos_in + len)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-07-09 05:16:47 +07:00
|
|
|
inode_lock(src);
|
2016-08-04 19:13:03 +07:00
|
|
|
if (src != dst) {
|
2017-11-03 09:21:05 +07:00
|
|
|
ret = -EBUSY;
|
|
|
|
if (!inode_trylock(dst))
|
|
|
|
goto out;
|
2016-08-04 19:13:03 +07:00
|
|
|
}
|
2016-07-09 05:16:47 +07:00
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
if (pos_in + len > src->i_size || pos_in + len < pos_in)
|
|
|
|
goto out_unlock;
|
|
|
|
if (len == 0)
|
|
|
|
olen = len = src->i_size - pos_in;
|
|
|
|
if (pos_in + len == src->i_size)
|
|
|
|
len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
|
|
|
|
if (len == 0) {
|
|
|
|
ret = 0;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_osize = dst->i_size;
|
|
|
|
if (pos_out + olen > dst->i_size)
|
|
|
|
dst_max_i_size = pos_out + olen;
|
|
|
|
|
|
|
|
/* verify the end result is block aligned */
|
|
|
|
if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
|
|
|
|
!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
|
|
|
|
!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = f2fs_convert_inline_inode(src);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = f2fs_convert_inline_inode(dst);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
/* write out all dirty pages from offset */
|
|
|
|
ret = filemap_write_and_wait_range(src->i_mapping,
|
|
|
|
pos_in, pos_in + len);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = filemap_write_and_wait_range(dst->i_mapping,
|
|
|
|
pos_out, pos_out + len);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
f2fs_balance_fs(sbi, true);
|
2018-07-25 10:11:56 +07:00
|
|
|
|
|
|
|
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
|
|
|
|
if (src != dst) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
|
|
|
|
goto out_src;
|
|
|
|
}
|
|
|
|
|
2016-07-09 05:16:47 +07:00
|
|
|
f2fs_lock_op(sbi);
|
2016-09-10 10:19:37 +07:00
|
|
|
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
|
|
|
|
pos_out >> F2FS_BLKSIZE_BITS,
|
|
|
|
len >> F2FS_BLKSIZE_BITS, false);
|
2016-07-09 05:16:47 +07:00
|
|
|
|
|
|
|
if (!ret) {
|
|
|
|
if (dst_max_i_size)
|
|
|
|
f2fs_i_size_write(dst, dst_max_i_size);
|
|
|
|
else if (dst_osize != dst->i_size)
|
|
|
|
f2fs_i_size_write(dst, dst_osize);
|
|
|
|
}
|
|
|
|
f2fs_unlock_op(sbi);
|
2018-07-25 10:11:56 +07:00
|
|
|
|
|
|
|
if (src != dst)
|
2018-04-24 09:55:28 +07:00
|
|
|
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
|
2018-07-25 10:11:56 +07:00
|
|
|
out_src:
|
|
|
|
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
|
|
|
|
out_unlock:
|
|
|
|
if (src != dst)
|
2016-07-09 05:16:47 +07:00
|
|
|
inode_unlock(dst);
|
2016-08-04 19:13:03 +07:00
|
|
|
out:
|
2016-07-09 05:16:47 +07:00
|
|
|
inode_unlock(src);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct f2fs_move_range range;
|
|
|
|
struct fd dst;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!(filp->f_mode & FMODE_READ) ||
|
|
|
|
!(filp->f_mode & FMODE_WRITE))
|
|
|
|
return -EBADF;
|
|
|
|
|
|
|
|
if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
dst = fdget(range.dst_fd);
|
|
|
|
if (!dst.file)
|
|
|
|
return -EBADF;
|
|
|
|
|
|
|
|
if (!(dst.file->f_mode & FMODE_WRITE)) {
|
|
|
|
err = -EBADF;
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = mnt_want_write_file(filp);
|
|
|
|
if (err)
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
err = f2fs_move_file_range(filp, range.pos_in, dst.file,
|
|
|
|
range.pos_out, range.len);
|
|
|
|
|
|
|
|
mnt_drop_write_file(filp);
|
2017-03-10 16:54:26 +07:00
|
|
|
if (err)
|
|
|
|
goto err_out;
|
2016-07-09 05:16:47 +07:00
|
|
|
|
|
|
|
if (copy_to_user((struct f2fs_move_range __user *)arg,
|
|
|
|
&range, sizeof(range)))
|
|
|
|
err = -EFAULT;
|
|
|
|
err_out:
|
|
|
|
fdput(dst);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-04-14 05:17:00 +07:00
|
|
|
static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct sit_info *sm = SIT_I(sbi);
|
|
|
|
unsigned int start_segno = 0, end_segno = 0;
|
|
|
|
unsigned int dev_start_segno = 0, dev_end_segno = 0;
|
|
|
|
struct f2fs_flush_device range;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (f2fs_readonly(sbi->sb))
|
|
|
|
return -EROFS;
|
|
|
|
|
2018-08-21 09:21:43 +07:00
|
|
|
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2017-04-14 05:17:00 +07:00
|
|
|
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
|
2018-10-24 17:37:26 +07:00
|
|
|
__is_large_section(sbi)) {
|
2017-04-14 05:17:00 +07:00
|
|
|
f2fs_msg(sbi->sb, KERN_WARNING,
|
|
|
|
"Can't flush %u in %d for segs_per_sec %u != 1\n",
|
|
|
|
range.dev_num, sbi->s_ndevs,
|
|
|
|
sbi->segs_per_sec);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (range.dev_num != 0)
|
|
|
|
dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
|
|
|
|
dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
|
|
|
|
|
|
|
|
start_segno = sm->last_victim[FLUSH_DEVICE];
|
|
|
|
if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
|
|
|
|
start_segno = dev_start_segno;
|
|
|
|
end_segno = min(start_segno + range.segments, dev_end_segno);
|
|
|
|
|
|
|
|
while (start_segno < end_segno) {
|
|
|
|
if (!mutex_trylock(&sbi->gc_mutex)) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
sm->last_victim[GC_CB] = end_segno + 1;
|
|
|
|
sm->last_victim[GC_GREEDY] = end_segno + 1;
|
|
|
|
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
|
|
|
|
ret = f2fs_gc(sbi, true, true, start_segno);
|
|
|
|
if (ret == -EAGAIN)
|
|
|
|
ret = 0;
|
|
|
|
else if (ret < 0)
|
|
|
|
break;
|
|
|
|
start_segno++;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-07-22 02:58:59 +07:00
|
|
|
static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
|
|
|
|
|
|
|
|
/* Must validate to set it with SQLite behavior in Android. */
|
|
|
|
sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
|
|
|
|
|
|
|
|
return put_user(sb_feature, (u32 __user *)arg);
|
|
|
|
}
|
2017-04-14 05:17:00 +07:00
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
#ifdef CONFIG_QUOTA
|
2018-09-25 14:36:02 +07:00
|
|
|
int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
|
|
|
|
{
|
|
|
|
struct dquot *transfer_to[MAXQUOTAS] = {};
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
|
|
|
|
if (!IS_ERR(transfer_to[PRJQUOTA])) {
|
|
|
|
err = __dquot_transfer(inode, transfer_to);
|
|
|
|
if (err)
|
|
|
|
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
|
|
|
|
dqput(transfer_to[PRJQUOTA]);
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct page *ipage;
|
|
|
|
kprojid_t kprojid;
|
|
|
|
int err;
|
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (!f2fs_sb_has_project_quota(sbi)) {
|
2017-07-28 23:32:52 +07:00
|
|
|
if (projid != F2FS_DEF_PROJID)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!f2fs_has_extra_attr(inode))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
|
|
|
|
|
|
|
|
if (projid_eq(kprojid, F2FS_I(inode)->i_projid))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err = -EPERM;
|
|
|
|
/* Is it quota file? Do not allow user to mess with it */
|
|
|
|
if (IS_NOQUOTA(inode))
|
2018-09-11 06:54:21 +07:00
|
|
|
return err;
|
2017-07-28 23:32:52 +07:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
ipage = f2fs_get_node_page(sbi, inode->i_ino);
|
2018-09-11 06:54:21 +07:00
|
|
|
if (IS_ERR(ipage))
|
|
|
|
return PTR_ERR(ipage);
|
2017-07-28 23:32:52 +07:00
|
|
|
|
|
|
|
if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
|
|
|
|
i_projid)) {
|
|
|
|
err = -EOVERFLOW;
|
|
|
|
f2fs_put_page(ipage, 1);
|
2018-09-11 06:54:21 +07:00
|
|
|
return err;
|
2017-07-28 23:32:52 +07:00
|
|
|
}
|
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
|
2018-04-21 16:53:52 +07:00
|
|
|
err = dquot_initialize(inode);
|
|
|
|
if (err)
|
2018-09-11 06:54:21 +07:00
|
|
|
return err;
|
2017-07-28 23:32:52 +07:00
|
|
|
|
2018-09-25 14:36:02 +07:00
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
err = f2fs_transfer_project_quota(inode, kprojid);
|
|
|
|
if (err)
|
|
|
|
goto out_unlock;
|
2017-07-28 23:32:52 +07:00
|
|
|
|
|
|
|
F2FS_I(inode)->i_projid = kprojid;
|
|
|
|
inode->i_ctime = current_time(inode);
|
|
|
|
f2fs_mark_inode_dirty_sync(inode, true);
|
2018-09-25 14:36:02 +07:00
|
|
|
out_unlock:
|
|
|
|
f2fs_unlock_op(sbi);
|
2017-07-28 23:32:52 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
#else
|
2018-09-25 14:36:02 +07:00
|
|
|
int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
|
|
|
|
{
|
|
|
|
if (projid != F2FS_DEF_PROJID)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Transfer internal flags to xflags */
|
|
|
|
static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags)
|
|
|
|
{
|
|
|
|
__u32 xflags = 0;
|
|
|
|
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_SYNC_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_SYNC;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_IMMUTABLE_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_IMMUTABLE;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_APPEND_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_APPEND;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_NODUMP_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_NODUMP;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_NOATIME_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_NOATIME;
|
2018-04-03 14:08:17 +07:00
|
|
|
if (iflags & F2FS_PROJINHERIT_FL)
|
2017-07-28 23:32:52 +07:00
|
|
|
xflags |= FS_XFLAG_PROJINHERIT;
|
|
|
|
return xflags;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
|
|
|
|
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
|
|
|
|
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
|
|
|
|
|
|
|
|
/* Transfer xflags flags to internal */
|
|
|
|
static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags)
|
|
|
|
{
|
|
|
|
unsigned long iflags = 0;
|
|
|
|
|
|
|
|
if (xflags & FS_XFLAG_SYNC)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_SYNC_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
if (xflags & FS_XFLAG_IMMUTABLE)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_IMMUTABLE_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
if (xflags & FS_XFLAG_APPEND)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_APPEND_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
if (xflags & FS_XFLAG_NODUMP)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_NODUMP_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
if (xflags & FS_XFLAG_NOATIME)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_NOATIME_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
if (xflags & FS_XFLAG_PROJINHERIT)
|
2018-04-03 14:08:17 +07:00
|
|
|
iflags |= F2FS_PROJINHERIT_FL;
|
2017-07-28 23:32:52 +07:00
|
|
|
|
|
|
|
return iflags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct fsxattr fa;
|
|
|
|
|
|
|
|
memset(&fa, 0, sizeof(struct fsxattr));
|
|
|
|
fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
|
2018-04-08 10:25:53 +07:00
|
|
|
F2FS_FL_USER_VISIBLE);
|
2017-07-28 23:32:52 +07:00
|
|
|
|
2018-10-24 17:34:26 +07:00
|
|
|
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
|
2017-07-28 23:32:52 +07:00
|
|
|
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
|
|
|
|
fi->i_projid);
|
|
|
|
|
|
|
|
if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa)))
|
|
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-11 06:54:21 +07:00
|
|
|
static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Project Quota ID state is only allowed to change from within the init
|
|
|
|
* namespace. Enforce that restriction only if we are trying to change
|
|
|
|
* the quota ID state. Everything else is allowed in user namespaces.
|
|
|
|
*/
|
|
|
|
if (current_user_ns() == &init_user_ns)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
|
|
|
|
if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
|
|
|
|
return -EINVAL;
|
|
|
|
} else {
|
|
|
|
if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-28 23:32:52 +07:00
|
|
|
static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct fsxattr fa;
|
|
|
|
unsigned int flags;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* Make sure caller has proper permission */
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
flags = f2fs_xflags_to_iflags(fa.fsx_xflags);
|
|
|
|
if (f2fs_mask_flags(inode->i_mode, flags) != flags)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
err = mnt_want_write_file(filp);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
2018-09-11 06:54:21 +07:00
|
|
|
err = f2fs_ioctl_check_project(inode, &fa);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
2017-07-28 23:32:52 +07:00
|
|
|
flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
|
|
|
|
(flags & F2FS_FL_XFLAG_VISIBLE);
|
|
|
|
err = __f2fs_ioc_setflags(inode, flags);
|
|
|
|
if (err)
|
2018-09-11 06:54:21 +07:00
|
|
|
goto out;
|
2017-07-28 23:32:52 +07:00
|
|
|
|
|
|
|
err = f2fs_ioc_setproject(filp, fa.fsx_projid);
|
2018-09-11 06:54:21 +07:00
|
|
|
out:
|
|
|
|
inode_unlock(inode);
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return err;
|
2017-07-28 23:32:52 +07:00
|
|
|
}
|
2017-04-14 05:17:00 +07:00
|
|
|
|
2017-12-08 07:25:39 +07:00
|
|
|
int f2fs_pin_file_control(struct inode *inode, bool inc)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
/* Use i_gc_failures for normal file as a risk signal. */
|
|
|
|
if (inc)
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
f2fs_i_gc_failures_write(inode,
|
|
|
|
fi->i_gc_failures[GC_FAILURE_PIN] + 1);
|
2017-12-08 07:25:39 +07:00
|
|
|
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
|
2017-12-08 07:25:39 +07:00
|
|
|
f2fs_msg(sbi->sb, KERN_WARNING,
|
|
|
|
"%s: Enable GC = ino %lx after %x GC trials\n",
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
__func__, inode->i_ino,
|
|
|
|
fi->i_gc_failures[GC_FAILURE_PIN]);
|
2017-12-08 07:25:39 +07:00
|
|
|
clear_inode_flag(inode, FI_PIN_FILE);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
__u32 pin;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
if (get_user(pin, (__u32 __user *)arg))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (f2fs_readonly(F2FS_I_SB(inode)->sb))
|
|
|
|
return -EROFS;
|
|
|
|
|
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
inode_lock(inode);
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-29 23:20:41 +07:00
|
|
|
if (f2fs_should_update_outplace(inode, NULL)) {
|
2018-01-17 15:31:38 +07:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2017-12-08 07:25:39 +07:00
|
|
|
if (!pin) {
|
|
|
|
clear_inode_flag(inode, FI_PIN_FILE);
|
2018-07-28 17:37:58 +07:00
|
|
|
f2fs_i_gc_failures_write(inode, 0);
|
2017-12-08 07:25:39 +07:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f2fs_pin_file_control(inode, false)) {
|
|
|
|
ret = -EAGAIN;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ret = f2fs_convert_inline_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
set_inode_flag(inode, FI_PIN_FILE);
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
|
2017-12-08 07:25:39 +07:00
|
|
|
done:
|
|
|
|
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
|
|
|
|
out:
|
|
|
|
inode_unlock(inode);
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
__u32 pin = 0;
|
|
|
|
|
|
|
|
if (is_inode_flag_set(inode, FI_PIN_FILE))
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 19:28:54 +07:00
|
|
|
pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
|
2017-12-08 07:25:39 +07:00
|
|
|
return put_user(pin, (u32 __user *)arg);
|
|
|
|
}
|
|
|
|
|
2018-01-11 13:42:30 +07:00
|
|
|
int f2fs_precache_extents(struct inode *inode)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct f2fs_map_blocks map;
|
|
|
|
pgoff_t m_next_extent;
|
|
|
|
loff_t end;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (is_inode_flag_set(inode, FI_NO_EXTENT))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
map.m_lblk = 0;
|
|
|
|
map.m_next_pgofs = NULL;
|
|
|
|
map.m_next_extent = &m_next_extent;
|
|
|
|
map.m_seg_type = NO_CHECK_TYPE;
|
2018-11-20 03:29:35 +07:00
|
|
|
map.m_may_create = false;
|
2018-01-11 13:42:30 +07:00
|
|
|
end = F2FS_I_SB(inode)->max_file_blocks;
|
|
|
|
|
|
|
|
while (map.m_lblk < end) {
|
|
|
|
map.m_len = end - map.m_lblk;
|
|
|
|
|
2018-04-24 09:55:28 +07:00
|
|
|
down_write(&fi->i_gc_rwsem[WRITE]);
|
2018-01-11 13:42:30 +07:00
|
|
|
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
|
2018-04-24 09:55:28 +07:00
|
|
|
up_write(&fi->i_gc_rwsem[WRITE]);
|
2018-01-11 13:42:30 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
map.m_lblk = m_next_extent;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
return f2fs_precache_extents(file_inode(filp));
|
|
|
|
}
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
|
|
|
|
return -EIO;
|
|
|
|
|
2014-09-25 05:37:02 +07:00
|
|
|
switch (cmd) {
|
|
|
|
case F2FS_IOC_GETFLAGS:
|
|
|
|
return f2fs_ioc_getflags(filp, arg);
|
|
|
|
case F2FS_IOC_SETFLAGS:
|
|
|
|
return f2fs_ioc_setflags(filp, arg);
|
2015-01-23 19:36:04 +07:00
|
|
|
case F2FS_IOC_GETVERSION:
|
|
|
|
return f2fs_ioc_getversion(filp, arg);
|
2014-10-07 07:39:50 +07:00
|
|
|
case F2FS_IOC_START_ATOMIC_WRITE:
|
|
|
|
return f2fs_ioc_start_atomic_write(filp);
|
|
|
|
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
|
|
|
|
return f2fs_ioc_commit_atomic_write(filp);
|
2014-10-07 06:11:16 +07:00
|
|
|
case F2FS_IOC_START_VOLATILE_WRITE:
|
|
|
|
return f2fs_ioc_start_volatile_write(filp);
|
2014-12-09 21:08:59 +07:00
|
|
|
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
|
|
|
|
return f2fs_ioc_release_volatile_write(filp);
|
|
|
|
case F2FS_IOC_ABORT_VOLATILE_WRITE:
|
|
|
|
return f2fs_ioc_abort_volatile_write(filp);
|
2015-01-09 10:15:53 +07:00
|
|
|
case F2FS_IOC_SHUTDOWN:
|
|
|
|
return f2fs_ioc_shutdown(filp, arg);
|
2014-09-25 05:37:02 +07:00
|
|
|
case FITRIM:
|
|
|
|
return f2fs_ioc_fitrim(filp, arg);
|
2015-04-21 05:19:06 +07:00
|
|
|
case F2FS_IOC_SET_ENCRYPTION_POLICY:
|
|
|
|
return f2fs_ioc_set_encryption_policy(filp, arg);
|
|
|
|
case F2FS_IOC_GET_ENCRYPTION_POLICY:
|
|
|
|
return f2fs_ioc_get_encryption_policy(filp, arg);
|
|
|
|
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
|
|
|
|
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
|
2015-07-10 17:08:10 +07:00
|
|
|
case F2FS_IOC_GARBAGE_COLLECT:
|
|
|
|
return f2fs_ioc_gc(filp, arg);
|
2017-06-16 06:44:42 +07:00
|
|
|
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
|
|
|
|
return f2fs_ioc_gc_range(filp, arg);
|
2015-10-05 21:24:19 +07:00
|
|
|
case F2FS_IOC_WRITE_CHECKPOINT:
|
2018-07-17 19:41:49 +07:00
|
|
|
return f2fs_ioc_write_checkpoint(filp, arg);
|
2015-10-27 08:53:45 +07:00
|
|
|
case F2FS_IOC_DEFRAGMENT:
|
|
|
|
return f2fs_ioc_defragment(filp, arg);
|
2016-07-09 05:16:47 +07:00
|
|
|
case F2FS_IOC_MOVE_RANGE:
|
|
|
|
return f2fs_ioc_move_range(filp, arg);
|
2017-04-14 05:17:00 +07:00
|
|
|
case F2FS_IOC_FLUSH_DEVICE:
|
|
|
|
return f2fs_ioc_flush_device(filp, arg);
|
2017-07-22 02:58:59 +07:00
|
|
|
case F2FS_IOC_GET_FEATURES:
|
|
|
|
return f2fs_ioc_get_features(filp, arg);
|
2017-07-28 23:32:52 +07:00
|
|
|
case F2FS_IOC_FSGETXATTR:
|
|
|
|
return f2fs_ioc_fsgetxattr(filp, arg);
|
|
|
|
case F2FS_IOC_FSSETXATTR:
|
|
|
|
return f2fs_ioc_fssetxattr(filp, arg);
|
2017-12-08 07:25:39 +07:00
|
|
|
case F2FS_IOC_GET_PIN_FILE:
|
|
|
|
return f2fs_ioc_get_pin_file(filp, arg);
|
|
|
|
case F2FS_IOC_SET_PIN_FILE:
|
|
|
|
return f2fs_ioc_set_pin_file(filp, arg);
|
2018-01-11 13:42:30 +07:00
|
|
|
case F2FS_IOC_PRECACHE_EXTENTS:
|
|
|
|
return f2fs_ioc_precache_extents(filp, arg);
|
2012-11-02 15:09:44 +07:00
|
|
|
default:
|
|
|
|
return -ENOTTY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-22 10:39:58 +07:00
|
|
|
static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|
|
|
{
|
2016-02-04 04:09:09 +07:00
|
|
|
struct file *file = iocb->ki_filp;
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
ssize_t ret;
|
2015-04-22 10:39:58 +07:00
|
|
|
|
2017-10-24 04:48:49 +07:00
|
|
|
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
|
|
|
|
return -EIO;
|
|
|
|
|
2018-03-08 17:34:38 +07:00
|
|
|
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!inode_trylock(inode)) {
|
|
|
|
if (iocb->ki_flags & IOCB_NOWAIT)
|
|
|
|
return -EAGAIN;
|
|
|
|
inode_lock(inode);
|
|
|
|
}
|
|
|
|
|
2016-02-04 04:09:09 +07:00
|
|
|
ret = generic_write_checks(iocb, from);
|
|
|
|
if (ret > 0) {
|
2018-03-31 07:58:13 +07:00
|
|
|
bool preallocated = false;
|
|
|
|
size_t target_size = 0;
|
2017-01-14 04:12:29 +07:00
|
|
|
int err;
|
2016-11-12 07:31:56 +07:00
|
|
|
|
2017-01-14 04:12:29 +07:00
|
|
|
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
|
|
|
|
set_inode_flag(inode, FI_NO_PREALLOC);
|
2016-11-12 07:31:56 +07:00
|
|
|
|
2018-03-08 17:34:38 +07:00
|
|
|
if ((iocb->ki_flags & IOCB_NOWAIT) &&
|
|
|
|
(iocb->ki_flags & IOCB_DIRECT)) {
|
|
|
|
if (!f2fs_overwrite_io(inode, iocb->ki_pos,
|
|
|
|
iov_iter_count(from)) ||
|
|
|
|
f2fs_has_inline_data(inode) ||
|
2018-09-27 17:34:52 +07:00
|
|
|
f2fs_force_buffered_io(inode,
|
|
|
|
iocb, from)) {
|
2018-04-24 10:40:19 +07:00
|
|
|
clear_inode_flag(inode,
|
|
|
|
FI_NO_PREALLOC);
|
2018-03-08 17:34:38 +07:00
|
|
|
inode_unlock(inode);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
2018-03-31 07:58:13 +07:00
|
|
|
preallocated = true;
|
|
|
|
target_size = iocb->ki_pos + iov_iter_count(from);
|
|
|
|
|
2018-03-08 17:34:38 +07:00
|
|
|
err = f2fs_preallocate_blocks(iocb, from);
|
|
|
|
if (err) {
|
|
|
|
clear_inode_flag(inode, FI_NO_PREALLOC);
|
|
|
|
inode_unlock(inode);
|
|
|
|
return err;
|
|
|
|
}
|
2016-07-14 09:33:19 +07:00
|
|
|
}
|
2016-11-12 07:31:56 +07:00
|
|
|
ret = __generic_file_write_iter(iocb, from);
|
2017-01-14 04:12:29 +07:00
|
|
|
clear_inode_flag(inode, FI_NO_PREALLOC);
|
2017-08-02 22:21:48 +07:00
|
|
|
|
2018-03-31 07:58:13 +07:00
|
|
|
/* if we couldn't write data, we should deallocate blocks. */
|
|
|
|
if (preallocated && i_size_read(inode) < target_size)
|
|
|
|
f2fs_truncate(inode);
|
|
|
|
|
2017-08-02 22:21:48 +07:00
|
|
|
if (ret > 0)
|
|
|
|
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
|
2016-02-04 04:09:09 +07:00
|
|
|
}
|
|
|
|
inode_unlock(inode);
|
|
|
|
|
2016-04-07 22:52:01 +07:00
|
|
|
if (ret > 0)
|
|
|
|
ret = generic_write_sync(iocb, ret);
|
2016-02-04 04:09:09 +07:00
|
|
|
return ret;
|
2015-04-22 10:39:58 +07:00
|
|
|
}
|
|
|
|
|
2013-02-04 21:41:41 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
switch (cmd) {
|
|
|
|
case F2FS_IOC32_GETFLAGS:
|
|
|
|
cmd = F2FS_IOC_GETFLAGS;
|
|
|
|
break;
|
|
|
|
case F2FS_IOC32_SETFLAGS:
|
|
|
|
cmd = F2FS_IOC_SETFLAGS;
|
|
|
|
break;
|
2015-11-10 17:44:20 +07:00
|
|
|
case F2FS_IOC32_GETVERSION:
|
|
|
|
cmd = F2FS_IOC_GETVERSION;
|
|
|
|
break;
|
|
|
|
case F2FS_IOC_START_ATOMIC_WRITE:
|
|
|
|
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
|
|
|
|
case F2FS_IOC_START_VOLATILE_WRITE:
|
|
|
|
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
|
|
|
|
case F2FS_IOC_ABORT_VOLATILE_WRITE:
|
|
|
|
case F2FS_IOC_SHUTDOWN:
|
|
|
|
case F2FS_IOC_SET_ENCRYPTION_POLICY:
|
|
|
|
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
|
|
|
|
case F2FS_IOC_GET_ENCRYPTION_POLICY:
|
|
|
|
case F2FS_IOC_GARBAGE_COLLECT:
|
2017-06-16 06:44:42 +07:00
|
|
|
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
|
2015-11-10 17:44:20 +07:00
|
|
|
case F2FS_IOC_WRITE_CHECKPOINT:
|
|
|
|
case F2FS_IOC_DEFRAGMENT:
|
2016-07-09 05:16:47 +07:00
|
|
|
case F2FS_IOC_MOVE_RANGE:
|
2017-04-14 05:17:00 +07:00
|
|
|
case F2FS_IOC_FLUSH_DEVICE:
|
2017-07-22 02:58:59 +07:00
|
|
|
case F2FS_IOC_GET_FEATURES:
|
2017-07-28 23:32:52 +07:00
|
|
|
case F2FS_IOC_FSGETXATTR:
|
|
|
|
case F2FS_IOC_FSSETXATTR:
|
2017-12-08 07:25:39 +07:00
|
|
|
case F2FS_IOC_GET_PIN_FILE:
|
|
|
|
case F2FS_IOC_SET_PIN_FILE:
|
2018-01-11 13:42:30 +07:00
|
|
|
case F2FS_IOC_PRECACHE_EXTENTS:
|
2016-07-09 05:16:47 +07:00
|
|
|
break;
|
2013-02-04 21:41:41 +07:00
|
|
|
default:
|
|
|
|
return -ENOIOCTLCMD;
|
|
|
|
}
|
|
|
|
return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-11-02 15:09:44 +07:00
|
|
|
const struct file_operations f2fs_file_operations = {
|
2014-04-23 13:10:24 +07:00
|
|
|
.llseek = f2fs_llseek,
|
2014-04-03 01:33:16 +07:00
|
|
|
.read_iter = generic_file_read_iter,
|
2015-04-22 10:39:58 +07:00
|
|
|
.write_iter = f2fs_file_write_iter,
|
|
|
|
.open = f2fs_file_open,
|
2014-12-06 05:37:37 +07:00
|
|
|
.release = f2fs_release_file,
|
2012-11-02 15:09:44 +07:00
|
|
|
.mmap = f2fs_file_mmap,
|
2017-07-25 09:46:29 +07:00
|
|
|
.flush = f2fs_file_flush,
|
2012-11-02 15:09:44 +07:00
|
|
|
.fsync = f2fs_sync_file,
|
|
|
|
.fallocate = f2fs_fallocate,
|
|
|
|
.unlocked_ioctl = f2fs_ioctl,
|
2013-02-04 21:41:41 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_ioctl = f2fs_compat_ioctl,
|
|
|
|
#endif
|
2012-11-02 15:09:44 +07:00
|
|
|
.splice_read = generic_file_splice_read,
|
2014-04-05 15:27:08 +07:00
|
|
|
.splice_write = iter_file_splice_write,
|
2012-11-02 15:09:44 +07:00
|
|
|
};
|