linux_dsm_epyc7002/fs/fat/inode.c

1557 lines
40 KiB
C
Raw Normal View History

/*
* linux/fs/fat/inode.c
*
* Written 1992,1993 by Werner Almesberger
* VFAT extensions by Gordon Chaffee, merged with msdos fs by Henrik Storner
* Rewritten for the constant inumbers support by Al Viro
*
* Fixes:
*
* Max Cohan: Fixed invalid FSINFO offset when info_sector is 0
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/mount.h>
#include <linux/vfs.h>
#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/writeback.h>
#include <linux/log2.h>
#include <linux/hash.h>
#include <linux/blkdev.h>
#include <asm/unaligned.h>
#include "fat.h"
#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
/* if user don't select VFAT, this is undefined. */
#define CONFIG_FAT_DEFAULT_IOCHARSET ""
#endif
static int fat_default_codepage = CONFIG_FAT_DEFAULT_CODEPAGE;
static char fat_default_iocharset[] = CONFIG_FAT_DEFAULT_IOCHARSET;
static int fat_add_cluster(struct inode *inode)
{
int err, cluster;
err = fat_alloc_clusters(inode, &cluster, 1);
if (err)
return err;
/* FIXME: this cluster should be added after data of this
* cluster is writed */
err = fat_chain_add(inode, cluster, 1);
if (err)
fat_free_clusters(inode, cluster);
return err;
}
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
unsigned long mapped_blocks;
sector_t phys;
int err, offset;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
return err;
if (phys) {
map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
return 0;
}
if (!create)
return 0;
if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",
MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
return -EIO;
}
offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
if (!offset) {
/* TODO: multiple cluster allocation would be desirable. */
err = fat_add_cluster(inode);
if (err)
return err;
}
/* available blocks on this cluster */
mapped_blocks = sbi->sec_per_clus - offset;
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
return err;
BUG_ON(!phys);
BUG_ON(*max_blocks != mapped_blocks);
set_buffer_new(bh_result);
map_bh(bh_result, sb, phys);
return 0;
}
static int fat_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
int err;
err = __fat_get_block(inode, iblock, &max_blocks, bh_result, create);
if (err)
return err;
bh_result->b_size = max_blocks << sb->s_blocksize_bits;
return 0;
}
static int fat_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, fat_get_block, wbc);
}
static int fat_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
return mpage_writepages(mapping, wbc, fat_get_block);
}
static int fat_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, fat_get_block);
}
static int fat_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
}
static void fat_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size);
fat_truncate_blocks(inode, inode->i_size);
}
}
static int fat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
int err;
*pagep = NULL;
err = cont_write_begin(file, mapping, pos, len, flags,
pagep, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
if (err < 0)
fat_write_failed(mapping, pos + len);
return err;
}
static int fat_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *pagep, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
if (err < len)
fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
mark_inode_dirty(inode);
}
return err;
}
static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
if (rw == WRITE) {
/*
* FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
* so we need to update the ->mmu_private to block boundary.
*
* But we must fill the remaining area or hole by nul for
* updating ->mmu_private.
*
* Return 0, and fallback to normal buffered write.
*/
loff_t size = offset + iov_length(iov, nr_segs);
if (MSDOS_I(inode)->mmu_private < size)
return 0;
}
/*
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
fat_get_block);
if (ret < 0 && (rw & WRITE))
fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
return ret;
}
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
blocknr = generic_block_bmap(mapping, block, fat_get_block);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
}
static const struct address_space_operations fat_aops = {
.readpage = fat_readpage,
.readpages = fat_readpages,
.writepage = fat_writepage,
.writepages = fat_writepages,
.write_begin = fat_write_begin,
.write_end = fat_write_end,
.direct_IO = fat_direct_IO,
.bmap = _fat_bmap
};
/*
* New FAT inode stuff. We do the following:
* a) i_ino is constant and has nothing with on-disk location.
* b) FAT manages its own cache of directory entries.
* c) *This* cache is indexed by on-disk location.
* d) inode has an associated directory entry, all right, but
* it may be unhashed.
* e) currently entries are stored within struct inode. That should
* change.
* f) we deal with races in the following way:
* 1. readdir() and lookup() do FAT-dir-cache lookup.
* 2. rename() unhashes the F-d-c entry and rehashes it in
* a new place.
* 3. unlink() and rmdir() unhash F-d-c entry.
* 4. fat_write_inode() checks whether the thing is unhashed.
* If it is we silently return. If it isn't we do bread(),
* check if the location is still valid and retry if it
* isn't. Otherwise we do changes.
* 5. Spinlock is used to protect hash/unhash/location check/lookup
* 6. fat_evict_inode() unhashes the F-d-c entry.
* 7. lookup() and readdir() do igrab() if they find a F-d-c entry
* and consider negative result as cache miss.
*/
static void fat_hash_init(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int i;
spin_lock_init(&sbi->inode_hash_lock);
for (i = 0; i < FAT_HASH_SIZE; i++)
INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
}
static inline unsigned long fat_hash(loff_t i_pos)
{
return hash_32(i_pos, FAT_HASH_BITS);
}
static void dir_hash_init(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int i;
spin_lock_init(&sbi->dir_hash_lock);
for (i = 0; i < FAT_HASH_SIZE; i++)
INIT_HLIST_HEAD(&sbi->dir_hashtable[i]);
}
void fat_attach(struct inode *inode, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
if (inode->i_ino != MSDOS_ROOT_INO) {
struct hlist_head *head = sbi->inode_hashtable
+ fat_hash(i_pos);
spin_lock(&sbi->inode_hash_lock);
MSDOS_I(inode)->i_pos = i_pos;
hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
spin_unlock(&sbi->inode_hash_lock);
}
/* If NFS support is enabled, cache the mapping of start cluster
* to directory inode. This is used during reconnection of
* dentries to the filesystem root.
*/
if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
struct hlist_head *d_head = sbi->dir_hashtable;
d_head += fat_dir_hash(MSDOS_I(inode)->i_logstart);
spin_lock(&sbi->dir_hash_lock);
hlist_add_head(&MSDOS_I(inode)->i_dir_hash, d_head);
spin_unlock(&sbi->dir_hash_lock);
}
}
EXPORT_SYMBOL_GPL(fat_attach);
void fat_detach(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
spin_lock(&sbi->inode_hash_lock);
MSDOS_I(inode)->i_pos = 0;
hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
spin_unlock(&sbi->inode_hash_lock);
if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
spin_lock(&sbi->dir_hash_lock);
hlist_del_init(&MSDOS_I(inode)->i_dir_hash);
spin_unlock(&sbi->dir_hash_lock);
}
}
EXPORT_SYMBOL_GPL(fat_detach);
struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
struct hlist_node *_p;
struct msdos_inode_info *i;
struct inode *inode = NULL;
spin_lock(&sbi->inode_hash_lock);
hlist_for_each_entry(i, _p, head, i_fat_hash) {
BUG_ON(i->vfs_inode.i_sb != sb);
if (i->i_pos != i_pos)
continue;
inode = igrab(&i->vfs_inode);
if (inode)
break;
}
spin_unlock(&sbi->inode_hash_lock);
return inode;
}
static int is_exec(unsigned char *extension)
{
unsigned char *exe_extensions = "EXECOMBAT", *walk;
for (walk = exe_extensions; *walk; walk += 3)
if (!strncmp(extension, walk, 3))
return 1;
return 0;
}
static int fat_calc_dir_size(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int ret, fclus, dclus;
inode->i_size = 0;
if (MSDOS_I(inode)->i_start == 0)
return 0;
ret = fat_get_cluster(inode, FAT_ENT_EOF, &fclus, &dclus);
if (ret < 0)
return ret;
inode->i_size = (fclus + 1) << sbi->cluster_bits;
return 0;
}
/* doesn't deal with root inode */
static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
MSDOS_I(inode)->i_pos = 0;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode->i_version++;
inode->i_generation = get_seconds();
if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
inode->i_generation &= ~1;
inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
} else { /* not a directory */
inode->i_generation |= 1;
inode->i_mode = fat_make_mode(sbi, de->attr,
((sbi->options.showexec && !is_exec(de->name + 8))
? S_IRUGO|S_IWUGO : S_IRWXUGO));
MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
inode->i_size = le32_to_cpu(de->size);
inode->i_op = &fat_file_inode_operations;
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
MSDOS_I(inode)->mmu_private = inode->i_size;
}
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
}
fat_save_attrs(inode, de->attr);
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
if (sbi->options.isvfat) {
fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
de->cdate, de->ctime_cs);
fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
return 0;
}
struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos)
{
struct inode *inode;
int err;
inode = fat_iget(sb, i_pos);
if (inode)
goto out;
inode = new_inode(sb);
if (!inode) {
inode = ERR_PTR(-ENOMEM);
goto out;
}
inode->i_ino = iunique(sb, MSDOS_ROOT_INO);
inode->i_version = 1;
err = fat_fill_inode(inode, de);
if (err) {
iput(inode);
inode = ERR_PTR(err);
goto out;
}
fat_attach(inode, i_pos);
insert_inode_hash(inode);
out:
return inode;
}
EXPORT_SYMBOL_GPL(fat_build_inode);
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
}
invalidate_inode_buffers(inode);
clear_inode(inode);
fat_cache_inval_inode(inode);
fat_detach(inode);
}
static void fat_put_super(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:12 +07:00
iput(sbi->fsinfo_inode);
iput(sbi->fat_inode);
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
sb->s_fs_info = NULL;
kfree(sbi);
}
static struct kmem_cache *fat_inode_cachep;
static struct inode *fat_alloc_inode(struct super_block *sb)
{
struct msdos_inode_info *ei;
ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
init_rwsem(&ei->truncate_lock);
return &ei->vfs_inode;
}
2011-01-07 13:49:49 +07:00
static void fat_i_callback(struct rcu_head *head)
{
2011-01-07 13:49:49 +07:00
struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
2011-01-07 13:49:49 +07:00
static void fat_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, fat_i_callback);
}
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
spin_lock_init(&ei->cache_lru_lock);
ei->nr_caches = 0;
ei->cache_valid_id = FAT_CACHE_VALID + 1;
INIT_LIST_HEAD(&ei->cache_lru);
INIT_HLIST_NODE(&ei->i_fat_hash);
INIT_HLIST_NODE(&ei->i_dir_hash);
inode_init_once(&ei->vfs_inode);
}
static int __init fat_init_inodecache(void)
{
fat_inode_cachep = kmem_cache_create("fat_inode_cache",
sizeof(struct msdos_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
init_once);
if (fat_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static void __exit fat_destroy_inodecache(void)
{
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(fat_inode_cachep);
}
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
*flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
return 0;
}
static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
/* If the count of free cluster is still unknown, counts it here. */
if (sbi->free_clusters == -1 || !sbi->free_clus_valid) {
int err = fat_count_free_clusters(dentry->d_sb);
if (err)
return err;
}
buf->f_type = dentry->d_sb->s_magic;
buf->f_bsize = sbi->cluster_size;
buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
buf->f_bfree = sbi->free_clusters;
buf->f_bavail = sbi->free_clusters;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
buf->f_namelen =
(sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
return 0;
}
static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
struct inode *inode)
{
loff_t i_pos;
#if BITS_PER_LONG == 32
spin_lock(&sbi->inode_hash_lock);
#endif
i_pos = MSDOS_I(inode)->i_pos;
#if BITS_PER_LONG == 32
spin_unlock(&sbi->inode_hash_lock);
#endif
return i_pos;
}
static int __fat_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *raw_entry;
loff_t i_pos;
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion There was another FAT BKL conversion deadlock reported by Bart Trojanowski due to the BKL being used as a recursive lock by FAT, which was missed because it only triggers with 'sync' (or 'dirsync') mounts. The recursion worked for the BKL, but after the conversion to lock_super (which uses a mutex), it just deadlocks. Thanks to Bart for debugging this and testing the fix. The lock debugging information from the original report: ============================================= [ INFO: possible recursive locking detected ] 2.6.27-rc3-bisect-00448-ga7f5aaf #16 --------------------------------------------- mv/4020 is trying to acquire lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 but task is already holding lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 other info that might help us debug this: 3 locks held by mv/4020: #0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140 #1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110 #2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 stack backtrace: Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16 [<c014e694>] validate_chain+0x984/0xea0 [<c0108d70>] ? native_sched_clock+0x0/0xf0 [<c014ee9c>] __lock_acquire+0x2ec/0x9b0 [<c014f5cf>] lock_acquire+0x6f/0x90 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c044e5fd>] mutex_lock_nested+0xad/0x300 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] lock_super+0x1e/0x20 [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat] [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80 [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat] [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat] [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat] [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat] [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat] [<c01b0968>] vfs_unlink+0x98/0x110 [<c01b2400>] do_unlinkat+0x130/0x140 [<c016a8f5>] ? audit_syscall_entry+0x105/0x150 [<c01b253b>] sys_unlinkat+0x3b/0x40 [<c01040d3>] sysenter_do_call+0x12/0x3f ======================= where the deadlock is due to the nesting of lock_super from vfat_unlink to fat_write_inode: - do_unlinkat - vfs_unlink - vfat_unlink * lock_super - fat_remove_entries - fat_sync_inode - fat_write_inode * lock_super and the fix is to simply remove the use of lock_super() in fat_write_inode. The lock_super() there had been just an automatic conversion of the kernel lock to the superblock lock, but no locking was actually needed there, since the code in fat_write_inode already protected all relevant accesses with a spinlock (sbi->inode_hash_lock to be exact). The only code inside the BKL (and thus the superblock lock) was accesses tp local variables or calls to functions that have long been SMP-safe (i.e. sb_bread, mark_buffe_dirty and brlese). Bart reports: "Looks good. I ran 10 parallel processes creating 1M files truncating them, writing to them again and then deleting them. This patch fixes the issue I ran into. Signed-off-by: Bart Trojanowski <bart@jukie.net>" Reported-and-tested-by: Bart Trojanowski <bart@jukie.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-20 22:31:19 +07:00
int err;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
retry:
i_pos = fat_i_pos_read(sbi, inode);
if (!i_pos)
return 0;
bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
if (!bh) {
fat_msg(sb, KERN_ERR, "unable to read inode block "
"for updating (i_pos %lld)", i_pos);
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion There was another FAT BKL conversion deadlock reported by Bart Trojanowski due to the BKL being used as a recursive lock by FAT, which was missed because it only triggers with 'sync' (or 'dirsync') mounts. The recursion worked for the BKL, but after the conversion to lock_super (which uses a mutex), it just deadlocks. Thanks to Bart for debugging this and testing the fix. The lock debugging information from the original report: ============================================= [ INFO: possible recursive locking detected ] 2.6.27-rc3-bisect-00448-ga7f5aaf #16 --------------------------------------------- mv/4020 is trying to acquire lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 but task is already holding lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 other info that might help us debug this: 3 locks held by mv/4020: #0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140 #1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110 #2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 stack backtrace: Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16 [<c014e694>] validate_chain+0x984/0xea0 [<c0108d70>] ? native_sched_clock+0x0/0xf0 [<c014ee9c>] __lock_acquire+0x2ec/0x9b0 [<c014f5cf>] lock_acquire+0x6f/0x90 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c044e5fd>] mutex_lock_nested+0xad/0x300 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] lock_super+0x1e/0x20 [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat] [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80 [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat] [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat] [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat] [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat] [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat] [<c01b0968>] vfs_unlink+0x98/0x110 [<c01b2400>] do_unlinkat+0x130/0x140 [<c016a8f5>] ? audit_syscall_entry+0x105/0x150 [<c01b253b>] sys_unlinkat+0x3b/0x40 [<c01040d3>] sysenter_do_call+0x12/0x3f ======================= where the deadlock is due to the nesting of lock_super from vfat_unlink to fat_write_inode: - do_unlinkat - vfs_unlink - vfat_unlink * lock_super - fat_remove_entries - fat_sync_inode - fat_write_inode * lock_super and the fix is to simply remove the use of lock_super() in fat_write_inode. The lock_super() there had been just an automatic conversion of the kernel lock to the superblock lock, but no locking was actually needed there, since the code in fat_write_inode already protected all relevant accesses with a spinlock (sbi->inode_hash_lock to be exact). The only code inside the BKL (and thus the superblock lock) was accesses tp local variables or calls to functions that have long been SMP-safe (i.e. sb_bread, mark_buffe_dirty and brlese). Bart reports: "Looks good. I ran 10 parallel processes creating 1M files truncating them, writing to them again and then deleting them. This patch fixes the issue I ran into. Signed-off-by: Bart Trojanowski <bart@jukie.net>" Reported-and-tested-by: Bart Trojanowski <bart@jukie.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-20 22:31:19 +07:00
return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
if (i_pos != MSDOS_I(inode)->i_pos) {
spin_unlock(&sbi->inode_hash_lock);
brelse(bh);
goto retry;
}
raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
[i_pos & (sbi->dir_per_block - 1)];
if (S_ISDIR(inode->i_mode))
raw_entry->size = 0;
else
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
&raw_entry->cdate, &raw_entry->ctime_cs);
fat_time_unix2fat(sbi, &inode->i_atime, &atime,
&raw_entry->adate, NULL);
}
spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh);
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion There was another FAT BKL conversion deadlock reported by Bart Trojanowski due to the BKL being used as a recursive lock by FAT, which was missed because it only triggers with 'sync' (or 'dirsync') mounts. The recursion worked for the BKL, but after the conversion to lock_super (which uses a mutex), it just deadlocks. Thanks to Bart for debugging this and testing the fix. The lock debugging information from the original report: ============================================= [ INFO: possible recursive locking detected ] 2.6.27-rc3-bisect-00448-ga7f5aaf #16 --------------------------------------------- mv/4020 is trying to acquire lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 but task is already holding lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 other info that might help us debug this: 3 locks held by mv/4020: #0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140 #1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110 #2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 stack backtrace: Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16 [<c014e694>] validate_chain+0x984/0xea0 [<c0108d70>] ? native_sched_clock+0x0/0xf0 [<c014ee9c>] __lock_acquire+0x2ec/0x9b0 [<c014f5cf>] lock_acquire+0x6f/0x90 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c044e5fd>] mutex_lock_nested+0xad/0x300 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] lock_super+0x1e/0x20 [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat] [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80 [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat] [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat] [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat] [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat] [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat] [<c01b0968>] vfs_unlink+0x98/0x110 [<c01b2400>] do_unlinkat+0x130/0x140 [<c016a8f5>] ? audit_syscall_entry+0x105/0x150 [<c01b253b>] sys_unlinkat+0x3b/0x40 [<c01040d3>] sysenter_do_call+0x12/0x3f ======================= where the deadlock is due to the nesting of lock_super from vfat_unlink to fat_write_inode: - do_unlinkat - vfs_unlink - vfat_unlink * lock_super - fat_remove_entries - fat_sync_inode - fat_write_inode * lock_super and the fix is to simply remove the use of lock_super() in fat_write_inode. The lock_super() there had been just an automatic conversion of the kernel lock to the superblock lock, but no locking was actually needed there, since the code in fat_write_inode already protected all relevant accesses with a spinlock (sbi->inode_hash_lock to be exact). The only code inside the BKL (and thus the superblock lock) was accesses tp local variables or calls to functions that have long been SMP-safe (i.e. sb_bread, mark_buffe_dirty and brlese). Bart reports: "Looks good. I ran 10 parallel processes creating 1M files truncating them, writing to them again and then deleting them. This patch fixes the issue I ran into. Signed-off-by: Bart Trojanowski <bart@jukie.net>" Reported-and-tested-by: Bart Trojanowski <bart@jukie.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-20 22:31:19 +07:00
err = 0;
if (wait)
err = sync_dirty_buffer(bh);
brelse(bh);
return err;
}
static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
{
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:13 +07:00
int err;
if (inode->i_ino == MSDOS_FSINFO_INO) {
struct super_block *sb = inode->i_sb;
mutex_lock(&MSDOS_SB(sb)->s_lock);
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:13 +07:00
err = fat_clusters_flush(sb);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:13 +07:00
} else
err = __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
return err;
}
int fat_sync_inode(struct inode *inode)
{
return __fat_write_inode(inode, 1);
}
EXPORT_SYMBOL_GPL(fat_sync_inode);
static int fat_show_options(struct seq_file *m, struct dentry *root);
static const struct super_operations fat_sops = {
.alloc_inode = fat_alloc_inode,
.destroy_inode = fat_destroy_inode,
.write_inode = fat_write_inode,
.evict_inode = fat_evict_inode,
.put_super = fat_put_super,
.statfs = fat_statfs,
.remount_fs = fat_remount,
.show_options = fat_show_options,
};
static const struct export_operations fat_export_ops = {
.fh_to_dentry = fat_fh_to_dentry,
.fh_to_parent = fat_fh_to_parent,
.get_parent = fat_get_parent,
};
static int fat_show_options(struct seq_file *m, struct dentry *root)
{
struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
struct fat_mount_options *opts = &sbi->options;
int isvfat = opts->isvfat;
if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
from_kuid_munged(&init_user_ns, opts->fs_uid));
if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
seq_printf(m, ",gid=%u",
from_kgid_munged(&init_user_ns, opts->fs_gid));
seq_printf(m, ",fmask=%04o", opts->fs_fmask);
seq_printf(m, ",dmask=%04o", opts->fs_dmask);
if (opts->allow_utime)
seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
if (sbi->nls_disk)
/* strip "cp" prefix from displayed option */
seq_printf(m, ",codepage=%s", &sbi->nls_disk->charset[2]);
if (isvfat) {
if (sbi->nls_io)
seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
switch (opts->shortname) {
case VFAT_SFN_DISPLAY_WIN95 | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=win95");
break;
case VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WINNT:
seq_puts(m, ",shortname=winnt");
break;
case VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=mixed");
break;
case VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=lower");
break;
default:
seq_puts(m, ",shortname=unknown");
break;
}
}
if (opts->name_check != 'n')
seq_printf(m, ",check=%c", opts->name_check);
if (opts->usefree)
seq_puts(m, ",usefree");
if (opts->quiet)
seq_puts(m, ",quiet");
if (opts->nfs)
seq_puts(m, ",nfs");
if (opts->showexec)
seq_puts(m, ",showexec");
if (opts->sys_immutable)
seq_puts(m, ",sys_immutable");
if (!isvfat) {
if (opts->dotsOK)
seq_puts(m, ",dotsOK=yes");
if (opts->nocase)
seq_puts(m, ",nocase");
} else {
if (opts->utf8)
seq_puts(m, ",utf8");
if (opts->unicode_xlate)
seq_puts(m, ",uni_xlate");
if (!opts->numtail)
seq_puts(m, ",nonumtail");
if (opts->rodir)
seq_puts(m, ",rodir");
}
if (opts->flush)
seq_puts(m, ",flush");
if (opts->tz_set) {
if (opts->time_offset)
seq_printf(m, ",time_offset=%d", opts->time_offset);
else
seq_puts(m, ",tz=UTC");
}
if (opts->errors == FAT_ERRORS_CONT)
seq_puts(m, ",errors=continue");
else if (opts->errors == FAT_ERRORS_PANIC)
seq_puts(m, ",errors=panic");
else
seq_puts(m, ",errors=remount-ro");
if (opts->discard)
seq_puts(m, ",discard");
return 0;
}
enum {
Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
Opt_immutable, Opt_dots, Opt_nodots,
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
Opt_err,
};
static const match_table_t fat_tokens = {
{Opt_check_r, "check=relaxed"},
{Opt_check_s, "check=strict"},
{Opt_check_n, "check=normal"},
{Opt_check_r, "check=r"},
{Opt_check_s, "check=s"},
{Opt_check_n, "check=n"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_umask, "umask=%o"},
{Opt_dmask, "dmask=%o"},
{Opt_fmask, "fmask=%o"},
{Opt_allow_utime, "allow_utime=%o"},
{Opt_codepage, "codepage=%u"},
{Opt_usefree, "usefree"},
{Opt_nocase, "nocase"},
{Opt_quiet, "quiet"},
{Opt_showexec, "showexec"},
{Opt_debug, "debug"},
{Opt_immutable, "sys_immutable"},
{Opt_flush, "flush"},
{Opt_tz_utc, "tz=UTC"},
{Opt_time_offset, "time_offset=%d"},
{Opt_err_cont, "errors=continue"},
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_discard, "discard"},
{Opt_nfs, "nfs"},
{Opt_obsolete, "conv=binary"},
{Opt_obsolete, "conv=text"},
{Opt_obsolete, "conv=auto"},
{Opt_obsolete, "conv=b"},
{Opt_obsolete, "conv=t"},
{Opt_obsolete, "conv=a"},
{Opt_obsolete, "fat=%u"},
{Opt_obsolete, "blocksize=%u"},
{Opt_obsolete, "cvf_format=%20s"},
{Opt_obsolete, "cvf_options=%100s"},
{Opt_obsolete, "posix"},
{Opt_err, NULL},
};
static const match_table_t msdos_tokens = {
{Opt_nodots, "nodots"},
{Opt_nodots, "dotsOK=no"},
{Opt_dots, "dots"},
{Opt_dots, "dotsOK=yes"},
{Opt_err, NULL}
};
static const match_table_t vfat_tokens = {
{Opt_charset, "iocharset=%s"},
{Opt_shortname_lower, "shortname=lower"},
{Opt_shortname_win95, "shortname=win95"},
{Opt_shortname_winnt, "shortname=winnt"},
{Opt_shortname_mixed, "shortname=mixed"},
{Opt_utf8_no, "utf8=0"}, /* 0 or no or false */
{Opt_utf8_no, "utf8=no"},
{Opt_utf8_no, "utf8=false"},
{Opt_utf8_yes, "utf8=1"}, /* empty or 1 or yes or true */
{Opt_utf8_yes, "utf8=yes"},
{Opt_utf8_yes, "utf8=true"},
{Opt_utf8_yes, "utf8"},
{Opt_uni_xl_no, "uni_xlate=0"}, /* 0 or no or false */
{Opt_uni_xl_no, "uni_xlate=no"},
{Opt_uni_xl_no, "uni_xlate=false"},
{Opt_uni_xl_yes, "uni_xlate=1"}, /* empty or 1 or yes or true */
{Opt_uni_xl_yes, "uni_xlate=yes"},
{Opt_uni_xl_yes, "uni_xlate=true"},
{Opt_uni_xl_yes, "uni_xlate"},
{Opt_nonumtail_no, "nonumtail=0"}, /* 0 or no or false */
{Opt_nonumtail_no, "nonumtail=no"},
{Opt_nonumtail_no, "nonumtail=false"},
{Opt_nonumtail_yes, "nonumtail=1"}, /* empty or 1 or yes or true */
{Opt_nonumtail_yes, "nonumtail=yes"},
{Opt_nonumtail_yes, "nonumtail=true"},
{Opt_nonumtail_yes, "nonumtail"},
{Opt_rodir, "rodir"},
{Opt_err, NULL}
};
static int parse_options(struct super_block *sb, char *options, int is_vfat,
int silent, int *debug, struct fat_mount_options *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
char *iocharset;
opts->isvfat = is_vfat;
opts->fs_uid = current_uid();
opts->fs_gid = current_gid();
opts->fs_fmask = opts->fs_dmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
opts->iocharset = fat_default_iocharset;
if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
opts->rodir = 0;
} else {
opts->shortname = 0;
opts->rodir = 1;
}
opts->name_check = 'n';
opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
opts->utf8 = opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
opts->tz_set = 0;
opts->nfs = 0;
opts->errors = FAT_ERRORS_RO;
*debug = 0;
if (!options)
goto out;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, fat_tokens, args);
if (token == Opt_err) {
if (is_vfat)
token = match_token(p, vfat_tokens, args);
else
token = match_token(p, msdos_tokens, args);
}
switch (token) {
case Opt_check_s:
opts->name_check = 's';
break;
case Opt_check_r:
opts->name_check = 'r';
break;
case Opt_check_n:
opts->name_check = 'n';
break;
case Opt_usefree:
opts->usefree = 1;
break;
case Opt_nocase:
if (!is_vfat)
opts->nocase = 1;
else {
/* for backward compatibility */
opts->shortname = VFAT_SFN_DISPLAY_WIN95
| VFAT_SFN_CREATE_WIN95;
}
break;
case Opt_quiet:
opts->quiet = 1;
break;
case Opt_showexec:
opts->showexec = 1;
break;
case Opt_debug:
*debug = 1;
break;
case Opt_immutable:
opts->sys_immutable = 1;
break;
case Opt_uid:
if (match_int(&args[0], &option))
return -EINVAL;
opts->fs_uid = make_kuid(current_user_ns(), option);
if (!uid_valid(opts->fs_uid))
return -EINVAL;
break;
case Opt_gid:
if (match_int(&args[0], &option))
return -EINVAL;
opts->fs_gid = make_kgid(current_user_ns(), option);
if (!gid_valid(opts->fs_gid))
return -EINVAL;
break;
case Opt_umask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_fmask = opts->fs_dmask = option;
break;
case Opt_dmask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_dmask = option;
break;
case Opt_fmask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_fmask = option;
break;
case Opt_allow_utime:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->allow_utime = option & (S_IWGRP | S_IWOTH);
break;
case Opt_codepage:
if (match_int(&args[0], &option))
return -EINVAL;
opts->codepage = option;
break;
case Opt_flush:
opts->flush = 1;
break;
case Opt_time_offset:
if (match_int(&args[0], &option))
return -EINVAL;
if (option < -12 * 60 || option > 12 * 60)
return -EINVAL;
opts->tz_set = 1;
opts->time_offset = option;
break;
fatfs: add UTC timestamp option Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems, allowing timestamps to be in coordinated universal time (UTC) rather than local time in applications where doing this is advantageous. In particular, portable devices that use fat/vfat (such as digital cameras) can benefit from using UTC in their internal clocks, thus avoiding daylight saving time errors and general time ambiguity issues. The user of the device does not have to worry about changing the time when moving from place or when daylight saving changes. The new mount option, when set, disables the counter-adjustment that Linux currently makes to FAT timestamp info in anticipation of the normal userspace time zone correction. When used in this new mode, all daylight saving time and time zone handling is done in userspace as is normal for many other filesystems (like ext3). The default mode, which remains unchanged, is still appropriate when mounting volumes written in Windows (because of its use of local time). I originally based this patch on one submitted last year by Paul Collins, but I updated it to work with current source and changed variable/option naming. Ogawa Hirofumi (who maintains these filesystems) and I discussed this patch at length on lkml, and he suggested using the option name in the attached version of the patch. Barry Bouwsma pointed out a good addition to the patch as well. Signed-off-by: Joe Peterson <joe@skyrush.com> Signed-off-by: Paul Collins <paul@ondioline.org> Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Barry Bouwsma <free_beer_for_all@yahoo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 15:46:47 +07:00
case Opt_tz_utc:
opts->tz_set = 1;
opts->time_offset = 0;
fatfs: add UTC timestamp option Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems, allowing timestamps to be in coordinated universal time (UTC) rather than local time in applications where doing this is advantageous. In particular, portable devices that use fat/vfat (such as digital cameras) can benefit from using UTC in their internal clocks, thus avoiding daylight saving time errors and general time ambiguity issues. The user of the device does not have to worry about changing the time when moving from place or when daylight saving changes. The new mount option, when set, disables the counter-adjustment that Linux currently makes to FAT timestamp info in anticipation of the normal userspace time zone correction. When used in this new mode, all daylight saving time and time zone handling is done in userspace as is normal for many other filesystems (like ext3). The default mode, which remains unchanged, is still appropriate when mounting volumes written in Windows (because of its use of local time). I originally based this patch on one submitted last year by Paul Collins, but I updated it to work with current source and changed variable/option naming. Ogawa Hirofumi (who maintains these filesystems) and I discussed this patch at length on lkml, and he suggested using the option name in the attached version of the patch. Barry Bouwsma pointed out a good addition to the patch as well. Signed-off-by: Joe Peterson <joe@skyrush.com> Signed-off-by: Paul Collins <paul@ondioline.org> Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Barry Bouwsma <free_beer_for_all@yahoo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 15:46:47 +07:00
break;
case Opt_err_cont:
opts->errors = FAT_ERRORS_CONT;
break;
case Opt_err_panic:
opts->errors = FAT_ERRORS_PANIC;
break;
case Opt_err_ro:
opts->errors = FAT_ERRORS_RO;
break;
/* msdos specific */
case Opt_dots:
opts->dotsOK = 1;
break;
case Opt_nodots:
opts->dotsOK = 0;
break;
/* vfat specific */
case Opt_charset:
if (opts->iocharset != fat_default_iocharset)
kfree(opts->iocharset);
iocharset = match_strdup(&args[0]);
if (!iocharset)
return -ENOMEM;
opts->iocharset = iocharset;
break;
case Opt_shortname_lower:
opts->shortname = VFAT_SFN_DISPLAY_LOWER
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_shortname_win95:
opts->shortname = VFAT_SFN_DISPLAY_WIN95
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_shortname_winnt:
opts->shortname = VFAT_SFN_DISPLAY_WINNT
| VFAT_SFN_CREATE_WINNT;
break;
case Opt_shortname_mixed:
opts->shortname = VFAT_SFN_DISPLAY_WINNT
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_utf8_no: /* 0 or no or false */
opts->utf8 = 0;
break;
case Opt_utf8_yes: /* empty or 1 or yes or true */
opts->utf8 = 1;
break;
case Opt_uni_xl_no: /* 0 or no or false */
opts->unicode_xlate = 0;
break;
case Opt_uni_xl_yes: /* empty or 1 or yes or true */
opts->unicode_xlate = 1;
break;
case Opt_nonumtail_no: /* 0 or no or false */
opts->numtail = 1; /* negated option */
break;
case Opt_nonumtail_yes: /* empty or 1 or yes or true */
opts->numtail = 0; /* negated option */
break;
case Opt_rodir:
opts->rodir = 1;
break;
case Opt_discard:
opts->discard = 1;
break;
case Opt_nfs:
opts->nfs = 1;
break;
/* obsolete mount options */
case Opt_obsolete:
fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
"not supported now", p);
break;
/* unknown option */
default:
if (!silent) {
fat_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
"or missing value", p);
}
return -EINVAL;
}
}
out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
" for FAT filesystems, filesystem will be "
"case sensitive!");
}
/* If user doesn't specify allow_utime, it's initialized from dmask. */
if (opts->allow_utime == (unsigned short)-1)
opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
if (opts->unicode_xlate)
opts->utf8 = 0;
return 0;
}
static int fat_read_root(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int error;
MSDOS_I(inode)->i_pos = 0;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode->i_version++;
inode->i_generation = 0;
inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
if (sbi->fat_bits == 32) {
MSDOS_I(inode)->i_start = sbi->root_cluster;
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
} else {
MSDOS_I(inode)->i_start = 0;
inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
}
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
set_nlink(inode, fat_subdirs(inode)+2);
return 0;
}
/*
* Read the super block of an MS-DOS FS.
*/
int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
void (*setup)(struct super_block *))
{
struct inode *root_inode = NULL, *fat_inode = NULL;
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:12 +07:00
struct inode *fsinfo_inode = NULL;
struct buffer_head *bh;
struct fat_boot_sector *b;
struct msdos_sb_info *sbi;
u16 logical_sector_size;
u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
int debug;
unsigned int media;
long error;
char buf[50];
/*
* GFP_KERNEL is ok here, because while we do hold the
* supeblock lock, memory pressure can't call back into
* the filesystem, since we're only just about to mount
* it and have no inodes etc active!
*/
sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
sb->s_flags |= MS_NODIRATIME;
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
if (error)
goto out_fail;
setup(sb); /* flavour-specific stuff that needs options */
error = -EIO;
sb_min_blocksize(sb, 512);
bh = sb_bread(sb, 0);
if (bh == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector");
goto out_fail;
}
b = (struct fat_boot_sector *) bh->b_data;
if (!b->reserved) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
brelse(bh);
goto out_invalid;
}
if (!b->fats) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
brelse(bh);
goto out_invalid;
}
/*
* Earlier we checked here that b->secs_track and b->head are nonzero,
* but it turns out valid FAT filesystems can have zero there.
*/
media = b->media;
if (!fat_valid_media(media)) {
if (!silent)
fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
media);
brelse(bh);
goto out_invalid;
}
logical_sector_size = get_unaligned_le16(&b->sector_size);
if (!is_power_of_2(logical_sector_size)
|| (logical_sector_size < 512)
|| (logical_sector_size > 4096)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
logical_sector_size);
brelse(bh);
goto out_invalid;
}
sbi->sec_per_clus = b->sec_per_clus;
if (!is_power_of_2(sbi->sec_per_clus)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
sbi->sec_per_clus);
brelse(bh);
goto out_invalid;
}
if (logical_sector_size < sb->s_blocksize) {
fat_msg(sb, KERN_ERR, "logical sector size too small for device"
" (logical sector size = %u)", logical_sector_size);
brelse(bh);
goto out_fail;
}
if (logical_sector_size > sb->s_blocksize) {
brelse(bh);
if (!sb_set_blocksize(sb, logical_sector_size)) {
fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
logical_sector_size);
goto out_fail;
}
bh = sb_bread(sb, 0);
if (bh == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector"
" (logical sector size = %lu)",
sb->s_blocksize);
goto out_fail;
}
b = (struct fat_boot_sector *) bh->b_data;
}
mutex_init(&sbi->s_lock);
sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus;
sbi->cluster_bits = ffs(sbi->cluster_size) - 1;
sbi->fats = b->fats;
sbi->fat_bits = 0; /* Don't know yet */
sbi->fat_start = le16_to_cpu(b->reserved);
sbi->fat_length = le16_to_cpu(b->fat_length);
sbi->root_cluster = 0;
sbi->free_clusters = -1; /* Don't know yet */
sbi->free_clus_valid = 0;
sbi->prev_free = FAT_START_ENT;
sb->s_maxbytes = 0xffffffff;
if (!sbi->fat_length && b->fat32_length) {
struct fat_boot_fsinfo *fsinfo;
struct buffer_head *fsinfo_bh;
/* Must be FAT32 */
sbi->fat_bits = 32;
sbi->fat_length = le32_to_cpu(b->fat32_length);
sbi->root_cluster = le32_to_cpu(b->root_cluster);
/* MC - if info_sector is 0, don't multiply by 0 */
sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
if (sbi->fsinfo_sector == 0)
sbi->fsinfo_sector = 1;
fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
if (fsinfo_bh == NULL) {
fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
" (sector = %lu)", sbi->fsinfo_sector);
brelse(bh);
goto out_fail;
}
fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
if (!IS_FSINFO(fsinfo)) {
fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
"0x%08x, 0x%08x (sector = %lu)",
le32_to_cpu(fsinfo->signature1),
le32_to_cpu(fsinfo->signature2),
sbi->fsinfo_sector);
} else {
if (sbi->options.usefree)
sbi->free_clus_valid = 1;
sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters);
sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
}
brelse(fsinfo_bh);
}
sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus directory-entries per block"
" (%u)", sbi->dir_entries);
brelse(bh);
goto out_invalid;
}
rootdir_sectors = sbi->dir_entries
* sizeof(struct msdos_dir_entry) / sb->s_blocksize;
sbi->data_start = sbi->dir_start + rootdir_sectors;
total_sectors = get_unaligned_le16(&b->sectors);
if (total_sectors == 0)
total_sectors = le32_to_cpu(b->total_sect);
total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus;
if (sbi->fat_bits != 32)
sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
/* check that FAT table does not overflow */
fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
if (total_clusters > MAX_FAT(sb)) {
if (!silent)
fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
brelse(bh);
goto out_invalid;
}
sbi->max_cluster = total_clusters + FAT_START_ENT;
/* check the free_clusters, it's not necessarily correct */
if (sbi->free_clusters != -1 && sbi->free_clusters > total_clusters)
sbi->free_clusters = -1;
/* check the prev_free, it's not necessarily correct */
sbi->prev_free %= sbi->max_cluster;
if (sbi->prev_free < FAT_START_ENT)
sbi->prev_free = FAT_START_ENT;
brelse(bh);
/* set up enough so that it can read an inode */
fat_hash_init(sb);
dir_hash_init(sb);
fat_ent_access_init(sb);
/*
* The low byte of FAT's first entry must have same value with
* media-field. But in real world, too many devices is
* writing wrong value. So, removed that validity check.
*
* if (FAT_FIRST_ENT(sb, media) != first)
*/
error = -EINVAL;
sprintf(buf, "cp%d", sbi->options.codepage);
sbi->nls_disk = load_nls(buf);
if (!sbi->nls_disk) {
fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
goto out_fail;
}
/* FIXME: utf8 is using iocharset for upper/lower conversion */
if (sbi->options.isvfat) {
sbi->nls_io = load_nls(sbi->options.iocharset);
if (!sbi->nls_io) {
fat_msg(sb, KERN_ERR, "IO charset %s not found",
sbi->options.iocharset);
goto out_fail;
}
}
error = -ENOMEM;
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
MSDOS_I(fat_inode)->i_pos = 0;
sbi->fat_inode = fat_inode;
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:12 +07:00
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
root_inode = new_inode(sb);
if (!root_inode)
goto out_fail;
root_inode->i_ino = MSDOS_ROOT_INO;
root_inode->i_version = 1;
error = fat_read_root(root_inode);
if (error < 0) {
iput(root_inode);
goto out_fail;
}
error = -ENOMEM;
insert_inode_hash(root_inode);
fat_attach(root_inode, 0);
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
fat_msg(sb, KERN_ERR, "get root inode failed");
goto out_fail;
}
if (sbi->options.discard) {
struct request_queue *q = bdev_get_queue(sb->s_bdev);
if (!blk_queue_discard(q))
fat_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but "
"the device does not support discard");
}
return 0;
out_invalid:
error = -EINVAL;
if (!silent)
fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
out_fail:
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-06-01 06:26:12 +07:00
if (fsinfo_inode)
iput(fsinfo_inode);
if (fat_inode)
iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
if (sbi->options.iocharset != fat_default_iocharset)
kfree(sbi->options.iocharset);
sb->s_fs_info = NULL;
kfree(sbi);
return error;
}
EXPORT_SYMBOL_GPL(fat_fill_super);
/*
* helper function for fat_flush_inodes. This writes both the inode
* and the file data blocks, waiting for in flight data blocks before
* the start of the call. It does not wait for any io started
* during the call
*/
static int writeback_inode(struct inode *inode)
{
int ret;
/* if we used wait=1, sync_inode_metadata waits for the io for the
* inode to finish. So wait=0 is sent down to sync_inode_metadata
* and filemap_fdatawrite is used for the data blocks
*/
ret = sync_inode_metadata(inode, 0);
if (!ret)
ret = filemap_fdatawrite(inode->i_mapping);
return ret;
}
/*
* write data and metadata corresponding to i1 and i2. The io is
* started but we do not wait for any of it to finish.
*
* filemap_flush is used for the block device, so if there is a dirty
* page for a block already in flight, we will not wait and start the
* io over again
*/
int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
{
int ret = 0;
if (!MSDOS_SB(sb)->options.flush)
return 0;
if (i1)
ret = writeback_inode(i1);
if (!ret && i2)
ret = writeback_inode(i2);
if (!ret) {
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
ret = filemap_flush(mapping);
}
return ret;
}
EXPORT_SYMBOL_GPL(fat_flush_inodes);
static int __init init_fat_fs(void)
{
int err;
err = fat_cache_init();
if (err)
return err;
err = fat_init_inodecache();
if (err)
goto failed;
return 0;
failed:
fat_cache_destroy();
return err;
}
static void __exit exit_fat_fs(void)
{
fat_cache_destroy();
fat_destroy_inodecache();
}
module_init(init_fat_fs)
module_exit(exit_fat_fs)
MODULE_LICENSE("GPL");