linux_dsm_epyc7002/fs/nilfs2/the_nilfs.h
Ryusuke Konishi a924586036 nilfs2: fix oopses with doubly mounted snapshots
will fix kernel oopses like the following:

 # mount -t nilfs2 -r -o cp=20 /dev/sdb1 /test1
 # mount -t nilfs2 -r -o cp=20 /dev/sdb1 /test2
 # umount /test1
 # umount /test2

BUG: sleeping function called from invalid context at arch/x86/mm/fault.c:1069
in_atomic(): 0, irqs_disabled(): 1, pid: 3886, name: umount.nilfs2
1 lock held by umount.nilfs2/3886:
 #0:  (&type->s_umount_key#31){+.+...}, at: [<c10b398a>] deactivate_super+0x52/0x6c
irq event stamp: 1219
hardirqs last  enabled at (1219): [<c135c774>] __mutex_unlock_slowpath+0xf8/0x119
hardirqs last disabled at (1218): [<c135c6d5>] __mutex_unlock_slowpath+0x59/0x119
softirqs last  enabled at (1214): [<c1033316>] __do_softirq+0x1a5/0x1ad
softirqs last disabled at (1205): [<c1033354>] do_softirq+0x36/0x5a
Pid: 3886, comm: umount.nilfs2 Not tainted 2.6.31-rc6 #55
Call Trace:
 [<c1023549>] __might_sleep+0x107/0x10e
 [<c13603c0>] do_page_fault+0x246/0x397
 [<c136017a>] ? do_page_fault+0x0/0x397
 [<c135e753>] error_code+0x6b/0x70
 [<c136017a>] ? do_page_fault+0x0/0x397
 [<c104f805>] ? __lock_acquire+0x91/0x12fd
 [<c1050a62>] ? __lock_acquire+0x12ee/0x12fd
 [<c1050a62>] ? __lock_acquire+0x12ee/0x12fd
 [<c1050b2b>] lock_acquire+0xba/0xdd
 [<d0d17d3f>] ? nilfs_detach_segment_constructor+0x2f/0x2fa [nilfs2]
 [<c135d4fe>] down_write+0x2a/0x46
 [<d0d17d3f>] ? nilfs_detach_segment_constructor+0x2f/0x2fa [nilfs2]
 [<d0d17d3f>] nilfs_detach_segment_constructor+0x2f/0x2fa [nilfs2]
 [<c104ea2c>] ? mark_held_locks+0x43/0x5b
 [<c104ecb1>] ? trace_hardirqs_on_caller+0x10b/0x133
 [<c104ece4>] ? trace_hardirqs_on+0xb/0xd
 [<d0d09ac1>] nilfs_put_super+0x2f/0xca [nilfs2]
 [<c10b3352>] generic_shutdown_super+0x49/0xb8
 [<c10b33de>] kill_block_super+0x1d/0x31
 [<c10e6599>] ? vfs_quota_off+0x0/0x12
 [<c10b398f>] deactivate_super+0x57/0x6c
 [<c10c4bc3>] mntput_no_expire+0x8c/0xb4
 [<c10c5094>] sys_umount+0x27f/0x2a4
 [<c10c50c6>] sys_oldumount+0xd/0xf
 [<c10031a4>] sysenter_do_call+0x12/0x38
 ...

This turns out to be a bug brought by an -rc1 patch ("nilfs2: simplify
remaining sget() use").

In the patch, a new "put resource" function, nilfs_put_sbinfo()
was introduced to delay freeing nilfs_sb_info struct.

But the nilfs_put_sbinfo() mistakenly used atomic_dec_and_test()
function to check the reference count, and it caused the nilfs_sb_info
was freed when user mounted a snapshot twice.

This bug also suggests there was unseen memory leak in usual mount
/umount operations for nilfs.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
2009-08-19 02:10:13 +09:00

318 lines
10 KiB
C

/*
* the_nilfs.h - the_nilfs shared structure.
*
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Ryusuke Konishi <ryusuke@osrg.net>
*
*/
#ifndef _THE_NILFS_H
#define _THE_NILFS_H
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include "sb.h"
/* the_nilfs struct */
enum {
THE_NILFS_INIT = 0, /* Information from super_block is set */
THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
the latest checkpoint was loaded */
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
};
/**
* struct the_nilfs - struct to supervise multiple nilfs mount points
* @ns_flags: flags
* @ns_count: reference count
* @ns_list: list head for nilfs_list
* @ns_bdev: block device
* @ns_bdi: backing dev info
* @ns_writer: back pointer to writable nilfs_sb_info
* @ns_sem: semaphore for shared states
* @ns_super_sem: semaphore for global operations across super block instances
* @ns_mount_mutex: mutex protecting mount process of nilfs
* @ns_writer_mutex: mutex protecting ns_writer attach/detach
* @ns_writer_refcount: number of referrers on ns_writer
* @ns_current: back pointer to current mount
* @ns_sbh: buffer heads of on-disk super blocks
* @ns_sbp: pointers to super block data
* @ns_sbwtime: previous write time of super blocks
* @ns_sbsize: size of valid data in super block
* @ns_supers: list of nilfs super block structs
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
* @ns_pseg_offset: offset of next partial segment in the current full segment
* @ns_cno: next checkpoint number
* @ns_ctime: write time of the last segment
* @ns_nongc_ctime: write time of the last segment not for cleaner operation
* @ns_ndirtyblks: Number of dirty data blocks
* @ns_last_segment_lock: lock protecting fields for the latest segment
* @ns_last_pseg: start block number of the latest segment
* @ns_last_seq: sequence value of the latest segment
* @ns_last_cno: checkpoint number of the latest segment
* @ns_prot_seq: least sequence number of segments which must not be reclaimed
* @ns_free_segments_count: counter of free segments
* @ns_segctor_sem: segment constructor semaphore
* @ns_dat: DAT file inode
* @ns_cpfile: checkpoint file inode
* @ns_sufile: segusage file inode
* @ns_gc_dat: shadow inode of the DAT file inode for GC
* @ns_gc_inodes: dummy inodes to keep live blocks
* @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks
* @ns_blocksize_bits: bit length of block size
* @ns_nsegments: number of segments in filesystem
* @ns_blocks_per_segment: number of blocks per segment
* @ns_r_segments_percentage: reserved segments percentage
* @ns_nrsvsegs: number of reserved segments
* @ns_first_data_block: block number of first data block
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
*/
struct the_nilfs {
unsigned long ns_flags;
atomic_t ns_count;
struct list_head ns_list;
struct block_device *ns_bdev;
struct backing_dev_info *ns_bdi;
struct nilfs_sb_info *ns_writer;
struct rw_semaphore ns_sem;
struct rw_semaphore ns_super_sem;
struct mutex ns_mount_mutex;
struct mutex ns_writer_mutex;
atomic_t ns_writer_refcount;
/*
* components protected by ns_super_sem
*/
struct nilfs_sb_info *ns_current;
struct list_head ns_supers;
/*
* used for
* - loading the latest checkpoint exclusively.
* - allocating a new full segment.
* - protecting s_dirt in the super_block struct
* (see nilfs_write_super) and the following fields.
*/
struct buffer_head *ns_sbh[2];
struct nilfs_super_block *ns_sbp[2];
time_t ns_sbwtime[2];
unsigned ns_sbsize;
unsigned ns_mount_state;
/*
* Following fields are dedicated to a writable FS-instance.
* Except for the period seeking checkpoint, code outside the segment
* constructor must lock a segment semaphore while accessing these
* fields.
* The writable FS-instance is sole during a lifetime of the_nilfs.
*/
u64 ns_seg_seq;
__u64 ns_segnum;
__u64 ns_nextnum;
unsigned long ns_pseg_offset;
__u64 ns_cno;
time_t ns_ctime;
time_t ns_nongc_ctime;
atomic_t ns_ndirtyblks;
/*
* The following fields hold information on the latest partial segment
* written to disk with a super root. These fields are protected by
* ns_last_segment_lock.
*/
spinlock_t ns_last_segment_lock;
sector_t ns_last_pseg;
u64 ns_last_seq;
__u64 ns_last_cno;
u64 ns_prot_seq;
unsigned long ns_free_segments_count;
struct rw_semaphore ns_segctor_sem;
/*
* Following fields are lock free except for the period before
* the_nilfs is initialized.
*/
struct inode *ns_dat;
struct inode *ns_cpfile;
struct inode *ns_sufile;
struct inode *ns_gc_dat;
/* GC inode list and hash table head */
struct list_head ns_gc_inodes;
struct hlist_head *ns_gc_inodes_h;
/* Disk layout information (static) */
unsigned int ns_blocksize_bits;
unsigned long ns_nsegments;
unsigned long ns_blocks_per_segment;
unsigned long ns_r_segments_percentage;
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
};
#define NILFS_GCINODE_HASH_BITS 8
#define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS)
#define THE_NILFS_FNS(bit, name) \
static inline void set_nilfs_##name(struct the_nilfs *nilfs) \
{ \
set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
} \
static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \
{ \
clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
} \
static inline int nilfs_##name(struct the_nilfs *nilfs) \
{ \
return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
}
THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(LOADED, loaded)
THE_NILFS_FNS(DISCONTINUED, discontinued)
/* Minimum interval of periodical update of superblocks (in seconds) */
#define NILFS_SB_FREQ 10
#define NILFS_ALTSB_FREQ 60 /* spare superblock */
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
struct the_nilfs *find_or_create_nilfs(struct block_device *);
void put_nilfs(struct the_nilfs *);
int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
int nilfs_near_disk_full(struct the_nilfs *);
void nilfs_fall_back_super_block(struct the_nilfs *);
void nilfs_swap_super_block(struct the_nilfs *);
static inline void get_nilfs(struct the_nilfs *nilfs)
{
/* Caller must have at least one reference of the_nilfs. */
atomic_inc(&nilfs->ns_count);
}
static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs)
{
if (atomic_inc_and_test(&nilfs->ns_writer_refcount))
mutex_lock(&nilfs->ns_writer_mutex);
return nilfs->ns_writer;
}
static inline void nilfs_put_writer(struct the_nilfs *nilfs)
{
if (atomic_add_negative(-1, &nilfs->ns_writer_refcount))
mutex_unlock(&nilfs->ns_writer_mutex);
}
static inline void
nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
{
mutex_lock(&nilfs->ns_writer_mutex);
nilfs->ns_writer = sbi;
mutex_unlock(&nilfs->ns_writer_mutex);
}
static inline void
nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
{
mutex_lock(&nilfs->ns_writer_mutex);
if (sbi == nilfs->ns_writer)
nilfs->ns_writer = NULL;
mutex_unlock(&nilfs->ns_writer_mutex);
}
static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
{
if (atomic_dec_and_test(&sbi->s_count))
kfree(sbi);
}
static inline void
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
sector_t *seg_start, sector_t *seg_end)
{
*seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum;
*seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1;
if (segnum == 0)
*seg_start = nilfs->ns_first_data_block;
}
static inline sector_t
nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum)
{
return (segnum == 0) ? nilfs->ns_first_data_block :
(sector_t)nilfs->ns_blocks_per_segment * segnum;
}
static inline __u64
nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr)
{
sector_t segnum = blocknr;
sector_div(segnum, nilfs->ns_blocks_per_segment);
return segnum;
}
static inline void
nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start,
sector_t seg_end)
{
/* terminate the current full segment (used in case of I/O-error) */
nilfs->ns_pseg_offset = seg_end - seg_start + 1;
}
static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs)
{
/* move forward with a full segment */
nilfs->ns_segnum = nilfs->ns_nextnum;
nilfs->ns_pseg_offset = 0;
nilfs->ns_seg_seq++;
}
static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs)
{
__u64 cno;
spin_lock(&nilfs->ns_last_segment_lock);
cno = nilfs->ns_last_cno;
spin_unlock(&nilfs->ns_last_segment_lock);
return cno;
}
static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
{
return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
}
#endif /* _THE_NILFS_H */