Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: stop calling filemap_fdatawait inside ->fsync
  fix readahead calculations in xfs_dir2_leaf_getdents()
  xfs: make sure xfs_sync_fsdata covers the log
  xfs: mark inodes dirty before issuing I/O
  xfs: cleanup ->sync_fs
  xfs: fix xfs_quiesce_data
  xfs: implement ->dirty_inode to fix timestamp handling
This commit is contained in:
Linus Torvalds 2009-10-09 13:29:42 -07:00
commit a372bf8b6a
13 changed files with 146 additions and 102 deletions

View File

@ -185,6 +185,24 @@ xfs_destroy_ioend(
mempool_free(ioend, xfs_ioend_pool);
}
/*
* If the end of the current ioend is beyond the current EOF,
* return the new EOF value, otherwise zero.
*/
STATIC xfs_fsize_t
xfs_ioend_new_eof(
xfs_ioend_t *ioend)
{
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
xfs_fsize_t bsize;
bsize = ioend->io_offset + ioend->io_size;
isize = MAX(ip->i_size, ip->i_new_size);
isize = MIN(isize, bsize);
return isize > ip->i_d.di_size ? isize : 0;
}
/*
* Update on-disk file size now that data has been written to disk.
* The current in-memory file size is i_size. If a write is beyond
@ -192,13 +210,13 @@ xfs_destroy_ioend(
* updated. If this write does not extend all the way to the valid
* file size then restrict this update to the end of the write.
*/
STATIC void
xfs_setfilesize(
xfs_ioend_t *ioend)
{
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
xfs_fsize_t bsize;
ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
ASSERT(ioend->io_type != IOMAP_READ);
@ -206,16 +224,10 @@ xfs_setfilesize(
if (unlikely(ioend->io_error))
return;
bsize = ioend->io_offset + ioend->io_size;
xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = MAX(ip->i_size, ip->i_new_size);
isize = MIN(isize, bsize);
if (ip->i_d.di_size < isize) {
isize = xfs_ioend_new_eof(ioend);
if (isize) {
ip->i_d.di_size = isize;
ip->i_update_core = 1;
xfs_mark_inode_dirty_sync(ip);
}
@ -404,10 +416,16 @@ xfs_submit_ioend_bio(
struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
/*
* If the I/O is beyond EOF we mark the inode dirty immediately
* but don't update the inode size until I/O completion.
*/
if (xfs_ioend_new_eof(ioend))
xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
submit_bio(WRITE, bio);
ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
bio_put(bio);

View File

@ -176,14 +176,7 @@ xfs_file_fsync(
struct dentry *dentry,
int datasync)
{
struct inode *inode = dentry->d_inode;
struct xfs_inode *ip = XFS_I(inode);
int error;
/* capture size updates in I/O completion before writing the inode. */
error = filemap_fdatawait(inode->i_mapping);
if (error)
return error;
struct xfs_inode *ip = XFS_I(dentry->d_inode);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
return -xfs_fsync(ip);

View File

@ -57,19 +57,22 @@
#include <linux/fiemap.h>
/*
* Bring the atime in the XFS inode uptodate.
* Used before logging the inode to disk or when the Linux inode goes away.
* Bring the timestamps in the XFS inode uptodate.
*
* Used before writing the inode to disk.
*/
void
xfs_synchronize_atime(
xfs_synchronize_times(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
if (!(inode->i_state & I_CLEAR)) {
ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
}
ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
}
/*
@ -106,32 +109,20 @@ xfs_ichgtime(
if ((flags & XFS_ICHGTIME_MOD) &&
!timespec_equal(&inode->i_mtime, &tv)) {
inode->i_mtime = tv;
ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
sync_it = 1;
}
if ((flags & XFS_ICHGTIME_CHG) &&
!timespec_equal(&inode->i_ctime, &tv)) {
inode->i_ctime = tv;
ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
sync_it = 1;
}
/*
* We update the i_update_core field _after_ changing
* the timestamps in order to coordinate properly with
* xfs_iflush() so that we don't lose timestamp updates.
* This keeps us from having to hold the inode lock
* while doing this. We use the SYNCHRONIZE macro to
* ensure that the compiler does not reorder the update
* of i_update_core above the timestamp updates above.
* Update complete - now make sure everyone knows that the inode
* is dirty.
*/
if (sync_it) {
SYNCHRONIZE();
ip->i_update_core = 1;
if (sync_it)
xfs_mark_inode_dirty_sync(ip);
}
}
/*
@ -506,10 +497,8 @@ xfs_vn_getattr(
stat->gid = ip->i_d.di_gid;
stat->ino = ip->i_ino;
stat->atime = inode->i_atime;
stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec;
stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
stat->blocks =
XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);

View File

@ -667,7 +667,7 @@ xfs_write(
xip->i_new_size = new_size;
if (likely(!(ioflags & IO_INVIS)))
xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
file_update_time(file);
/*
* If the offset is beyond the size of the file, we have a couple

View File

@ -976,6 +976,28 @@ xfs_fs_inode_init_once(
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
}
/*
* Dirty the XFS inode when mark_inode_dirty_sync() is called so that
* we catch unlogged VFS level updates to the inode. Care must be taken
* here - the transaction code calls mark_inode_dirty_sync() to mark the
* VFS inode dirty in a transaction and clears the i_update_core field;
* it must clear the field after calling mark_inode_dirty_sync() to
* correctly indicate that the dirty state has been propagated into the
* inode log item.
*
* We need the barrier() to maintain correct ordering between unlogged
* updates and the transaction commit code that clears the i_update_core
* field. This requires all updates to be completed before marking the
* inode dirty.
*/
STATIC void
xfs_fs_dirty_inode(
struct inode *inode)
{
barrier();
XFS_I(inode)->i_update_core = 1;
}
/*
* Attempt to flush the inode, this will actually fail
* if the inode is pinned, but we dirty the inode again
@ -1126,7 +1148,7 @@ xfs_fs_put_super(
}
STATIC int
xfs_fs_sync_super(
xfs_fs_sync_fs(
struct super_block *sb,
int wait)
{
@ -1134,23 +1156,23 @@ xfs_fs_sync_super(
int error;
/*
* Treat a sync operation like a freeze. This is to work
* around a race in sync_inodes() which works in two phases
* - an asynchronous flush, which can write out an inode
* without waiting for file size updates to complete, and a
* synchronous flush, which wont do anything because the
* async flush removed the inode's dirty flag. Also
* sync_inodes() will not see any files that just have
* outstanding transactions to be flushed because we don't
* dirty the Linux inode until after the transaction I/O
* completes.
* Not much we can do for the first async pass. Writing out the
* superblock would be counter-productive as we are going to redirty
* when writing out other data and metadata (and writing out a single
* block is quite fast anyway).
*
* Try to asynchronously kick off quota syncing at least.
*/
if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
error = xfs_quiesce_data(mp);
else
error = xfs_sync_fsdata(mp, 0);
if (!wait) {
xfs_qm_sync(mp, SYNC_TRYLOCK);
return 0;
}
if (unlikely(laptop_mode)) {
error = xfs_quiesce_data(mp);
if (error)
return -error;
if (laptop_mode) {
int prev_sync_seq = mp->m_sync_seq;
/*
@ -1169,7 +1191,7 @@ xfs_fs_sync_super(
mp->m_sync_seq != prev_sync_seq);
}
return -error;
return 0;
}
STATIC int
@ -1539,10 +1561,11 @@ xfs_fs_get_sb(
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.write_inode = xfs_fs_write_inode,
.clear_inode = xfs_fs_clear_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
.statfs = xfs_fs_statfs,
.remount_fs = xfs_fs_remount,

View File

@ -309,11 +309,15 @@ xfs_sync_attr(
STATIC int
xfs_commit_dummy_trans(
struct xfs_mount *mp,
uint log_flags)
uint flags)
{
struct xfs_inode *ip = mp->m_rootip;
struct xfs_trans *tp;
int error;
int log_flags = XFS_LOG_FORCE;
if (flags & SYNC_WAIT)
log_flags |= XFS_LOG_SYNC;
/*
* Put a dummy transaction in the log to tell recovery
@ -331,13 +335,12 @@ xfs_commit_dummy_trans(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/* XXX(hch): ignoring the error here.. */
error = xfs_trans_commit(tp, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* the log force ensures this transaction is pushed to disk */
xfs_log_force(mp, 0, log_flags);
return 0;
return error;
}
int
@ -385,7 +388,20 @@ xfs_sync_fsdata(
else
XFS_BUF_ASYNC(bp);
return xfs_bwrite(mp, bp);
error = xfs_bwrite(mp, bp);
if (error)
return error;
/*
* If this is a data integrity sync make sure all pending buffers
* are flushed out for the log coverage check below.
*/
if (flags & SYNC_WAIT)
xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, flags);
return error;
out_brelse:
xfs_buf_relse(bp);
@ -419,14 +435,16 @@ xfs_quiesce_data(
/* push non-blocking */
xfs_sync_data(mp, 0);
xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_filestream_flush(mp);
/* push and block */
/* push and block till complete */
xfs_sync_data(mp, SYNC_WAIT);
xfs_qm_sync(mp, SYNC_WAIT);
/* drop inode references pinned by filestreams */
xfs_filestream_flush(mp);
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp, 0);
error = xfs_sync_fsdata(mp, SYNC_WAIT);
/* flush data-only devices */
if (mp->m_rtdev_targp)
@ -570,8 +588,6 @@ xfs_sync_worker(
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);

View File

@ -206,10 +206,10 @@ xfs_swap_extents(
* process that the file was not changed out from
* under it.
*/
if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) ||
(sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) ||
(sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) ||
(sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) {
if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
(sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
error = XFS_ERROR(EBUSY);
goto out_unlock;
}

View File

@ -854,6 +854,7 @@ xfs_dir2_leaf_getdents(
*/
ra_want = howmany(bufsize + mp->m_dirblksize,
mp->m_sb.sb_blocksize) - 1;
ASSERT(ra_want >= 0);
/*
* If we don't have as many as we want, and we haven't
@ -1088,7 +1089,8 @@ xfs_dir2_leaf_getdents(
*/
ptr += length;
curoff += length;
bufsize -= length;
/* bufsize may have just been a guess; don't go negative */
bufsize = bufsize > length ? bufsize - length : 0;
}
/*

View File

@ -3068,9 +3068,9 @@ xfs_iflush_int(
SYNCHRONIZE();
/*
* Make sure to get the latest atime from the Linux inode.
* Make sure to get the latest timestamps from the Linux inode.
*/
xfs_synchronize_atime(ip);
xfs_synchronize_times(ip);
if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {

View File

@ -504,7 +504,7 @@ void xfs_ichgtime(xfs_inode_t *, int);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_atime(xfs_inode_t *);
void xfs_synchronize_times(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#if defined(XFS_INODE_TRACE)

View File

@ -231,6 +231,15 @@ xfs_inode_item_format(
vecp++;
nvecs = 1;
/*
* Make sure the linux inode is dirty. We do this before
* clearing i_update_core as the VFS will call back into
* XFS here and set i_update_core, so we need to dirty the
* inode first so that the ordering of i_update_core and
* unlogged modifications still works as described below.
*/
xfs_mark_inode_dirty_sync(ip);
/*
* Clear i_update_core if the timestamps (or any other
* non-transactional modification) need flushing/logging
@ -263,14 +272,9 @@ xfs_inode_item_format(
}
/*
* Make sure to get the latest atime from the Linux inode.
* Make sure to get the latest timestamps from the Linux inode.
*/
xfs_synchronize_atime(ip);
/*
* make sure the linux inode is dirty
*/
xfs_mark_inode_dirty_sync(ip);
xfs_synchronize_times(ip);
vecp->i_addr = (xfs_caddr_t)&ip->i_d;
vecp->i_len = sizeof(struct xfs_icdinode);

View File

@ -59,6 +59,7 @@ xfs_bulkstat_one_iget(
{
xfs_icdinode_t *dic; /* dinode core info pointer */
xfs_inode_t *ip; /* incore inode pointer */
struct inode *inode;
int error;
error = xfs_iget(mp, NULL, ino,
@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
ASSERT(ip->i_imap.im_blkno != 0);
dic = &ip->i_d;
inode = VFS_I(ip);
/* xfs_iget returns the following without needing
* further change.
@ -83,16 +85,19 @@ xfs_bulkstat_one_iget(
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
/*
* We are reading the atime from the Linux inode because the
* dinode might not be uptodate.
* We need to read the timestamps from the Linux inode because
* the VFS keeps writing directly into the inode structure instead
* of telling us about the updates.
*/
buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec;
buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec;
buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
buf->bs_xflags = xfs_ip2xflags(ip);
buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
buf->bs_extents = dic->di_nextents;

View File

@ -2475,12 +2475,6 @@ xfs_reclaim(
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
/*
* Make sure the atime in the XFS inode is correct before freeing the
* Linux inode.
*/
xfs_synchronize_atime(ip);
/*
* If we have nothing to flush with this inode then complete the
* teardown now, otherwise break the link between the xfs inode and the