gfs2: Try harder to delete inodes locally

When an inode's link count drops to zero and the inode is cached on
other nodes, the current behavior of gfs2 is to immediately give up and
to rely on the other node(s) to delete the inode if there is iopen glock
contention.  This leads to resource group glock bouncing and the loss of
caching.  With the previous patches in place, we can fix that by not
giving up immediately.

When the inode is still open on other nodes, those nodes won't be able
to evict the inode and give up the iopen glock.  In that case, our lock
conversion request will time out.  The unlink system call will block for
the duration of the iopen lock conversion request.  We're also holding
the inode glock in EX mode for an extended duration, so other nodes
won't be able to make progress on the inode, either.

This is worse than what we had before, but we can prevent other nodes
from getting stuck by aborting our iopen locking request if there is
contention on the inode glock.  This will the the subject of a future
patch.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
This commit is contained in:
Andreas Gruenbacher 2020-01-14 14:59:08 +01:00
parent 8c7b9262a8
commit 9e73330f29

View File

@ -1258,6 +1258,50 @@ static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
gfs2_glock_put(gl); gfs2_glock_put(gl);
} }
static bool gfs2_upgrade_iopen_glock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder *gh = &ip->i_iopen_gh;
long timeout = 5 * HZ;
int error;
gh->gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(gh);
/*
* If there are no other lock holders, we'll get the lock immediately.
* Otherwise, the other nodes holding the lock will be notified about
* our locking request. If they don't have the inode open, they'll
* evict the cached inode and release the lock. As a last resort,
* we'll eventually time out.
*
* Note that we're passing the LM_FLAG_TRY_1CB flag to the first
* locking request as an optimization to notify lock holders as soon as
* possible. Without that flag, they'd be notified implicitly by the
* second locking request.
*/
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
error = gfs2_glock_nq(gh);
if (error != GLR_TRYFAILED)
return !error;
gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
error = gfs2_glock_nq(gh);
if (error)
return false;
timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
!test_bit(HIF_WAIT, &gh->gh_iflags),
timeout);
if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
gfs2_glock_dq(gh);
return false;
}
return true;
}
/** /**
* gfs2_evict_inode - Remove an inode from cache * gfs2_evict_inode - Remove an inode from cache
* @inode: The inode to evict * @inode: The inode to evict
@ -1339,14 +1383,11 @@ static void gfs2_evict_inode(struct inode *inode)
out_delete: out_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) && if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE; if (!gfs2_upgrade_iopen_glock(inode)) {
gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE,
&ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
if (error)
goto out_truncate; goto out_truncate;
} }
}
if (S_ISDIR(inode->i_mode) && if (S_ISDIR(inode->i_mode) &&
(ip->i_diskflags & GFS2_DIF_EXHASH)) { (ip->i_diskflags & GFS2_DIF_EXHASH)) {