xfs: refactor ratelimited buffer error messages into helper

XFS has some inconsistent log message rate limiting with respect to
buffer alerts. The metadata I/O error notification uses the generic
ratelimited alert, the buffer push code uses a custom rate limit and
the similar quiesce time failure checks are not rate limited at all
(when they should be).

The custom rate limit defined in the buf item code is specifically
crafted for buffer alerts. It is more aggressive than generic rate
limiting code because it must accommodate a high frequency of I/O
error events in a relative short timeframe.

Factor out the custom rate limit state from the buf item code into a
per-buftarg rate limit so various alerts are limited based on the
target. Define a buffer alert helper function and use it for the
buffer alerts that are already ratelimited.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Allison Collins <allison.henderson@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:
Brian Foster 2020-05-06 13:25:21 -07:00 committed by Darrick J. Wong
parent b6983e80b0
commit f9bccfcc3b
5 changed files with 41 additions and 17 deletions

View File

@ -1244,10 +1244,10 @@ xfs_buf_ioerror_alert(
struct xfs_buf *bp,
xfs_failaddr_t func)
{
xfs_alert_ratelimited(bp->b_mount,
"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-bp->b_error);
xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
func, (uint64_t)XFS_BUF_ADDR(bp),
bp->b_length, -bp->b_error);
}
/*
@ -1828,6 +1828,13 @@ xfs_alloc_buftarg(
btp->bt_bdev = bdev;
btp->bt_daxdev = dax_dev;
/*
* Buffer IO error rate limiting. Limit it to no more than 10 messages
* per 30 seconds so as to not spam logs too much on repeated errors.
*/
ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
DEFAULT_RATELIMIT_BURST);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error_free;

View File

@ -91,6 +91,7 @@ typedef struct xfs_buftarg {
struct list_lru bt_lru;
struct percpu_counter bt_io_count;
struct ratelimit_state bt_ioerror_rl;
} xfs_buftarg_t;
struct xfs_buf;

View File

@ -481,14 +481,6 @@ xfs_buf_item_unpin(
}
}
/*
* Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
* seconds so as to not spam logs too much on repeated detection of the same
* buffer being bad..
*/
static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
STATIC uint
xfs_buf_item_push(
struct xfs_log_item *lip,
@ -518,11 +510,10 @@ xfs_buf_item_push(
trace_xfs_buf_item_push(bip);
/* has a previous flush failed due to IO errors? */
if ((bp->b_flags & XBF_WRITE_FAIL) &&
___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
xfs_warn(bp->b_mount,
"Failing async write on buffer block 0x%llx. Retrying async write.",
(long long)bp->b_bn);
if (bp->b_flags & XBF_WRITE_FAIL) {
xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
"Failing async write on buffer block 0x%llx. Retrying async write.",
(long long)bp->b_bn);
}
if (!xfs_buf_delwri_queue(bp, buffer_list))

View File

@ -117,3 +117,25 @@ xfs_hex_dump(const void *p, int length)
{
print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
}
void
xfs_buf_alert_ratelimited(
struct xfs_buf *bp,
const char *rlmsg,
const char *fmt,
...)
{
struct xfs_mount *mp = bp->b_mount;
struct va_format vaf;
va_list args;
/* use the more aggressive per-target rate limit for buffers */
if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
__xfs_printk(KERN_ALERT, mp, &vaf);
va_end(args);
}

View File

@ -79,4 +79,7 @@ void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
extern void xfs_hex_dump(const void *p, int length);
void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
const char *fmt, ...);
#endif /* __XFS_MESSAGE_H */