linux_dsm_epyc7002/fs/xfs/xfs_trans_extfree.c
Darrick J. Wong 3fd129b63f xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents.  Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.

Therefore, create two per-AG block reservation pools.  One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.

Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG.  Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.

In other words, play some accounting tricks in-core to make sure that
we always have blocks available.  On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 10:30:52 +10:00

240 lines
6.1 KiB
C

/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_trace.h"
/*
* This routine is called to allocate an "extent free done"
* log item that will hold nextents worth of extents. The
* caller must use all nextents extents, because we are not
* flexible about this at all.
*/
struct xfs_efd_log_item *
xfs_trans_get_efd(struct xfs_trans *tp,
struct xfs_efi_log_item *efip,
uint nextents)
{
struct xfs_efd_log_item *efdp;
ASSERT(tp != NULL);
ASSERT(nextents > 0);
efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
ASSERT(efdp != NULL);
/*
* Get a log_item_desc to point at the new item.
*/
xfs_trans_add_item(tp, &efdp->efd_item);
return efdp;
}
/*
* Free an extent and log it to the EFD. Note that the transaction is marked
* dirty regardless of whether the extent free succeeds or fails to support the
* EFI/EFD lifecycle rules.
*/
int
xfs_trans_free_extent(
struct xfs_trans *tp,
struct xfs_efd_log_item *efdp,
xfs_fsblock_t start_block,
xfs_extlen_t ext_len,
struct xfs_owner_info *oinfo)
{
struct xfs_mount *mp = tp->t_mountp;
uint next_extent;
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block);
struct xfs_extent *extp;
int error;
trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
error = xfs_free_extent(tp, start_block, ext_len, oinfo,
XFS_AG_RESV_NONE);
/*
* Mark the transaction dirty, even on error. This ensures the
* transaction is aborted, which:
*
* 1.) releases the EFI and frees the EFD
* 2.) shuts down the filesystem
*/
tp->t_flags |= XFS_TRANS_DIRTY;
efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
next_extent = efdp->efd_next_extent;
ASSERT(next_extent < efdp->efd_format.efd_nextents);
extp = &(efdp->efd_format.efd_extents[next_extent]);
extp->ext_start = start_block;
extp->ext_len = ext_len;
efdp->efd_next_extent++;
return error;
}
/* Sort bmap items by AG. */
static int
xfs_extent_free_diff_items(
void *priv,
struct list_head *a,
struct list_head *b)
{
struct xfs_mount *mp = priv;
struct xfs_extent_free_item *ra;
struct xfs_extent_free_item *rb;
ra = container_of(a, struct xfs_extent_free_item, xefi_list);
rb = container_of(b, struct xfs_extent_free_item, xefi_list);
return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
}
/* Get an EFI. */
STATIC void *
xfs_extent_free_create_intent(
struct xfs_trans *tp,
unsigned int count)
{
struct xfs_efi_log_item *efip;
ASSERT(tp != NULL);
ASSERT(count > 0);
efip = xfs_efi_init(tp->t_mountp, count);
ASSERT(efip != NULL);
/*
* Get a log_item_desc to point at the new item.
*/
xfs_trans_add_item(tp, &efip->efi_item);
return efip;
}
/* Log a free extent to the intent item. */
STATIC void
xfs_extent_free_log_item(
struct xfs_trans *tp,
void *intent,
struct list_head *item)
{
struct xfs_efi_log_item *efip = intent;
struct xfs_extent_free_item *free;
uint next_extent;
struct xfs_extent *extp;
free = container_of(item, struct xfs_extent_free_item, xefi_list);
tp->t_flags |= XFS_TRANS_DIRTY;
efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
/*
* atomic_inc_return gives us the value after the increment;
* we want to use it as an array index so we need to subtract 1 from
* it.
*/
next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
ASSERT(next_extent < efip->efi_format.efi_nextents);
extp = &efip->efi_format.efi_extents[next_extent];
extp->ext_start = free->xefi_startblock;
extp->ext_len = free->xefi_blockcount;
}
/* Get an EFD so we can process all the free extents. */
STATIC void *
xfs_extent_free_create_done(
struct xfs_trans *tp,
void *intent,
unsigned int count)
{
return xfs_trans_get_efd(tp, intent, count);
}
/* Process a free extent. */
STATIC int
xfs_extent_free_finish_item(
struct xfs_trans *tp,
struct xfs_defer_ops *dop,
struct list_head *item,
void *done_item,
void **state)
{
struct xfs_extent_free_item *free;
int error;
free = container_of(item, struct xfs_extent_free_item, xefi_list);
error = xfs_trans_free_extent(tp, done_item,
free->xefi_startblock,
free->xefi_blockcount,
&free->xefi_oinfo);
kmem_free(free);
return error;
}
/* Abort all pending EFIs. */
STATIC void
xfs_extent_free_abort_intent(
void *intent)
{
xfs_efi_release(intent);
}
/* Cancel a free extent. */
STATIC void
xfs_extent_free_cancel_item(
struct list_head *item)
{
struct xfs_extent_free_item *free;
free = container_of(item, struct xfs_extent_free_item, xefi_list);
kmem_free(free);
}
static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
.type = XFS_DEFER_OPS_TYPE_FREE,
.max_items = XFS_EFI_MAX_FAST_EXTENTS,
.diff_items = xfs_extent_free_diff_items,
.create_intent = xfs_extent_free_create_intent,
.abort_intent = xfs_extent_free_abort_intent,
.log_item = xfs_extent_free_log_item,
.create_done = xfs_extent_free_create_done,
.finish_item = xfs_extent_free_finish_item,
.cancel_item = xfs_extent_free_cancel_item,
};
/* Register the deferred op type. */
void
xfs_extent_free_init_defer_op(void)
{
xfs_defer_init_op_type(&xfs_extent_free_defer_type);
}