linux_dsm_epyc7002/fs/btrfs/snapshot-size-query.c
AuxXxilium 5fa3ea047a init: add dsm gpl source
Signed-off-by: AuxXxilium <info@auxxxilium.tech>
2024-07-05 18:00:04 +02:00

661 lines
18 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef MY_ABC_HERE
#define MY_ABC_HERE
#endif
/*
* Copyright (C) 2021 Synology Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/rbtree.h>
#include "ctree.h"
#include "disk-io.h"
#include "backref.h"
#include "btrfs_inode.h"
/* send.c */
extern int write_buf(struct file *filp, const void *buf, u32 len,
loff_t *off);
extern int tree_advance_with_mode(struct btrfs_path *path, int *level,
int root_level, int mode,
struct btrfs_key *key);
static int snap_entry_cmp(struct btrfs_snapshot_size_entry *e1,
struct btrfs_snapshot_size_entry *e2);
static int snap_entry_insert(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry **insert,
int replace);
static inline int
snap_entry_check_node_shared(struct btrfs_fs_info *fs_info,
struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry);
static inline bool
snap_entry_check_file_extent(struct btrfs_snapshot_size_entry *entry);
static inline int
snap_entry_find_extent_owner(struct btrfs_fs_info *fs_info,
struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry,
u64 *owner_id);
static inline void
snap_entry_update_usage(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry,
u64 owner_id,
struct btrfs_ioctl_snapshot_size_query_args *args);
static inline void free_snapshot_size_ctx(struct btrfs_snapshot_size_ctx *ctx,
u64 snap_count);
static inline struct btrfs_snapshot_size_ctx*
prepare_snapshot_size_ctx(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_snapshot_size_query_args *args);
static int
show_calculate_progress(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_ioctl_snapshot_size_query_args *args,
u64 skip_bytes);
int
btrfs_snapshot_size_query(struct file *file,
struct btrfs_ioctl_snapshot_size_query_args *args)
{
int i;
int ret = 0;
u64 snap_count = args->snap_count;
struct btrfs_fs_info *fs_info;
struct btrfs_snapshot_size_ctx *ctx;
struct rb_node *node;
struct btrfs_snapshot_size_entry *entry;
fs_info = BTRFS_I(file_inode(file))->root->fs_info;
ctx = prepare_snapshot_size_ctx(fs_info, args);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
ctx = NULL;
goto out;
}
/*
* The loop are composed of 3 parts:
* 1> Check whether the given node is shared or not
* 2> Check shared EXTENT_ITEM and make statistic about the usage
* 3> Travel deeper node
*/
while (!RB_EMPTY_ROOT(&ctx->root)) {
int nritems;
int advance = ADVANCE;
struct extent_buffer *eb;
struct btrfs_snapshot_size_entry *next_entry;
struct rb_node *next_node;
// DSM#108202: avoid soft lockup
// DSM#111569: check signal and exit if user cancels
if (signal_pending(current)) {
ret = -EINTR;
break;
}
cond_resched();
/*
* 1st part:
* Check if the given node is shared or not, that is to say,
* we check whether the node is pointed by subvolumes
* (snapshots) that are not given by ioctl(). The node is
* shared if the callback function returns postive value (>0)
* and we make sure all descendants are shared and it has no
* need of deeper traveling of the tree.
*/
ret = 0;
node = rb_first(&ctx->root);
entry = rb_entry(node, struct btrfs_snapshot_size_entry,
node);
if (entry->level != entry->root_level &&
entry->path->slots[entry->level] == 0) {
ret = snap_entry_check_node_shared(fs_info, ctx,
entry);
args->processed_size +=
entry->root->fs_info->nodesize;
if (ret < 0) {
goto out;
} else if (ret > 0) {
advance = ADVANCE_ONLY_UPNEXT;
goto advance;
}
}
/*
* 2nd part:
* All slots in the leaf node are processed at the same time.
* If the slot is not a hole(bytenr ==0) and EXTENT_ITEM is
* only referred by the given subvolumes, the usage of
* subvolume with the largest ID that pointer to it should be
* accumulated.
*/
eb = entry->path->nodes[entry->level];
nritems = btrfs_header_nritems(eb);
while (entry->level == 0 &&
entry->path->slots[entry->level] < nritems) {
u64 owner_id;
// DSM#108202: avoid soft lockup
// DSM#111569: check signal and exit if user cancels
if (signal_pending(current)) {
ret = -EINTR;
break;
}
cond_resched();
btrfs_item_key_to_cpu(entry->path->nodes[0],
&entry->key,
entry->path->slots[0]);
if (!snap_entry_check_file_extent(entry))
goto next;
ret = snap_entry_find_extent_owner(fs_info, ctx,
entry, &owner_id);
if (ret < 0)
goto out;
if (ret == 0) {
snap_entry_update_usage(ctx, entry, owner_id,
args);
}
next:
entry->path->slots[0]++;
}
/* do verbose display */
if (show_calculate_progress(ctx, args, 100 * 1024 * 1024)) {
ret = -EBADF;
goto out;
}
advance:
/*
* 3rd part:
* Now we've done processing this node, advance the tree node.
* There's a subtlety here: After tree advacne, if there's a
* subvolume whose next node to be processed is the same as
* this node. We'll keep only one subolume to handle this
* node. The other one needs to keep advancing until the next
* node to be processed for that subvolume is not overlapped
* with the existing one.
*/
next_node = rb_next(node);
if (tree_advance_with_mode(entry->path, &entry->level,
entry->root_level, advance,
&entry->key) < 0) {
rb_erase(&entry->node, &ctx->root);
continue;
}
if (!next_node)
continue;
next_entry = rb_entry(next_node,
struct btrfs_snapshot_size_entry, node);
/*
* After advance if this entry is still the lowest key in the
 * tree, don't move it out and insert again. Is's just waste
* of time.
*/
if (snap_entry_cmp(entry, next_entry) < 0)
continue;
rb_erase(&entry->node, &ctx->root);
RB_CLEAR_NODE(&entry->node);
while (snap_entry_insert(ctx, &entry, 1)) {
if (tree_advance_with_mode(entry->path, &entry->level,
entry->root_level,
ADVANCE_ONLY_NEXT,
&entry->key) < 0) {
/*
* This node is not in the tree anymore, so
* don't call rb_erase on it.
*/
break;
}
}
}
// store the result
i = 0;
node = rb_first(&ctx->snap_roots->root);
while (node) {
struct ulist_node *ulist_node = rb_entry(node,
struct ulist_node,
rb_node);
entry = (struct btrfs_snapshot_size_entry *) ulist_node->aux;
args->id_maps[i].snap_id = ulist_node->val;
args->id_maps[i++].marginal_size = entry->snap_exclusive_size;
node = rb_next(node);
}
out:
free_snapshot_size_ctx(ctx, snap_count);
return ret;
}
static int snap_entry_cmp(struct btrfs_snapshot_size_entry *e1,
struct btrfs_snapshot_size_entry *e2)
{
int cmp;
u64 e1_blockptr;
u64 e2_blockptr;
u64 e1_gen;
u64 e2_gen;
/*
* We process the in the order
* 1. lower key first
* 2. "higher" level first
* 3. lower block bytenr first
*/
cmp = btrfs_comp_cpu_keys(&e1->key, &e2->key);
if (cmp != 0)
return cmp;
if (e1->level > e2->level)
return -1;
if (e1->level < e2->level)
return 1;
if (e1->level != 0) {
e1_blockptr = btrfs_node_blockptr(
e1->path->nodes[e1->level],
e1->path->slots[e1->level]);
e2_blockptr = btrfs_node_blockptr(
e2->path->nodes[e2->level],
e2->path->slots[e2->level]);
e1_gen = btrfs_node_ptr_generation(
e1->path->nodes[e1->level],
e1->path->slots[e1->level]);
e2_gen = btrfs_node_ptr_generation(
e2->path->nodes[e2->level],
e2->path->slots[e2->level]);
if (e1_blockptr == e2_blockptr &&
e1_gen == e2_gen)
return 0;
if (e1_blockptr < e2_blockptr)
return -1;
if (e1_blockptr > e2_blockptr)
return 1;
WARN_ON(1);
} else {
e1_blockptr = e1->path->nodes[e1->level]->start;
e2_blockptr = e2->path->nodes[e2->level]->start;
if (e1_blockptr < e2_blockptr)
return -1;
if (e1_blockptr > e2_blockptr)
return 1;
}
return 0;
}
static int snap_entry_insert(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry **insert,
int replace)
{
struct rb_node **p = &ctx->root.rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_snapshot_size_entry *entry;
int cmp = 0;
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node,
struct btrfs_snapshot_size_entry, node);
cmp = snap_entry_cmp(*insert, entry);
if (cmp < 0) {
p = &(*p)->rb_left;
} else if (cmp > 0) {
p = &(*p)->rb_right;
} else {
/*
* If the newly added entry shares the same key with
* the existing node in rbtree, and the added entry
* has larger subvolume id. We need to keep that
* entry, and advance the exsiting node in rbtree. If
* this behavior changes, make sure to change all the
* highest_root_id under btrfs_find_shared_root in
* backref.c.
*/
if (replace && (*insert)->root_id > entry->root_id) {
rb_replace_node(parent_node, &(*insert)->node,
&ctx->root);
RB_CLEAR_NODE(parent_node);
*insert = entry;
}
return 1;
}
}
rb_link_node(&(*insert)->node, parent_node, p);
rb_insert_color(&(*insert)->node, &ctx->root);
return 0;
}
static inline bool
snap_entry_check_file_extent(struct btrfs_snapshot_size_entry *entry)
{
bool ret = false;
struct btrfs_file_extent_item *ei;
u8 type;
u64 bytenr;
if (entry->key.type != BTRFS_EXTENT_DATA_KEY)
goto out;
ei = btrfs_item_ptr(entry->path->nodes[0], entry->path->slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(entry->path->nodes[0], ei);
if (type != BTRFS_FILE_EXTENT_PREALLOC &&
type != BTRFS_FILE_EXTENT_REG)
goto out;
bytenr = btrfs_file_extent_disk_bytenr(entry->path->nodes[0], ei);
if (bytenr == 0)
goto out;
ret = true;
out:
return ret;
}
static inline int
snap_entry_check_node_shared(struct btrfs_fs_info *fs_info,
struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry)
{
u64 bytenr = entry->path->nodes[entry->level]->start;
u64 parent_bytenr = entry->path->nodes[entry->level+1]->start;
u64 root_objectid = entry->root_id;
u64 inum = entry->key.objectid;
u64 file_offset = entry->key.offset;
u64 datao = 0;
return btrfs_check_shared_inlist(fs_info, root_objectid, inum,
file_offset, datao, bytenr,
ctx->snap_roots, parent_bytenr,
NULL);
}
static inline int
snap_entry_find_extent_owner(struct btrfs_fs_info *fs_info,
struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry,
u64 *owner_id)
{
struct btrfs_file_extent_item *ei =
btrfs_item_ptr(entry->path->nodes[0],
entry->path->slots[0],
struct btrfs_file_extent_item);
u64 bytenr = btrfs_file_extent_disk_bytenr(entry->path->nodes[0], ei);
u64 parent_bytenr = entry->path->nodes[0]->start;
u64 root_objectid = entry->root_id;
u64 inum = entry->key.objectid;
u64 file_offset = entry->key.offset;
u64 datao = btrfs_file_extent_offset(entry->path->nodes[0], ei);
*owner_id = entry->root_id;
return btrfs_check_shared_inlist(fs_info, root_objectid, inum,
file_offset, datao, bytenr,
ctx->snap_roots, parent_bytenr,
owner_id);
}
static inline void
snap_entry_update_usage(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_snapshot_size_entry *entry,
u64 owner_id,
struct btrfs_ioctl_snapshot_size_query_args *args)
{
struct btrfs_file_extent_item *ei =
btrfs_item_ptr(entry->path->nodes[0],
entry->path->slots[0],
struct btrfs_file_extent_item);
u64 bytes = btrfs_file_extent_disk_num_bytes(entry->path->nodes[0],
ei);
if (owner_id == entry->root_id)
entry->snap_exclusive_size += bytes;
else {
struct ulist_node *node;
struct btrfs_snapshot_size_entry *counted_entry;
node = ulist_search(ctx->snap_roots, owner_id);
counted_entry =
(struct btrfs_snapshot_size_entry *) node->aux;
counted_entry->snap_exclusive_size += bytes;
}
args->calc_size += bytes;
}
static inline void free_snapshot_size_ctx(struct btrfs_snapshot_size_ctx *ctx,
u64 snap_count)
{
int i;
if (!ctx)
return;
for (i = 0; i < snap_count; i++) {
if (ctx->snaps[i].path)
btrfs_free_path(ctx->snaps[i].path);
if (ctx->snaps[i].root) {
btrfs_put_root(ctx->snaps[i].root);
spin_lock(&ctx->snaps[i].root->root_item_lock);
ctx->snaps[i].root->send_in_progress--;
spin_unlock(&ctx->snaps[i].root->root_item_lock);
}
}
if (ctx->out_filp)
fput(ctx->out_filp);
if (ctx->snap_roots)
ulist_free(ctx->snap_roots);
kvfree(ctx);
}
static inline struct btrfs_snapshot_size_ctx*
prepare_snapshot_size_ctx(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_snapshot_size_query_args *args)
{
int i;
int ret = 0;
u64 snap_count = args->snap_count;
struct btrfs_snapshot_size_ctx *ctx;
struct btrfs_root *snap_root;
ctx = kvzalloc(sizeof(*ctx) +
sizeof(struct btrfs_snapshot_size_entry) * snap_count,
GFP_KERNEL);
if (!ctx) {
ret = -ENOMEM;
goto out;
}
ctx->snap_roots = ulist_alloc(GFP_NOFS);
if (!ctx->snap_roots) {
ret = -ENOMEM;
goto out;
}
ctx->root = RB_ROOT;
ctx->flags = args->flags;
ctx->out_filp = fget(args->fd);
if (!ctx->out_filp) {
ret = -EBADF;
goto out;
}
for (i = 0; i < snap_count; ++i) {
int level;
struct btrfs_snapshot_size_entry *entry;
struct extent_buffer *eb;
ret = ulist_add(ctx->snap_roots,
args->id_maps[i].snap_id,
(u64)(&ctx->snaps[i]), GFP_KERNEL);
if (ret <= 0) {
if (ret == 0)
ret = -EINVAL;
goto out;
}
ret = 0;
entry = &ctx->snaps[i];
entry->root_id = args->id_maps[i].snap_id;
entry->path = btrfs_alloc_path();
if (!entry->path) {
ret = -ENOMEM;
goto out;
}
snap_root = btrfs_get_fs_root(fs_info, entry->root_id, true);
if (IS_ERR(snap_root)) {
ret = PTR_ERR(snap_root);
goto out;
}
spin_lock(&snap_root->root_item_lock);
if (btrfs_root_dead(snap_root) ||
!btrfs_root_readonly(snap_root)) {
spin_unlock(&snap_root->root_item_lock);
btrfs_put_root(snap_root);
ret = -EPERM;
goto out;
}
if (snap_root->dedupe_in_progress) {
spin_unlock(&snap_root->root_item_lock);
btrfs_put_root(snap_root);
ret = -EAGAIN;
goto out;
}
#ifdef MY_ABC_HERE
if (snap_root->syno_orphan_cleanup.cleanup_in_progress) {
spin_unlock(&snap_root->root_item_lock);
btrfs_put_root(snap_root);
ret = -EAGAIN;
goto out;
}
#endif /* MY_ABC_HERE */
snap_root->send_in_progress++;
spin_unlock(&snap_root->root_item_lock);
down_read(&snap_root->fs_info->commit_root_sem);
eb = btrfs_clone_extent_buffer(snap_root->commit_root);
up_read(&snap_root->fs_info->commit_root_sem);
if (!eb) {
ret = -ENOMEM;
btrfs_put_root(snap_root);
goto out;
}
level = btrfs_header_level(eb);
entry->root = snap_root;
entry->path->search_commit_root = 1;
entry->path->skip_locking = 1;
entry->root_level = entry->level = level;
entry->path->nodes[level] = eb;
if (level == 0)
btrfs_item_key_to_cpu(entry->path->nodes[level],
&entry->key, entry->path->slots[level]);
else
btrfs_node_key_to_cpu(entry->path->nodes[level],
&entry->key, entry->path->slots[level]);
while (snap_entry_insert(ctx, &entry, 0)) {
if (tree_advance_with_mode(entry->path, &entry->level,
entry->root_level,
ADVANCE_ONLY_NEXT,
&entry->key) < 0) {
break;
}
}
}
out:
if (ret) {
free_snapshot_size_ctx(ctx, snap_count);
return ERR_PTR(ret);
}
return ctx;
}
static int
show_calculate_progress(struct btrfs_snapshot_size_ctx *ctx,
struct btrfs_ioctl_snapshot_size_query_args *args,
u64 skip_bytes)
{
int ret = 0;
int len;
char buf[256];
u64 flags = ctx->flags;
if (args->calc_size - ctx->last_calc_size < skip_bytes)
goto out;
if (get_seconds() - ctx->last_show < 2)
goto out;
if (!(flags & (BTRFS_SNAP_SIZE_SHOW_MARGINAL_SIZE|
BTRFS_SNAP_SIZE_SHOW_EXCL_SIZE|
BTRFS_SNAP_SIZE_SHOW_PROCESSED_SIZE)))
goto out;
if (flags & BTRFS_SNAP_SIZE_SHOW_MARGINAL_SIZE) {
struct rb_node *node = rb_first(&ctx->snap_roots->root);
while (node) {
struct ulist_node *lnode = rb_entry(node,
struct ulist_node,
rb_node);
struct btrfs_snapshot_size_entry *entry =
(struct btrfs_snapshot_size_entry *)lnode->aux;
u64 snap_id = lnode->val;
u64 marginal_size = entry->snap_exclusive_size;
len = snprintf(buf, sizeof(buf),
"subvol(%llu) %llu bytes\n", snap_id,
marginal_size);
ret = write_buf(ctx->out_filp, buf, len, &ctx->off);
if (ret)
goto out;
node = rb_next(node);
}
}
if (flags & BTRFS_SNAP_SIZE_SHOW_EXCL_SIZE) {
/* show exclusize size by each entry*/
len = snprintf(buf, sizeof(buf), "exclusive %llu bytes\n",
args->calc_size);
ret = write_buf(ctx->out_filp, buf, len, &ctx->off);
if (ret)
goto out;
ctx->last_calc_size = args->calc_size;
}
if (flags & BTRFS_SNAP_SIZE_SHOW_PROCESSED_SIZE) {
len = snprintf(buf, sizeof(buf), "processed %llu bytes\n",
args->processed_size);
ret = write_buf(ctx->out_filp, buf, len, &ctx->off);
if (ret)
goto out;
}
ret = write_buf(ctx->out_filp, "\n", 1, &ctx->off);
if (ret)
goto out;
ctx->last_show = get_seconds();
out:
return ret;
}