linux_dsm_epyc7002/fs/hfs/bnode.c
Desmond Cheong Zhi Xi 06b3d9923f hfs: fix high memory mapping in hfs_bnode_read
[ Upstream commit 54a5ead6f5e2b47131a7385d0c0af18e7b89cb02 ]

Pages that we read in hfs_bnode_read need to be kmapped into kernel
address space.  However, currently only the 0th page is kmapped.  If the
given offset + length exceeds this 0th page, then we have an invalid
memory access.

To fix this, we kmap relevant pages one by one and copy their relevant
portions of data.

An example of invalid memory access occurring without this fix can be seen
in the following crash report:

  ==================================================================
  BUG: KASAN: use-after-free in memcpy include/linux/fortify-string.h:191 [inline]
  BUG: KASAN: use-after-free in hfs_bnode_read+0xc4/0xe0 fs/hfs/bnode.c:26
  Read of size 2 at addr ffff888125fdcffe by task syz-executor5/4634

  CPU: 0 PID: 4634 Comm: syz-executor5 Not tainted 5.13.0-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:79 [inline]
   dump_stack+0x195/0x1f8 lib/dump_stack.c:120
   print_address_description.constprop.0+0x1d/0x110 mm/kasan/report.c:233
   __kasan_report mm/kasan/report.c:419 [inline]
   kasan_report.cold+0x7b/0xd4 mm/kasan/report.c:436
   check_region_inline mm/kasan/generic.c:180 [inline]
   kasan_check_range+0x154/0x1b0 mm/kasan/generic.c:186
   memcpy+0x24/0x60 mm/kasan/shadow.c:65
   memcpy include/linux/fortify-string.h:191 [inline]
   hfs_bnode_read+0xc4/0xe0 fs/hfs/bnode.c:26
   hfs_bnode_read_u16 fs/hfs/bnode.c:34 [inline]
   hfs_bnode_find+0x880/0xcc0 fs/hfs/bnode.c:365
   hfs_brec_find+0x2d8/0x540 fs/hfs/bfind.c:126
   hfs_brec_read+0x27/0x120 fs/hfs/bfind.c:165
   hfs_cat_find_brec+0x19a/0x3b0 fs/hfs/catalog.c:194
   hfs_fill_super+0xc13/0x1460 fs/hfs/super.c:419
   mount_bdev+0x331/0x3f0 fs/super.c:1368
   hfs_mount+0x35/0x40 fs/hfs/super.c:457
   legacy_get_tree+0x10c/0x220 fs/fs_context.c:592
   vfs_get_tree+0x93/0x300 fs/super.c:1498
   do_new_mount fs/namespace.c:2905 [inline]
   path_mount+0x13f5/0x20e0 fs/namespace.c:3235
   do_mount fs/namespace.c:3248 [inline]
   __do_sys_mount fs/namespace.c:3456 [inline]
   __se_sys_mount fs/namespace.c:3433 [inline]
   __x64_sys_mount+0x2b8/0x340 fs/namespace.c:3433
   do_syscall_64+0x37/0xc0 arch/x86/entry/common.c:47
   entry_SYSCALL_64_after_hwframe+0x44/0xae
  RIP: 0033:0x45e63a
  Code: 48 c7 c2 bc ff ff ff f7 d8 64 89 02 b8 ff ff ff ff eb d2 e8 88 04 00 00 0f 1f 84 00 00 00 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
  RSP: 002b:00007f9404d410d8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
  RAX: ffffffffffffffda RBX: 0000000020000248 RCX: 000000000045e63a
  RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007f9404d41120
  RBP: 00007f9404d41120 R08: 00000000200002c0 R09: 0000000020000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
  R13: 0000000000000003 R14: 00000000004ad5d8 R15: 0000000000000000

  The buggy address belongs to the page:
  page:00000000dadbcf3e refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x125fdc
  flags: 0x2fffc0000000000(node=0|zone=2|lastcpupid=0x3fff)
  raw: 02fffc0000000000 ffffea000497f748 ffffea000497f6c8 0000000000000000
  raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
  page dumped because: kasan: bad access detected

  Memory state around the buggy address:
   ffff888125fdce80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ffff888125fdcf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  >ffff888125fdcf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                                                                  ^
   ffff888125fdd000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ffff888125fdd080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ==================================================================

Link: https://lkml.kernel.org/r/20210701030756.58760-3-desmondcheongzx@gmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2021-07-31 08:16:12 +02:00

497 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/hfs/bnode.c
*
* Copyright (C) 2001
* Brad Boyer (flar@allandria.com)
* (C) 2003 Ardis Technologies <roman@ardistech.com>
*
* Handle basic btree node operations
*/
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include "btree.h"
void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
int pagenum;
int bytes_read;
int bytes_to_read;
void *vaddr;
off += node->page_offset;
pagenum = off >> PAGE_SHIFT;
off &= ~PAGE_MASK; /* compute page offset for the first page */
for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) {
if (pagenum >= node->tree->pages_per_bnode)
break;
page = node->page[pagenum];
bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
vaddr = kmap_atomic(page);
memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
kunmap_atomic(vaddr);
pagenum++;
off = 0; /* page offset only applies to the first page */
}
}
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
{
__be16 data;
// optimize later...
hfs_bnode_read(node, &data, off, 2);
return be16_to_cpu(data);
}
u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off)
{
u8 data;
// optimize later...
hfs_bnode_read(node, &data, off, 1);
return data;
}
void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
{
struct hfs_btree *tree;
int key_len;
tree = node->tree;
if (node->type == HFS_NODE_LEAF ||
tree->attributes & HFS_TREE_VARIDXKEYS)
key_len = hfs_bnode_read_u8(node, off) + 1;
else
key_len = tree->max_key_len + 1;
hfs_bnode_read(node, key, off, key_len);
}
void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
off += node->page_offset;
page = node->page[0];
memcpy(kmap(page) + off, buf, len);
kunmap(page);
set_page_dirty(page);
}
void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data)
{
__be16 v = cpu_to_be16(data);
// optimize later...
hfs_bnode_write(node, &v, off, 2);
}
void hfs_bnode_write_u8(struct hfs_bnode *node, int off, u8 data)
{
// optimize later...
hfs_bnode_write(node, &data, off, 1);
}
void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
{
struct page *page;
off += node->page_offset;
page = node->page[0];
memset(kmap(page) + off, 0, len);
kunmap(page);
set_page_dirty(page);
}
void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
struct hfs_bnode *src_node, int src, int len)
{
struct page *src_page, *dst_page;
hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
src += src_node->page_offset;
dst += dst_node->page_offset;
src_page = src_node->page[0];
dst_page = dst_node->page[0];
memcpy(kmap(dst_page) + dst, kmap(src_page) + src, len);
kunmap(src_page);
kunmap(dst_page);
set_page_dirty(dst_page);
}
void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
{
struct page *page;
void *ptr;
hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
src += node->page_offset;
dst += node->page_offset;
page = node->page[0];
ptr = kmap(page);
memmove(ptr + dst, ptr + src, len);
kunmap(page);
set_page_dirty(page);
}
void hfs_bnode_dump(struct hfs_bnode *node)
{
struct hfs_bnode_desc desc;
__be32 cnid;
int i, off, key_off;
hfs_dbg(BNODE_MOD, "bnode: %d\n", node->this);
hfs_bnode_read(node, &desc, 0, sizeof(desc));
hfs_dbg(BNODE_MOD, "%d, %d, %d, %d, %d\n",
be32_to_cpu(desc.next), be32_to_cpu(desc.prev),
desc.type, desc.height, be16_to_cpu(desc.num_recs));
off = node->tree->node_size - 2;
for (i = be16_to_cpu(desc.num_recs); i >= 0; off -= 2, i--) {
key_off = hfs_bnode_read_u16(node, off);
hfs_dbg_cont(BNODE_MOD, " %d", key_off);
if (i && node->type == HFS_NODE_INDEX) {
int tmp;
if (node->tree->attributes & HFS_TREE_VARIDXKEYS)
tmp = (hfs_bnode_read_u8(node, key_off) | 1) + 1;
else
tmp = node->tree->max_key_len + 1;
hfs_dbg_cont(BNODE_MOD, " (%d,%d",
tmp, hfs_bnode_read_u8(node, key_off));
hfs_bnode_read(node, &cnid, key_off + tmp, 4);
hfs_dbg_cont(BNODE_MOD, ",%d)", be32_to_cpu(cnid));
} else if (i && node->type == HFS_NODE_LEAF) {
int tmp;
tmp = hfs_bnode_read_u8(node, key_off);
hfs_dbg_cont(BNODE_MOD, " (%d)", tmp);
}
}
hfs_dbg_cont(BNODE_MOD, "\n");
}
void hfs_bnode_unlink(struct hfs_bnode *node)
{
struct hfs_btree *tree;
struct hfs_bnode *tmp;
__be32 cnid;
tree = node->tree;
if (node->prev) {
tmp = hfs_bnode_find(tree, node->prev);
if (IS_ERR(tmp))
return;
tmp->next = node->next;
cnid = cpu_to_be32(tmp->next);
hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, next), 4);
hfs_bnode_put(tmp);
} else if (node->type == HFS_NODE_LEAF)
tree->leaf_head = node->next;
if (node->next) {
tmp = hfs_bnode_find(tree, node->next);
if (IS_ERR(tmp))
return;
tmp->prev = node->prev;
cnid = cpu_to_be32(tmp->prev);
hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, prev), 4);
hfs_bnode_put(tmp);
} else if (node->type == HFS_NODE_LEAF)
tree->leaf_tail = node->prev;
// move down?
if (!node->prev && !node->next) {
printk(KERN_DEBUG "hfs_btree_del_level\n");
}
if (!node->parent) {
tree->root = 0;
tree->depth = 0;
}
set_bit(HFS_BNODE_DELETED, &node->flags);
}
static inline int hfs_bnode_hash(u32 num)
{
num = (num >> 16) + num;
num += num >> 8;
return num & (NODE_HASH_SIZE - 1);
}
struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
{
struct hfs_bnode *node;
if (cnid >= tree->node_count) {
pr_err("request for non-existent node %d in B*Tree\n", cnid);
return NULL;
}
for (node = tree->node_hash[hfs_bnode_hash(cnid)];
node; node = node->next_hash) {
if (node->this == cnid) {
return node;
}
}
return NULL;
}
static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
{
struct hfs_bnode *node, *node2;
struct address_space *mapping;
struct page *page;
int size, block, i, hash;
loff_t off;
if (cnid >= tree->node_count) {
pr_err("request for non-existent node %d in B*Tree\n", cnid);
return NULL;
}
size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
sizeof(struct page *);
node = kzalloc(size, GFP_KERNEL);
if (!node)
return NULL;
node->tree = tree;
node->this = cnid;
set_bit(HFS_BNODE_NEW, &node->flags);
atomic_set(&node->refcnt, 1);
hfs_dbg(BNODE_REFS, "new_node(%d:%d): 1\n",
node->tree->cnid, node->this);
init_waitqueue_head(&node->lock_wq);
spin_lock(&tree->hash_lock);
node2 = hfs_bnode_findhash(tree, cnid);
if (!node2) {
hash = hfs_bnode_hash(cnid);
node->next_hash = tree->node_hash[hash];
tree->node_hash[hash] = node;
tree->node_hash_cnt++;
} else {
spin_unlock(&tree->hash_lock);
kfree(node);
wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags));
return node2;
}
spin_unlock(&tree->hash_lock);
mapping = tree->inode->i_mapping;
off = (loff_t)cnid * tree->node_size;
block = off >> PAGE_SHIFT;
node->page_offset = off & ~PAGE_MASK;
for (i = 0; i < tree->pages_per_bnode; i++) {
page = read_mapping_page(mapping, block++, NULL);
if (IS_ERR(page))
goto fail;
if (PageError(page)) {
put_page(page);
goto fail;
}
node->page[i] = page;
}
return node;
fail:
set_bit(HFS_BNODE_ERROR, &node->flags);
return node;
}
void hfs_bnode_unhash(struct hfs_bnode *node)
{
struct hfs_bnode **p;
hfs_dbg(BNODE_REFS, "remove_node(%d:%d): %d\n",
node->tree->cnid, node->this, atomic_read(&node->refcnt));
for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)];
*p && *p != node; p = &(*p)->next_hash)
;
BUG_ON(!*p);
*p = node->next_hash;
node->tree->node_hash_cnt--;
}
/* Load a particular node out of a tree */
struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
{
struct hfs_bnode *node;
struct hfs_bnode_desc *desc;
int i, rec_off, off, next_off;
int entry_size, key_size;
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, num);
if (node) {
hfs_bnode_get(node);
spin_unlock(&tree->hash_lock);
wait_event(node->lock_wq, !test_bit(HFS_BNODE_NEW, &node->flags));
if (test_bit(HFS_BNODE_ERROR, &node->flags))
goto node_error;
return node;
}
spin_unlock(&tree->hash_lock);
node = __hfs_bnode_create(tree, num);
if (!node)
return ERR_PTR(-ENOMEM);
if (test_bit(HFS_BNODE_ERROR, &node->flags))
goto node_error;
if (!test_bit(HFS_BNODE_NEW, &node->flags))
return node;
desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset);
node->prev = be32_to_cpu(desc->prev);
node->next = be32_to_cpu(desc->next);
node->num_recs = be16_to_cpu(desc->num_recs);
node->type = desc->type;
node->height = desc->height;
kunmap(node->page[0]);
switch (node->type) {
case HFS_NODE_HEADER:
case HFS_NODE_MAP:
if (node->height != 0)
goto node_error;
break;
case HFS_NODE_LEAF:
if (node->height != 1)
goto node_error;
break;
case HFS_NODE_INDEX:
if (node->height <= 1 || node->height > tree->depth)
goto node_error;
break;
default:
goto node_error;
}
rec_off = tree->node_size - 2;
off = hfs_bnode_read_u16(node, rec_off);
if (off != sizeof(struct hfs_bnode_desc))
goto node_error;
for (i = 1; i <= node->num_recs; off = next_off, i++) {
rec_off -= 2;
next_off = hfs_bnode_read_u16(node, rec_off);
if (next_off <= off ||
next_off > tree->node_size ||
next_off & 1)
goto node_error;
entry_size = next_off - off;
if (node->type != HFS_NODE_INDEX &&
node->type != HFS_NODE_LEAF)
continue;
key_size = hfs_bnode_read_u8(node, off) + 1;
if (key_size >= entry_size /*|| key_size & 1*/)
goto node_error;
}
clear_bit(HFS_BNODE_NEW, &node->flags);
wake_up(&node->lock_wq);
return node;
node_error:
set_bit(HFS_BNODE_ERROR, &node->flags);
clear_bit(HFS_BNODE_NEW, &node->flags);
wake_up(&node->lock_wq);
hfs_bnode_put(node);
return ERR_PTR(-EIO);
}
void hfs_bnode_free(struct hfs_bnode *node)
{
int i;
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
put_page(node->page[i]);
kfree(node);
}
struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
{
struct hfs_bnode *node;
struct page **pagep;
int i;
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, num);
spin_unlock(&tree->hash_lock);
if (node) {
pr_crit("new node %u already hashed?\n", num);
WARN_ON(1);
return node;
}
node = __hfs_bnode_create(tree, num);
if (!node)
return ERR_PTR(-ENOMEM);
if (test_bit(HFS_BNODE_ERROR, &node->flags)) {
hfs_bnode_put(node);
return ERR_PTR(-EIO);
}
pagep = node->page;
memset(kmap(*pagep) + node->page_offset, 0,
min((int)PAGE_SIZE, (int)tree->node_size));
set_page_dirty(*pagep);
kunmap(*pagep);
for (i = 1; i < tree->pages_per_bnode; i++) {
memset(kmap(*++pagep), 0, PAGE_SIZE);
set_page_dirty(*pagep);
kunmap(*pagep);
}
clear_bit(HFS_BNODE_NEW, &node->flags);
wake_up(&node->lock_wq);
return node;
}
void hfs_bnode_get(struct hfs_bnode *node)
{
if (node) {
atomic_inc(&node->refcnt);
hfs_dbg(BNODE_REFS, "get_node(%d:%d): %d\n",
node->tree->cnid, node->this,
atomic_read(&node->refcnt));
}
}
/* Dispose of resources used by a node */
void hfs_bnode_put(struct hfs_bnode *node)
{
if (node) {
struct hfs_btree *tree = node->tree;
int i;
hfs_dbg(BNODE_REFS, "put_node(%d:%d): %d\n",
node->tree->cnid, node->this,
atomic_read(&node->refcnt));
BUG_ON(!atomic_read(&node->refcnt));
if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
return;
for (i = 0; i < tree->pages_per_bnode; i++) {
if (!node->page[i])
continue;
mark_page_accessed(node->page[i]);
}
if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
hfs_bnode_unhash(node);
spin_unlock(&tree->hash_lock);
hfs_bmap_free(node);
hfs_bnode_free(node);
return;
}
spin_unlock(&tree->hash_lock);
}
}