linux_dsm_epyc7002/fs/readdir.c
Linus Torvalds 9f79b78ef7 Convert filldir[64]() from __put_user() to unsafe_put_user()
We really should avoid the "__{get,put}_user()" functions entirely,
because they can easily be mis-used and the original intent of being
used for simple direct user accesses no longer holds in a post-SMAP/PAN
world.

Manually optimizing away the user access range check makes no sense any
more, when the range check is generally much cheaper than the "enable
user accesses" code that the __{get,put}_user() functions still need.

So instead of __put_user(), use the unsafe_put_user() interface with
user_access_{begin,end}() that really does generate better code these
days, and which is generally a nicer interface.  Under some loads, the
multiple user writes that filldir() does are actually quite noticeable.

This also makes the dirent name copy use unsafe_put_user() with a couple
of macros.  We do not want to make function calls with SMAP/PAN
disabled, and the code this generates is quite good when the
architecture uses "asm goto" for unsafe_put_user() like x86 does.

Note that this doesn't bother with the legacy cases.  Nobody should use
them anyway, so performance doesn't really matter there.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-10-05 12:00:36 -07:00

557 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/readdir.c
*
* Copyright (C) 1995 Linus Torvalds
*/
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/stat.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/dirent.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/unistd.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
/*
* Note the "unsafe_put_user() semantics: we goto a
* label for errors.
*
* Also note how we use a "while()" loop here, even though
* only the biggest size needs to loop. The compiler (well,
* at least gcc) is smart enough to turn the smaller sizes
* into just if-statements, and this way we don't need to
* care whether 'u64' or 'u32' is the biggest size.
*/
#define unsafe_copy_loop(dst, src, len, type, label) \
while (len >= sizeof(type)) { \
unsafe_put_user(get_unaligned((type *)src), \
(type __user *)dst, label); \
dst += sizeof(type); \
src += sizeof(type); \
len -= sizeof(type); \
}
/*
* We avoid doing 64-bit copies on 32-bit architectures. They
* might be better, but the component names are mostly small,
* and the 64-bit cases can end up being much more complex and
* put much more register pressure on the code, so it's likely
* not worth the pain of unaligned accesses etc.
*
* So limit the copies to "unsigned long" size. I did verify
* that at least the x86-32 case is ok without this limiting,
* but I worry about random other legacy 32-bit cases that
* might not do as well.
*/
#define unsafe_copy_type(dst, src, len, type, label) do { \
if (sizeof(type) <= sizeof(unsigned long)) \
unsafe_copy_loop(dst, src, len, type, label); \
} while (0)
/*
* Copy the dirent name to user space, and NUL-terminate
* it. This should not be a function call, since we're doing
* the copy inside a "user_access_begin/end()" section.
*/
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
char __user *dst = (_dst); \
const char *src = (_src); \
size_t len = (_len); \
unsafe_copy_type(dst, src, len, u64, label); \
unsafe_copy_type(dst, src, len, u32, label); \
unsafe_copy_type(dst, src, len, u16, label); \
unsafe_copy_type(dst, src, len, u8, label); \
unsafe_put_user(0, dst, label); \
} while (0)
int iterate_dir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
bool shared = false;
int res = -ENOTDIR;
if (file->f_op->iterate_shared)
shared = true;
else if (!file->f_op->iterate)
goto out;
res = security_file_permission(file, MAY_READ);
if (res)
goto out;
if (shared)
res = down_read_killable(&inode->i_rwsem);
else
res = down_write_killable(&inode->i_rwsem);
if (res)
goto out;
res = -ENOENT;
if (!IS_DEADDIR(inode)) {
ctx->pos = file->f_pos;
if (shared)
res = file->f_op->iterate_shared(file, ctx);
else
res = file->f_op->iterate(file, ctx);
file->f_pos = ctx->pos;
fsnotify_access(file);
file_accessed(file);
}
if (shared)
inode_unlock_shared(inode);
else
inode_unlock(inode);
out:
return res;
}
EXPORT_SYMBOL(iterate_dir);
/*
* Traditional linux readdir() handling..
*
* "count=1" is a special case, meaning that the buffer is one
* dirent-structure in size and that the code can't handle more
* anyway. Thus the special "fillonedir()" function for that
* case (the low-level handlers don't need to care about this).
*/
#ifdef __ARCH_WANT_OLD_READDIR
struct old_linux_dirent {
unsigned long d_ino;
unsigned long d_offset;
unsigned short d_namlen;
char d_name[1];
};
struct readdir_callback {
struct dir_context ctx;
struct old_linux_dirent __user * dirent;
int result;
};
static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct readdir_callback *buf =
container_of(ctx, struct readdir_callback, ctx);
struct old_linux_dirent __user * dirent;
unsigned long d_ino;
if (buf->result)
return -EINVAL;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
return -EOVERFLOW;
}
buf->result++;
dirent = buf->dirent;
if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
if ( __put_user(d_ino, &dirent->d_ino) ||
__put_user(offset, &dirent->d_offset) ||
__put_user(namlen, &dirent->d_namlen) ||
__copy_to_user(dirent->d_name, name, namlen) ||
__put_user(0, dirent->d_name + namlen))
goto efault;
return 0;
efault:
buf->result = -EFAULT;
return -EFAULT;
}
SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
struct old_linux_dirent __user *, dirent, unsigned int, count)
{
int error;
struct fd f = fdget_pos(fd);
struct readdir_callback buf = {
.ctx.actor = fillonedir,
.dirent = dirent
};
if (!f.file)
return -EBADF;
error = iterate_dir(f.file, &buf.ctx);
if (buf.result)
error = buf.result;
fdput_pos(f);
return error;
}
#endif /* __ARCH_WANT_OLD_READDIR */
/*
* New, all-improved, singing, dancing, iBCS2-compliant getdents()
* interface.
*/
struct linux_dirent {
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
char d_name[1];
};
struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
struct linux_dirent __user * previous;
int count;
int error;
};
static int filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct linux_dirent __user * dirent;
struct getdents_callback *buf =
container_of(ctx, struct getdents_callback, ctx);
unsigned long d_ino;
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->error = -EOVERFLOW;
return -EOVERFLOW;
}
dirent = buf->previous;
if (dirent && signal_pending(current))
return -EINTR;
/*
* Note! This range-checks 'previous' (which may be NULL).
* The real range was checked in getdents
*/
if (!user_access_begin(dirent, sizeof(*dirent)))
goto efault;
if (dirent)
unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();
buf->previous = dirent;
dirent = (void __user *)dirent + reclen;
buf->current_dir = dirent;
buf->count -= reclen;
return 0;
efault_end:
user_access_end();
efault:
buf->error = -EFAULT;
return -EFAULT;
}
SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
struct linux_dirent __user * lastdirent;
struct getdents_callback buf = {
.ctx.actor = filldir,
.count = count,
.current_dir = dirent
};
int error;
if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
if (!f.file)
return -EBADF;
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
fdput_pos(f);
return error;
}
struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
struct linux_dirent64 __user * previous;
int count;
int error;
};
static int filldir64(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct linux_dirent64 __user *dirent;
struct getdents_callback64 *buf =
container_of(ctx, struct getdents_callback64, ctx);
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
dirent = buf->previous;
if (dirent && signal_pending(current))
return -EINTR;
/*
* Note! This range-checks 'previous' (which may be NULL).
* The real range was checked in getdents
*/
if (!user_access_begin(dirent, sizeof(*dirent)))
goto efault;
if (dirent)
unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
unsafe_put_user(ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, &dirent->d_type, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();
buf->previous = dirent;
dirent = (void __user *)dirent + reclen;
buf->current_dir = dirent;
buf->count -= reclen;
return 0;
efault_end:
user_access_end();
efault:
buf->error = -EFAULT;
return -EFAULT;
}
int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
unsigned int count)
{
struct fd f;
struct linux_dirent64 __user * lastdirent;
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
.count = count,
.current_dir = dirent
};
int error;
if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
if (!f.file)
return -EBADF;
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
typeof(lastdirent->d_off) d_off = buf.ctx.pos;
if (__put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
fdput_pos(f);
return error;
}
SYSCALL_DEFINE3(getdents64, unsigned int, fd,
struct linux_dirent64 __user *, dirent, unsigned int, count)
{
return ksys_getdents64(fd, dirent, count);
}
#ifdef CONFIG_COMPAT
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_offset;
unsigned short d_namlen;
char d_name[1];
};
struct compat_readdir_callback {
struct dir_context ctx;
struct compat_old_linux_dirent __user *dirent;
int result;
};
static int compat_fillonedir(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
struct compat_readdir_callback *buf =
container_of(ctx, struct compat_readdir_callback, ctx);
struct compat_old_linux_dirent __user *dirent;
compat_ulong_t d_ino;
if (buf->result)
return -EINVAL;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
return -EOVERFLOW;
}
buf->result++;
dirent = buf->dirent;
if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
if ( __put_user(d_ino, &dirent->d_ino) ||
__put_user(offset, &dirent->d_offset) ||
__put_user(namlen, &dirent->d_namlen) ||
__copy_to_user(dirent->d_name, name, namlen) ||
__put_user(0, dirent->d_name + namlen))
goto efault;
return 0;
efault:
buf->result = -EFAULT;
return -EFAULT;
}
COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
{
int error;
struct fd f = fdget_pos(fd);
struct compat_readdir_callback buf = {
.ctx.actor = compat_fillonedir,
.dirent = dirent
};
if (!f.file)
return -EBADF;
error = iterate_dir(f.file, &buf.ctx);
if (buf.result)
error = buf.result;
fdput_pos(f);
return error;
}
struct compat_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_off;
unsigned short d_reclen;
char d_name[1];
};
struct compat_getdents_callback {
struct dir_context ctx;
struct compat_linux_dirent __user *current_dir;
struct compat_linux_dirent __user *previous;
int count;
int error;
};
static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct compat_linux_dirent __user * dirent;
struct compat_getdents_callback *buf =
container_of(ctx, struct compat_getdents_callback, ctx);
compat_ulong_t d_ino;
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
namlen + 2, sizeof(compat_long_t));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->error = -EOVERFLOW;
return -EOVERFLOW;
}
dirent = buf->previous;
if (dirent) {
if (signal_pending(current))
return -EINTR;
if (__put_user(offset, &dirent->d_off))
goto efault;
}
dirent = buf->current_dir;
if (__put_user(d_ino, &dirent->d_ino))
goto efault;
if (__put_user(reclen, &dirent->d_reclen))
goto efault;
if (copy_to_user(dirent->d_name, name, namlen))
goto efault;
if (__put_user(0, dirent->d_name + namlen))
goto efault;
if (__put_user(d_type, (char __user *) dirent + reclen - 1))
goto efault;
buf->previous = dirent;
dirent = (void __user *)dirent + reclen;
buf->current_dir = dirent;
buf->count -= reclen;
return 0;
efault:
buf->error = -EFAULT;
return -EFAULT;
}
COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct compat_linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
struct compat_linux_dirent __user * lastdirent;
struct compat_getdents_callback buf = {
.ctx.actor = compat_filldir,
.current_dir = dirent,
.count = count
};
int error;
if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
if (!f.file)
return -EBADF;
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
lastdirent = buf.previous;
if (lastdirent) {
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
error = count - buf.count;
}
fdput_pos(f);
return error;
}
#endif