2019-05-27 13:55:01 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Berkeley style UIO structures - Alan Cox 1994.
|
|
|
|
*/
|
2012-10-13 16:46:48 +07:00
|
|
|
#ifndef __LINUX_UIO_H
|
|
|
|
#define __LINUX_UIO_H
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-11-28 07:29:46 +07:00
|
|
|
#include <linux/kernel.h>
|
2017-06-30 08:45:10 +07:00
|
|
|
#include <linux/thread_info.h>
|
2012-10-13 16:46:48 +07:00
|
|
|
#include <uapi/linux/uio.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-11-28 07:29:46 +07:00
|
|
|
struct page;
|
2016-09-23 03:33:12 +07:00
|
|
|
struct pipe_inode_info;
|
2009-07-30 05:04:19 +07:00
|
|
|
|
|
|
|
struct kvec {
|
|
|
|
void *iov_base; /* and that should *never* hold a userland pointer */
|
|
|
|
size_t iov_len;
|
|
|
|
};
|
|
|
|
|
2018-10-22 19:07:28 +07:00
|
|
|
enum iter_type {
|
2019-02-28 03:05:25 +07:00
|
|
|
/* iter types */
|
|
|
|
ITER_IOVEC = 4,
|
|
|
|
ITER_KVEC = 8,
|
|
|
|
ITER_BVEC = 16,
|
|
|
|
ITER_PIPE = 32,
|
|
|
|
ITER_DISCARD = 64,
|
2014-04-05 10:12:29 +07:00
|
|
|
};
|
|
|
|
|
2013-11-28 07:29:46 +07:00
|
|
|
struct iov_iter {
|
2019-02-28 03:05:25 +07:00
|
|
|
/*
|
|
|
|
* Bit 0 is the read/write bit, set if we're writing.
|
|
|
|
* Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
|
|
|
|
* the caller isn't expecting to drop a page reference when done.
|
|
|
|
*/
|
2018-10-20 06:57:56 +07:00
|
|
|
unsigned int type;
|
2013-11-28 07:29:46 +07:00
|
|
|
size_t iov_offset;
|
|
|
|
size_t count;
|
2014-04-05 10:12:29 +07:00
|
|
|
union {
|
|
|
|
const struct iovec *iov;
|
2014-11-28 02:48:42 +07:00
|
|
|
const struct kvec *kvec;
|
2014-04-05 10:12:29 +07:00
|
|
|
const struct bio_vec *bvec;
|
2016-09-23 03:33:12 +07:00
|
|
|
struct pipe_inode_info *pipe;
|
|
|
|
};
|
|
|
|
union {
|
|
|
|
unsigned long nr_segs;
|
2017-02-18 06:42:24 +07:00
|
|
|
struct {
|
2019-11-15 20:30:32 +07:00
|
|
|
unsigned int head;
|
|
|
|
unsigned int start_head;
|
2017-02-18 06:42:24 +07:00
|
|
|
};
|
2014-04-05 10:12:29 +07:00
|
|
|
};
|
2013-11-28 07:29:46 +07:00
|
|
|
};
|
|
|
|
|
2018-10-22 19:07:28 +07:00
|
|
|
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
|
|
|
|
{
|
2019-06-26 20:49:28 +07:00
|
|
|
return i->type & ~(READ | WRITE);
|
2018-10-22 19:07:28 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iter_is_iovec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_IOVEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_kvec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_KVEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_bvec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_BVEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_pipe(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_PIPE;
|
|
|
|
}
|
|
|
|
|
2018-10-20 06:57:56 +07:00
|
|
|
static inline bool iov_iter_is_discard(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_DISCARD;
|
|
|
|
}
|
|
|
|
|
2018-10-22 19:07:28 +07:00
|
|
|
static inline unsigned char iov_iter_rw(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return i->type & (READ | WRITE);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Total number of bytes covered by an iovec.
|
|
|
|
*
|
|
|
|
* NOTE that it is not safe to use this function until all the iovec's
|
|
|
|
* segment lengths have been validated. Because the individual lengths can
|
|
|
|
* overflow a size_t when added together.
|
|
|
|
*/
|
|
|
|
static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
|
|
|
|
{
|
|
|
|
unsigned long seg;
|
|
|
|
size_t ret = 0;
|
|
|
|
|
|
|
|
for (seg = 0; seg < nr_segs; seg++)
|
|
|
|
ret += iov[seg].iov_len;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-11-28 07:29:46 +07:00
|
|
|
static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
|
|
|
|
{
|
|
|
|
return (struct iovec) {
|
|
|
|
.iov_base = iter->iov->iov_base + iter->iov_offset,
|
|
|
|
.iov_len = min(iter->count,
|
|
|
|
iter->iov->iov_len - iter->iov_offset),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t iov_iter_copy_from_user_atomic(struct page *page,
|
|
|
|
struct iov_iter *i, unsigned long offset, size_t bytes);
|
|
|
|
void iov_iter_advance(struct iov_iter *i, size_t bytes);
|
2017-02-18 06:42:24 +07:00
|
|
|
void iov_iter_revert(struct iov_iter *i, size_t bytes);
|
2013-11-28 07:29:46 +07:00
|
|
|
int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
|
|
|
|
size_t iov_iter_single_seg_count(const struct iov_iter *i);
|
2014-02-04 05:07:03 +07:00
|
|
|
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
|
|
|
struct iov_iter *i);
|
2014-04-04 02:05:18 +07:00
|
|
|
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
|
|
|
struct iov_iter *i);
|
2017-06-30 08:45:10 +07:00
|
|
|
|
|
|
|
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
if (unlikely(!check_copy_size(addr, bytes, true)))
|
2017-07-10 18:40:49 +07:00
|
|
|
return 0;
|
2017-06-30 08:45:10 +07:00
|
|
|
else
|
|
|
|
return _copy_to_iter(addr, bytes, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
if (unlikely(!check_copy_size(addr, bytes, false)))
|
2017-07-10 18:40:49 +07:00
|
|
|
return 0;
|
2017-06-30 08:45:10 +07:00
|
|
|
else
|
|
|
|
return _copy_from_iter(addr, bytes, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
if (unlikely(!check_copy_size(addr, bytes, false)))
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return _copy_from_iter_full(addr, bytes, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
if (unlikely(!check_copy_size(addr, bytes, false)))
|
2017-07-10 18:40:49 +07:00
|
|
|
return 0;
|
2017-06-30 08:45:10 +07:00
|
|
|
else
|
|
|
|
return _copy_from_iter_nocache(addr, bytes, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
if (unlikely(!check_copy_size(addr, bytes, false)))
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return _copy_from_iter_full_nocache(addr, bytes, i);
|
|
|
|
}
|
|
|
|
|
2017-05-30 02:22:50 +07:00
|
|
|
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
|
|
|
/*
|
|
|
|
* Note, users like pmem that depend on the stricter semantics of
|
|
|
|
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
|
|
|
|
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
|
|
|
|
* destination is flushed from the cache on return.
|
|
|
|
*/
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
|
2017-05-30 02:22:50 +07:00
|
|
|
#else
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
#define _copy_from_iter_flushcache _copy_from_iter_nocache
|
|
|
|
#endif
|
|
|
|
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 10:40:16 +07:00
|
|
|
#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
|
|
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
2018-05-04 07:06:31 +07:00
|
|
|
#else
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 10:40:16 +07:00
|
|
|
#define _copy_mc_to_iter _copy_to_iter
|
2018-05-04 07:06:31 +07:00
|
|
|
#endif
|
|
|
|
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
|
2017-05-30 02:22:50 +07:00
|
|
|
{
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
if (unlikely(!check_copy_size(addr, bytes, false)))
|
2017-07-10 18:40:49 +07:00
|
|
|
return 0;
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
else
|
|
|
|
return _copy_from_iter_flushcache(addr, bytes, i);
|
2017-05-30 02:22:50 +07:00
|
|
|
}
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-08 10:39:20 +07:00
|
|
|
|
2018-05-04 07:06:31 +07:00
|
|
|
static __always_inline __must_check
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 10:40:16 +07:00
|
|
|
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
|
2018-05-04 07:06:31 +07:00
|
|
|
{
|
2018-09-06 03:31:40 +07:00
|
|
|
if (unlikely(!check_copy_size(addr, bytes, true)))
|
2018-05-04 07:06:31 +07:00
|
|
|
return 0;
|
|
|
|
else
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 10:40:16 +07:00
|
|
|
return _copy_mc_to_iter(addr, bytes, i);
|
2018-05-04 07:06:31 +07:00
|
|
|
}
|
|
|
|
|
2014-08-01 20:27:22 +07:00
|
|
|
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
|
2014-03-06 01:50:45 +07:00
|
|
|
unsigned long iov_iter_alignment(const struct iov_iter *i);
|
2016-04-09 06:05:19 +07:00
|
|
|
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
|
2018-10-20 06:57:56 +07:00
|
|
|
void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
|
2014-03-06 07:28:09 +07:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 06:57:56 +07:00
|
|
|
void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
|
2015-01-23 13:08:07 +07:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 06:57:56 +07:00
|
|
|
void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
|
2014-11-25 02:46:11 +07:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 06:57:56 +07:00
|
|
|
void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
|
2016-09-23 03:33:12 +07:00
|
|
|
size_t count);
|
2018-10-20 06:57:56 +07:00
|
|
|
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
|
2014-03-15 15:05:57 +07:00
|
|
|
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
|
2014-09-24 22:09:11 +07:00
|
|
|
size_t maxsize, unsigned maxpages, size_t *start);
|
2014-03-21 15:58:33 +07:00
|
|
|
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
|
|
|
|
size_t maxsize, size_t *start);
|
2014-03-19 12:16:16 +07:00
|
|
|
int iov_iter_npages(const struct iov_iter *i, int maxpages);
|
2013-11-28 07:29:46 +07:00
|
|
|
|
2015-02-01 08:08:47 +07:00
|
|
|
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
|
|
|
|
|
2016-10-11 00:57:37 +07:00
|
|
|
static inline size_t iov_iter_count(const struct iov_iter *i)
|
2013-11-28 07:29:46 +07:00
|
|
|
{
|
|
|
|
return i->count;
|
|
|
|
}
|
|
|
|
|
2014-06-23 14:44:40 +07:00
|
|
|
/*
|
|
|
|
* Cap the iov_iter by given limit; note that the second argument is
|
|
|
|
* *not* the new size - it's upper limit for such. Passing it a value
|
|
|
|
* greater than the amount of data in iov_iter is fine - it'll just do
|
|
|
|
* nothing in that case.
|
|
|
|
*/
|
|
|
|
static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
|
2014-03-22 17:51:37 +07:00
|
|
|
{
|
2014-06-23 14:44:40 +07:00
|
|
|
/*
|
|
|
|
* count doesn't have to fit in size_t - comparison extends both
|
|
|
|
* operands to u64 here and any value that would be truncated by
|
|
|
|
* conversion in assignement is by definition greater than all
|
|
|
|
* values of size_t, including old i->count.
|
|
|
|
*/
|
2014-03-22 17:51:37 +07:00
|
|
|
if (i->count > count)
|
|
|
|
i->count = count;
|
|
|
|
}
|
|
|
|
|
2014-04-04 23:15:19 +07:00
|
|
|
/*
|
|
|
|
* reexpand a previously truncated iterator; count must be no more than how much
|
|
|
|
* we had shrunk it.
|
|
|
|
*/
|
|
|
|
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
|
|
|
|
{
|
|
|
|
i->count = count;
|
|
|
|
}
|
2018-12-04 08:52:07 +07:00
|
|
|
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i);
|
2014-11-24 13:08:00 +07:00
|
|
|
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
|
2016-11-02 09:09:04 +07:00
|
|
|
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
|
2018-12-04 08:52:09 +07:00
|
|
|
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
|
|
|
|
struct iov_iter *i);
|
2014-04-04 23:15:19 +07:00
|
|
|
|
2020-09-25 11:51:40 +07:00
|
|
|
struct iovec *iovec_from_user(const struct iovec __user *uvector,
|
|
|
|
unsigned long nr_segs, unsigned long fast_segs,
|
|
|
|
struct iovec *fast_iov, bool compat);
|
|
|
|
ssize_t import_iovec(int type, const struct iovec __user *uvec,
|
|
|
|
unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
|
|
|
|
struct iov_iter *i);
|
|
|
|
ssize_t __import_iovec(int type, const struct iovec __user *uvec,
|
|
|
|
unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
|
|
|
|
struct iov_iter *i, bool compat);
|
saner iov_iter initialization primitives
iovec-backed iov_iter instances are assumed to satisfy several properties:
* no more than UIO_MAXIOV elements in iovec array
* total size of all ranges is no more than MAX_RW_COUNT
* all ranges pass access_ok().
The problem is, invariants of data structures should be established in the
primitives creating those data structures, not in the code using those
primitives. And iov_iter_init() violates that principle. For a while we
managed to get away with that, but once the use of iov_iter started to
spread, it didn't take long for shit to hit the fan - missed check in
sys_sendto() had introduced a roothole.
We _do_ have primitives for importing and validating iovecs (both native and
compat ones) and those primitives are almost always followed by shoving the
resulting iovec into iov_iter. Life would be considerably simpler (and safer)
if we combined those primitives with initializing iov_iter.
That gives us two new primitives - import_iovec() and compat_import_iovec().
Calling conventions:
iovec = iov_array;
err = import_iovec(direction, uvec, nr_segs,
ARRAY_SIZE(iov_array), &iovec,
&iter);
imports user vector into kernel space (into iov_array if it fits, allocated
if it doesn't fit or if iovec was NULL), validates it and sets iter up to
refer to it. On success 0 is returned and allocated kernel copy (or NULL
if the array had fit into caller-supplied one) is returned via iovec.
On failure all allocations are undone and -E... is returned. If the total
size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated.
compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec;
otherwise it's identical to import_iovec().
Finally, import_single_range() sets iov_iter backed by single-element iovec
covering a user-supplied range -
err = import_single_range(direction, address, size, iovec, &iter);
does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets
silently truncated.
Next commits will be switching the things up to use of those and reducing
the amount of iov_iter_init() instances.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-03-22 04:45:43 +07:00
|
|
|
int import_single_range(int type, void __user *buf, size_t len,
|
|
|
|
struct iovec *iov, struct iov_iter *i);
|
|
|
|
|
2017-02-18 13:44:03 +07:00
|
|
|
int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
|
|
|
|
int (*f)(struct kvec *vec, void *context),
|
|
|
|
void *context);
|
|
|
|
|
2009-07-30 05:04:19 +07:00
|
|
|
#endif
|