2008-07-02 20:53:13 +07:00
|
|
|
/*
|
|
|
|
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
|
|
|
|
* Copyright 2002 Andi Kleen, SuSE Labs.
|
2005-04-17 05:20:36 +07:00
|
|
|
* Subject to the GNU Public License v2.
|
2008-07-02 20:53:13 +07:00
|
|
|
*
|
|
|
|
* Functions to copy from and to user space.
|
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-09-26 15:52:32 +07:00
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/dwarf2.h>
|
|
|
|
|
2006-02-04 03:51:02 +07:00
|
|
|
#define FIX_ALIGNMENT 1
|
|
|
|
|
2006-09-26 15:52:39 +07:00
|
|
|
#include <asm/current.h>
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#include <asm/thread_info.h>
|
|
|
|
#include <asm/cpufeature.h>
|
2011-05-18 05:29:15 +07:00
|
|
|
#include <asm/alternative-asm.h>
|
2006-09-26 15:52:39 +07:00
|
|
|
|
2011-05-18 05:29:15 +07:00
|
|
|
/*
|
|
|
|
* By placing feature2 after feature1 in altinstructions section, we logically
|
|
|
|
* implement:
|
|
|
|
* If CPU has feature2, jmp to alt2 is used
|
|
|
|
* else if CPU has feature1, jmp to alt1 is used
|
|
|
|
* else jmp to orig is used.
|
|
|
|
*/
|
|
|
|
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
|
2006-09-26 15:52:39 +07:00
|
|
|
0:
|
|
|
|
.byte 0xe9 /* 32bit jump */
|
|
|
|
.long \orig-1f /* by default jump to orig */
|
|
|
|
1:
|
|
|
|
.section .altinstr_replacement,"ax"
|
2008-07-02 20:53:13 +07:00
|
|
|
2: .byte 0xe9 /* near jump with 32bit immediate */
|
2011-05-18 05:29:15 +07:00
|
|
|
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
|
|
|
|
3: .byte 0xe9 /* near jump with 32bit immediate */
|
|
|
|
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
|
2006-09-26 15:52:39 +07:00
|
|
|
.previous
|
2011-05-18 05:29:15 +07:00
|
|
|
|
2006-09-26 15:52:39 +07:00
|
|
|
.section .altinstructions,"a"
|
2011-05-18 05:29:15 +07:00
|
|
|
altinstruction_entry 0b,2b,\feature1,5,5
|
|
|
|
altinstruction_entry 0b,3b,\feature2,5,5
|
2006-09-26 15:52:39 +07:00
|
|
|
.previous
|
|
|
|
.endm
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-07-02 20:53:13 +07:00
|
|
|
.macro ALIGN_DESTINATION
|
|
|
|
#ifdef FIX_ALIGNMENT
|
|
|
|
/* check for bad alignment of destination */
|
|
|
|
movl %edi,%ecx
|
|
|
|
andl $7,%ecx
|
|
|
|
jz 102f /* already aligned */
|
|
|
|
subl $8,%ecx
|
|
|
|
negl %ecx
|
|
|
|
subl %ecx,%edx
|
|
|
|
100: movb (%rsi),%al
|
|
|
|
101: movb %al,(%rdi)
|
|
|
|
incq %rsi
|
|
|
|
incq %rdi
|
|
|
|
decl %ecx
|
|
|
|
jnz 100b
|
|
|
|
102:
|
|
|
|
.section .fixup,"ax"
|
2008-07-30 18:30:14 +07:00
|
|
|
103: addl %ecx,%edx /* ecx is zerorest also */
|
2008-07-02 20:53:13 +07:00
|
|
|
jmp copy_user_handle_tail
|
|
|
|
.previous
|
|
|
|
|
|
|
|
.section __ex_table,"a"
|
|
|
|
.align 8
|
|
|
|
.quad 100b,103b
|
|
|
|
.quad 101b,103b
|
|
|
|
.previous
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/* Standard copy_to_user with segment limit checking */
|
2009-11-16 21:42:18 +07:00
|
|
|
ENTRY(_copy_to_user)
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_STARTPROC
|
2005-04-17 05:20:36 +07:00
|
|
|
GET_THREAD_INFO(%rax)
|
|
|
|
movq %rdi,%rcx
|
|
|
|
addq %rdx,%rcx
|
2008-07-02 20:53:13 +07:00
|
|
|
jc bad_to_user
|
2008-06-24 21:19:35 +07:00
|
|
|
cmpq TI_addr_limit(%rax),%rcx
|
x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit
As reported in BZ #30352:
https://bugzilla.kernel.org/show_bug.cgi?id=30352
there's a kernel bug related to reading the last allowed page on x86_64.
The _copy_to_user() and _copy_from_user() functions use the following
check for address limit:
if (buf + size >= limit)
fail();
while it should be more permissive:
if (buf + size > limit)
fail();
That's because the size represents the number of bytes being
read/write from/to buf address AND including the buf address.
So the copy function will actually never touch the limit
address even if "buf + size == limit".
Following program fails to use the last page as buffer
due to the wrong limit check:
#include <sys/mman.h>
#include <sys/socket.h>
#include <assert.h>
#define PAGE_SIZE (4096)
#define LAST_PAGE ((void*)(0x7fffffffe000))
int main()
{
int fds[2], err;
void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
assert(ptr == LAST_PAGE);
err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
assert(err == 0);
err = send(fds[0], ptr, PAGE_SIZE, 0);
perror("send");
assert(err == PAGE_SIZE);
err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL);
perror("recv");
assert(err == PAGE_SIZE);
return 0;
}
The other place checking the addr limit is the access_ok() function,
which is working properly. There's just a misleading comment
for the __range_not_ok() macro - which this patch fixes as well.
The last page of the user-space address range is a guard page and
Brian Gerst observed that the guard page itself due to an erratum on K8 cpus
(#121 Sequential Execution Across Non-Canonical Boundary Causes Processor
Hang).
However, the test code is using the last valid page before the guard page.
The bug is that the last byte before the guard page can't be read
because of the off-by-one error. The guard page is left in place.
This bug would normally not show up because the last page is
part of the process stack and never accessed via syscalls.
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-05-12 21:30:30 +07:00
|
|
|
ja bad_to_user
|
2011-05-18 05:29:15 +07:00
|
|
|
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
|
|
|
copy_user_generic_unrolled,copy_user_generic_string, \
|
|
|
|
copy_user_enhanced_fast_string
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_ENDPROC
|
2009-11-16 21:42:18 +07:00
|
|
|
ENDPROC(_copy_to_user)
|
2006-02-04 03:51:02 +07:00
|
|
|
|
2008-07-02 20:53:13 +07:00
|
|
|
/* Standard copy_from_user with segment limit checking */
|
2009-09-26 19:33:01 +07:00
|
|
|
ENTRY(_copy_from_user)
|
2006-09-26 15:52:39 +07:00
|
|
|
CFI_STARTPROC
|
2008-07-02 20:53:13 +07:00
|
|
|
GET_THREAD_INFO(%rax)
|
|
|
|
movq %rsi,%rcx
|
|
|
|
addq %rdx,%rcx
|
|
|
|
jc bad_from_user
|
|
|
|
cmpq TI_addr_limit(%rax),%rcx
|
x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit
As reported in BZ #30352:
https://bugzilla.kernel.org/show_bug.cgi?id=30352
there's a kernel bug related to reading the last allowed page on x86_64.
The _copy_to_user() and _copy_from_user() functions use the following
check for address limit:
if (buf + size >= limit)
fail();
while it should be more permissive:
if (buf + size > limit)
fail();
That's because the size represents the number of bytes being
read/write from/to buf address AND including the buf address.
So the copy function will actually never touch the limit
address even if "buf + size == limit".
Following program fails to use the last page as buffer
due to the wrong limit check:
#include <sys/mman.h>
#include <sys/socket.h>
#include <assert.h>
#define PAGE_SIZE (4096)
#define LAST_PAGE ((void*)(0x7fffffffe000))
int main()
{
int fds[2], err;
void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
assert(ptr == LAST_PAGE);
err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
assert(err == 0);
err = send(fds[0], ptr, PAGE_SIZE, 0);
perror("send");
assert(err == PAGE_SIZE);
err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL);
perror("recv");
assert(err == PAGE_SIZE);
return 0;
}
The other place checking the addr limit is the access_ok() function,
which is working properly. There's just a misleading comment
for the __range_not_ok() macro - which this patch fixes as well.
The last page of the user-space address range is a guard page and
Brian Gerst observed that the guard page itself due to an erratum on K8 cpus
(#121 Sequential Execution Across Non-Canonical Boundary Causes Processor
Hang).
However, the test code is using the last valid page before the guard page.
The bug is that the last byte before the guard page can't be read
because of the off-by-one error. The guard page is left in place.
This bug would normally not show up because the last page is
part of the process stack and never accessed via syscalls.
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-05-12 21:30:30 +07:00
|
|
|
ja bad_from_user
|
2011-05-18 05:29:15 +07:00
|
|
|
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
|
|
|
copy_user_generic_unrolled,copy_user_generic_string, \
|
|
|
|
copy_user_enhanced_fast_string
|
2006-09-26 15:52:39 +07:00
|
|
|
CFI_ENDPROC
|
2009-09-26 19:33:01 +07:00
|
|
|
ENDPROC(_copy_from_user)
|
2006-09-26 15:52:39 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
.section .fixup,"ax"
|
|
|
|
/* must zero dest */
|
2008-07-02 20:53:13 +07:00
|
|
|
ENTRY(bad_from_user)
|
2005-04-17 05:20:36 +07:00
|
|
|
bad_from_user:
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_STARTPROC
|
2005-04-17 05:20:36 +07:00
|
|
|
movl %edx,%ecx
|
|
|
|
xorl %eax,%eax
|
|
|
|
rep
|
|
|
|
stosb
|
|
|
|
bad_to_user:
|
2008-07-02 20:53:13 +07:00
|
|
|
movl %edx,%eax
|
2005-04-17 05:20:36 +07:00
|
|
|
ret
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_ENDPROC
|
2008-07-02 20:53:13 +07:00
|
|
|
ENDPROC(bad_from_user)
|
2005-04-17 05:20:36 +07:00
|
|
|
.previous
|
2008-07-02 20:53:13 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
2006-09-26 15:52:39 +07:00
|
|
|
* copy_user_generic_unrolled - memory copy with exception handling.
|
2008-07-02 20:53:13 +07:00
|
|
|
* This version is for CPUs like P4 that don't have efficient micro
|
|
|
|
* code for rep movsq
|
|
|
|
*
|
|
|
|
* Input:
|
2005-04-17 05:20:36 +07:00
|
|
|
* rdi destination
|
|
|
|
* rsi source
|
|
|
|
* rdx count
|
|
|
|
*
|
2008-07-02 20:53:13 +07:00
|
|
|
* Output:
|
2011-03-18 02:24:16 +07:00
|
|
|
* eax uncopied bytes or 0 if successful.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2006-09-26 15:52:39 +07:00
|
|
|
ENTRY(copy_user_generic_unrolled)
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_STARTPROC
|
2008-07-02 20:53:13 +07:00
|
|
|
cmpl $8,%edx
|
|
|
|
jb 20f /* less then 8 bytes, go to byte copy loop */
|
|
|
|
ALIGN_DESTINATION
|
|
|
|
movl %edx,%ecx
|
|
|
|
andl $63,%edx
|
|
|
|
shrl $6,%ecx
|
|
|
|
jz 17f
|
|
|
|
1: movq (%rsi),%r8
|
|
|
|
2: movq 1*8(%rsi),%r9
|
|
|
|
3: movq 2*8(%rsi),%r10
|
|
|
|
4: movq 3*8(%rsi),%r11
|
|
|
|
5: movq %r8,(%rdi)
|
|
|
|
6: movq %r9,1*8(%rdi)
|
|
|
|
7: movq %r10,2*8(%rdi)
|
|
|
|
8: movq %r11,3*8(%rdi)
|
|
|
|
9: movq 4*8(%rsi),%r8
|
|
|
|
10: movq 5*8(%rsi),%r9
|
|
|
|
11: movq 6*8(%rsi),%r10
|
|
|
|
12: movq 7*8(%rsi),%r11
|
|
|
|
13: movq %r8,4*8(%rdi)
|
|
|
|
14: movq %r9,5*8(%rdi)
|
|
|
|
15: movq %r10,6*8(%rdi)
|
|
|
|
16: movq %r11,7*8(%rdi)
|
2006-02-04 03:51:02 +07:00
|
|
|
leaq 64(%rsi),%rsi
|
|
|
|
leaq 64(%rdi),%rdi
|
|
|
|
decl %ecx
|
2008-07-02 20:53:13 +07:00
|
|
|
jnz 1b
|
|
|
|
17: movl %edx,%ecx
|
|
|
|
andl $7,%edx
|
|
|
|
shrl $3,%ecx
|
|
|
|
jz 20f
|
|
|
|
18: movq (%rsi),%r8
|
|
|
|
19: movq %r8,(%rdi)
|
2006-02-04 03:51:02 +07:00
|
|
|
leaq 8(%rsi),%rsi
|
2008-07-02 20:53:13 +07:00
|
|
|
leaq 8(%rdi),%rdi
|
|
|
|
decl %ecx
|
|
|
|
jnz 18b
|
|
|
|
20: andl %edx,%edx
|
|
|
|
jz 23f
|
2006-02-04 03:51:02 +07:00
|
|
|
movl %edx,%ecx
|
2008-07-02 20:53:13 +07:00
|
|
|
21: movb (%rsi),%al
|
|
|
|
22: movb %al,(%rdi)
|
2006-02-04 03:51:02 +07:00
|
|
|
incq %rsi
|
2008-07-02 20:53:13 +07:00
|
|
|
incq %rdi
|
2006-02-04 03:51:02 +07:00
|
|
|
decl %ecx
|
2008-07-02 20:53:13 +07:00
|
|
|
jnz 21b
|
|
|
|
23: xor %eax,%eax
|
2006-02-04 03:51:02 +07:00
|
|
|
ret
|
|
|
|
|
2008-07-02 20:53:13 +07:00
|
|
|
.section .fixup,"ax"
|
|
|
|
30: shll $6,%ecx
|
|
|
|
addl %ecx,%edx
|
|
|
|
jmp 60f
|
2008-07-11 02:52:52 +07:00
|
|
|
40: lea (%rdx,%rcx,8),%rdx
|
2008-07-02 20:53:13 +07:00
|
|
|
jmp 60f
|
|
|
|
50: movl %ecx,%edx
|
|
|
|
60: jmp copy_user_handle_tail /* ecx is zerorest also */
|
|
|
|
.previous
|
2006-02-04 03:51:02 +07:00
|
|
|
|
|
|
|
.section __ex_table,"a"
|
|
|
|
.align 8
|
2008-07-02 20:53:13 +07:00
|
|
|
.quad 1b,30b
|
|
|
|
.quad 2b,30b
|
|
|
|
.quad 3b,30b
|
|
|
|
.quad 4b,30b
|
|
|
|
.quad 5b,30b
|
|
|
|
.quad 6b,30b
|
|
|
|
.quad 7b,30b
|
|
|
|
.quad 8b,30b
|
|
|
|
.quad 9b,30b
|
|
|
|
.quad 10b,30b
|
|
|
|
.quad 11b,30b
|
|
|
|
.quad 12b,30b
|
|
|
|
.quad 13b,30b
|
|
|
|
.quad 14b,30b
|
|
|
|
.quad 15b,30b
|
|
|
|
.quad 16b,30b
|
|
|
|
.quad 18b,40b
|
|
|
|
.quad 19b,40b
|
|
|
|
.quad 21b,50b
|
|
|
|
.quad 22b,50b
|
2006-02-04 03:51:02 +07:00
|
|
|
.previous
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_ENDPROC
|
2008-07-02 20:53:13 +07:00
|
|
|
ENDPROC(copy_user_generic_unrolled)
|
2006-09-26 15:52:32 +07:00
|
|
|
|
2008-07-02 20:53:13 +07:00
|
|
|
/* Some CPUs run faster using the string copy instructions.
|
|
|
|
* This is also a lot simpler. Use them when possible.
|
|
|
|
*
|
|
|
|
* Only 4GB of copy is supported. This shouldn't be a problem
|
|
|
|
* because the kernel normally only writes from/to page sized chunks
|
|
|
|
* even if user space passed a longer buffer.
|
|
|
|
* And more would be dangerous because both Intel and AMD have
|
|
|
|
* errata with rep movsq > 4GB. If someone feels the need to fix
|
|
|
|
* this please consider this.
|
|
|
|
*
|
|
|
|
* Input:
|
|
|
|
* rdi destination
|
|
|
|
* rsi source
|
|
|
|
* rdx count
|
|
|
|
*
|
|
|
|
* Output:
|
|
|
|
* eax uncopied bytes or 0 if successful.
|
|
|
|
*/
|
2006-09-26 15:52:39 +07:00
|
|
|
ENTRY(copy_user_generic_string)
|
2006-09-26 15:52:32 +07:00
|
|
|
CFI_STARTPROC
|
2008-07-02 20:53:13 +07:00
|
|
|
andl %edx,%edx
|
|
|
|
jz 4f
|
|
|
|
cmpl $8,%edx
|
|
|
|
jb 2f /* less than 8 bytes, go to byte copy loop */
|
|
|
|
ALIGN_DESTINATION
|
2005-04-17 05:20:36 +07:00
|
|
|
movl %edx,%ecx
|
|
|
|
shrl $3,%ecx
|
2008-07-02 20:53:13 +07:00
|
|
|
andl $7,%edx
|
|
|
|
1: rep
|
2006-09-26 15:52:39 +07:00
|
|
|
movsq
|
2008-07-02 20:53:13 +07:00
|
|
|
2: movl %edx,%ecx
|
|
|
|
3: rep
|
|
|
|
movsb
|
|
|
|
4: xorl %eax,%eax
|
2005-04-17 05:20:36 +07:00
|
|
|
ret
|
2006-09-26 15:52:39 +07:00
|
|
|
|
2008-07-02 20:53:13 +07:00
|
|
|
.section .fixup,"ax"
|
2008-07-11 02:52:52 +07:00
|
|
|
11: lea (%rdx,%rcx,8),%rcx
|
2008-07-02 20:53:13 +07:00
|
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
|
|
jmp copy_user_handle_tail
|
|
|
|
.previous
|
2006-01-12 04:44:45 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
.section __ex_table,"a"
|
2008-07-02 20:53:13 +07:00
|
|
|
.align 8
|
|
|
|
.quad 1b,11b
|
|
|
|
.quad 3b,12b
|
2005-04-17 05:20:36 +07:00
|
|
|
.previous
|
2008-07-02 20:53:13 +07:00
|
|
|
CFI_ENDPROC
|
|
|
|
ENDPROC(copy_user_generic_string)
|
2011-05-18 05:29:15 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
|
|
|
|
* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
|
|
|
|
*
|
|
|
|
* Input:
|
|
|
|
* rdi destination
|
|
|
|
* rsi source
|
|
|
|
* rdx count
|
|
|
|
*
|
|
|
|
* Output:
|
|
|
|
* eax uncopied bytes or 0 if successful.
|
|
|
|
*/
|
|
|
|
ENTRY(copy_user_enhanced_fast_string)
|
|
|
|
CFI_STARTPROC
|
|
|
|
andl %edx,%edx
|
|
|
|
jz 2f
|
|
|
|
movl %edx,%ecx
|
|
|
|
1: rep
|
|
|
|
movsb
|
|
|
|
2: xorl %eax,%eax
|
|
|
|
ret
|
|
|
|
|
|
|
|
.section .fixup,"ax"
|
|
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
|
|
jmp copy_user_handle_tail
|
|
|
|
.previous
|
|
|
|
|
|
|
|
.section __ex_table,"a"
|
|
|
|
.align 8
|
|
|
|
.quad 1b,12b
|
|
|
|
.previous
|
|
|
|
CFI_ENDPROC
|
|
|
|
ENDPROC(copy_user_enhanced_fast_string)
|