2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* S390 low-level entry points.
|
|
|
|
*
|
2012-07-20 16:15:04 +07:00
|
|
|
* Copyright IBM Corp. 1999, 2012
|
2005-04-17 05:20:36 +07:00
|
|
|
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
|
2006-09-28 21:56:37 +07:00
|
|
|
* Hartmut Penner (hp@de.ibm.com),
|
|
|
|
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
|
2005-06-26 04:55:30 +07:00
|
|
|
* Heiko Carstens <heiko.carstens@de.ibm.com>
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
|
2008-02-05 22:50:40 +07:00
|
|
|
#include <linux/init.h>
|
2011-07-24 15:48:19 +07:00
|
|
|
#include <linux/linkage.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/cache.h>
|
|
|
|
#include <asm/errno.h>
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#include <asm/thread_info.h>
|
2005-09-10 01:57:26 +07:00
|
|
|
#include <asm/asm-offsets.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include <asm/page.h>
|
2012-06-04 20:05:43 +07:00
|
|
|
#include <asm/sigp.h>
|
2013-06-27 14:01:09 +07:00
|
|
|
#include <asm/irq.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
__PT_R0 = __PT_GPRS
|
|
|
|
__PT_R1 = __PT_GPRS + 4
|
|
|
|
__PT_R2 = __PT_GPRS + 8
|
|
|
|
__PT_R3 = __PT_GPRS + 12
|
|
|
|
__PT_R4 = __PT_GPRS + 16
|
|
|
|
__PT_R5 = __PT_GPRS + 20
|
|
|
|
__PT_R6 = __PT_GPRS + 24
|
|
|
|
__PT_R7 = __PT_GPRS + 28
|
|
|
|
__PT_R8 = __PT_GPRS + 32
|
|
|
|
__PT_R9 = __PT_GPRS + 36
|
|
|
|
__PT_R10 = __PT_GPRS + 40
|
|
|
|
__PT_R11 = __PT_GPRS + 44
|
|
|
|
__PT_R12 = __PT_GPRS + 48
|
|
|
|
__PT_R13 = __PT_GPRS + 524
|
|
|
|
__PT_R14 = __PT_GPRS + 56
|
|
|
|
__PT_R15 = __PT_GPRS + 60
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-10-11 02:33:20 +07:00
|
|
|
_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
_TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE)
|
2008-10-11 02:33:20 +07:00
|
|
|
_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
_TIF_MCCK_PENDING | _TIF_ASCE)
|
2011-10-30 21:16:49 +07:00
|
|
|
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
|
|
|
|
_TIF_SYSCALL_TRACEPOINT)
|
2012-09-10 18:00:09 +07:00
|
|
|
_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
|
|
|
|
STACK_SIZE = 1 << STACK_SHIFT
|
2013-04-24 15:20:43 +07:00
|
|
|
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#define BASED(name) name-system_call(%r13)
|
|
|
|
|
2006-07-03 14:24:46 +07:00
|
|
|
.macro TRACE_IRQS_ON
|
2011-12-27 17:27:15 +07:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
2008-11-15 00:18:05 +07:00
|
|
|
basr %r2,%r0
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lhardirqs_on)
|
|
|
|
basr %r14,%r1 # call trace_hardirqs_on_caller
|
|
|
|
#endif
|
2006-07-03 14:24:46 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
2008-11-15 00:18:05 +07:00
|
|
|
basr %r2,%r0
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lhardirqs_off)
|
|
|
|
basr %r14,%r1 # call trace_hardirqs_off_caller
|
2007-11-20 17:13:32 +07:00
|
|
|
#endif
|
2011-12-27 17:27:15 +07:00
|
|
|
.endm
|
2007-11-20 17:13:32 +07:00
|
|
|
|
|
|
|
.macro LOCKDEP_SYS_EXIT
|
2011-12-27 17:27:15 +07:00
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
|
|
tm __PT_PSW+1(%r11),0x01 # returning to user ?
|
|
|
|
jz .+10
|
2007-11-20 17:13:32 +07:00
|
|
|
l %r1,BASED(.Llockdep_sys_exit)
|
2011-12-27 17:27:15 +07:00
|
|
|
basr %r14,%r1 # call lockdep_sys_exit
|
2006-07-03 14:24:46 +07:00
|
|
|
#endif
|
2007-07-10 16:24:18 +07:00
|
|
|
.endm
|
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
.macro CHECK_STACK stacksize,savearea
|
2006-06-29 19:58:05 +07:00
|
|
|
#ifdef CONFIG_CHECK_STACK
|
2011-12-27 17:27:15 +07:00
|
|
|
tml %r15,\stacksize - CONFIG_STACK_GUARD
|
|
|
|
la %r14,\savearea
|
|
|
|
jz stack_overflow
|
2006-06-29 19:58:05 +07:00
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
.macro SWITCH_ASYNC savearea,stack,shift
|
|
|
|
tmh %r8,0x0001 # interrupting from user ?
|
|
|
|
jnz 1f
|
|
|
|
lr %r14,%r9
|
|
|
|
sl %r14,BASED(.Lcritical_start)
|
|
|
|
cl %r14,BASED(.Lcritical_length)
|
|
|
|
jhe 0f
|
|
|
|
la %r11,\savearea # inside critical section, do cleanup
|
|
|
|
bras %r14,cleanup_critical
|
|
|
|
tmh %r8,0x0001 # retest problem state after cleanup
|
|
|
|
jnz 1f
|
|
|
|
0: l %r14,\stack # are we already on the target stack?
|
2005-04-17 05:20:36 +07:00
|
|
|
slr %r14,%r15
|
2011-12-27 17:27:15 +07:00
|
|
|
sra %r14,\shift
|
|
|
|
jnz 1f
|
|
|
|
CHECK_STACK 1<<\shift,\savearea
|
2013-04-24 15:20:43 +07:00
|
|
|
ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
2011-12-27 17:27:15 +07:00
|
|
|
j 2f
|
|
|
|
1: l %r15,\stack # load target stack
|
2013-04-24 15:20:43 +07:00
|
|
|
2: la %r11,STACK_FRAME_OVERHEAD(%r15)
|
2005-06-26 04:55:30 +07:00
|
|
|
.endm
|
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
.macro ADD64 high,low,timer
|
|
|
|
al \high,\timer
|
2012-03-11 22:59:27 +07:00
|
|
|
al \low,4+\timer
|
2011-12-27 17:27:15 +07:00
|
|
|
brc 12,.+8
|
|
|
|
ahi \high,1
|
2005-04-17 05:20:36 +07:00
|
|
|
.endm
|
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
.macro SUB64 high,low,timer
|
|
|
|
sl \high,\timer
|
2012-03-11 22:59:27 +07:00
|
|
|
sl \low,4+\timer
|
2011-12-27 17:27:15 +07:00
|
|
|
brc 3,.+8
|
|
|
|
ahi \high,-1
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro UPDATE_VTIME high,low,enter_timer
|
|
|
|
lm \high,\low,__LC_EXIT_TIMER
|
|
|
|
SUB64 \high,\low,\enter_timer
|
|
|
|
ADD64 \high,\low,__LC_USER_TIMER
|
|
|
|
stm \high,\low,__LC_USER_TIMER
|
|
|
|
lm \high,\low,__LC_LAST_UPDATE_TIMER
|
|
|
|
SUB64 \high,\low,__LC_EXIT_TIMER
|
|
|
|
ADD64 \high,\low,__LC_SYSTEM_TIMER
|
|
|
|
stm \high,\low,__LC_SYSTEM_TIMER
|
|
|
|
mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
|
2005-04-17 05:20:36 +07:00
|
|
|
.endm
|
|
|
|
|
2010-10-25 21:10:37 +07:00
|
|
|
.macro REENABLE_IRQS
|
2011-12-27 17:27:15 +07:00
|
|
|
st %r8,__LC_RETURN_PSW
|
|
|
|
ni __LC_RETURN_PSW,0xbf
|
|
|
|
ssm __LC_RETURN_PSW
|
2010-10-25 21:10:37 +07:00
|
|
|
.endm
|
|
|
|
|
2011-01-05 18:47:25 +07:00
|
|
|
.section .kprobes.text, "ax"
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Scheduler resume function, called by switch_to
|
|
|
|
* gpr2 = (task_struct *) prev
|
|
|
|
* gpr3 = (task_struct *) next
|
|
|
|
* Returns:
|
|
|
|
* gpr2 = prev
|
|
|
|
*/
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(__switch_to)
|
2012-05-15 14:20:06 +07:00
|
|
|
stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
|
|
|
|
st %r15,__THREAD_ksp(%r2) # store kernel stack of prev
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r4,__THREAD_info(%r2) # get thread_info of prev
|
2011-01-05 18:48:10 +07:00
|
|
|
l %r5,__THREAD_info(%r3) # get thread_info of next
|
2012-05-15 14:20:06 +07:00
|
|
|
lr %r15,%r5
|
2013-04-24 15:20:43 +07:00
|
|
|
ahi %r15,STACK_INIT # end of kernel stack of next
|
2012-05-15 14:20:06 +07:00
|
|
|
st %r3,__LC_CURRENT # store task struct of next
|
|
|
|
st %r5,__LC_THREAD_INFO # store thread info of next
|
|
|
|
st %r15,__LC_KERNEL_STACK # store end of kernel stack
|
|
|
|
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
|
|
|
|
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
|
|
|
|
l %r15,__THREAD_ksp(%r3) # load kernel stack of next
|
2012-09-10 18:00:09 +07:00
|
|
|
lhi %r6,_TIF_TRANSFER # transfer TIF bits
|
|
|
|
n %r6,__TI_flags(%r4) # isolate TIF bits
|
2011-12-27 17:27:15 +07:00
|
|
|
jz 0f
|
2012-09-10 18:00:09 +07:00
|
|
|
o %r6,__TI_flags(%r5) # set TIF bits of next
|
|
|
|
st %r6,__TI_flags(%r5)
|
|
|
|
ni __TI_flags+3(%r4),255-_TIF_TRANSFER # clear TIF bits of prev
|
2012-05-15 14:20:06 +07:00
|
|
|
0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
|
2005-04-17 05:20:36 +07:00
|
|
|
br %r14
|
|
|
|
|
|
|
|
__critical_start:
|
|
|
|
/*
|
|
|
|
* SVC interrupt handler routine. System calls are synchronous events and
|
|
|
|
* are executed with interrupts enabled.
|
|
|
|
*/
|
|
|
|
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(system_call)
|
2008-12-25 19:39:25 +07:00
|
|
|
stpt __LC_SYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
sysc_stm:
|
|
|
|
stm %r8,%r15,__LC_SAVE_AREA_SYNC
|
|
|
|
l %r12,__LC_THREAD_INFO
|
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
|
|
|
sysc_per:
|
|
|
|
l %r15,__LC_KERNEL_STACK
|
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
|
2005-04-17 05:20:36 +07:00
|
|
|
sysc_vtime:
|
2011-12-27 17:27:15 +07:00
|
|
|
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
|
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
|
|
|
|
mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW
|
2011-12-27 17:27:18 +07:00
|
|
|
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
|
2005-04-17 05:20:36 +07:00
|
|
|
sysc_do_svc:
|
2011-12-27 17:27:15 +07:00
|
|
|
oi __TI_flags+3(%r12),_TIF_SYSCALL
|
2013-04-24 17:58:39 +07:00
|
|
|
l %r10,__TI_sysc_table(%r12) # 31 bit system call table
|
2011-12-27 17:27:18 +07:00
|
|
|
lh %r8,__PT_INT_CODE+2(%r11)
|
2011-12-27 17:27:15 +07:00
|
|
|
sla %r8,2 # shift and test for svc0
|
|
|
|
jnz sysc_nr_ok
|
2005-04-17 05:20:36 +07:00
|
|
|
# svc 0: system call number in %r1
|
|
|
|
cl %r1,BASED(.Lnr_syscalls)
|
2011-12-27 17:27:15 +07:00
|
|
|
jnl sysc_nr_ok
|
2011-12-27 17:27:18 +07:00
|
|
|
sth %r1,__PT_INT_CODE+2(%r11)
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r8,%r1
|
|
|
|
sla %r8,2
|
2005-04-17 05:20:36 +07:00
|
|
|
sysc_nr_ok:
|
2011-12-27 17:27:15 +07:00
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
|
|
|
st %r2,__PT_ORIG_GPR2(%r11)
|
|
|
|
st %r7,STACK_FRAME_OVERHEAD(%r15)
|
|
|
|
l %r9,0(%r8,%r10) # get system call addr.
|
2011-10-30 21:16:49 +07:00
|
|
|
tm __TI_flags+2(%r12),_TIF_TRACE >> 8
|
2011-12-27 17:27:15 +07:00
|
|
|
jnz sysc_tracesys
|
|
|
|
basr %r14,%r9 # call sys_xxxx
|
|
|
|
st %r2,__PT_R2(%r11) # store return value
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
sysc_return:
|
2010-05-17 15:00:02 +07:00
|
|
|
LOCKDEP_SYS_EXIT
|
|
|
|
sysc_tif:
|
2011-12-27 17:27:15 +07:00
|
|
|
tm __PT_PSW+1(%r11),0x01 # returning to user ?
|
|
|
|
jno sysc_restore
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_WORK_SVC
|
2011-12-27 17:27:15 +07:00
|
|
|
jnz sysc_work # check for work
|
2011-10-30 21:16:49 +07:00
|
|
|
ni __TI_flags+3(%r12),255-_TIF_SYSCALL
|
2007-11-20 17:13:32 +07:00
|
|
|
sysc_restore:
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
|
|
|
|
stpt __LC_EXIT_TIMER
|
|
|
|
lm %r0,%r15,__PT_R0(%r11)
|
|
|
|
lpsw __LC_RETURN_PSW
|
2007-11-20 17:13:32 +07:00
|
|
|
sysc_done:
|
|
|
|
|
2010-05-17 15:00:01 +07:00
|
|
|
#
|
|
|
|
# One of the work bits is on. Find out which one.
|
|
|
|
#
|
2011-10-30 21:16:49 +07:00
|
|
|
sysc_work:
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
|
2011-12-27 17:27:15 +07:00
|
|
|
jo sysc_mcck_pending
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
|
2011-12-27 17:27:15 +07:00
|
|
|
jo sysc_reschedule
|
2012-11-21 22:36:27 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_PER_TRAP
|
|
|
|
jo sysc_singlestep
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_SIGPENDING
|
2011-12-27 17:27:15 +07:00
|
|
|
jo sysc_sigpending
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
|
2011-12-27 17:27:15 +07:00
|
|
|
jo sysc_notify_resume
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_ASCE
|
|
|
|
jo sysc_uaccess
|
2011-12-27 17:27:15 +07:00
|
|
|
j sysc_return # beware of critical section cleanup
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#
|
|
|
|
# _TIF_NEED_RESCHED is set, call schedule
|
2006-09-28 21:56:37 +07:00
|
|
|
#
|
|
|
|
sysc_reschedule:
|
|
|
|
l %r1,BASED(.Lschedule)
|
2010-05-17 15:00:02 +07:00
|
|
|
la %r14,BASED(sysc_return)
|
2011-12-27 17:27:15 +07:00
|
|
|
br %r1 # call schedule
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-06-26 04:55:30 +07:00
|
|
|
#
|
|
|
|
# _TIF_MCCK_PENDING is set, call handler
|
|
|
|
#
|
|
|
|
sysc_mcck_pending:
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lhandle_mcck)
|
2010-05-17 15:00:02 +07:00
|
|
|
la %r14,BASED(sysc_return)
|
2005-06-26 04:55:30 +07:00
|
|
|
br %r1 # TIF bit will be cleared by handler
|
|
|
|
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
#
|
|
|
|
# _TIF_ASCE is set, load user space asce
|
|
|
|
#
|
|
|
|
sysc_uaccess:
|
|
|
|
ni __TI_flags+3(%r12),255-_TIF_ASCE
|
|
|
|
lctl %c1,%c1,__LC_USER_ASCE # load primary asce
|
|
|
|
j sysc_return
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2008-04-30 14:53:08 +07:00
|
|
|
# _TIF_SIGPENDING is set, call do_signal
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2006-09-28 21:56:37 +07:00
|
|
|
sysc_sigpending:
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2006-09-28 21:56:37 +07:00
|
|
|
l %r1,BASED(.Ldo_signal)
|
|
|
|
basr %r14,%r1 # call do_signal
|
2011-10-30 21:16:49 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_SYSCALL
|
2011-12-27 17:27:15 +07:00
|
|
|
jno sysc_return
|
|
|
|
lm %r2,%r7,__PT_R2(%r11) # load svc arguments
|
2013-09-27 20:24:38 +07:00
|
|
|
l %r10,__TI_sysc_table(%r12) # 31 bit system call table
|
2011-12-27 17:27:15 +07:00
|
|
|
xr %r8,%r8 # svc 0 returns -ENOSYS
|
2011-12-27 17:27:18 +07:00
|
|
|
clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
|
2011-12-27 17:27:15 +07:00
|
|
|
jnl sysc_nr_ok # invalid svc number -> do svc 0
|
2011-12-27 17:27:18 +07:00
|
|
|
lh %r8,__PT_INT_CODE+2(%r11) # load new svc number
|
2011-12-27 17:27:15 +07:00
|
|
|
sla %r8,2
|
|
|
|
j sysc_nr_ok # restart svc
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-10-11 02:33:20 +07:00
|
|
|
#
|
|
|
|
# _TIF_NOTIFY_RESUME is set, call do_notify_resume
|
|
|
|
#
|
|
|
|
sysc_notify_resume:
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2008-10-11 02:33:20 +07:00
|
|
|
l %r1,BASED(.Ldo_notify_resume)
|
2010-05-17 15:00:02 +07:00
|
|
|
la %r14,BASED(sysc_return)
|
2008-10-11 02:33:20 +07:00
|
|
|
br %r1 # call do_notify_resume
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2011-01-05 18:48:10 +07:00
|
|
|
# _TIF_PER_TRAP is set, call do_per_trap
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
|
|
|
sysc_singlestep:
|
2012-11-21 22:36:27 +07:00
|
|
|
ni __TI_flags+3(%r12),255-_TIF_PER_TRAP
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
|
|
|
l %r1,BASED(.Ldo_per_trap)
|
|
|
|
la %r14,BASED(sysc_return)
|
|
|
|
br %r1 # call do_per_trap
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#
|
2008-10-11 02:33:20 +07:00
|
|
|
# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
|
|
|
|
# and after the system call
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
|
|
|
sysc_tracesys:
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Ltrace_enter)
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2005-04-17 05:20:36 +07:00
|
|
|
la %r3,0
|
2011-01-05 18:47:57 +07:00
|
|
|
xr %r0,%r0
|
2011-12-27 17:27:18 +07:00
|
|
|
icm %r0,3,__PT_INT_CODE+2(%r11)
|
2011-12-27 17:27:15 +07:00
|
|
|
st %r0,__PT_R2(%r11)
|
|
|
|
basr %r14,%r1 # call do_syscall_trace_enter
|
2008-10-11 02:33:20 +07:00
|
|
|
cl %r2,BASED(.Lnr_syscalls)
|
2011-12-27 17:27:15 +07:00
|
|
|
jnl sysc_tracenogo
|
|
|
|
lr %r8,%r2
|
|
|
|
sll %r8,2
|
|
|
|
l %r9,0(%r8,%r10)
|
2005-04-17 05:20:36 +07:00
|
|
|
sysc_tracego:
|
2011-12-27 17:27:15 +07:00
|
|
|
lm %r3,%r7,__PT_R3(%r11)
|
|
|
|
st %r7,STACK_FRAME_OVERHEAD(%r15)
|
|
|
|
l %r2,__PT_ORIG_GPR2(%r11)
|
|
|
|
basr %r14,%r9 # call sys_xxx
|
|
|
|
st %r2,__PT_R2(%r11) # store return value
|
2005-04-17 05:20:36 +07:00
|
|
|
sysc_tracenogo:
|
2011-10-30 21:16:49 +07:00
|
|
|
tm __TI_flags+2(%r12),_TIF_TRACE >> 8
|
2011-12-27 17:27:15 +07:00
|
|
|
jz sysc_return
|
2008-10-11 02:33:20 +07:00
|
|
|
l %r1,BASED(.Ltrace_exit)
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2005-04-17 05:20:36 +07:00
|
|
|
la %r14,BASED(sysc_return)
|
2011-12-27 17:27:15 +07:00
|
|
|
br %r1 # call do_syscall_trace_exit
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#
|
|
|
|
# a new process exits the kernel with ret_from_fork
|
|
|
|
#
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(ret_from_fork)
|
2011-12-27 17:27:15 +07:00
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
|
|
l %r12,__LC_THREAD_INFO
|
2005-04-17 05:20:36 +07:00
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
2012-09-11 05:03:41 +07:00
|
|
|
l %r1,BASED(.Lschedule_tail)
|
|
|
|
basr %r14,%r1 # call schedule_tail
|
|
|
|
TRACE_IRQS_ON
|
|
|
|
ssm __LC_SVC_NEW_PSW # reenable interrupts
|
2012-10-12 02:30:14 +07:00
|
|
|
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
|
|
|
|
jne sysc_tracenogo
|
|
|
|
# it's a kernel thread
|
|
|
|
lm %r9,%r10,__PT_R9(%r11) # load gprs
|
2012-09-11 05:03:41 +07:00
|
|
|
ENTRY(kernel_thread_starter)
|
|
|
|
la %r2,0(%r10)
|
|
|
|
basr %r14,%r9
|
2012-10-12 02:30:14 +07:00
|
|
|
j sysc_tracenogo
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Program check handler routine
|
|
|
|
*/
|
|
|
|
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(pgm_check_handler)
|
2008-12-25 19:39:25 +07:00
|
|
|
stpt __LC_SYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r8,%r15,__LC_SAVE_AREA_SYNC
|
|
|
|
l %r12,__LC_THREAD_INFO
|
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
|
|
|
lm %r8,%r9,__LC_PGM_OLD_PSW
|
|
|
|
tmh %r8,0x0001 # test problem state bit
|
|
|
|
jnz 1f # -> fault in user space
|
|
|
|
tmh %r8,0x4000 # PER bit set in old PSW ?
|
|
|
|
jnz 0f # -> enabled, can't be a double fault
|
|
|
|
tm __LC_PGM_ILC+3,0x80 # check for per exception
|
|
|
|
jnz pgm_svcper # -> single stepped svc
|
|
|
|
0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
|
2013-04-24 15:20:43 +07:00
|
|
|
ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
2011-12-27 17:27:15 +07:00
|
|
|
j 2f
|
|
|
|
1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
|
|
|
|
l %r15,__LC_KERNEL_STACK
|
2013-04-24 15:20:43 +07:00
|
|
|
2: la %r11,STACK_FRAME_OVERHEAD(%r15)
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
|
|
|
|
stm %r8,%r9,__PT_PSW(%r11)
|
2011-12-27 17:27:18 +07:00
|
|
|
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
|
|
|
|
mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
|
2011-12-27 17:27:15 +07:00
|
|
|
tm __LC_PGM_ILC+3,0x80 # check for per exception
|
|
|
|
jz 0f
|
2011-01-05 18:47:57 +07:00
|
|
|
l %r1,__TI_task(%r12)
|
2011-12-27 17:27:15 +07:00
|
|
|
tmh %r8,0x0001 # kernel per event ?
|
|
|
|
jz pgm_kprobe
|
|
|
|
oi __TI_flags+3(%r12),_TIF_PER_TRAP
|
2011-01-05 18:48:10 +07:00
|
|
|
mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
|
2011-01-05 18:48:10 +07:00
|
|
|
mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
|
2011-12-27 17:27:18 +07:00
|
|
|
0: REENABLE_IRQS
|
2011-12-27 17:27:15 +07:00
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
2011-01-05 18:47:57 +07:00
|
|
|
l %r1,BASED(.Ljump_table)
|
2011-12-27 17:27:15 +07:00
|
|
|
la %r10,0x7f
|
2011-12-27 17:27:18 +07:00
|
|
|
n %r10,__PT_INT_CODE(%r11)
|
2011-12-27 17:27:15 +07:00
|
|
|
je sysc_return
|
|
|
|
sll %r10,2
|
|
|
|
l %r1,0(%r10,%r1) # load address of handler routine
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2011-01-05 18:47:57 +07:00
|
|
|
basr %r14,%r1 # branch to interrupt-handler
|
2011-12-27 17:27:15 +07:00
|
|
|
j sysc_return
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#
|
2011-12-27 17:27:15 +07:00
|
|
|
# PER event in supervisor state, must be kprobes
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2011-12-27 17:27:15 +07:00
|
|
|
pgm_kprobe:
|
|
|
|
REENABLE_IRQS
|
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
|
|
|
l %r1,BASED(.Ldo_per_trap)
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
|
|
|
basr %r14,%r1 # call do_per_trap
|
|
|
|
j sysc_return
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-09-20 20:58:39 +07:00
|
|
|
#
|
2011-12-27 17:27:15 +07:00
|
|
|
# single stepped system call
|
2006-09-20 20:58:39 +07:00
|
|
|
#
|
2011-12-27 17:27:15 +07:00
|
|
|
pgm_svcper:
|
|
|
|
oi __TI_flags+3(%r12),_TIF_PER_TRAP
|
|
|
|
mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
|
|
|
|
mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
|
|
|
|
lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs
|
2006-09-20 20:58:39 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* IO interrupt handler routine
|
|
|
|
*/
|
|
|
|
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(io_int_handler)
|
2005-04-17 05:20:36 +07:00
|
|
|
stck __LC_INT_CLOCK
|
2008-12-31 21:11:41 +07:00
|
|
|
stpt __LC_ASYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r8,%r15,__LC_SAVE_AREA_ASYNC
|
|
|
|
l %r12,__LC_THREAD_INFO
|
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
|
|
|
lm %r8,%r9,__LC_IO_OLD_PSW
|
|
|
|
tmh %r8,0x0001 # interrupting from user ?
|
|
|
|
jz io_skip
|
|
|
|
UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
|
|
|
|
io_skip:
|
|
|
|
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
|
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
|
|
|
|
stm %r8,%r9,__PT_PSW(%r11)
|
2013-06-17 19:54:02 +07:00
|
|
|
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
|
2006-07-03 14:24:46 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
2013-06-17 19:54:02 +07:00
|
|
|
io_loop:
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Ldo_IRQ)
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2013-06-27 14:01:09 +07:00
|
|
|
lhi %r3,IO_INTERRUPT
|
|
|
|
tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
|
|
|
|
jz io_call
|
|
|
|
lhi %r3,THIN_INTERRUPT
|
|
|
|
io_call:
|
2011-12-27 17:27:15 +07:00
|
|
|
basr %r14,%r1 # call do_IRQ
|
2013-06-17 19:54:02 +07:00
|
|
|
tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
|
|
|
|
jz io_return
|
|
|
|
tpi 0
|
|
|
|
jz io_return
|
|
|
|
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
|
|
|
|
j io_loop
|
2005-04-17 05:20:36 +07:00
|
|
|
io_return:
|
2010-05-17 15:00:02 +07:00
|
|
|
LOCKDEP_SYS_EXIT
|
|
|
|
TRACE_IRQS_ON
|
|
|
|
io_tif:
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_WORK_INT
|
2011-12-27 17:27:15 +07:00
|
|
|
jnz io_work # there is work to do (signals etc.)
|
2007-11-20 17:13:32 +07:00
|
|
|
io_restore:
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
|
|
|
|
stpt __LC_EXIT_TIMER
|
|
|
|
lm %r0,%r15,__PT_R0(%r11)
|
|
|
|
lpsw __LC_RETURN_PSW
|
2005-09-04 05:57:56 +07:00
|
|
|
io_done:
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-05-07 14:22:52 +07:00
|
|
|
#
|
2010-05-17 15:00:01 +07:00
|
|
|
# There is work todo, find out in which context we have been interrupted:
|
|
|
|
# 1) if we return to user space we can do all _TIF_WORK_INT work
|
|
|
|
# 2) if we return to kernel code and preemptive scheduling is enabled check
|
|
|
|
# the preemption counter and if it is zero call preempt_schedule_irq
|
|
|
|
# Before any work can be done, a switch to the kernel stack is required.
|
2008-05-07 14:22:52 +07:00
|
|
|
#
|
|
|
|
io_work:
|
2011-12-27 17:27:15 +07:00
|
|
|
tm __PT_PSW+1(%r11),0x01 # returning to user ?
|
|
|
|
jo io_work_user # yes -> do resched & signal
|
2010-05-17 15:00:01 +07:00
|
|
|
#ifdef CONFIG_PREEMPT
|
2008-05-07 14:22:52 +07:00
|
|
|
# check for preemptive scheduling
|
2011-01-05 18:47:57 +07:00
|
|
|
icm %r0,15,__TI_precount(%r12)
|
2011-12-27 17:27:15 +07:00
|
|
|
jnz io_restore # preemption disabled
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
|
2011-12-27 17:27:15 +07:00
|
|
|
jno io_restore
|
2010-05-17 15:00:01 +07:00
|
|
|
# switch to kernel stack
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,__PT_R15(%r11)
|
|
|
|
ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
|
|
|
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
|
|
|
|
xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
|
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r1)
|
2005-04-17 05:20:36 +07:00
|
|
|
lr %r15,%r1
|
2010-05-17 15:00:02 +07:00
|
|
|
# TRACE_IRQS_ON already done at io_return, call
|
|
|
|
# TRACE_IRQS_OFF to keep things symmetrical
|
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lpreempt_irq)
|
2010-05-17 15:00:02 +07:00
|
|
|
basr %r14,%r1 # call preempt_schedule_irq
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2010-05-17 15:00:02 +07:00
|
|
|
#else
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_restore
|
2010-05-17 15:00:02 +07:00
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-05-17 15:00:01 +07:00
|
|
|
#
|
|
|
|
# Need to do work before returning to userspace, switch to kernel stack
|
|
|
|
#
|
2008-05-07 14:22:52 +07:00
|
|
|
io_work_user:
|
2005-04-17 05:20:36 +07:00
|
|
|
l %r1,__LC_KERNEL_STACK
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
|
|
|
|
xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
|
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r1)
|
2005-04-17 05:20:36 +07:00
|
|
|
lr %r15,%r1
|
2010-05-17 15:00:02 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
|
|
|
# One of the work bits is on. Find out which one.
|
2010-05-17 15:00:01 +07:00
|
|
|
# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
|
2006-09-28 21:56:37 +07:00
|
|
|
# and _TIF_MCCK_PENDING
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2010-05-17 15:00:02 +07:00
|
|
|
io_work_tif:
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
|
2011-12-27 17:27:15 +07:00
|
|
|
jo io_mcck_pending
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
|
2011-12-27 17:27:15 +07:00
|
|
|
jo io_reschedule
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_SIGPENDING
|
2011-12-27 17:27:15 +07:00
|
|
|
jo io_sigpending
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
|
2011-12-27 17:27:15 +07:00
|
|
|
jo io_notify_resume
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_ASCE
|
|
|
|
jo io_uaccess
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return # beware of critical section cleanup
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-06-26 04:55:30 +07:00
|
|
|
#
|
|
|
|
# _TIF_MCCK_PENDING is set, call handler
|
|
|
|
#
|
|
|
|
io_mcck_pending:
|
2010-05-17 15:00:02 +07:00
|
|
|
# TRACE_IRQS_ON already done at io_return
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lhandle_mcck)
|
2007-07-27 17:29:18 +07:00
|
|
|
basr %r14,%r1 # TIF bit will be cleared by handler
|
2010-05-17 15:00:02 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2005-06-26 04:55:30 +07:00
|
|
|
|
s390/uaccess: rework uaccess code - fix locking issues
The current uaccess code uses a page table walk in some circumstances,
e.g. in case of the in atomic futex operations or if running on old
hardware which doesn't support the mvcos instruction.
However it turned out that the page table walk code does not correctly
lock page tables when accessing page table entries.
In other words: a different cpu may invalidate a page table entry while
the current cpu inspects the pte. This may lead to random data corruption.
Adding correct locking however isn't trivial for all uaccess operations.
Especially copy_in_user() is problematic since that requires to hold at
least two locks, but must be protected against ABBA deadlock when a
different cpu also performs a copy_in_user() operation.
So the solution is a different approach where we change address spaces:
User space runs in primary address mode, or access register mode within
vdso code, like it currently already does.
The kernel usually also runs in home space mode, however when accessing
user space the kernel switches to primary or secondary address mode if
the mvcos instruction is not available or if a compare-and-swap (futex)
instruction on a user space address is performed.
KVM however is special, since that requires the kernel to run in home
address space while implicitly accessing user space with the sie
instruction.
So we end up with:
User space:
- runs in primary or access register mode
- cr1 contains the user asce
- cr7 contains the user asce
- cr13 contains the kernel asce
Kernel space:
- runs in home space mode
- cr1 contains the user or kernel asce
-> the kernel asce is loaded when a uaccess requires primary or
secondary address mode
- cr7 contains the user or kernel asce, (changed with set_fs())
- cr13 contains the kernel asce
In case of uaccess the kernel changes to:
- primary space mode in case of a uaccess (copy_to_user) and uses
e.g. the mvcp instruction to access user space. However the kernel
will stay in home space mode if the mvcos instruction is available
- secondary space mode in case of futex atomic operations, so that the
instructions come from primary address space and data from secondary
space
In case of kvm the kernel runs in home space mode, but cr1 gets switched
to contain the gmap asce before the sie instruction gets executed. When
the sie instruction is finished cr1 will be switched back to contain the
user asce.
A context switch between two processes will always load the kernel asce
for the next process in cr1. So the first exit to user space is a bit
more expensive (one extra load control register instruction) than before,
however keeps the code rather simple.
In sum this means there is no need to perform any error prone page table
walks anymore when accessing user space.
The patch seems to be rather large, however it mainly removes the
the page table walk code and restores the previously deleted "standard"
uaccess code, with a couple of changes.
The uaccess without mvcos mode can be enforced with the "uaccess_primary"
kernel parameter.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-03-21 16:42:25 +07:00
|
|
|
#
|
|
|
|
# _TIF_ASCE is set, load user space asce
|
|
|
|
#
|
|
|
|
io_uaccess:
|
|
|
|
ni __TI_flags+3(%r12),255-_TIF_ASCE
|
|
|
|
lctl %c1,%c1,__LC_USER_ASCE # load primary asce
|
|
|
|
j io_return
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
|
|
|
# _TIF_NEED_RESCHED is set, call schedule
|
2006-09-28 21:56:37 +07:00
|
|
|
#
|
|
|
|
io_reschedule:
|
2010-05-17 15:00:02 +07:00
|
|
|
# TRACE_IRQS_ON already done at io_return
|
2006-09-28 21:56:37 +07:00
|
|
|
l %r1,BASED(.Lschedule)
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_SVC_NEW_PSW # reenable interrupts
|
2006-09-28 21:56:37 +07:00
|
|
|
basr %r14,%r1 # call scheduler
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
|
2007-11-20 17:13:32 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#
|
2008-04-30 14:53:08 +07:00
|
|
|
# _TIF_SIGPENDING is set, call do_signal
|
2005-04-17 05:20:36 +07:00
|
|
|
#
|
2006-09-28 21:56:37 +07:00
|
|
|
io_sigpending:
|
2010-05-17 15:00:02 +07:00
|
|
|
# TRACE_IRQS_ON already done at io_return
|
2006-09-28 21:56:37 +07:00
|
|
|
l %r1,BASED(.Ldo_signal)
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_SVC_NEW_PSW # reenable interrupts
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2006-09-28 21:56:37 +07:00
|
|
|
basr %r14,%r1 # call do_signal
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
|
2007-11-20 17:13:32 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-10-11 02:33:20 +07:00
|
|
|
#
|
|
|
|
# _TIF_SIGPENDING is set, call do_signal
|
|
|
|
#
|
|
|
|
io_notify_resume:
|
2010-05-17 15:00:02 +07:00
|
|
|
# TRACE_IRQS_ON already done at io_return
|
2008-10-11 02:33:20 +07:00
|
|
|
l %r1,BASED(.Ldo_notify_resume)
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_SVC_NEW_PSW # reenable interrupts
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
|
|
|
basr %r14,%r1 # call do_notify_resume
|
|
|
|
ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
|
2008-10-11 02:33:20 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2008-10-11 02:33:20 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* External interrupt handler routine
|
|
|
|
*/
|
|
|
|
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(ext_int_handler)
|
2005-04-17 05:20:36 +07:00
|
|
|
stck __LC_INT_CLOCK
|
2008-12-31 21:11:41 +07:00
|
|
|
stpt __LC_ASYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r8,%r15,__LC_SAVE_AREA_ASYNC
|
|
|
|
l %r12,__LC_THREAD_INFO
|
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
|
|
|
lm %r8,%r9,__LC_EXT_OLD_PSW
|
|
|
|
tmh %r8,0x0001 # interrupting from user ?
|
|
|
|
jz ext_skip
|
|
|
|
UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
|
|
|
|
ext_skip:
|
|
|
|
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
|
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
|
|
|
|
stm %r8,%r9,__PT_PSW(%r11)
|
2013-06-17 19:54:02 +07:00
|
|
|
mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
|
|
|
|
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
|
2006-07-03 14:24:46 +07:00
|
|
|
TRACE_IRQS_OFF
|
2013-06-27 14:01:09 +07:00
|
|
|
l %r1,BASED(.Ldo_IRQ)
|
2011-12-27 17:27:15 +07:00
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
2013-06-27 14:01:09 +07:00
|
|
|
lhi %r3,EXT_INTERRUPT
|
|
|
|
basr %r14,%r1 # call do_IRQ
|
2011-12-27 17:27:15 +07:00
|
|
|
j io_return
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-03-11 22:59:27 +07:00
|
|
|
/*
|
|
|
|
* Load idle PSW. The second "half" of this function is in cleanup_idle.
|
|
|
|
*/
|
|
|
|
ENTRY(psw_idle)
|
2012-07-20 16:15:08 +07:00
|
|
|
st %r3,__SF_EMPTY(%r15)
|
2012-03-11 22:59:27 +07:00
|
|
|
basr %r1,0
|
|
|
|
la %r1,psw_idle_lpsw+4-.(%r1)
|
|
|
|
st %r1,__SF_EMPTY+4(%r15)
|
|
|
|
oi __SF_EMPTY+4(%r15),0x80
|
2012-07-20 16:15:08 +07:00
|
|
|
stck __CLOCK_IDLE_ENTER(%r2)
|
|
|
|
stpt __TIMER_IDLE_ENTER(%r2)
|
2012-03-11 22:59:27 +07:00
|
|
|
psw_idle_lpsw:
|
|
|
|
lpsw __SF_EMPTY(%r15)
|
|
|
|
br %r14
|
|
|
|
psw_idle_end:
|
|
|
|
|
2005-09-04 05:57:56 +07:00
|
|
|
__critical_end:
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Machine check handler routines
|
|
|
|
*/
|
|
|
|
|
2011-07-24 15:48:19 +07:00
|
|
|
ENTRY(mcck_int_handler)
|
2010-05-17 15:00:03 +07:00
|
|
|
stck __LC_MCCK_CLOCK
|
2005-06-26 04:55:30 +07:00
|
|
|
spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
|
|
|
|
lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r12,__LC_THREAD_INFO
|
|
|
|
l %r13,__LC_SVC_NEW_PSW+4
|
|
|
|
lm %r8,%r9,__LC_MCK_OLD_PSW
|
2006-09-28 21:56:37 +07:00
|
|
|
tm __LC_MCCK_CODE,0x80 # system damage?
|
2011-12-27 17:27:15 +07:00
|
|
|
jo mcck_panic # yes -> rest of mcck code invalid
|
|
|
|
la %r14,__LC_CPU_TIMER_SAVE_AREA
|
|
|
|
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
|
2006-06-29 19:58:05 +07:00
|
|
|
tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
|
2011-12-27 17:27:15 +07:00
|
|
|
jo 3f
|
2006-06-29 19:58:05 +07:00
|
|
|
la %r14,__LC_SYNC_ENTER_TIMER
|
|
|
|
clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
jl 0f
|
2006-06-29 19:58:05 +07:00
|
|
|
la %r14,__LC_ASYNC_ENTER_TIMER
|
|
|
|
0: clc 0(8,%r14),__LC_EXIT_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
jl 1f
|
2006-06-29 19:58:05 +07:00
|
|
|
la %r14,__LC_EXIT_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
|
|
|
|
jl 2f
|
2006-06-29 19:58:05 +07:00
|
|
|
la %r14,__LC_LAST_UPDATE_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
2: spt 0(%r14)
|
2010-05-17 15:00:03 +07:00
|
|
|
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
|
2011-12-27 17:27:15 +07:00
|
|
|
3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
|
|
|
|
jno mcck_panic # no -> skip cleanup critical
|
|
|
|
tm %r8,0x0001 # interrupting from user ?
|
|
|
|
jz mcck_skip
|
|
|
|
UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
|
|
|
|
mcck_skip:
|
|
|
|
SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
|
2013-02-28 22:28:41 +07:00
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r8,%r9,__PT_PSW(%r11)
|
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
|
|
|
l %r1,BASED(.Ldo_machine_check)
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
|
|
|
basr %r14,%r1 # call s390_do_machine_check
|
|
|
|
tm __PT_PSW+1(%r11),0x01 # returning to user ?
|
|
|
|
jno mcck_return
|
2006-09-28 21:56:37 +07:00
|
|
|
l %r1,__LC_KERNEL_STACK # switch to kernel stack
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
|
|
|
|
xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
|
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
2005-06-26 04:55:30 +07:00
|
|
|
lr %r15,%r1
|
2011-12-27 17:27:15 +07:00
|
|
|
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
|
2011-01-05 18:47:57 +07:00
|
|
|
tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
|
2011-12-27 17:27:15 +07:00
|
|
|
jno mcck_return
|
2006-07-03 14:24:46 +07:00
|
|
|
TRACE_IRQS_OFF
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r1,BASED(.Lhandle_mcck)
|
|
|
|
basr %r14,%r1 # call s390_handle_mcck
|
2006-07-03 14:24:46 +07:00
|
|
|
TRACE_IRQS_ON
|
2005-04-17 05:20:36 +07:00
|
|
|
mcck_return:
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
|
2006-06-29 19:58:05 +07:00
|
|
|
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
|
2011-12-27 17:27:15 +07:00
|
|
|
jno 0f
|
|
|
|
lm %r0,%r15,__PT_R0(%r11)
|
2006-06-29 19:58:05 +07:00
|
|
|
stpt __LC_EXIT_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
lpsw __LC_RETURN_MCCK_PSW
|
|
|
|
0: lm %r0,%r15,__PT_R0(%r11)
|
|
|
|
lpsw __LC_RETURN_MCCK_PSW
|
2006-06-29 19:58:05 +07:00
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
mcck_panic:
|
|
|
|
l %r14,__LC_PANIC_STACK
|
|
|
|
slr %r14,%r15
|
|
|
|
sra %r14,PAGE_SHIFT
|
|
|
|
jz 0f
|
|
|
|
l %r15,__LC_PANIC_STACK
|
2013-04-24 15:20:43 +07:00
|
|
|
j mcck_skip
|
2011-12-27 17:27:15 +07:00
|
|
|
0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
|
|
|
j mcck_skip
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2011-08-03 21:44:19 +07:00
|
|
|
#
|
|
|
|
# PSW restart interrupt handler
|
|
|
|
#
|
2012-03-11 22:59:26 +07:00
|
|
|
ENTRY(restart_int_handler)
|
2011-12-27 17:27:15 +07:00
|
|
|
st %r15,__LC_SAVE_AREA_RESTART
|
2012-03-11 22:59:26 +07:00
|
|
|
l %r15,__LC_RESTART_STACK
|
2011-12-27 17:27:15 +07:00
|
|
|
ahi %r15,-__PT_SIZE # create pt_regs on stack
|
2012-03-11 22:59:26 +07:00
|
|
|
xc 0(__PT_SIZE,%r15),0(%r15)
|
2011-12-27 17:27:15 +07:00
|
|
|
stm %r0,%r14,__PT_R0(%r15)
|
|
|
|
mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART
|
|
|
|
mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw
|
2012-03-11 22:59:26 +07:00
|
|
|
ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
|
|
|
|
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
|
2012-06-05 14:59:52 +07:00
|
|
|
l %r1,__LC_RESTART_FN # load fn, parm & source cpu
|
|
|
|
l %r2,__LC_RESTART_DATA
|
|
|
|
l %r3,__LC_RESTART_SOURCE
|
2012-03-11 22:59:26 +07:00
|
|
|
ltr %r3,%r3 # test source cpu address
|
|
|
|
jm 1f # negative -> skip source stop
|
2012-06-04 20:05:43 +07:00
|
|
|
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
|
2012-03-11 22:59:26 +07:00
|
|
|
brc 10,0b # wait for status stored
|
|
|
|
1: basr %r14,%r1 # call function
|
|
|
|
stap __SF_EMPTY(%r15) # store cpu address
|
|
|
|
lh %r3,__SF_EMPTY(%r15)
|
2012-06-04 20:05:43 +07:00
|
|
|
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
|
2012-03-11 22:59:26 +07:00
|
|
|
brc 2,2b
|
|
|
|
3: j 3b
|
2011-08-03 21:44:19 +07:00
|
|
|
|
2011-01-05 18:47:25 +07:00
|
|
|
.section .kprobes.text, "ax"
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#ifdef CONFIG_CHECK_STACK
|
|
|
|
/*
|
|
|
|
* The synchronous or the asynchronous stack overflowed. We are dead.
|
|
|
|
* No need to properly save the registers, we are going to panic anyway.
|
|
|
|
* Setup a pt_regs so that show_trace can provide a good call trace.
|
|
|
|
*/
|
|
|
|
stack_overflow:
|
|
|
|
l %r15,__LC_PANIC_STACK # change to panic stack
|
2013-04-24 15:20:43 +07:00
|
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
|
|
stm %r0,%r7,__PT_R0(%r11)
|
|
|
|
stm %r8,%r9,__PT_PSW(%r11)
|
2011-12-27 17:27:15 +07:00
|
|
|
mvc __PT_R8(32,%r11),0(%r14)
|
|
|
|
l %r1,BASED(1f)
|
|
|
|
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
|
|
|
|
lr %r2,%r11 # pass pointer to pt_regs
|
|
|
|
br %r1 # branch to kernel_stack_overflow
|
2006-09-28 21:56:37 +07:00
|
|
|
1: .long kernel_stack_overflow
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
|
|
|
|
2011-12-27 17:27:15 +07:00
|
|
|
cleanup_table:
|
|
|
|
.long system_call + 0x80000000
|
|
|
|
.long sysc_do_svc + 0x80000000
|
|
|
|
.long sysc_tif + 0x80000000
|
|
|
|
.long sysc_restore + 0x80000000
|
|
|
|
.long sysc_done + 0x80000000
|
|
|
|
.long io_tif + 0x80000000
|
|
|
|
.long io_restore + 0x80000000
|
|
|
|
.long io_done + 0x80000000
|
2012-03-11 22:59:27 +07:00
|
|
|
.long psw_idle + 0x80000000
|
|
|
|
.long psw_idle_end + 0x80000000
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
cleanup_critical:
|
2011-12-27 17:27:15 +07:00
|
|
|
cl %r9,BASED(cleanup_table) # system_call
|
|
|
|
jl 0f
|
|
|
|
cl %r9,BASED(cleanup_table+4) # sysc_do_svc
|
|
|
|
jl cleanup_system_call
|
|
|
|
cl %r9,BASED(cleanup_table+8) # sysc_tif
|
|
|
|
jl 0f
|
|
|
|
cl %r9,BASED(cleanup_table+12) # sysc_restore
|
|
|
|
jl cleanup_sysc_tif
|
|
|
|
cl %r9,BASED(cleanup_table+16) # sysc_done
|
|
|
|
jl cleanup_sysc_restore
|
|
|
|
cl %r9,BASED(cleanup_table+20) # io_tif
|
|
|
|
jl 0f
|
|
|
|
cl %r9,BASED(cleanup_table+24) # io_restore
|
|
|
|
jl cleanup_io_tif
|
|
|
|
cl %r9,BASED(cleanup_table+28) # io_done
|
|
|
|
jl cleanup_io_restore
|
2012-03-11 22:59:27 +07:00
|
|
|
cl %r9,BASED(cleanup_table+32) # psw_idle
|
|
|
|
jl 0f
|
|
|
|
cl %r9,BASED(cleanup_table+36) # psw_idle_end
|
|
|
|
jl cleanup_idle
|
2011-12-27 17:27:15 +07:00
|
|
|
0: br %r14
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
cleanup_system_call:
|
2011-12-27 17:27:15 +07:00
|
|
|
# check if stpt has been executed
|
|
|
|
cl %r9,BASED(cleanup_system_call_insn)
|
|
|
|
jh 0f
|
2005-04-17 05:20:36 +07:00
|
|
|
mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
chi %r11,__LC_SAVE_AREA_ASYNC
|
|
|
|
je 0f
|
|
|
|
mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
|
|
|
|
0: # check if stm has been executed
|
|
|
|
cl %r9,BASED(cleanup_system_call_insn+4)
|
|
|
|
jh 0f
|
|
|
|
mvc __LC_SAVE_AREA_SYNC(32),0(%r11)
|
|
|
|
0: # set up saved registers r12, and r13
|
|
|
|
st %r12,16(%r11) # r12 thread-info pointer
|
|
|
|
st %r13,20(%r11) # r13 literal-pool pointer
|
|
|
|
# check if the user time calculation has been done
|
|
|
|
cl %r9,BASED(cleanup_system_call_insn+8)
|
|
|
|
jh 0f
|
|
|
|
l %r10,__LC_EXIT_TIMER
|
|
|
|
l %r15,__LC_EXIT_TIMER+4
|
|
|
|
SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER
|
|
|
|
ADD64 %r10,%r15,__LC_USER_TIMER
|
|
|
|
st %r10,__LC_USER_TIMER
|
|
|
|
st %r15,__LC_USER_TIMER+4
|
|
|
|
0: # check if the system time calculation has been done
|
|
|
|
cl %r9,BASED(cleanup_system_call_insn+12)
|
|
|
|
jh 0f
|
|
|
|
l %r10,__LC_LAST_UPDATE_TIMER
|
|
|
|
l %r15,__LC_LAST_UPDATE_TIMER+4
|
|
|
|
SUB64 %r10,%r15,__LC_EXIT_TIMER
|
|
|
|
ADD64 %r10,%r15,__LC_SYSTEM_TIMER
|
|
|
|
st %r10,__LC_SYSTEM_TIMER
|
|
|
|
st %r15,__LC_SYSTEM_TIMER+4
|
|
|
|
0: # update accounting time stamp
|
2005-04-17 05:20:36 +07:00
|
|
|
mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
|
2011-12-27 17:27:15 +07:00
|
|
|
# set up saved register 11
|
|
|
|
l %r15,__LC_KERNEL_STACK
|
2013-04-24 15:20:43 +07:00
|
|
|
la %r9,STACK_FRAME_OVERHEAD(%r15)
|
|
|
|
st %r9,12(%r11) # r11 pt_regs pointer
|
2011-12-27 17:27:15 +07:00
|
|
|
# fill pt_regs
|
2013-04-24 15:20:43 +07:00
|
|
|
mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC
|
|
|
|
stm %r0,%r7,__PT_R0(%r9)
|
|
|
|
mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW
|
|
|
|
mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
|
2011-12-27 17:27:15 +07:00
|
|
|
# setup saved register 15
|
|
|
|
st %r15,28(%r11) # r15 stack pointer
|
|
|
|
# set new psw address and exit
|
|
|
|
l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000
|
2005-04-17 05:20:36 +07:00
|
|
|
br %r14
|
|
|
|
cleanup_system_call_insn:
|
2006-09-28 21:56:37 +07:00
|
|
|
.long system_call + 0x80000000
|
2011-12-27 17:27:15 +07:00
|
|
|
.long sysc_stm + 0x80000000
|
|
|
|
.long sysc_vtime + 0x80000000 + 36
|
|
|
|
.long sysc_vtime + 0x80000000 + 76
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_sysc_tif:
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000
|
2005-04-17 05:20:36 +07:00
|
|
|
br %r14
|
|
|
|
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_sysc_restore:
|
2011-12-27 17:27:15 +07:00
|
|
|
cl %r9,BASED(cleanup_sysc_restore_insn)
|
|
|
|
jhe 0f
|
|
|
|
l %r9,12(%r11) # get saved pointer to pt_regs
|
|
|
|
mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
|
|
|
|
mvc 0(32,%r11),__PT_R8(%r9)
|
|
|
|
lm %r0,%r7,__PT_R0(%r9)
|
|
|
|
0: lm %r8,%r9,__LC_RETURN_PSW
|
2005-04-17 05:20:36 +07:00
|
|
|
br %r14
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_sysc_restore_insn:
|
2007-11-20 17:13:32 +07:00
|
|
|
.long sysc_done - 4 + 0x80000000
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_io_tif:
|
2011-12-27 17:27:15 +07:00
|
|
|
l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000
|
2010-04-09 18:43:00 +07:00
|
|
|
br %r14
|
|
|
|
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_io_restore:
|
2011-12-27 17:27:15 +07:00
|
|
|
cl %r9,BASED(cleanup_io_restore_insn)
|
|
|
|
jhe 0f
|
|
|
|
l %r9,12(%r11) # get saved r11 pointer to pt_regs
|
|
|
|
mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
|
|
|
|
mvc 0(32,%r11),__PT_R8(%r9)
|
|
|
|
lm %r0,%r7,__PT_R0(%r9)
|
|
|
|
0: lm %r8,%r9,__LC_RETURN_PSW
|
2005-09-04 05:57:56 +07:00
|
|
|
br %r14
|
2010-05-17 15:00:02 +07:00
|
|
|
cleanup_io_restore_insn:
|
2007-11-20 17:13:32 +07:00
|
|
|
.long io_done - 4 + 0x80000000
|
2005-09-04 05:57:56 +07:00
|
|
|
|
2012-03-11 22:59:27 +07:00
|
|
|
cleanup_idle:
|
|
|
|
# copy interrupt clock & cpu timer
|
2012-07-20 16:15:08 +07:00
|
|
|
mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
|
|
|
|
mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
|
2012-03-11 22:59:27 +07:00
|
|
|
chi %r11,__LC_SAVE_AREA_ASYNC
|
|
|
|
je 0f
|
2012-07-20 16:15:08 +07:00
|
|
|
mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
|
|
|
|
mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
|
2012-03-11 22:59:27 +07:00
|
|
|
0: # check if stck has been executed
|
|
|
|
cl %r9,BASED(cleanup_idle_insn)
|
|
|
|
jhe 1f
|
2012-07-20 16:15:08 +07:00
|
|
|
mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
|
|
|
|
mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3)
|
|
|
|
1: # account system time going idle
|
2012-03-11 22:59:27 +07:00
|
|
|
lm %r9,%r10,__LC_STEAL_TIMER
|
2012-07-20 16:15:08 +07:00
|
|
|
ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2)
|
2012-03-11 22:59:27 +07:00
|
|
|
SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK
|
|
|
|
stm %r9,%r10,__LC_STEAL_TIMER
|
2012-07-20 16:15:08 +07:00
|
|
|
mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
|
2012-03-11 22:59:27 +07:00
|
|
|
lm %r9,%r10,__LC_SYSTEM_TIMER
|
|
|
|
ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER
|
2012-07-20 16:15:08 +07:00
|
|
|
SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2)
|
2012-03-11 22:59:27 +07:00
|
|
|
stm %r9,%r10,__LC_SYSTEM_TIMER
|
2012-07-20 16:15:08 +07:00
|
|
|
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
|
2012-03-11 22:59:27 +07:00
|
|
|
# prepare return psw
|
2013-08-23 19:45:58 +07:00
|
|
|
n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
|
2012-03-11 22:59:27 +07:00
|
|
|
l %r9,24(%r11) # return from psw_idle
|
|
|
|
br %r14
|
|
|
|
cleanup_idle_insn:
|
|
|
|
.long psw_idle_lpsw + 0x80000000
|
|
|
|
cleanup_idle_wait:
|
2013-08-23 19:45:58 +07:00
|
|
|
.long 0xfcfdffff
|
2012-03-11 22:59:27 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Integer constants
|
|
|
|
*/
|
2011-12-27 17:27:15 +07:00
|
|
|
.align 4
|
2012-03-11 22:59:27 +07:00
|
|
|
.Lnr_syscalls:
|
|
|
|
.long NR_syscalls
|
|
|
|
.Lvtimer_max:
|
|
|
|
.quad 0x7fffffffffffffff
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Symbol constants
|
|
|
|
*/
|
2011-12-27 17:27:15 +07:00
|
|
|
.Ldo_machine_check: .long s390_do_machine_check
|
|
|
|
.Lhandle_mcck: .long s390_handle_mcck
|
|
|
|
.Ldo_IRQ: .long do_IRQ
|
|
|
|
.Ldo_signal: .long do_signal
|
|
|
|
.Ldo_notify_resume: .long do_notify_resume
|
|
|
|
.Ldo_per_trap: .long do_per_trap
|
|
|
|
.Ljump_table: .long pgm_check_table
|
|
|
|
.Lschedule: .long schedule
|
2007-12-04 22:09:04 +07:00
|
|
|
#ifdef CONFIG_PREEMPT
|
2011-12-27 17:27:15 +07:00
|
|
|
.Lpreempt_irq: .long preempt_schedule_irq
|
2007-12-04 22:09:04 +07:00
|
|
|
#endif
|
2011-12-27 17:27:15 +07:00
|
|
|
.Ltrace_enter: .long do_syscall_trace_enter
|
|
|
|
.Ltrace_exit: .long do_syscall_trace_exit
|
|
|
|
.Lschedule_tail: .long schedule_tail
|
|
|
|
.Lsysc_per: .long sysc_per + 0x80000000
|
2006-07-03 14:24:46 +07:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
2011-12-27 17:27:15 +07:00
|
|
|
.Lhardirqs_on: .long trace_hardirqs_on_caller
|
|
|
|
.Lhardirqs_off: .long trace_hardirqs_off_caller
|
2008-11-15 00:18:03 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
2011-12-27 17:27:15 +07:00
|
|
|
.Llockdep_sys_exit: .long lockdep_sys_exit
|
2006-07-03 14:24:46 +07:00
|
|
|
#endif
|
2011-12-27 17:27:15 +07:00
|
|
|
.Lcritical_start: .long __critical_start + 0x80000000
|
|
|
|
.Lcritical_length: .long __critical_end - __critical_start
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-09-28 21:56:37 +07:00
|
|
|
.section .rodata, "a"
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SYSCALL(esa,esame,emu) .long esa
|
2009-06-12 15:26:47 +07:00
|
|
|
.globl sys_call_table
|
2005-04-17 05:20:36 +07:00
|
|
|
sys_call_table:
|
|
|
|
#include "syscalls.S"
|
|
|
|
#undef SYSCALL
|