From 3b8c88993e3709b4d44f7ca4e886044a49605394 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 20 Nov 2007 11:13:30 +0100 Subject: [PATCH 01/12] [S390] cio: change device sense procedure to work with pav aliases Modify the sense id channel program to allow device sensing of pav alias devices which belong to a base device with ungrouped paths. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_id.c | 45 ++++++++---------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index f232832f2b22..2f6bf462425e 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -113,19 +113,10 @@ __ccw_device_sense_id_start(struct ccw_device *cdev) { struct subchannel *sch; struct ccw1 *ccw; - int ret; sch = to_subchannel(cdev->dev.parent); /* Setup sense channel program. */ ccw = cdev->private->iccws; - if (sch->schib.pmcw.pim != 0x80) { - /* more than one path installed. */ - ccw->cmd_code = CCW_CMD_SUSPEND_RECONN; - ccw->cda = 0; - ccw->count = 0; - ccw->flags = CCW_FLAG_SLI | CCW_FLAG_CC; - ccw++; - } ccw->cmd_code = CCW_CMD_SENSE_ID; ccw->cda = (__u32) __pa (&cdev->private->senseid); ccw->count = sizeof (struct senseid); @@ -133,25 +124,9 @@ __ccw_device_sense_id_start(struct ccw_device *cdev) /* Reset device status. */ memset(&cdev->private->irb, 0, sizeof(struct irb)); + cdev->private->flags.intretry = 0; - /* Try on every path. */ - ret = -ENODEV; - while (cdev->private->imask != 0) { - if ((sch->opm & cdev->private->imask) != 0 && - cdev->private->iretry > 0) { - cdev->private->iretry--; - /* Reset internal retry indication. */ - cdev->private->flags.intretry = 0; - ret = cio_start (sch, cdev->private->iccws, - cdev->private->imask); - /* ret is 0, -EBUSY, -EACCES or -ENODEV */ - if (ret != -EACCES) - return ret; - } - cdev->private->imask >>= 1; - cdev->private->iretry = 5; - } - return ret; + return cio_start(sch, ccw, LPM_ANYPATH); } void @@ -161,8 +136,7 @@ ccw_device_sense_id_start(struct ccw_device *cdev) memset (&cdev->private->senseid, 0, sizeof (struct senseid)); cdev->private->senseid.cu_type = 0xFFFF; - cdev->private->imask = 0x80; - cdev->private->iretry = 5; + cdev->private->iretry = 3; ret = __ccw_device_sense_id_start(cdev); if (ret && ret != -EBUSY) ccw_device_sense_id_done(cdev, ret); @@ -278,14 +252,13 @@ ccw_device_sense_id_irq(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_sense_id_done(cdev, ret); break; case -EACCES: /* channel is not operational. */ - sch->lpm &= ~cdev->private->imask; - cdev->private->imask >>= 1; - cdev->private->iretry = 5; - /* fall through. */ case -EAGAIN: /* try again. */ - ret = __ccw_device_sense_id_start(cdev); - if (ret == 0 || ret == -EBUSY) - break; + cdev->private->iretry--; + if (cdev->private->iretry > 0) { + ret = __ccw_device_sense_id_start(cdev); + if (ret == 0 || ret == -EBUSY) + break; + } /* fall through. */ default: /* Sense ID failed. Try asking VM. */ if (MACHINE_IS_VM) { From 7aa8dac7ac68f5c2293e2ecf5ef542aa849f541f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 20 Nov 2007 11:13:31 +0100 Subject: [PATCH 02/12] [S390] magic sysrq: check for in_atomic before doing an console_unblank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When doing an magic sysrq reboot on s390 the following bug message appears: SysRq : Resetting BUG: sleeping function called from invalid context at include/asm/semaphore.h:61 in_atomic():1, irqs_disabled():0 07000000004002a8 000000000fe6bc48 0000000000000002 0000000000000000 000000000fe6bce8 000000000fe6bc60 000000000fe6bc60 000000000012a79a 0000000000000000 07000000004002a8 0000000000000006 0000000000000000 0000000000000000 000000000fe6bc48 000000000000000d 000000000fe6bcb8 00000000004000c8 0000000000103234 000000000fe6bc48 000000000fe6bc90 Call Trace: (¬<00000000001031b2>| show_trace+0x12e/0x148) ¬<000000000011ffca>| __might_sleep+0x10a/0x118 ¬<0000000000129fba>| acquire_console_sem+0x92/0xf4 ¬<000000000012a2ca>| console_unblank+0xc2/0xc8 ¬<0000000000107bb4>| machine_restart+0x54/0x6c ¬<000000000028e806>| sysrq_handle_reboot+0x26/0x30 ¬<000000000028e52a>| __handle_sysrq+0xa6/0x180 ¬<0000000000140134>| run_workqueue+0xcc/0x18c ¬<000000000014029a>| worker_thread+0xa6/0x108 ¬<00000000001458e4>| kthread+0x64/0x9c ¬<0000000000106f0e>| kernel_thread_starter+0x6/0xc ¬<0000000000106f08>| kernel_thread_starter+0x0/0xc The only reason for doing a console_unblank on s390 is to flush the log buffer. We have to check for in_atomic before doing a console_unblank as the console is otherwise filled with an unrelated bug message. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7e1bfb984064..c4131a817412 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -347,7 +347,7 @@ void (*_machine_power_off)(void) = do_machine_power_off_nonsmp; void machine_restart(char *command) { - if (!in_interrupt() || oops_in_progress) + if ((!in_interrupt() && !in_atomic()) || oops_in_progress) /* * Only unblank the console if we are called in enabled * context or a bust_spinlocks cleared the way for us. From 411788ea7fca01ee803af8225ac35807b4d02050 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:32 +0100 Subject: [PATCH 03/12] [S390] Fix irq tracing and lockdep_sys_exit calls. Current support for TRACE_IRQFLAGS and lockdep_sys_exit is broken. IRQ flag tracing is broken for program checks. Even worse is that the newly introduced calls to lockdep_sys_exit are in the critical section code which is not supposed to call any C functions. In addition the checks if locks are still held are also done when returning to kernel code which is broken as well. Fix all this by disabling interrupts and machine checks at the exit paths and then do the appropriate checks and calls. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 109 ++++++++++++++++++++++++++----------- arch/s390/kernel/entry64.S | 106 +++++++++++++++++++++++++----------- arch/s390/kernel/setup.c | 4 ++ include/asm-s390/system.h | 5 ++ 4 files changed, 158 insertions(+), 66 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 139ca153d5cc..764d56177cb5 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -69,13 +69,31 @@ STACK_SIZE = 1 << STACK_SHIFT basr %r14,%r1 .endm - .macro LOCKDEP_SYS_EXIT - l %r1,BASED(.Llockdep_sys_exit) + .macro TRACE_IRQS_CHECK + tm SP_PSW(%r15),0x03 # irqs enabled? + jz 0f + l %r1,BASED(.Ltrace_irq_on) basr %r14,%r1 + j 1f +0: l %r1,BASED(.Ltrace_irq_off) + basr %r14,%r1 +1: .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define TRACE_IRQS_CHECK +#endif + +#ifdef CONFIG_LOCKDEP + .macro LOCKDEP_SYS_EXIT + tm SP_PSW+1(%r15),0x01 # returning to user ? + jz 0f + l %r1,BASED(.Llockdep_sys_exit) + basr %r14,%r1 +0: + .endm +#else #define LOCKDEP_SYS_EXIT #endif @@ -234,8 +252,6 @@ sysc_saveall: lh %r7,0x8a # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(sysc_do_svc) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -263,19 +279,34 @@ sysc_do_restart: sysc_return: tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_leave) + bno BASED(sysc_restore) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) +sysc_restore: +#ifdef CONFIG_TRACE_IRQFLAGS + la %r1,BASED(sysc_restore_trace_psw) + lpsw 0(%r1) +sysc_restore_trace: + TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT +#endif sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 +sysc_done: + +#ifdef CONFIG_TRACE_IRQFLAGS + .align 8 + .globl sysc_restore_trace_psw +sysc_restore_trace_psw: + .long 0, sysc_restore_trace + 0x80000000 +#endif # # recheck if there is more work to do # sysc_work_loop: tm __TI_flags+3(%r9),_TIF_WORK_SVC - bz BASED(sysc_leave) # there is no work to do + bz BASED(sysc_restore) # there is no work to do # # One of the work bits is on. Find out which one. # @@ -290,8 +321,8 @@ sysc_work: bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) - LOCKDEP_SYS_EXIT - b BASED(sysc_leave) + b BASED(sysc_restore) +sysc_work_done: # # _TIF_NEED_RESCHED is set, call schedule @@ -458,6 +489,7 @@ pgm_check_handler: pgm_no_vtime: #endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF l %r3,__LC_PGM_ILC # load program interruption code la %r8,0x7f nr %r8,%r3 @@ -497,6 +529,7 @@ pgm_per_std: pgm_no_vtime2: #endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF l %r1,__TI_task(%r9) mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS @@ -517,15 +550,13 @@ pgm_svcper: SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime3) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime3: #endif lh %r7,0x8a # get svc number from lowcore l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF l %r1,__TI_task(%r9) mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS @@ -542,7 +573,7 @@ kernel_per: mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_leave) # load adr. of system return + la %r14,BASED(sysc_restore)# load adr. of system return br %r1 # branch to do_single_step /* @@ -569,26 +600,38 @@ io_no_vtime: l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler - TRACE_IRQS_ON - io_return: tm SP_PSW+1(%r15),0x01 # returning to user ? #ifdef CONFIG_PREEMPT bno BASED(io_preempt) # no -> check for preemptive scheduling #else - bno BASED(io_leave) # no-> skip resched & signal + bno BASED(io_restore) # no-> skip resched & signal #endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) +io_restore: +#ifdef CONFIG_TRACE_IRQFLAGS + la %r1,BASED(io_restore_trace_psw) + lpsw 0(%r1) +io_restore_trace: + TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT +#endif io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: +#ifdef CONFIG_TRACE_IRQFLAGS + .align 8 + .globl io_restore_trace_psw +io_restore_trace_psw: + .long 0, io_restore_trace + 0x80000000 +#endif + #ifdef CONFIG_PREEMPT io_preempt: icm %r0,15,__TI_precount(%r9) - bnz BASED(io_leave) + bnz BASED(io_restore) l %r1,SP_R15(%r15) s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) @@ -596,12 +639,14 @@ io_preempt: lr %r15,%r1 io_resume_loop: tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bno BASED(io_leave) + bno BASED(io_restore) mvc __TI_precount(4,%r9),BASED(.Lc_pactive) + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts l %r1,BASED(.Lschedule) basr %r14,%r1 # call schedule stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF xc __TI_precount(4,%r9),__TI_precount(%r9) b BASED(io_resume_loop) #endif @@ -627,40 +672,42 @@ io_work_loop: bo BASED(io_reschedule) tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) bnz BASED(io_sigpending) - LOCKDEP_SYS_EXIT - b BASED(io_leave) + b BASED(io_restore) +io_work_done: # # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: - TRACE_IRQS_OFF l %r1,BASED(.Ls390_handle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler - TRACE_IRQS_ON b BASED(io_work_loop) # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: + TRACE_IRQS_ON l %r1,BASED(.Lschedule) stosm __SF_EMPTY(%r15),0x03 # reenable interrupts basr %r14,%r1 # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF tm __TI_flags+3(%r9),_TIF_WORK_INT - bz BASED(io_leave) # there is no work to do + bz BASED(io_restore) # there is no work to do b BASED(io_work_loop) # # _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal # io_sigpending: + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF b BASED(io_work_loop) /* @@ -688,7 +735,6 @@ ext_no_vtime: lh %r3,__LC_EXT_INT_CODE # get interruption code l %r1,BASED(.Ldo_extint) basr %r14,%r1 - TRACE_IRQS_ON b BASED(io_return) __critical_end: @@ -853,15 +899,15 @@ cleanup_table_system_call: cleanup_table_sysc_return: .long sysc_return + 0x80000000, sysc_leave + 0x80000000 cleanup_table_sysc_leave: - .long sysc_leave + 0x80000000, sysc_work_loop + 0x80000000 + .long sysc_leave + 0x80000000, sysc_done + 0x80000000 cleanup_table_sysc_work_loop: - .long sysc_work_loop + 0x80000000, sysc_reschedule + 0x80000000 + .long sysc_work_loop + 0x80000000, sysc_work_done + 0x80000000 cleanup_table_io_return: .long io_return + 0x80000000, io_leave + 0x80000000 cleanup_table_io_leave: .long io_leave + 0x80000000, io_done + 0x80000000 cleanup_table_io_work_loop: - .long io_work_loop + 0x80000000, io_mcck_pending + 0x80000000 + .long io_work_loop + 0x80000000, io_work_done + 0x80000000 cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_system_call) @@ -930,8 +976,6 @@ cleanup_system_call: cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(cleanup_novtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER cleanup_stime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) @@ -939,7 +983,6 @@ cleanup_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER cleanup_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -cleanup_novtime: #endif mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) la %r12,__LC_RETURN_PSW @@ -978,10 +1021,10 @@ cleanup_sysc_leave: 2: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: + .long sysc_done - 4 + 0x80000000 #ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long sysc_leave + 14 + 0x80000000 + .long sysc_done - 8 + 0x80000000 #endif - .long sysc_leave + 10 + 0x80000000 cleanup_io_return: mvc __LC_RETURN_PSW(4),0(%r12) @@ -1008,10 +1051,10 @@ cleanup_io_leave: 2: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: + .long io_done - 4 + 0x80000000 #ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long io_leave + 18 + 0x80000000 + .long io_done - 8 + 0x80000000 #endif - .long io_leave + 14 + 0x80000000 /* * Integer constants diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 05e26d1fdf40..e15c80efdd05 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -67,12 +67,28 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ brasl %r14,trace_hardirqs_off .endm - .macro LOCKDEP_SYS_EXIT - brasl %r14,lockdep_sys_exit + .macro TRACE_IRQS_CHECK + tm SP_PSW(%r15),0x03 # irqs enabled? + jz 0f + brasl %r14,trace_hardirqs_on + j 1f +0: brasl %r14,trace_hardirqs_off +1: .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define TRACE_IRQS_CHECK +#endif + +#ifdef CONFIG_LOCKDEP + .macro LOCKDEP_SYS_EXIT + tm SP_PSW+1(%r15),0x01 # returning to user ? + jz 0f + brasl %r14,lockdep_sys_exit +0: + .endm +#else #define LOCKDEP_SYS_EXIT #endif @@ -222,8 +238,6 @@ sysc_saveall: llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz sysc_do_svc UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -257,19 +271,34 @@ sysc_noemu: sysc_return: tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_leave + jno sysc_restore tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) +sysc_restore: +#ifdef CONFIG_TRACE_IRQFLAGS + larl %r1,sysc_restore_trace_psw + lpswe 0(%r1) +sysc_restore_trace: + TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT +#endif sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 +sysc_done: + +#ifdef CONFIG_TRACE_IRQFLAGS + .align 8 + .globl sysc_restore_trace_psw +sysc_restore_trace_psw: + .quad 0, sysc_restore_trace +#endif # # recheck if there is more work to do # sysc_work_loop: tm __TI_flags+7(%r9),_TIF_WORK_SVC - jz sysc_leave # there is no work to do + jz sysc_restore # there is no work to do # # One of the work bits is on. Find out which one. # @@ -284,8 +313,8 @@ sysc_work: jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep - LOCKDEP_SYS_EXIT - j sysc_leave + j sysc_restore +sysc_work_done: # # _TIF_NEED_RESCHED is set, call schedule @@ -445,6 +474,7 @@ pgm_check_handler: pgm_no_vtime: #endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF lgf %r3,__LC_PGM_ILC # load program interruption code lghi %r8,0x7f ngr %r8,%r3 @@ -484,6 +514,7 @@ pgm_per_std: pgm_no_vtime2: #endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF lg %r1,__TI_task(%r9) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per @@ -504,12 +535,9 @@ pgm_svcper: SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime3 UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime3: #endif llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct @@ -529,7 +557,7 @@ kernel_per: lhi %r0,__LC_PGM_OLD_PSW sth %r0,SP_TRAP(%r15) # set trap indication to pgm check la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_leave # load adr. of system ret, no work + larl %r14,sysc_restore # load adr. of system ret, no work jg do_single_step # branch to do_single_step /* @@ -554,26 +582,38 @@ io_no_vtime: TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler - TRACE_IRQS_ON - io_return: tm SP_PSW+1(%r15),0x01 # returning to user ? #ifdef CONFIG_PREEMPT jno io_preempt # no -> check for preemptive scheduling #else - jno io_leave # no-> skip resched & signal + jno io_restore # no-> skip resched & signal #endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) +io_restore: +#ifdef CONFIG_TRACE_IRQFLAGS + larl %r1,io_restore_trace_psw + lpswe 0(%r1) +io_restore_trace: + TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT +#endif io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: +#ifdef CONFIG_TRACE_IRQFLAGS + .align 8 + .globl io_restore_trace_psw +io_restore_trace_psw: + .quad 0, io_restore_trace +#endif + #ifdef CONFIG_PREEMPT io_preempt: icm %r0,15,__TI_precount(%r9) - jnz io_leave + jnz io_restore # switch to kernel stack lg %r1,SP_R15(%r15) aghi %r1,-SP_SIZE @@ -582,12 +622,14 @@ io_preempt: lgr %r15,%r1 io_resume_loop: tm __TI_flags+7(%r9),_TIF_NEED_RESCHED - jno io_leave + jno io_restore larl %r1,.Lc_pactive mvc __TI_precount(4,%r9),0(%r1) + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,schedule # call schedule stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF xc __TI_precount(4,%r9),__TI_precount(%r9) j io_resume_loop #endif @@ -613,37 +655,39 @@ io_work_loop: jo io_reschedule tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) jnz io_sigpending - LOCKDEP_SYS_EXIT - j io_leave + j io_restore +io_work_done: # # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: - TRACE_IRQS_OFF brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - TRACE_IRQS_ON j io_work_loop # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,schedule # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF tm __TI_flags+7(%r9),_TIF_WORK_INT - jz io_leave # there is no work to do + jz io_restore # there is no work to do j io_work_loop # # _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal # io_sigpending: + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + TRACE_IRQS_OFF j io_work_loop /* @@ -669,7 +713,6 @@ ext_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area llgh %r3,__LC_EXT_INT_CODE # get interruption code brasl %r14,do_extint - TRACE_IRQS_ON j io_return __critical_end: @@ -824,15 +867,15 @@ cleanup_table_system_call: cleanup_table_sysc_return: .quad sysc_return, sysc_leave cleanup_table_sysc_leave: - .quad sysc_leave, sysc_work_loop + .quad sysc_leave, sysc_done cleanup_table_sysc_work_loop: - .quad sysc_work_loop, sysc_reschedule + .quad sysc_work_loop, sysc_work_done cleanup_table_io_return: .quad io_return, io_leave cleanup_table_io_leave: .quad io_leave, io_done cleanup_table_io_work_loop: - .quad io_work_loop, io_mcck_pending + .quad io_work_loop, io_work_done cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_system_call) @@ -901,8 +944,6 @@ cleanup_system_call: cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz cleanup_novtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER cleanup_stime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) @@ -910,7 +951,6 @@ cleanup_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER cleanup_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -cleanup_novtime: #endif mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) la %r12,__LC_RETURN_PSW @@ -949,10 +989,10 @@ cleanup_sysc_leave: 2: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: + .quad sysc_done - 4 #ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad sysc_leave + 16 + .quad sysc_done - 8 #endif - .quad sysc_leave + 12 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -979,10 +1019,10 @@ cleanup_io_leave: 2: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: + .quad io_done - 4 #ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad io_leave + 20 + .quad io_done - 8 #endif - .quad io_leave + 16 /* * Integer constants diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c4131a817412..50f8f1e3760e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -492,6 +492,10 @@ static void setup_addressing_mode(void) printk("S390 address spaces switched, "); set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY); } +#ifdef CONFIG_TRACE_IRQFLAGS + sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + io_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; +#endif } static void __init diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index d866d3385556..44bda786eef7 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -388,6 +388,11 @@ extern void (*_machine_power_off)(void); #define arch_align_stack(x) (x) +#ifdef CONFIG_TRACE_IRQFLAGS +extern psw_t sysc_restore_trace_psw; +extern psw_t io_restore_trace_psw; +#endif + #endif /* __KERNEL__ */ #endif From 43ebbf119a9670d8f08b9e57968e109c770f8636 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:33 +0100 Subject: [PATCH 04/12] [S390] cmm: remove unused binary sysctls. Remove binary sysctls that never worked due to missing strategy functions. Cc: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 3 --- include/linux/sysctl.h | 5 ----- kernel/sysctl_check.c | 3 --- 3 files changed, 11 deletions(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index d4ed93dfb9c7..413c240cbca7 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -341,19 +341,16 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, static struct ctl_table cmm_table[] = { { - .ctl_name = VM_CMM_PAGES, .procname = "cmm_pages", .mode = 0644, .proc_handler = &cmm_pages_handler, }, { - .ctl_name = VM_CMM_TIMED_PAGES, .procname = "cmm_timed_pages", .mode = 0644, .proc_handler = &cmm_pages_handler, }, { - .ctl_name = VM_CMM_TIMEOUT, .procname = "cmm_timeout", .mode = 0644, .proc_handler = &cmm_timeout_handler, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e99171f01b4c..35b698120af8 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -207,11 +207,6 @@ enum VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ - - /* s390 vm cmm sysctls */ - VM_CMM_PAGES=1111, - VM_CMM_TIMED_PAGES=1112, - VM_CMM_TIMEOUT=1113, }; diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4abc6d2306f4..cde6d780b0ec 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -140,9 +140,6 @@ static struct trans_ctl_table trans_vm_table[] = { { VM_PANIC_ON_OOM, "panic_on_oom" }, { VM_VDSO_ENABLED, "vdso_enabled" }, { VM_MIN_SLAB, "min_slab_ratio" }, - { VM_CMM_PAGES, "cmm_pages" }, - { VM_CMM_TIMED_PAGES, "cmm_timed_pages" }, - { VM_CMM_TIMEOUT, "cmm_timeout" }, {} }; From 37e3a6ac5a30468021a2f366e497d455bbcb5d21 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:34 +0100 Subject: [PATCH 05/12] [S390] appldata: remove unused binary sysctls. Remove binary sysctls that never worked due to missing strategy functions. Cc: "Eric W. Biederman" Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata.h | 1 - arch/s390/appldata/appldata_base.c | 74 ++++++--------------------- arch/s390/appldata/appldata_mem.c | 1 - arch/s390/appldata/appldata_net_sum.c | 1 - arch/s390/appldata/appldata_os.c | 1 - include/linux/sysctl.h | 1 - kernel/sysctl_check.c | 11 ---- 7 files changed, 15 insertions(+), 75 deletions(-) diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h index 4069b81f7f1d..db3ae8505103 100644 --- a/arch/s390/appldata/appldata.h +++ b/arch/s390/appldata/appldata.h @@ -45,7 +45,6 @@ struct appldata_ops { int active; /* monitoring status */ /* fill in from here */ - unsigned int ctl_nr; /* sysctl ID */ char name[APPLDATA_PROC_NAME_LENGTH]; /* name of /proc fs node */ unsigned char record_nr; /* Record Nr. for Product ID */ void (*callback)(void *data); /* callback function */ diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ac61cf43a7d9..655d52543e2d 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -53,29 +53,26 @@ static int appldata_interval_handler(ctl_table *ctl, int write, static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { { - .ctl_name = CTL_APPLDATA_TIMER, .procname = "timer", .mode = S_IRUGO | S_IWUSR, .proc_handler = &appldata_timer_handler, }, { - .ctl_name = CTL_APPLDATA_INTERVAL, .procname = "interval", .mode = S_IRUGO | S_IWUSR, .proc_handler = &appldata_interval_handler, }, - { .ctl_name = 0 } + { }, }; static struct ctl_table appldata_dir_table[] = { { - .ctl_name = CTL_APPLDATA, .procname = appldata_proc_name, .maxlen = 0, .mode = S_IRUGO | S_IXUGO, .child = appldata_table, }, - { .ctl_name = 0 } + { }, }; /* @@ -441,75 +438,38 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, */ int appldata_register_ops(struct appldata_ops *ops) { - struct list_head *lh; - struct appldata_ops *tmp_ops; - int i; + if ((ops->size > APPLDATA_MAX_REC_SIZE) || (ops->size < 0)) + return -EINVAL; - i = 0; - - if ((ops->size > APPLDATA_MAX_REC_SIZE) || - (ops->size < 0)){ - P_ERROR("Invalid size of %s record = %i, maximum = %i!\n", - ops->name, ops->size, APPLDATA_MAX_REC_SIZE); + ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL); + if (!ops->ctl_table) return -ENOMEM; - } - if ((ops->ctl_nr == CTL_APPLDATA) || - (ops->ctl_nr == CTL_APPLDATA_TIMER) || - (ops->ctl_nr == CTL_APPLDATA_INTERVAL)) { - P_ERROR("ctl_nr %i already in use!\n", ops->ctl_nr); - return -EBUSY; - } - ops->ctl_table = kzalloc(4*sizeof(struct ctl_table), GFP_KERNEL); - if (ops->ctl_table == NULL) { - P_ERROR("Not enough memory for %s ctl_table!\n", ops->name); - return -ENOMEM; - } spin_lock(&appldata_ops_lock); - list_for_each(lh, &appldata_ops_list) { - tmp_ops = list_entry(lh, struct appldata_ops, list); - P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", - ++i, tmp_ops->name, tmp_ops->ctl_nr); - P_DEBUG("Comparing %s (ctl %i) with %s (ctl %i)\n", - tmp_ops->name, tmp_ops->ctl_nr, ops->name, - ops->ctl_nr); - if (strncmp(tmp_ops->name, ops->name, - APPLDATA_PROC_NAME_LENGTH) == 0) { - P_ERROR("Name \"%s\" already registered!\n", ops->name); - kfree(ops->ctl_table); - spin_unlock(&appldata_ops_lock); - return -EBUSY; - } - if (tmp_ops->ctl_nr == ops->ctl_nr) { - P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); - kfree(ops->ctl_table); - spin_unlock(&appldata_ops_lock); - return -EBUSY; - } - } list_add(&ops->list, &appldata_ops_list); spin_unlock(&appldata_ops_lock); - ops->ctl_table[0].ctl_name = CTL_APPLDATA; ops->ctl_table[0].procname = appldata_proc_name; ops->ctl_table[0].maxlen = 0; ops->ctl_table[0].mode = S_IRUGO | S_IXUGO; ops->ctl_table[0].child = &ops->ctl_table[2]; - ops->ctl_table[1].ctl_name = 0; - - ops->ctl_table[2].ctl_name = ops->ctl_nr; ops->ctl_table[2].procname = ops->name; ops->ctl_table[2].mode = S_IRUGO | S_IWUSR; ops->ctl_table[2].proc_handler = appldata_generic_handler; ops->ctl_table[2].data = ops; - ops->ctl_table[3].ctl_name = 0; - ops->sysctl_header = register_sysctl_table(ops->ctl_table); - + if (!ops->sysctl_header) + goto out; P_INFO("%s-ops registered!\n", ops->name); return 0; +out: + spin_lock(&appldata_ops_lock); + list_del(&ops->list); + spin_unlock(&appldata_ops_lock); + kfree(ops->ctl_table); + return -ENOMEM; } /* @@ -519,15 +479,11 @@ int appldata_register_ops(struct appldata_ops *ops) */ void appldata_unregister_ops(struct appldata_ops *ops) { - void *table; spin_lock(&appldata_ops_lock); list_del(&ops->list); - /* at that point any incoming access will fail */ - table = ops->ctl_table; - ops->ctl_table = NULL; spin_unlock(&appldata_ops_lock); unregister_sysctl_table(ops->sysctl_header); - kfree(table); + kfree(ops->ctl_table); P_INFO("%s-ops unregistered!\n", ops->name); } /********************** module-ops management **************************/ diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 697eb30a68a3..51181ccdb87b 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -147,7 +147,6 @@ static void appldata_get_mem_data(void *data) static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_MEM, .name = "mem", .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 6c1815a47714..4d8344336001 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -142,7 +142,6 @@ static void appldata_get_net_sum_data(void *data) static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_NET_SUM, .name = "net_sum", .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 76a15523ae9e..6b3eafe10453 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -82,7 +82,6 @@ struct appldata_os_data { static struct appldata_os_data *appldata_os_data; static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_OS, .name = "os", .record_nr = APPLDATA_RECORD_OS_ID, .owner = THIS_MODULE, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 35b698120af8..4f5047df8a9e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -70,7 +70,6 @@ enum CTL_ABI=9, /* Binary emulation */ CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ CTL_ARLAN=254, /* arlan wireless driver */ - CTL_APPLDATA=2120, /* s390 appldata */ CTL_S390DBF=5677, /* s390 debug */ CTL_SUNRPC=7249, /* sunrpc debug */ CTL_PM=9899, /* frv power management */ diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index cde6d780b0ec..8f5baac1eb08 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1216,16 +1216,6 @@ static struct trans_ctl_table trans_arlan_table[] = { {} }; -static struct trans_ctl_table trans_appldata_table[] = { - { CTL_APPLDATA_TIMER, "timer" }, - { CTL_APPLDATA_INTERVAL, "interval" }, - { CTL_APPLDATA_OS, "os" }, - { CTL_APPLDATA_NET_SUM, "net_sum" }, - { CTL_APPLDATA_MEM, "mem" }, - {} - -}; - static struct trans_ctl_table trans_s390dbf_table[] = { { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, @@ -1270,7 +1260,6 @@ static struct trans_ctl_table trans_root_table[] = { { CTL_ABI, "abi" }, /* CTL_CPU not used */ { CTL_ARLAN, "arlan", trans_arlan_table }, - { CTL_APPLDATA, "appldata", trans_appldata_table }, { CTL_S390DBF, "s390dbf", trans_s390dbf_table }, { CTL_SUNRPC, "sunrpc", trans_sunrpc_table }, { CTL_PM, "pm", trans_pm_table }, From b8e7a54cd06b0b0174029ef3a7f5a1415a2c28f2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:35 +0100 Subject: [PATCH 06/12] [S390] Fix kernel preemption. When returning from IRQ handling and TIF_NEED_RESCHED is set we must call preempt_schedule_irq() instead of schedule(). Otherwise the BKL might be unlocked in schedule() and therfore everything that relies on the BKL is broken. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 15 +++++---------- arch/s390/kernel/entry64.S | 12 ++---------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 764d56177cb5..b2b2edc40eb1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -640,15 +640,9 @@ io_preempt: io_resume_loop: tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bno BASED(io_restore) - mvc __TI_precount(4,%r9),BASED(.Lc_pactive) - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - l %r1,BASED(.Lschedule) - basr %r14,%r1 # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - TRACE_IRQS_OFF - xc __TI_precount(4,%r9),__TI_precount(%r9) - b BASED(io_resume_loop) + l %r1,BASED(.Lpreempt_schedule_irq) + la %r14,BASED(io_resume_loop) + br %r1 # call schedule #endif # @@ -1062,7 +1056,6 @@ cleanup_io_leave_insn: .align 4 .Lc_spsize: .long SP_SIZE .Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lc_pactive: .long PREEMPT_ACTIVE .Lnr_syscalls: .long NR_syscalls .L0x018: .short 0x018 .L0x020: .short 0x020 @@ -1086,6 +1079,8 @@ cleanup_io_leave_insn: .Lexecve_tail: .long execve_tail .Ljump_table: .long pgm_check_table .Lschedule: .long schedule +.Lpreempt_schedule_irq: + .long preempt_schedule_irq .Ltrace: .long syscall_trace .Lschedtail: .long schedule_tail .Lsysc_table: .long sys_call_table diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e15c80efdd05..a3e47b893f07 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -623,15 +623,8 @@ io_preempt: io_resume_loop: tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jno io_restore - larl %r1,.Lc_pactive - mvc __TI_precount(4,%r9),0(%r1) - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,schedule # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - TRACE_IRQS_OFF - xc __TI_precount(4,%r9),__TI_precount(%r9) - j io_resume_loop + larl %r14,io_resume_loop + jg preempt_schedule_irq #endif # @@ -1029,7 +1022,6 @@ cleanup_io_leave_insn: */ .align 4 .Lconst: -.Lc_pactive: .long PREEMPT_ACTIVE .Lnr_syscalls: .long NR_syscalls .L0x0130: .short 0x130 .L0x0140: .short 0x140 From ce7e9fae8db07af4080e868f4588f8f095f803dc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 20 Nov 2007 11:13:36 +0100 Subject: [PATCH 07/12] [S390] Optimize storage key handling for anonymous pages page_mkclean used to call page_clear_dirty for every given page. This is different to all other architectures, where the dirty bit in the PTEs is only resetted, if page_mapping() returns a non-NULL pointer. We can move the page_test_dirty/page_clear_dirty sequence into the 2nd if to avoid unnecessary iske/sske sequences, which are expensive. This change also helps kvm for s390 as the host must transfer the dirty bit into the guest status bits. By moving the page_clear_dirty operation into the 2nd if, the vm will only call page_clear_dirty for pages where it walks the mapping anyway. There it calls ptep_clear_flush for writable ptes, so we can transfer the dirty bit to the guest. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- mm/rmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index dc3be5f5b0da..dbc2ca2057a5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -471,11 +471,12 @@ int page_mkclean(struct page *page) if (page_mapped(page)) { struct address_space *mapping = page_mapping(page); - if (mapping) + if (mapping) { ret = page_mkclean_file(mapping, page); - if (page_test_dirty(page)) { - page_clear_dirty(page); - ret = 1; + if (page_test_dirty(page)) { + page_clear_dirty(page); + ret = 1; + } } } From 677d762319facc20467243c6dd9487261e3515b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:37 +0100 Subject: [PATCH 08/12] [S390] Dont overwrite lowcores on smp_send_stop(). Don't perform a sigp store-status-at-address on smp_send_stop(). It will overwrite the lowcores of other cpus and destroys valueable debug informations. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 56 ++++++------------------------------------ 1 file changed, 7 insertions(+), 49 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b05ae8584258..264ea906db4c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -193,10 +193,16 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -static void do_send_stop(void) +void smp_send_stop(void) { int cpu, rc; + /* Disable all interrupts/machine checks */ + __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + + /* write magic number to zero page (absolute 0) */ + lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; + /* stop all processors */ for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) @@ -204,60 +210,12 @@ static void do_send_stop(void) do { rc = signal_processor(cpu, sigp_stop); } while (rc == sigp_busy); - } -} -static void do_store_status(void) -{ - int cpu, rc; - - /* store status of all processors in their lowcores (real 0) */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = signal_processor_p( - (__u32)(unsigned long) lowcore_ptr[cpu], cpu, - sigp_store_status_at_address); - } while (rc == sigp_busy); - } -} - -static void do_wait_for_stop(void) -{ - int cpu; - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; while (!smp_cpu_not_running(cpu)) cpu_relax(); } } -/* - * this function sends a 'stop' sigp to all other CPUs in the system. - * it goes straight through. - */ -void smp_send_stop(void) -{ - /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); - - /* write magic number to zero page (absolute 0) */ - lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; - - /* stop other processors. */ - do_send_stop(); - - /* wait until other processors are stopped */ - do_wait_for_stop(); - - /* store status of other processors. */ - do_store_status(); -} - /* * Reboot, halt and power_off routines for SMP. */ From 70cf5035dedaeddf8f6ae5c0a74ea65dcd7356ab Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 20 Nov 2007 11:13:38 +0100 Subject: [PATCH 09/12] [S390] Explicitly code allocpercpu calls in iucv The iucv is the only user of the various functions that are used to bring parts of cpus up and down. Its the only allocpercpu user that will do I/O on per cpu objects (which is difficult to do with virtually mapped memory). And its the only use of allocpercpu where a GFP_DMA allocation is done. Remove the allocpercpu calls from iucv and code the allocation and freeing manually. After this patch it is possible to remove a large part of the allocpercpu API. Signed-off-by: Christoph Lameter Signed-off-by: Martin Schwidefsky --- net/iucv/iucv.c | 107 +++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index a2f5a6ea3895..7698f6c459d6 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -97,7 +97,7 @@ struct iucv_irq_list { struct iucv_irq_data data; }; -static struct iucv_irq_data *iucv_irq_data; +static struct iucv_irq_data *iucv_irq_data[NR_CPUS]; static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; @@ -277,7 +277,7 @@ union iucv_param { /* * Anchor for per-cpu IUCV command parameter block. */ -static union iucv_param *iucv_param; +static union iucv_param *iucv_param[NR_CPUS]; /** * iucv_call_b2f0 @@ -356,7 +356,7 @@ static void iucv_allow_cpu(void *data) * 0x10 - Flag to allow priority message completion interrupts * 0x08 - Flag to allow IUCV control interrupts */ - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[cpu]; memset(parm, 0, sizeof(union iucv_param)); parm->set_mask.ipmask = 0xf8; iucv_call_b2f0(IUCV_SETMASK, parm); @@ -377,7 +377,7 @@ static void iucv_block_cpu(void *data) union iucv_param *parm; /* Disable all iucv interrupts. */ - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[cpu]; memset(parm, 0, sizeof(union iucv_param)); iucv_call_b2f0(IUCV_SETMASK, parm); @@ -401,9 +401,9 @@ static void iucv_declare_cpu(void *data) return; /* Declare interrupt buffer. */ - parm = percpu_ptr(iucv_param, cpu); + parm = iucv_param[cpu]; memset(parm, 0, sizeof(union iucv_param)); - parm->db.ipbfadr1 = virt_to_phys(percpu_ptr(iucv_irq_data, cpu)); + parm->db.ipbfadr1 = virt_to_phys(iucv_irq_data[cpu]); rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); if (rc) { char *err = "Unknown"; @@ -458,7 +458,7 @@ static void iucv_retrieve_cpu(void *data) iucv_block_cpu(NULL); /* Retrieve interrupt buffer. */ - parm = percpu_ptr(iucv_param, cpu); + parm = iucv_param[cpu]; iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); /* Clear indication that an iucv buffer exists for this cpu. */ @@ -558,22 +558,23 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (!percpu_populate(iucv_irq_data, - sizeof(struct iucv_irq_data), - GFP_KERNEL|GFP_DMA, cpu)) + iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_irq_data[cpu]) return NOTIFY_BAD; - if (!percpu_populate(iucv_param, sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA, cpu)) { - percpu_depopulate(iucv_irq_data, cpu); + iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param[cpu]) return NOTIFY_BAD; - } break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - percpu_depopulate(iucv_param, cpu); - percpu_depopulate(iucv_irq_data, cpu); + kfree(iucv_param[cpu]); + iucv_param[cpu] = NULL; + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -612,7 +613,7 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) { union iucv_param *parm; - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (userdata) memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); @@ -755,7 +756,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, local_bh_disable(); /* Prepare parameter block. */ - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->ctrl.ippathid = path->pathid; parm->ctrl.ipmsglim = path->msglim; @@ -799,7 +800,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, BUG_ON(in_atomic()); spin_lock_bh(&iucv_table_lock); iucv_cleanup_queue(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->ctrl.ipmsglim = path->msglim; parm->ctrl.ipflags1 = path->flags; @@ -854,7 +855,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (userdata) memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); @@ -881,7 +882,7 @@ int iucv_path_resume(struct iucv_path *path, u8 userdata[16]) int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (userdata) memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); @@ -936,7 +937,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->purge.ippathid = path->pathid; parm->purge.ipmsgid = msg->id; @@ -1003,7 +1004,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, } local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->db.ipbfadr1 = (u32)(addr_t) buffer; parm->db.ipbfln1f = (u32) size; @@ -1040,7 +1041,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg) int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->db.ippathid = path->pathid; parm->db.ipmsgid = msg->id; @@ -1074,7 +1075,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (flags & IUCV_IPRMDATA) { parm->dpl.ippathid = path->pathid; @@ -1118,7 +1119,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (flags & IUCV_IPRMDATA) { /* Message of 8 bytes can be placed into the parameter list. */ @@ -1172,7 +1173,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, int rc; local_bh_disable(); - parm = percpu_ptr(iucv_param, smp_processor_id()); + parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (flags & IUCV_IPRMDATA) { parm->dpl.ippathid = path->pathid; @@ -1559,7 +1560,7 @@ static void iucv_external_interrupt(u16 code) struct iucv_irq_data *p; struct iucv_irq_list *work; - p = percpu_ptr(iucv_irq_data, smp_processor_id()); + p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { printk(KERN_WARNING "iucv_do_int: Got interrupt with " "pathid %d > max_connections (%ld)\n", @@ -1598,6 +1599,7 @@ static void iucv_external_interrupt(u16 code) static int __init iucv_init(void) { int rc; + int cpu; if (!MACHINE_IS_VM) { rc = -EPROTONOSUPPORT; @@ -1617,19 +1619,23 @@ static int __init iucv_init(void) rc = PTR_ERR(iucv_root); goto out_bus; } - /* Note: GFP_DMA used to get memory below 2G */ - iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data), - GFP_KERNEL|GFP_DMA); - if (!iucv_irq_data) { - rc = -ENOMEM; - goto out_root; - } - /* Allocate parameter blocks. */ - iucv_param = percpu_alloc(sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA); - if (!iucv_param) { - rc = -ENOMEM; - goto out_extint; + + for_each_online_cpu(cpu) { + /* Note: GFP_DMA used to get memory below 2G */ + iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_irq_data[cpu]) { + rc = -ENOMEM; + goto out_free; + } + + /* Allocate parameter blocks. */ + iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param[cpu]) { + rc = -ENOMEM; + goto out_free; + } } register_hotcpu_notifier(&iucv_cpu_notifier); ASCEBC(iucv_error_no_listener, 16); @@ -1638,9 +1644,13 @@ static int __init iucv_init(void) iucv_available = 1; return 0; -out_extint: - percpu_free(iucv_irq_data); -out_root: +out_free: + for_each_possible_cpu(cpu) { + kfree(iucv_param[cpu]); + iucv_param[cpu] = NULL; + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; + } s390_root_dev_unregister(iucv_root); out_bus: bus_unregister(&iucv_bus); @@ -1658,6 +1668,7 @@ static int __init iucv_init(void) static void __exit iucv_exit(void) { struct iucv_irq_list *p, *n; + int cpu; spin_lock_irq(&iucv_queue_lock); list_for_each_entry_safe(p, n, &iucv_task_queue, list) @@ -1666,8 +1677,12 @@ static void __exit iucv_exit(void) kfree(p); spin_unlock_irq(&iucv_queue_lock); unregister_hotcpu_notifier(&iucv_cpu_notifier); - percpu_free(iucv_param); - percpu_free(iucv_irq_data); + for_each_possible_cpu(cpu) { + kfree(iucv_param[cpu]); + iucv_param[cpu] = NULL; + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; + } s390_root_dev_unregister(iucv_root); bus_unregister(&iucv_bus); unregister_external_interrupt(0x4000, iucv_external_interrupt); From a2cb07376e397e7e788551f14acd972e22b09efd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:39 +0100 Subject: [PATCH 10/12] [S390] Fix memory detection. Before we're getting short on memory detection fixes here is the next one: if neither sclp nor diag260 report the storage size the detection loop will return immediately without detecting anything. Fix this by breaking the detection loop only if the memory end is known. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8bf4ae1150be..1b3af7dab816 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -200,7 +200,7 @@ static noinline __init void find_memory_chunks(unsigned long memsize) cc = __tprot(addr); while (cc == old_cc) { addr += CHUNK_INCR; - if (addr >= memsize) + if (memsize && addr >= memsize) break; #ifndef CONFIG_64BIT if (addr == ADDR2G) From 06770a6e7d26ba980055caff815b9b3f5322c9db Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Nov 2007 11:13:40 +0100 Subject: [PATCH 11/12] [S390] Add missing die_notifier() call to die(). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8ec9def83ccb..8ed16a83fba7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -260,6 +260,7 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(1); printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); print_modules(); + notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); show_regs(regs); bust_spinlocks(0); add_taint(TAINT_DIE); From c5d4a9997b4b2ec71cff0b219f05c6bc51f3fc79 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 20 Nov 2007 11:13:41 +0100 Subject: [PATCH 12/12] [S390] cio: Register/unregister subchannels only from kslowcrw. Make sure all subchannel handling is done on the slow path workqueue so that we don't have races between an old subchannel unregistering and a new subchannel with the same name registering. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 2 +- drivers/s390/cio/device_fsm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 838f7ac0dc32..6db31089d2d7 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -483,7 +483,7 @@ static DECLARE_WORK(css_reprobe_work, reprobe_all); void css_schedule_reprobe(void) { need_reprobe = 1; - queue_work(ccw_device_work, &css_reprobe_work); + queue_work(slow_path_wq, &css_reprobe_work); } EXPORT_SYMBOL_GPL(css_schedule_reprobe); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 8867443b8060..bfad421cda66 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -1034,7 +1034,7 @@ device_trigger_reprobe(struct subchannel *sch) if (sch->schib.pmcw.dev != cdev->private->dev_id.devno) { PREPARE_WORK(&cdev->private->kick_work, ccw_device_move_to_orphanage); - queue_work(ccw_device_work, &cdev->private->kick_work); + queue_work(slow_path_wq, &cdev->private->kick_work); } else ccw_device_start_id(cdev, 0); }