linux_dsm_epyc7002/arch/mips/kernel/entry.S
Paul Burton d8550860d9 MIPS: Fix IRQ tracing & lockdep when rescheduling
When the scheduler sets TIF_NEED_RESCHED & we call into the scheduler
from arch/mips/kernel/entry.S we disable interrupts. This is true
regardless of whether we reach work_resched from syscall_exit_work,
resume_userspace or by looping after calling schedule(). Although we
disable interrupts in these paths we don't call trace_hardirqs_off()
before calling into C code which may acquire locks, and we therefore
leave lockdep with an inconsistent view of whether interrupts are
disabled or not when CONFIG_PROVE_LOCKING & CONFIG_DEBUG_LOCKDEP are
both enabled.

Without tracing this interrupt state lockdep will print warnings such
as the following once a task returns from a syscall via
syscall_exit_partial with TIF_NEED_RESCHED set:

[   49.927678] ------------[ cut here ]------------
[   49.934445] WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3687 check_flags.part.41+0x1dc/0x1e8
[   49.946031] DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)
[   49.946355] CPU: 0 PID: 1 Comm: init Not tainted 4.10.0-00439-gc9fd5d362289-dirty #197
[   49.963505] Stack : 0000000000000000 ffffffff81bb5d6a 0000000000000006 ffffffff801ce9c4
[   49.974431]         0000000000000000 0000000000000000 0000000000000000 000000000000004a
[   49.985300]         ffffffff80b7e487 ffffffff80a24498 a8000000ff160000 ffffffff80ede8b8
[   49.996194]         0000000000000001 0000000000000000 0000000000000000 0000000077c8030c
[   50.007063]         000000007fd8a510 ffffffff801cd45c 0000000000000000 a8000000ff127c88
[   50.017945]         0000000000000000 ffffffff801cf928 0000000000000001 ffffffff80a24498
[   50.028827]         0000000000000000 0000000000000001 0000000000000000 0000000000000000
[   50.039688]         0000000000000000 a8000000ff127bd0 0000000000000000 ffffffff805509bc
[   50.050575]         00000000140084e0 0000000000000000 0000000000000000 0000000000040a00
[   50.061448]         0000000000000000 ffffffff8010e1b0 0000000000000000 ffffffff805509bc
[   50.072327]         ...
[   50.076087] Call Trace:
[   50.079869] [<ffffffff8010e1b0>] show_stack+0x80/0xa8
[   50.086577] [<ffffffff805509bc>] dump_stack+0x10c/0x190
[   50.093498] [<ffffffff8015dde0>] __warn+0xf0/0x108
[   50.099889] [<ffffffff8015de34>] warn_slowpath_fmt+0x3c/0x48
[   50.107241] [<ffffffff801c15b4>] check_flags.part.41+0x1dc/0x1e8
[   50.114961] [<ffffffff801c239c>] lock_is_held_type+0x8c/0xb0
[   50.122291] [<ffffffff809461b8>] __schedule+0x8c0/0x10f8
[   50.129221] [<ffffffff80946a60>] schedule+0x30/0x98
[   50.135659] [<ffffffff80106278>] work_resched+0x8/0x34
[   50.142397] ---[ end trace 0cb4f6ef5b99fe21 ]---
[   50.148405] possible reason: unannotated irqs-off.
[   50.154600] irq event stamp: 400463
[   50.159566] hardirqs last  enabled at (400463): [<ffffffff8094edc8>] _raw_spin_unlock_irqrestore+0x40/0xa8
[   50.171981] hardirqs last disabled at (400462): [<ffffffff8094eb98>] _raw_spin_lock_irqsave+0x30/0xb0
[   50.183897] softirqs last  enabled at (400450): [<ffffffff8016580c>] __do_softirq+0x4ac/0x6a8
[   50.195015] softirqs last disabled at (400425): [<ffffffff80165e78>] irq_exit+0x110/0x128

Fix this by using the TRACE_IRQS_OFF macro to call trace_hardirqs_off()
when CONFIG_TRACE_IRQFLAGS is enabled. This is done before invoking
schedule() following the work_resched label because:

 1) Interrupts are disabled regardless of the path we take to reach
    work_resched() & schedule().

 2) Performing the tracing here avoids the need to do it in paths which
    disable interrupts but don't call out to C code before hitting a
    path which uses the RESTORE_SOME macro that will call
    trace_hardirqs_on() or trace_hardirqs_off() as appropriate.

We call trace_hardirqs_on() using the TRACE_IRQS_ON macro before calling
syscall_trace_leave() for similar reasons, ensuring that lockdep has a
consistent view of state after we re-enable interrupts.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Cc: linux-mips@linux-mips.org
Cc: stable <stable@vger.kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/15385/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-06-30 04:40:18 +02:00

180 lines
4.2 KiB
ArmAsm

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 MIPS Technologies, Inc.
*/
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/compiler.h>
#include <asm/irqflags.h>
#include <asm/regdef.h>
#include <asm/mipsregs.h>
#include <asm/stackframe.h>
#include <asm/isadep.h>
#include <asm/thread_info.h>
#include <asm/war.h>
#ifndef CONFIG_PREEMPT
#define resume_kernel restore_all
#else
#define __ret_from_irq ret_from_exception
#endif
.text
.align 5
#ifndef CONFIG_PREEMPT
FEXPORT(ret_from_exception)
local_irq_disable # preempt stop
b __ret_from_irq
#endif
FEXPORT(ret_from_irq)
LONG_S s0, TI_REGS($28)
FEXPORT(__ret_from_irq)
/*
* We can be coming here from a syscall done in the kernel space,
* e.g. a failed kernel_execve().
*/
resume_userspace_check:
LONG_L t0, PT_STATUS(sp) # returning to kernel mode?
andi t0, t0, KU_USER
beqz t0, resume_kernel
resume_userspace:
local_irq_disable # make sure we dont miss an
# interrupt setting need_resched
# between sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
andi t0, a2, _TIF_WORK_MASK # (ignoring syscall_trace)
bnez t0, work_pending
j restore_all
#ifdef CONFIG_PREEMPT
resume_kernel:
local_irq_disable
lw t0, TI_PRE_COUNT($28)
bnez t0, restore_all
need_resched:
LONG_L t0, TI_FLAGS($28)
andi t1, t0, _TIF_NEED_RESCHED
beqz t1, restore_all
LONG_L t0, PT_STATUS(sp) # Interrupts off?
andi t0, 1
beqz t0, restore_all
jal preempt_schedule_irq
b need_resched
#endif
FEXPORT(ret_from_kernel_thread)
jal schedule_tail # a0 = struct task_struct *prev
move a0, s1
jal s0
j syscall_exit
FEXPORT(ret_from_fork)
jal schedule_tail # a0 = struct task_struct *prev
FEXPORT(syscall_exit)
local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
li t0, _TIF_ALLWORK_MASK
and t0, a2, t0
bnez t0, syscall_exit_work
restore_all: # restore full frame
.set noat
RESTORE_TEMP
RESTORE_AT
RESTORE_STATIC
restore_partial: # restore partial frame
#ifdef CONFIG_TRACE_IRQFLAGS
SAVE_STATIC
SAVE_AT
SAVE_TEMP
LONG_L v0, PT_STATUS(sp)
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
and v0, ST0_IEP
#else
and v0, ST0_IE
#endif
beqz v0, 1f
jal trace_hardirqs_on
b 2f
1: jal trace_hardirqs_off
2:
RESTORE_TEMP
RESTORE_AT
RESTORE_STATIC
#endif
RESTORE_SOME
RESTORE_SP_AND_RET
.set at
work_pending:
andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
beqz t0, work_notifysig
work_resched:
TRACE_IRQS_OFF
jal schedule
local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28)
andi t0, a2, _TIF_WORK_MASK # is there any work to be done
# other than syscall tracing?
beqz t0, restore_all
andi t0, a2, _TIF_NEED_RESCHED
bnez t0, work_resched
work_notifysig: # deal with pending signals and
# notify-resume requests
move a0, sp
li a1, 0
jal do_notify_resume # a2 already loaded
j resume_userspace_check
FEXPORT(syscall_exit_partial)
local_irq_disable # make sure need_resched doesn't
# change between and return
LONG_L a2, TI_FLAGS($28) # current->work
li t0, _TIF_ALLWORK_MASK
and t0, a2
beqz t0, restore_partial
SAVE_STATIC
syscall_exit_work:
LONG_L t0, PT_STATUS(sp) # returning to kernel mode?
andi t0, t0, KU_USER
beqz t0, resume_kernel
li t0, _TIF_WORK_SYSCALL_EXIT
and t0, a2 # a2 is preloaded with TI_FLAGS
beqz t0, work_pending # trace bit set?
local_irq_enable # could let syscall_trace_leave()
# call schedule() instead
TRACE_IRQS_ON
move a0, sp
jal syscall_trace_leave
b resume_userspace
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \
defined(CONFIG_MIPS_MT)
/*
* MIPS32R2 Instruction Hazard Barrier - must be called
*
* For C code use the inline version named instruction_hazard().
*/
LEAF(mips_ihb)
.set MIPS_ISA_LEVEL_RAW
jr.hb ra
nop
END(mips_ihb)
#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */