mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 12:56:18 +07:00
softlockup: check all tasks in hung_task
Impact: extend the scope of hung-task checks Changed the default value of hung_task_check_count to PID_MAX_LIMIT. hung_task_batch_count added to put an upper bound on the critical section. Every hung_task_batch_count checks, the rcu lock is never held for a too long time. Keeping the critical section small minimizes time preemption is disabled and keeps rcu grace periods small. To prevent following a stale pointer, get_task_struct is called on g and t. To verify that g and t have not been unhashed while outside the critical section, the task states are checked. The design was proposed by Frédéric Weisbecker. Signed-off-by: Mandeep Singh Baines <msb@google.com> Suggested-by: Frédéric Weisbecker <fweisbec@gmail.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
5e54f5986a
commit
ce9dbe244b
@ -17,9 +17,18 @@
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
/*
|
||||
* Have a reasonable limit on the number of tasks checked:
|
||||
* The number of tasks checked:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
|
||||
|
||||
/*
|
||||
* Limit number of tasks checked in a batch.
|
||||
*
|
||||
* This value controls the preemptibility of khungtaskd since preemption
|
||||
* is disabled during the critical section. It also controls the size of
|
||||
* the RCU grace period. So it needs to be upper-bound.
|
||||
*/
|
||||
#define HUNG_TASK_BATCHING 1024
|
||||
|
||||
/*
|
||||
* Zero means infinite timeout - no checking done:
|
||||
@ -109,6 +118,24 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
|
||||
panic("hung_task: blocked tasks");
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid extending the RCU grace period for an unbounded amount of time,
|
||||
* periodically exit the critical section and enter a new one.
|
||||
*
|
||||
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
|
||||
* exit the grace period. For classic RCU, a reschedule is required.
|
||||
*/
|
||||
static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
|
||||
{
|
||||
get_task_struct(g);
|
||||
get_task_struct(t);
|
||||
rcu_read_unlock();
|
||||
cond_resched();
|
||||
rcu_read_lock();
|
||||
put_task_struct(t);
|
||||
put_task_struct(g);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||
* a really long time (120 seconds). If that happens, print out
|
||||
@ -117,6 +144,7 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
|
||||
static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
{
|
||||
int max_count = sysctl_hung_task_check_count;
|
||||
int batch_count = HUNG_TASK_BATCHING;
|
||||
unsigned long now = get_timestamp();
|
||||
struct task_struct *g, *t;
|
||||
|
||||
@ -131,6 +159,13 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
do_each_thread(g, t) {
|
||||
if (!--max_count)
|
||||
goto unlock;
|
||||
if (!--batch_count) {
|
||||
batch_count = HUNG_TASK_BATCHING;
|
||||
rcu_lock_break(g, t);
|
||||
/* Exit if t or g was unhashed during refresh. */
|
||||
if (t->state == TASK_DEAD || g->state == TASK_DEAD)
|
||||
goto unlock;
|
||||
}
|
||||
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
||||
if (t->state == TASK_UNINTERRUPTIBLE)
|
||||
check_hung_task(t, now, timeout);
|
||||
|
Loading…
Reference in New Issue
Block a user