Blame view
kernel/hung_task.c
5.13 KB
e162b39a3
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
/* * Detect Hung Task * * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state * */ #include <linux/mm.h> #include <linux/cpu.h> #include <linux/nmi.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/lockdep.h> #include <linux/module.h> #include <linux/sysctl.h> /* |
ce9dbe244
|
20 |
* The number of tasks checked: |
e162b39a3
|
21 |
*/ |
ce9dbe244
|
22 23 24 25 26 27 28 29 30 31 |
unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. * * This value controls the preemptibility of khungtaskd since preemption * is disabled during the critical section. It also controls the size of * the RCU grace period. So it needs to be upper-bound. */ #define HUNG_TASK_BATCHING 1024 |
e162b39a3
|
32 33 34 35 |
/* * Zero means infinite timeout - no checking done: */ |
e11feaa11
|
36 |
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; |
e162b39a3
|
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
unsigned long __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; static struct task_struct *watchdog_task; /* * Should we panic (and reboot, if panic_timeout= is set) when a * hung task is detected: */ unsigned int __read_mostly sysctl_hung_task_panic = CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; static int __init hung_task_panic_setup(char *str) { sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); return 1; } __setup("hung_task_panic=", hung_task_panic_setup); static int hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) { did_panic = 1; return NOTIFY_DONE; } static struct notifier_block panic_block = { .notifier_call = hung_task_panic, }; |
17406b82d
|
70 |
static void check_hung_task(struct task_struct *t, unsigned long timeout) |
e162b39a3
|
71 72 |
{ unsigned long switch_count = t->nvcsw + t->nivcsw; |
cf2592f59
|
73 74 75 76 77 78 79 |
/* * Ensure the task is not frozen. * Also, when a freshly created task is scheduled once, changes * its state to TASK_UNINTERRUPTIBLE without having ever been * switched out once, it musn't be checked. */ if (unlikely(t->flags & PF_FROZEN || !switch_count)) |
e162b39a3
|
80 |
return; |
17406b82d
|
81 |
if (switch_count != t->last_switch_count) { |
e162b39a3
|
82 |
t->last_switch_count = switch_count; |
e162b39a3
|
83 84 |
return; } |
e162b39a3
|
85 86 87 88 89 90 91 92 93 |
if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; /* * Ok, the task did not get scheduled for more than 2 minutes, * complain: */ printk(KERN_ERR "INFO: task %s:%d blocked for more than " |
603a148f4
|
94 95 |
"%ld seconds. ", t->comm, t->pid, timeout); |
e162b39a3
|
96 97 98 99 |
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message. "); sched_show_task(t); |
f1b499f02
|
100 |
debug_show_held_locks(t); |
e162b39a3
|
101 |
|
e162b39a3
|
102 103 104 105 106 107 108 |
touch_nmi_watchdog(); if (sysctl_hung_task_panic) panic("hung_task: blocked tasks"); } /* |
ce9dbe244
|
109 110 111 112 |
* To avoid extending the RCU grace period for an unbounded amount of time, * periodically exit the critical section and enter a new one. * * For preemptible RCU it is sufficient to call rcu_read_unlock in order |
6a103b0d4
|
113 |
* to exit the grace period. For classic RCU, a reschedule is required. |
ce9dbe244
|
114 115 116 117 118 119 120 121 122 123 124 125 126 |
*/ static void rcu_lock_break(struct task_struct *g, struct task_struct *t) { get_task_struct(g); get_task_struct(t); rcu_read_unlock(); cond_resched(); rcu_read_lock(); put_task_struct(t); put_task_struct(g); } /* |
e162b39a3
|
127 128 129 130 |
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for * a really long time (120 seconds). If that happens, print out * a warning. */ |
603a148f4
|
131 |
static void check_hung_uninterruptible_tasks(unsigned long timeout) |
e162b39a3
|
132 133 |
{ int max_count = sysctl_hung_task_check_count; |
ce9dbe244
|
134 |
int batch_count = HUNG_TASK_BATCHING; |
e162b39a3
|
135 136 137 138 139 140 141 142 |
struct task_struct *g, *t; /* * If the system crashed already then all bets are off, * do not report extra hung tasks: */ if (test_taint(TAINT_DIE) || did_panic) return; |
94be52dc0
|
143 |
rcu_read_lock(); |
e162b39a3
|
144 |
do_each_thread(g, t) { |
e5af02261
|
145 |
if (!max_count--) |
e162b39a3
|
146 |
goto unlock; |
ce9dbe244
|
147 148 149 150 151 152 153 |
if (!--batch_count) { batch_count = HUNG_TASK_BATCHING; rcu_lock_break(g, t); /* Exit if t or g was unhashed during refresh. */ if (t->state == TASK_DEAD || g->state == TASK_DEAD) goto unlock; } |
e162b39a3
|
154 155 |
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) |
17406b82d
|
156 |
check_hung_task(t, timeout); |
e162b39a3
|
157 158 |
} while_each_thread(g, t); unlock: |
94be52dc0
|
159 |
rcu_read_unlock(); |
e162b39a3
|
160 |
} |
17406b82d
|
161 |
static unsigned long timeout_jiffies(unsigned long timeout) |
e162b39a3
|
162 163 |
{ /* timeout of 0 will disable the watchdog */ |
17406b82d
|
164 |
return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; |
e162b39a3
|
165 166 167 168 169 170 |
} /* * Process updating of timeout sysctl */ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, |
8d65af789
|
171 |
void __user *buffer, |
e162b39a3
|
172 173 174 |
size_t *lenp, loff_t *ppos) { int ret; |
8d65af789
|
175 |
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
e162b39a3
|
176 177 178 |
if (ret || !write) goto out; |
e162b39a3
|
179 180 181 182 183 184 185 186 187 188 189 190 |
wake_up_process(watchdog_task); out: return ret; } /* * kthread which checks for tasks stuck in D state */ static int watchdog(void *dummy) { set_user_nice(current, 0); |
e162b39a3
|
191 192 |
for ( ; ; ) { |
17406b82d
|
193 |
unsigned long timeout = sysctl_hung_task_timeout_secs; |
603a148f4
|
194 |
|
17406b82d
|
195 196 |
while (schedule_timeout_interruptible(timeout_jiffies(timeout))) timeout = sysctl_hung_task_timeout_secs; |
603a148f4
|
197 |
|
17406b82d
|
198 |
check_hung_uninterruptible_tasks(timeout); |
e162b39a3
|
199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
} return 0; } static int __init hung_task_init(void) { atomic_notifier_chain_register(&panic_notifier_list, &panic_block); watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); return 0; } module_init(hung_task_init); |