Blame view

drivers/char/hangcheck-timer.c 5.09 KB
921a3d4d3   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   * hangcheck-timer.c
   *
   * Driver for a little io fencing timer.
   *
696f9486d   Joel Becker   [PATCH] hangcheck...
7
   * Copyright (C) 2002, 2003 Oracle.  All rights reserved.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
   *
   * Author: Joel Becker <joel.becker@oracle.com>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
13
14
15
   */
  
  /*
   * The hangcheck-timer driver uses the TSC to catch delays that
   * jiffies does not notice.  A timer is set.  When the timer fires, it
   * checks whether it was delayed and if that delay exceeds a given
8dfba4d71   Joe Perches   drivers/char/: Sp...
16
   * margin of error.  The hangcheck_tick module parameter takes the timer
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
18
19
20
21
   * duration in seconds.  The hangcheck_margin parameter defines the
   * margin of error, in seconds.  The defaults are 60 seconds for the
   * timer and 180 seconds for the margin of error.  IOW, a timer is set
   * for 60 seconds.  When the timer fires, the callback checks the
   * actual duration that the timer waited.  If the duration exceeds the
8b932edfb   Shile Zhang   hangcheck-timer: ...
22
   * allotted time and margin (here 60 + 180, or 240 seconds), the machine
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
26
27
28
29
30
31
32
33
34
   * is restarted.  A healthy machine will have the duration match the
   * expected timeout very closely.
   */
  
  #include <linux/module.h>
  #include <linux/moduleparam.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/reboot.h>
  #include <linux/init.h>
696f9486d   Joel Becker   [PATCH] hangcheck...
35
  #include <linux/delay.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
36
  #include <linux/uaccess.h>
696f9486d   Joel Becker   [PATCH] hangcheck...
37
  #include <linux/sysrq.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
38
  #include <linux/timer.h>
2044fdb03   Thomas Gleixner   hangcheck-timer: ...
39
  #include <linux/hrtimer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

940370fc8   Yury Polyanskiy   hangcheck-timer: ...
41
  #define VERSION_STR "0.9.1"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
45
46
47
48
  
  #define DEFAULT_IOFENCE_MARGIN 60	/* Default fudge factor, in seconds */
  #define DEFAULT_IOFENCE_TICK 180	/* Default timer timeout, in seconds */
  
  static int hangcheck_tick = DEFAULT_IOFENCE_TICK;
  static int hangcheck_margin = DEFAULT_IOFENCE_MARGIN;
  static int hangcheck_reboot;  /* Defaults to not reboot */
696f9486d   Joel Becker   [PATCH] hangcheck...
49
  static int hangcheck_dump_tasks;  /* Defaults to not dumping SysRQ T */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50

696f9486d   Joel Becker   [PATCH] hangcheck...
51
  /* options - modular */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
54
55
56
57
  module_param(hangcheck_tick, int, 0);
  MODULE_PARM_DESC(hangcheck_tick, "Timer delay.");
  module_param(hangcheck_margin, int, 0);
  MODULE_PARM_DESC(hangcheck_margin, "If the hangcheck timer has been delayed more than hangcheck_margin seconds, the driver will fire.");
  module_param(hangcheck_reboot, int, 0);
  MODULE_PARM_DESC(hangcheck_reboot, "If nonzero, the machine will reboot when the timer margin is exceeded.");
696f9486d   Joel Becker   [PATCH] hangcheck...
58
59
  module_param(hangcheck_dump_tasks, int, 0);
  MODULE_PARM_DESC(hangcheck_dump_tasks, "If nonzero, the machine will dump the system task state when the timer margin is exceeded.");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60

696f9486d   Joel Becker   [PATCH] hangcheck...
61
  MODULE_AUTHOR("Oracle");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
  MODULE_DESCRIPTION("Hangcheck-timer detects when the system has gone out to lunch past a certain margin.");
  MODULE_LICENSE("GPL");
696f9486d   Joel Becker   [PATCH] hangcheck...
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  MODULE_VERSION(VERSION_STR);
  
  /* options - nonmodular */
  #ifndef MODULE
  
  static int __init hangcheck_parse_tick(char *str)
  {
  	int par;
  	if (get_option(&str,&par))
  		hangcheck_tick = par;
  	return 1;
  }
  
  static int __init hangcheck_parse_margin(char *str)
  {
  	int par;
  	if (get_option(&str,&par))
  		hangcheck_margin = par;
  	return 1;
  }
  
  static int __init hangcheck_parse_reboot(char *str)
  {
  	int par;
  	if (get_option(&str,&par))
  		hangcheck_reboot = par;
  	return 1;
  }
  
  static int __init hangcheck_parse_dump_tasks(char *str)
  {
  	int par;
  	if (get_option(&str,&par))
  		hangcheck_dump_tasks = par;
  	return 1;
  }
  
  __setup("hcheck_tick", hangcheck_parse_tick);
  __setup("hcheck_margin", hangcheck_parse_margin);
  __setup("hcheck_reboot", hangcheck_parse_reboot);
  __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks);
  #endif /* not MODULE */
2044fdb03   Thomas Gleixner   hangcheck-timer: ...
106
  #define TIMER_FREQ 1000000000ULL
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
109
  
  /* Last time scheduled */
  static unsigned long long hangcheck_tsc, hangcheck_tsc_margin;
24ed960ab   Kees Cook   treewide: Switch ...
110
  static void hangcheck_fire(struct timer_list *);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

1d27e3e22   Kees Cook   timer: Remove exp...
112
  static DEFINE_TIMER(hangcheck_ticktock, hangcheck_fire);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113

24ed960ab   Kees Cook   treewide: Switch ...
114
  static void hangcheck_fire(struct timer_list *unused)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
  {
  	unsigned long long cur_tsc, tsc_diff;
2044fdb03   Thomas Gleixner   hangcheck-timer: ...
117
  	cur_tsc = ktime_get_ns();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
121
122
123
124
  
  	if (cur_tsc > hangcheck_tsc)
  		tsc_diff = cur_tsc - hangcheck_tsc;
  	else
  		tsc_diff = (cur_tsc + (~0ULL - hangcheck_tsc)); /* or something */
  
  	if (tsc_diff > hangcheck_tsc_margin) {
696f9486d   Joel Becker   [PATCH] hangcheck...
125
126
127
128
  		if (hangcheck_dump_tasks) {
  			printk(KERN_CRIT "Hangcheck: Task state:
  ");
  #ifdef CONFIG_MAGIC_SYSRQ
f335397d1   Dmitry Torokhov   Input: sysrq - dr...
129
  			handle_sysrq('t');
696f9486d   Joel Becker   [PATCH] hangcheck...
130
131
  #endif  /* CONFIG_MAGIC_SYSRQ */
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
  		if (hangcheck_reboot) {
  			printk(KERN_CRIT "Hangcheck: hangcheck is restarting the machine.
  ");
970d32443   Eric W. Biederman   [PATCH] In hangch...
135
  			emergency_restart();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
140
  		} else {
  			printk(KERN_CRIT "Hangcheck: hangcheck value past margin!
  ");
  		}
  	}
940370fc8   Yury Polyanskiy   hangcheck-timer: ...
141
142
143
144
145
146
147
148
  #if 0
  	/*
  	 * Enable to investigate delays in detail
  	 */
  	printk("Hangcheck: called %Ld ns since last time (%Ld ns overshoot)
  ",
  			tsc_diff, tsc_diff - hangcheck_tick*TIMER_FREQ);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
  	mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
2044fdb03   Thomas Gleixner   hangcheck-timer: ...
150
  	hangcheck_tsc = ktime_get_ns();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
153
154
155
156
157
158
  }
  
  
  static int __init hangcheck_init(void)
  {
  	printk("Hangcheck: starting hangcheck timer %s (tick is %d seconds, margin is %d seconds).
  ",
  	       VERSION_STR, hangcheck_tick, hangcheck_margin);
696f9486d   Joel Becker   [PATCH] hangcheck...
159
  	hangcheck_tsc_margin =
d0439a544   Dan Carpenter   hangcheck-timer: ...
160
161
  		(unsigned long long)hangcheck_margin + hangcheck_tick;
  	hangcheck_tsc_margin *= TIMER_FREQ;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162

2044fdb03   Thomas Gleixner   hangcheck-timer: ...
163
  	hangcheck_tsc = ktime_get_ns();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
166
167
168
169
170
171
172
  	mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
  
  	return 0;
  }
  
  
  static void __exit hangcheck_exit(void)
  {
  	del_timer_sync(&hangcheck_ticktock);
696f9486d   Joel Becker   [PATCH] hangcheck...
173
174
          printk("Hangcheck: Stopped hangcheck timer.
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
  }
  
  module_init(hangcheck_init);
  module_exit(hangcheck_exit);