Commit ac08c26492a0ad4d94a25bd47d5630cd38337069

Authored by Thomas Gleixner
Committed by Linus Torvalds
1 parent e24650c2e7

[PATCH] posix-cpu-timers: prevent signal delivery starvation

The integer divisions in the timer accounting code can round the result
down to 0.  Adding 0 is without effect and the signal delivery stops.

Clamp the division result to minimum 1 to avoid this.

Problem was reported by Seongbae Park <spark@google.com>, who provided
also an inital patch.

Roland sayeth:

  I have had some more time to think about the problem, and to reproduce it
  using Toyo's test case.  For the record, if my understanding of the problem
  is correct, this happens only in one very particular case.  First, the
  expiry time has to be so soon that in cputime_t units (usually 1s/HZ ticks)
  it's < nthreads so the division yields zero.  Second, it only affects each
  thread that is so new that its CPU time accumulation is zero so now+0 is
  still zero and ->it_*_expires winds up staying zero.  For the VIRT and PROF
  clocks when cputime_t is tick granularity (or the SCHED clock on
  configurations where sched_clock's value only advances on clock ticks), this
  is not hard to arrange with new threads starting up and blocking before they
  accumulate a whole tick of CPU time.  That's what happens in Toyo's test
  case.

  Note that in general it is fine for that division to round down to zero,
  and set each thread's expiry time to its "now" time.  The problem only
  arises with thread's whose "now" value is still zero, so that now+0 winds up
  0 and is interpreted as "not set" instead of ">= now".  So it would be a
  sufficient and more precise fix to just use max(ticks, 1) inside the loop
  when setting each it_*_expires value.

  But, it does no harm to round the division up to one and always advance
  every thread's expiry time.  If the thread didn't already fire timers for
  the expiry time of "now", there is no expectation that it will do so before
  the next tick anyway.  So I followed Thomas's patch in lifting the max out
  of the loops.

  This patch also covers the reload cases, which are harder to write a test
  for (and I didn't try).  I've tested it with Toyo's case and it fixes that.

[toyoa@mvista.com: fix: min_t -> max_t]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Daniel Walker <dwalker@mvista.com>
Cc: Toyo Abe <toyoa@mvista.com>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Seongbae Park <spark@google.com>
Cc: Peter Mattis <pmattis@google.com>
Cc: Rohit Seth <rohitseth@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 1 changed file with 21 additions and 6 deletions Inline Diff

kernel/posix-cpu-timers.c
1 /* 1 /*
2 * Implement CPU time clocks for the POSIX clock interface. 2 * Implement CPU time clocks for the POSIX clock interface.
3 */ 3 */
4 4
5 #include <linux/sched.h> 5 #include <linux/sched.h>
6 #include <linux/posix-timers.h> 6 #include <linux/posix-timers.h>
7 #include <asm/uaccess.h> 7 #include <asm/uaccess.h>
8 #include <linux/errno.h> 8 #include <linux/errno.h>
9 9
10 static int check_clock(const clockid_t which_clock) 10 static int check_clock(const clockid_t which_clock)
11 { 11 {
12 int error = 0; 12 int error = 0;
13 struct task_struct *p; 13 struct task_struct *p;
14 const pid_t pid = CPUCLOCK_PID(which_clock); 14 const pid_t pid = CPUCLOCK_PID(which_clock);
15 15
16 if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) 16 if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
17 return -EINVAL; 17 return -EINVAL;
18 18
19 if (pid == 0) 19 if (pid == 0)
20 return 0; 20 return 0;
21 21
22 read_lock(&tasklist_lock); 22 read_lock(&tasklist_lock);
23 p = find_task_by_pid(pid); 23 p = find_task_by_pid(pid);
24 if (!p || (CPUCLOCK_PERTHREAD(which_clock) ? 24 if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
25 p->tgid != current->tgid : p->tgid != pid)) { 25 p->tgid != current->tgid : p->tgid != pid)) {
26 error = -EINVAL; 26 error = -EINVAL;
27 } 27 }
28 read_unlock(&tasklist_lock); 28 read_unlock(&tasklist_lock);
29 29
30 return error; 30 return error;
31 } 31 }
32 32
33 static inline union cpu_time_count 33 static inline union cpu_time_count
34 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) 34 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
35 { 35 {
36 union cpu_time_count ret; 36 union cpu_time_count ret;
37 ret.sched = 0; /* high half always zero when .cpu used */ 37 ret.sched = 0; /* high half always zero when .cpu used */
38 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 38 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
39 ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; 39 ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
40 } else { 40 } else {
41 ret.cpu = timespec_to_cputime(tp); 41 ret.cpu = timespec_to_cputime(tp);
42 } 42 }
43 return ret; 43 return ret;
44 } 44 }
45 45
46 static void sample_to_timespec(const clockid_t which_clock, 46 static void sample_to_timespec(const clockid_t which_clock,
47 union cpu_time_count cpu, 47 union cpu_time_count cpu,
48 struct timespec *tp) 48 struct timespec *tp)
49 { 49 {
50 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 50 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
51 tp->tv_sec = div_long_long_rem(cpu.sched, 51 tp->tv_sec = div_long_long_rem(cpu.sched,
52 NSEC_PER_SEC, &tp->tv_nsec); 52 NSEC_PER_SEC, &tp->tv_nsec);
53 } else { 53 } else {
54 cputime_to_timespec(cpu.cpu, tp); 54 cputime_to_timespec(cpu.cpu, tp);
55 } 55 }
56 } 56 }
57 57
58 static inline int cpu_time_before(const clockid_t which_clock, 58 static inline int cpu_time_before(const clockid_t which_clock,
59 union cpu_time_count now, 59 union cpu_time_count now,
60 union cpu_time_count then) 60 union cpu_time_count then)
61 { 61 {
62 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 62 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
63 return now.sched < then.sched; 63 return now.sched < then.sched;
64 } else { 64 } else {
65 return cputime_lt(now.cpu, then.cpu); 65 return cputime_lt(now.cpu, then.cpu);
66 } 66 }
67 } 67 }
68 static inline void cpu_time_add(const clockid_t which_clock, 68 static inline void cpu_time_add(const clockid_t which_clock,
69 union cpu_time_count *acc, 69 union cpu_time_count *acc,
70 union cpu_time_count val) 70 union cpu_time_count val)
71 { 71 {
72 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 72 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
73 acc->sched += val.sched; 73 acc->sched += val.sched;
74 } else { 74 } else {
75 acc->cpu = cputime_add(acc->cpu, val.cpu); 75 acc->cpu = cputime_add(acc->cpu, val.cpu);
76 } 76 }
77 } 77 }
78 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, 78 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
79 union cpu_time_count a, 79 union cpu_time_count a,
80 union cpu_time_count b) 80 union cpu_time_count b)
81 { 81 {
82 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 82 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
83 a.sched -= b.sched; 83 a.sched -= b.sched;
84 } else { 84 } else {
85 a.cpu = cputime_sub(a.cpu, b.cpu); 85 a.cpu = cputime_sub(a.cpu, b.cpu);
86 } 86 }
87 return a; 87 return a;
88 } 88 }
89 89
90 /* 90 /*
91 * Divide and limit the result to res >= 1
92 *
93 * This is necessary to prevent signal delivery starvation, when the result of
94 * the division would be rounded down to 0.
95 */
96 static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
97 {
98 cputime_t res = cputime_div(time, div);
99
100 return max_t(cputime_t, res, 1);
101 }
102
103 /*
91 * Update expiry time from increment, and increase overrun count, 104 * Update expiry time from increment, and increase overrun count,
92 * given the current clock sample. 105 * given the current clock sample.
93 */ 106 */
94 static void bump_cpu_timer(struct k_itimer *timer, 107 static void bump_cpu_timer(struct k_itimer *timer,
95 union cpu_time_count now) 108 union cpu_time_count now)
96 { 109 {
97 int i; 110 int i;
98 111
99 if (timer->it.cpu.incr.sched == 0) 112 if (timer->it.cpu.incr.sched == 0)
100 return; 113 return;
101 114
102 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { 115 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
103 unsigned long long delta, incr; 116 unsigned long long delta, incr;
104 117
105 if (now.sched < timer->it.cpu.expires.sched) 118 if (now.sched < timer->it.cpu.expires.sched)
106 return; 119 return;
107 incr = timer->it.cpu.incr.sched; 120 incr = timer->it.cpu.incr.sched;
108 delta = now.sched + incr - timer->it.cpu.expires.sched; 121 delta = now.sched + incr - timer->it.cpu.expires.sched;
109 /* Don't use (incr*2 < delta), incr*2 might overflow. */ 122 /* Don't use (incr*2 < delta), incr*2 might overflow. */
110 for (i = 0; incr < delta - incr; i++) 123 for (i = 0; incr < delta - incr; i++)
111 incr = incr << 1; 124 incr = incr << 1;
112 for (; i >= 0; incr >>= 1, i--) { 125 for (; i >= 0; incr >>= 1, i--) {
113 if (delta < incr) 126 if (delta < incr)
114 continue; 127 continue;
115 timer->it.cpu.expires.sched += incr; 128 timer->it.cpu.expires.sched += incr;
116 timer->it_overrun += 1 << i; 129 timer->it_overrun += 1 << i;
117 delta -= incr; 130 delta -= incr;
118 } 131 }
119 } else { 132 } else {
120 cputime_t delta, incr; 133 cputime_t delta, incr;
121 134
122 if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu)) 135 if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
123 return; 136 return;
124 incr = timer->it.cpu.incr.cpu; 137 incr = timer->it.cpu.incr.cpu;
125 delta = cputime_sub(cputime_add(now.cpu, incr), 138 delta = cputime_sub(cputime_add(now.cpu, incr),
126 timer->it.cpu.expires.cpu); 139 timer->it.cpu.expires.cpu);
127 /* Don't use (incr*2 < delta), incr*2 might overflow. */ 140 /* Don't use (incr*2 < delta), incr*2 might overflow. */
128 for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++) 141 for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
129 incr = cputime_add(incr, incr); 142 incr = cputime_add(incr, incr);
130 for (; i >= 0; incr = cputime_halve(incr), i--) { 143 for (; i >= 0; incr = cputime_halve(incr), i--) {
131 if (cputime_lt(delta, incr)) 144 if (cputime_lt(delta, incr))
132 continue; 145 continue;
133 timer->it.cpu.expires.cpu = 146 timer->it.cpu.expires.cpu =
134 cputime_add(timer->it.cpu.expires.cpu, incr); 147 cputime_add(timer->it.cpu.expires.cpu, incr);
135 timer->it_overrun += 1 << i; 148 timer->it_overrun += 1 << i;
136 delta = cputime_sub(delta, incr); 149 delta = cputime_sub(delta, incr);
137 } 150 }
138 } 151 }
139 } 152 }
140 153
141 static inline cputime_t prof_ticks(struct task_struct *p) 154 static inline cputime_t prof_ticks(struct task_struct *p)
142 { 155 {
143 return cputime_add(p->utime, p->stime); 156 return cputime_add(p->utime, p->stime);
144 } 157 }
145 static inline cputime_t virt_ticks(struct task_struct *p) 158 static inline cputime_t virt_ticks(struct task_struct *p)
146 { 159 {
147 return p->utime; 160 return p->utime;
148 } 161 }
149 static inline unsigned long long sched_ns(struct task_struct *p) 162 static inline unsigned long long sched_ns(struct task_struct *p)
150 { 163 {
151 return (p == current) ? current_sched_time(p) : p->sched_time; 164 return (p == current) ? current_sched_time(p) : p->sched_time;
152 } 165 }
153 166
154 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) 167 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
155 { 168 {
156 int error = check_clock(which_clock); 169 int error = check_clock(which_clock);
157 if (!error) { 170 if (!error) {
158 tp->tv_sec = 0; 171 tp->tv_sec = 0;
159 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); 172 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
160 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 173 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
161 /* 174 /*
162 * If sched_clock is using a cycle counter, we 175 * If sched_clock is using a cycle counter, we
163 * don't have any idea of its true resolution 176 * don't have any idea of its true resolution
164 * exported, but it is much more than 1s/HZ. 177 * exported, but it is much more than 1s/HZ.
165 */ 178 */
166 tp->tv_nsec = 1; 179 tp->tv_nsec = 1;
167 } 180 }
168 } 181 }
169 return error; 182 return error;
170 } 183 }
171 184
172 int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) 185 int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
173 { 186 {
174 /* 187 /*
175 * You can never reset a CPU clock, but we check for other errors 188 * You can never reset a CPU clock, but we check for other errors
176 * in the call before failing with EPERM. 189 * in the call before failing with EPERM.
177 */ 190 */
178 int error = check_clock(which_clock); 191 int error = check_clock(which_clock);
179 if (error == 0) { 192 if (error == 0) {
180 error = -EPERM; 193 error = -EPERM;
181 } 194 }
182 return error; 195 return error;
183 } 196 }
184 197
185 198
186 /* 199 /*
187 * Sample a per-thread clock for the given task. 200 * Sample a per-thread clock for the given task.
188 */ 201 */
189 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, 202 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
190 union cpu_time_count *cpu) 203 union cpu_time_count *cpu)
191 { 204 {
192 switch (CPUCLOCK_WHICH(which_clock)) { 205 switch (CPUCLOCK_WHICH(which_clock)) {
193 default: 206 default:
194 return -EINVAL; 207 return -EINVAL;
195 case CPUCLOCK_PROF: 208 case CPUCLOCK_PROF:
196 cpu->cpu = prof_ticks(p); 209 cpu->cpu = prof_ticks(p);
197 break; 210 break;
198 case CPUCLOCK_VIRT: 211 case CPUCLOCK_VIRT:
199 cpu->cpu = virt_ticks(p); 212 cpu->cpu = virt_ticks(p);
200 break; 213 break;
201 case CPUCLOCK_SCHED: 214 case CPUCLOCK_SCHED:
202 cpu->sched = sched_ns(p); 215 cpu->sched = sched_ns(p);
203 break; 216 break;
204 } 217 }
205 return 0; 218 return 0;
206 } 219 }
207 220
208 /* 221 /*
209 * Sample a process (thread group) clock for the given group_leader task. 222 * Sample a process (thread group) clock for the given group_leader task.
210 * Must be called with tasklist_lock held for reading. 223 * Must be called with tasklist_lock held for reading.
211 * Must be called with tasklist_lock held for reading, and p->sighand->siglock. 224 * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
212 */ 225 */
213 static int cpu_clock_sample_group_locked(unsigned int clock_idx, 226 static int cpu_clock_sample_group_locked(unsigned int clock_idx,
214 struct task_struct *p, 227 struct task_struct *p,
215 union cpu_time_count *cpu) 228 union cpu_time_count *cpu)
216 { 229 {
217 struct task_struct *t = p; 230 struct task_struct *t = p;
218 switch (clock_idx) { 231 switch (clock_idx) {
219 default: 232 default:
220 return -EINVAL; 233 return -EINVAL;
221 case CPUCLOCK_PROF: 234 case CPUCLOCK_PROF:
222 cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); 235 cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
223 do { 236 do {
224 cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); 237 cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
225 t = next_thread(t); 238 t = next_thread(t);
226 } while (t != p); 239 } while (t != p);
227 break; 240 break;
228 case CPUCLOCK_VIRT: 241 case CPUCLOCK_VIRT:
229 cpu->cpu = p->signal->utime; 242 cpu->cpu = p->signal->utime;
230 do { 243 do {
231 cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); 244 cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
232 t = next_thread(t); 245 t = next_thread(t);
233 } while (t != p); 246 } while (t != p);
234 break; 247 break;
235 case CPUCLOCK_SCHED: 248 case CPUCLOCK_SCHED:
236 cpu->sched = p->signal->sched_time; 249 cpu->sched = p->signal->sched_time;
237 /* Add in each other live thread. */ 250 /* Add in each other live thread. */
238 while ((t = next_thread(t)) != p) { 251 while ((t = next_thread(t)) != p) {
239 cpu->sched += t->sched_time; 252 cpu->sched += t->sched_time;
240 } 253 }
241 cpu->sched += sched_ns(p); 254 cpu->sched += sched_ns(p);
242 break; 255 break;
243 } 256 }
244 return 0; 257 return 0;
245 } 258 }
246 259
247 /* 260 /*
248 * Sample a process (thread group) clock for the given group_leader task. 261 * Sample a process (thread group) clock for the given group_leader task.
249 * Must be called with tasklist_lock held for reading. 262 * Must be called with tasklist_lock held for reading.
250 */ 263 */
251 static int cpu_clock_sample_group(const clockid_t which_clock, 264 static int cpu_clock_sample_group(const clockid_t which_clock,
252 struct task_struct *p, 265 struct task_struct *p,
253 union cpu_time_count *cpu) 266 union cpu_time_count *cpu)
254 { 267 {
255 int ret; 268 int ret;
256 unsigned long flags; 269 unsigned long flags;
257 spin_lock_irqsave(&p->sighand->siglock, flags); 270 spin_lock_irqsave(&p->sighand->siglock, flags);
258 ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, 271 ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
259 cpu); 272 cpu);
260 spin_unlock_irqrestore(&p->sighand->siglock, flags); 273 spin_unlock_irqrestore(&p->sighand->siglock, flags);
261 return ret; 274 return ret;
262 } 275 }
263 276
264 277
265 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) 278 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
266 { 279 {
267 const pid_t pid = CPUCLOCK_PID(which_clock); 280 const pid_t pid = CPUCLOCK_PID(which_clock);
268 int error = -EINVAL; 281 int error = -EINVAL;
269 union cpu_time_count rtn; 282 union cpu_time_count rtn;
270 283
271 if (pid == 0) { 284 if (pid == 0) {
272 /* 285 /*
273 * Special case constant value for our own clocks. 286 * Special case constant value for our own clocks.
274 * We don't have to do any lookup to find ourselves. 287 * We don't have to do any lookup to find ourselves.
275 */ 288 */
276 if (CPUCLOCK_PERTHREAD(which_clock)) { 289 if (CPUCLOCK_PERTHREAD(which_clock)) {
277 /* 290 /*
278 * Sampling just ourselves we can do with no locking. 291 * Sampling just ourselves we can do with no locking.
279 */ 292 */
280 error = cpu_clock_sample(which_clock, 293 error = cpu_clock_sample(which_clock,
281 current, &rtn); 294 current, &rtn);
282 } else { 295 } else {
283 read_lock(&tasklist_lock); 296 read_lock(&tasklist_lock);
284 error = cpu_clock_sample_group(which_clock, 297 error = cpu_clock_sample_group(which_clock,
285 current, &rtn); 298 current, &rtn);
286 read_unlock(&tasklist_lock); 299 read_unlock(&tasklist_lock);
287 } 300 }
288 } else { 301 } else {
289 /* 302 /*
290 * Find the given PID, and validate that the caller 303 * Find the given PID, and validate that the caller
291 * should be able to see it. 304 * should be able to see it.
292 */ 305 */
293 struct task_struct *p; 306 struct task_struct *p;
294 read_lock(&tasklist_lock); 307 read_lock(&tasklist_lock);
295 p = find_task_by_pid(pid); 308 p = find_task_by_pid(pid);
296 if (p) { 309 if (p) {
297 if (CPUCLOCK_PERTHREAD(which_clock)) { 310 if (CPUCLOCK_PERTHREAD(which_clock)) {
298 if (p->tgid == current->tgid) { 311 if (p->tgid == current->tgid) {
299 error = cpu_clock_sample(which_clock, 312 error = cpu_clock_sample(which_clock,
300 p, &rtn); 313 p, &rtn);
301 } 314 }
302 } else if (p->tgid == pid && p->signal) { 315 } else if (p->tgid == pid && p->signal) {
303 error = cpu_clock_sample_group(which_clock, 316 error = cpu_clock_sample_group(which_clock,
304 p, &rtn); 317 p, &rtn);
305 } 318 }
306 } 319 }
307 read_unlock(&tasklist_lock); 320 read_unlock(&tasklist_lock);
308 } 321 }
309 322
310 if (error) 323 if (error)
311 return error; 324 return error;
312 sample_to_timespec(which_clock, rtn, tp); 325 sample_to_timespec(which_clock, rtn, tp);
313 return 0; 326 return 0;
314 } 327 }
315 328
316 329
317 /* 330 /*
318 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 331 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
319 * This is called from sys_timer_create with the new timer already locked. 332 * This is called from sys_timer_create with the new timer already locked.
320 */ 333 */
321 int posix_cpu_timer_create(struct k_itimer *new_timer) 334 int posix_cpu_timer_create(struct k_itimer *new_timer)
322 { 335 {
323 int ret = 0; 336 int ret = 0;
324 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); 337 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
325 struct task_struct *p; 338 struct task_struct *p;
326 339
327 if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) 340 if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
328 return -EINVAL; 341 return -EINVAL;
329 342
330 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 343 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
331 new_timer->it.cpu.incr.sched = 0; 344 new_timer->it.cpu.incr.sched = 0;
332 new_timer->it.cpu.expires.sched = 0; 345 new_timer->it.cpu.expires.sched = 0;
333 346
334 read_lock(&tasklist_lock); 347 read_lock(&tasklist_lock);
335 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 348 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
336 if (pid == 0) { 349 if (pid == 0) {
337 p = current; 350 p = current;
338 } else { 351 } else {
339 p = find_task_by_pid(pid); 352 p = find_task_by_pid(pid);
340 if (p && p->tgid != current->tgid) 353 if (p && p->tgid != current->tgid)
341 p = NULL; 354 p = NULL;
342 } 355 }
343 } else { 356 } else {
344 if (pid == 0) { 357 if (pid == 0) {
345 p = current->group_leader; 358 p = current->group_leader;
346 } else { 359 } else {
347 p = find_task_by_pid(pid); 360 p = find_task_by_pid(pid);
348 if (p && p->tgid != pid) 361 if (p && p->tgid != pid)
349 p = NULL; 362 p = NULL;
350 } 363 }
351 } 364 }
352 new_timer->it.cpu.task = p; 365 new_timer->it.cpu.task = p;
353 if (p) { 366 if (p) {
354 get_task_struct(p); 367 get_task_struct(p);
355 } else { 368 } else {
356 ret = -EINVAL; 369 ret = -EINVAL;
357 } 370 }
358 read_unlock(&tasklist_lock); 371 read_unlock(&tasklist_lock);
359 372
360 return ret; 373 return ret;
361 } 374 }
362 375
363 /* 376 /*
364 * Clean up a CPU-clock timer that is about to be destroyed. 377 * Clean up a CPU-clock timer that is about to be destroyed.
365 * This is called from timer deletion with the timer already locked. 378 * This is called from timer deletion with the timer already locked.
366 * If we return TIMER_RETRY, it's necessary to release the timer's lock 379 * If we return TIMER_RETRY, it's necessary to release the timer's lock
367 * and try again. (This happens when the timer is in the middle of firing.) 380 * and try again. (This happens when the timer is in the middle of firing.)
368 */ 381 */
369 int posix_cpu_timer_del(struct k_itimer *timer) 382 int posix_cpu_timer_del(struct k_itimer *timer)
370 { 383 {
371 struct task_struct *p = timer->it.cpu.task; 384 struct task_struct *p = timer->it.cpu.task;
372 int ret = 0; 385 int ret = 0;
373 386
374 if (likely(p != NULL)) { 387 if (likely(p != NULL)) {
375 read_lock(&tasklist_lock); 388 read_lock(&tasklist_lock);
376 if (unlikely(p->signal == NULL)) { 389 if (unlikely(p->signal == NULL)) {
377 /* 390 /*
378 * We raced with the reaping of the task. 391 * We raced with the reaping of the task.
379 * The deletion should have cleared us off the list. 392 * The deletion should have cleared us off the list.
380 */ 393 */
381 BUG_ON(!list_empty(&timer->it.cpu.entry)); 394 BUG_ON(!list_empty(&timer->it.cpu.entry));
382 } else { 395 } else {
383 spin_lock(&p->sighand->siglock); 396 spin_lock(&p->sighand->siglock);
384 if (timer->it.cpu.firing) 397 if (timer->it.cpu.firing)
385 ret = TIMER_RETRY; 398 ret = TIMER_RETRY;
386 else 399 else
387 list_del(&timer->it.cpu.entry); 400 list_del(&timer->it.cpu.entry);
388 spin_unlock(&p->sighand->siglock); 401 spin_unlock(&p->sighand->siglock);
389 } 402 }
390 read_unlock(&tasklist_lock); 403 read_unlock(&tasklist_lock);
391 404
392 if (!ret) 405 if (!ret)
393 put_task_struct(p); 406 put_task_struct(p);
394 } 407 }
395 408
396 return ret; 409 return ret;
397 } 410 }
398 411
399 /* 412 /*
400 * Clean out CPU timers still ticking when a thread exited. The task 413 * Clean out CPU timers still ticking when a thread exited. The task
401 * pointer is cleared, and the expiry time is replaced with the residual 414 * pointer is cleared, and the expiry time is replaced with the residual
402 * time for later timer_gettime calls to return. 415 * time for later timer_gettime calls to return.
403 * This must be called with the siglock held. 416 * This must be called with the siglock held.
404 */ 417 */
405 static void cleanup_timers(struct list_head *head, 418 static void cleanup_timers(struct list_head *head,
406 cputime_t utime, cputime_t stime, 419 cputime_t utime, cputime_t stime,
407 unsigned long long sched_time) 420 unsigned long long sched_time)
408 { 421 {
409 struct cpu_timer_list *timer, *next; 422 struct cpu_timer_list *timer, *next;
410 cputime_t ptime = cputime_add(utime, stime); 423 cputime_t ptime = cputime_add(utime, stime);
411 424
412 list_for_each_entry_safe(timer, next, head, entry) { 425 list_for_each_entry_safe(timer, next, head, entry) {
413 list_del_init(&timer->entry); 426 list_del_init(&timer->entry);
414 if (cputime_lt(timer->expires.cpu, ptime)) { 427 if (cputime_lt(timer->expires.cpu, ptime)) {
415 timer->expires.cpu = cputime_zero; 428 timer->expires.cpu = cputime_zero;
416 } else { 429 } else {
417 timer->expires.cpu = cputime_sub(timer->expires.cpu, 430 timer->expires.cpu = cputime_sub(timer->expires.cpu,
418 ptime); 431 ptime);
419 } 432 }
420 } 433 }
421 434
422 ++head; 435 ++head;
423 list_for_each_entry_safe(timer, next, head, entry) { 436 list_for_each_entry_safe(timer, next, head, entry) {
424 list_del_init(&timer->entry); 437 list_del_init(&timer->entry);
425 if (cputime_lt(timer->expires.cpu, utime)) { 438 if (cputime_lt(timer->expires.cpu, utime)) {
426 timer->expires.cpu = cputime_zero; 439 timer->expires.cpu = cputime_zero;
427 } else { 440 } else {
428 timer->expires.cpu = cputime_sub(timer->expires.cpu, 441 timer->expires.cpu = cputime_sub(timer->expires.cpu,
429 utime); 442 utime);
430 } 443 }
431 } 444 }
432 445
433 ++head; 446 ++head;
434 list_for_each_entry_safe(timer, next, head, entry) { 447 list_for_each_entry_safe(timer, next, head, entry) {
435 list_del_init(&timer->entry); 448 list_del_init(&timer->entry);
436 if (timer->expires.sched < sched_time) { 449 if (timer->expires.sched < sched_time) {
437 timer->expires.sched = 0; 450 timer->expires.sched = 0;
438 } else { 451 } else {
439 timer->expires.sched -= sched_time; 452 timer->expires.sched -= sched_time;
440 } 453 }
441 } 454 }
442 } 455 }
443 456
444 /* 457 /*
445 * These are both called with the siglock held, when the current thread 458 * These are both called with the siglock held, when the current thread
446 * is being reaped. When the final (leader) thread in the group is reaped, 459 * is being reaped. When the final (leader) thread in the group is reaped,
447 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit. 460 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
448 */ 461 */
449 void posix_cpu_timers_exit(struct task_struct *tsk) 462 void posix_cpu_timers_exit(struct task_struct *tsk)
450 { 463 {
451 cleanup_timers(tsk->cpu_timers, 464 cleanup_timers(tsk->cpu_timers,
452 tsk->utime, tsk->stime, tsk->sched_time); 465 tsk->utime, tsk->stime, tsk->sched_time);
453 466
454 } 467 }
455 void posix_cpu_timers_exit_group(struct task_struct *tsk) 468 void posix_cpu_timers_exit_group(struct task_struct *tsk)
456 { 469 {
457 cleanup_timers(tsk->signal->cpu_timers, 470 cleanup_timers(tsk->signal->cpu_timers,
458 cputime_add(tsk->utime, tsk->signal->utime), 471 cputime_add(tsk->utime, tsk->signal->utime),
459 cputime_add(tsk->stime, tsk->signal->stime), 472 cputime_add(tsk->stime, tsk->signal->stime),
460 tsk->sched_time + tsk->signal->sched_time); 473 tsk->sched_time + tsk->signal->sched_time);
461 } 474 }
462 475
463 476
464 /* 477 /*
465 * Set the expiry times of all the threads in the process so one of them 478 * Set the expiry times of all the threads in the process so one of them
466 * will go off before the process cumulative expiry total is reached. 479 * will go off before the process cumulative expiry total is reached.
467 */ 480 */
468 static void process_timer_rebalance(struct task_struct *p, 481 static void process_timer_rebalance(struct task_struct *p,
469 unsigned int clock_idx, 482 unsigned int clock_idx,
470 union cpu_time_count expires, 483 union cpu_time_count expires,
471 union cpu_time_count val) 484 union cpu_time_count val)
472 { 485 {
473 cputime_t ticks, left; 486 cputime_t ticks, left;
474 unsigned long long ns, nsleft; 487 unsigned long long ns, nsleft;
475 struct task_struct *t = p; 488 struct task_struct *t = p;
476 unsigned int nthreads = atomic_read(&p->signal->live); 489 unsigned int nthreads = atomic_read(&p->signal->live);
477 490
478 if (!nthreads) 491 if (!nthreads)
479 return; 492 return;
480 493
481 switch (clock_idx) { 494 switch (clock_idx) {
482 default: 495 default:
483 BUG(); 496 BUG();
484 break; 497 break;
485 case CPUCLOCK_PROF: 498 case CPUCLOCK_PROF:
486 left = cputime_div(cputime_sub(expires.cpu, val.cpu), 499 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
487 nthreads); 500 nthreads);
488 do { 501 do {
489 if (likely(!(t->flags & PF_EXITING))) { 502 if (likely(!(t->flags & PF_EXITING))) {
490 ticks = cputime_add(prof_ticks(t), left); 503 ticks = cputime_add(prof_ticks(t), left);
491 if (cputime_eq(t->it_prof_expires, 504 if (cputime_eq(t->it_prof_expires,
492 cputime_zero) || 505 cputime_zero) ||
493 cputime_gt(t->it_prof_expires, ticks)) { 506 cputime_gt(t->it_prof_expires, ticks)) {
494 t->it_prof_expires = ticks; 507 t->it_prof_expires = ticks;
495 } 508 }
496 } 509 }
497 t = next_thread(t); 510 t = next_thread(t);
498 } while (t != p); 511 } while (t != p);
499 break; 512 break;
500 case CPUCLOCK_VIRT: 513 case CPUCLOCK_VIRT:
501 left = cputime_div(cputime_sub(expires.cpu, val.cpu), 514 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
502 nthreads); 515 nthreads);
503 do { 516 do {
504 if (likely(!(t->flags & PF_EXITING))) { 517 if (likely(!(t->flags & PF_EXITING))) {
505 ticks = cputime_add(virt_ticks(t), left); 518 ticks = cputime_add(virt_ticks(t), left);
506 if (cputime_eq(t->it_virt_expires, 519 if (cputime_eq(t->it_virt_expires,
507 cputime_zero) || 520 cputime_zero) ||
508 cputime_gt(t->it_virt_expires, ticks)) { 521 cputime_gt(t->it_virt_expires, ticks)) {
509 t->it_virt_expires = ticks; 522 t->it_virt_expires = ticks;
510 } 523 }
511 } 524 }
512 t = next_thread(t); 525 t = next_thread(t);
513 } while (t != p); 526 } while (t != p);
514 break; 527 break;
515 case CPUCLOCK_SCHED: 528 case CPUCLOCK_SCHED:
516 nsleft = expires.sched - val.sched; 529 nsleft = expires.sched - val.sched;
517 do_div(nsleft, nthreads); 530 do_div(nsleft, nthreads);
531 nsleft = max_t(unsigned long long, nsleft, 1);
518 do { 532 do {
519 if (likely(!(t->flags & PF_EXITING))) { 533 if (likely(!(t->flags & PF_EXITING))) {
520 ns = t->sched_time + nsleft; 534 ns = t->sched_time + nsleft;
521 if (t->it_sched_expires == 0 || 535 if (t->it_sched_expires == 0 ||
522 t->it_sched_expires > ns) { 536 t->it_sched_expires > ns) {
523 t->it_sched_expires = ns; 537 t->it_sched_expires = ns;
524 } 538 }
525 } 539 }
526 t = next_thread(t); 540 t = next_thread(t);
527 } while (t != p); 541 } while (t != p);
528 break; 542 break;
529 } 543 }
530 } 544 }
531 545
532 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 546 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
533 { 547 {
534 /* 548 /*
535 * That's all for this thread or process. 549 * That's all for this thread or process.
536 * We leave our residual in expires to be reported. 550 * We leave our residual in expires to be reported.
537 */ 551 */
538 put_task_struct(timer->it.cpu.task); 552 put_task_struct(timer->it.cpu.task);
539 timer->it.cpu.task = NULL; 553 timer->it.cpu.task = NULL;
540 timer->it.cpu.expires = cpu_time_sub(timer->it_clock, 554 timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
541 timer->it.cpu.expires, 555 timer->it.cpu.expires,
542 now); 556 now);
543 } 557 }
544 558
545 /* 559 /*
546 * Insert the timer on the appropriate list before any timers that 560 * Insert the timer on the appropriate list before any timers that
547 * expire later. This must be called with the tasklist_lock held 561 * expire later. This must be called with the tasklist_lock held
548 * for reading, and interrupts disabled. 562 * for reading, and interrupts disabled.
549 */ 563 */
550 static void arm_timer(struct k_itimer *timer, union cpu_time_count now) 564 static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
551 { 565 {
552 struct task_struct *p = timer->it.cpu.task; 566 struct task_struct *p = timer->it.cpu.task;
553 struct list_head *head, *listpos; 567 struct list_head *head, *listpos;
554 struct cpu_timer_list *const nt = &timer->it.cpu; 568 struct cpu_timer_list *const nt = &timer->it.cpu;
555 struct cpu_timer_list *next; 569 struct cpu_timer_list *next;
556 unsigned long i; 570 unsigned long i;
557 571
558 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? 572 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
559 p->cpu_timers : p->signal->cpu_timers); 573 p->cpu_timers : p->signal->cpu_timers);
560 head += CPUCLOCK_WHICH(timer->it_clock); 574 head += CPUCLOCK_WHICH(timer->it_clock);
561 575
562 BUG_ON(!irqs_disabled()); 576 BUG_ON(!irqs_disabled());
563 spin_lock(&p->sighand->siglock); 577 spin_lock(&p->sighand->siglock);
564 578
565 listpos = head; 579 listpos = head;
566 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { 580 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
567 list_for_each_entry(next, head, entry) { 581 list_for_each_entry(next, head, entry) {
568 if (next->expires.sched > nt->expires.sched) 582 if (next->expires.sched > nt->expires.sched)
569 break; 583 break;
570 listpos = &next->entry; 584 listpos = &next->entry;
571 } 585 }
572 } else { 586 } else {
573 list_for_each_entry(next, head, entry) { 587 list_for_each_entry(next, head, entry) {
574 if (cputime_gt(next->expires.cpu, nt->expires.cpu)) 588 if (cputime_gt(next->expires.cpu, nt->expires.cpu))
575 break; 589 break;
576 listpos = &next->entry; 590 listpos = &next->entry;
577 } 591 }
578 } 592 }
579 list_add(&nt->entry, listpos); 593 list_add(&nt->entry, listpos);
580 594
581 if (listpos == head) { 595 if (listpos == head) {
582 /* 596 /*
583 * We are the new earliest-expiring timer. 597 * We are the new earliest-expiring timer.
584 * If we are a thread timer, there can always 598 * If we are a thread timer, there can always
585 * be a process timer telling us to stop earlier. 599 * be a process timer telling us to stop earlier.
586 */ 600 */
587 601
588 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 602 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
589 switch (CPUCLOCK_WHICH(timer->it_clock)) { 603 switch (CPUCLOCK_WHICH(timer->it_clock)) {
590 default: 604 default:
591 BUG(); 605 BUG();
592 case CPUCLOCK_PROF: 606 case CPUCLOCK_PROF:
593 if (cputime_eq(p->it_prof_expires, 607 if (cputime_eq(p->it_prof_expires,
594 cputime_zero) || 608 cputime_zero) ||
595 cputime_gt(p->it_prof_expires, 609 cputime_gt(p->it_prof_expires,
596 nt->expires.cpu)) 610 nt->expires.cpu))
597 p->it_prof_expires = nt->expires.cpu; 611 p->it_prof_expires = nt->expires.cpu;
598 break; 612 break;
599 case CPUCLOCK_VIRT: 613 case CPUCLOCK_VIRT:
600 if (cputime_eq(p->it_virt_expires, 614 if (cputime_eq(p->it_virt_expires,
601 cputime_zero) || 615 cputime_zero) ||
602 cputime_gt(p->it_virt_expires, 616 cputime_gt(p->it_virt_expires,
603 nt->expires.cpu)) 617 nt->expires.cpu))
604 p->it_virt_expires = nt->expires.cpu; 618 p->it_virt_expires = nt->expires.cpu;
605 break; 619 break;
606 case CPUCLOCK_SCHED: 620 case CPUCLOCK_SCHED:
607 if (p->it_sched_expires == 0 || 621 if (p->it_sched_expires == 0 ||
608 p->it_sched_expires > nt->expires.sched) 622 p->it_sched_expires > nt->expires.sched)
609 p->it_sched_expires = nt->expires.sched; 623 p->it_sched_expires = nt->expires.sched;
610 break; 624 break;
611 } 625 }
612 } else { 626 } else {
613 /* 627 /*
614 * For a process timer, we must balance 628 * For a process timer, we must balance
615 * all the live threads' expirations. 629 * all the live threads' expirations.
616 */ 630 */
617 switch (CPUCLOCK_WHICH(timer->it_clock)) { 631 switch (CPUCLOCK_WHICH(timer->it_clock)) {
618 default: 632 default:
619 BUG(); 633 BUG();
620 case CPUCLOCK_VIRT: 634 case CPUCLOCK_VIRT:
621 if (!cputime_eq(p->signal->it_virt_expires, 635 if (!cputime_eq(p->signal->it_virt_expires,
622 cputime_zero) && 636 cputime_zero) &&
623 cputime_lt(p->signal->it_virt_expires, 637 cputime_lt(p->signal->it_virt_expires,
624 timer->it.cpu.expires.cpu)) 638 timer->it.cpu.expires.cpu))
625 break; 639 break;
626 goto rebalance; 640 goto rebalance;
627 case CPUCLOCK_PROF: 641 case CPUCLOCK_PROF:
628 if (!cputime_eq(p->signal->it_prof_expires, 642 if (!cputime_eq(p->signal->it_prof_expires,
629 cputime_zero) && 643 cputime_zero) &&
630 cputime_lt(p->signal->it_prof_expires, 644 cputime_lt(p->signal->it_prof_expires,
631 timer->it.cpu.expires.cpu)) 645 timer->it.cpu.expires.cpu))
632 break; 646 break;
633 i = p->signal->rlim[RLIMIT_CPU].rlim_cur; 647 i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
634 if (i != RLIM_INFINITY && 648 if (i != RLIM_INFINITY &&
635 i <= cputime_to_secs(timer->it.cpu.expires.cpu)) 649 i <= cputime_to_secs(timer->it.cpu.expires.cpu))
636 break; 650 break;
637 goto rebalance; 651 goto rebalance;
638 case CPUCLOCK_SCHED: 652 case CPUCLOCK_SCHED:
639 rebalance: 653 rebalance:
640 process_timer_rebalance( 654 process_timer_rebalance(
641 timer->it.cpu.task, 655 timer->it.cpu.task,
642 CPUCLOCK_WHICH(timer->it_clock), 656 CPUCLOCK_WHICH(timer->it_clock),
643 timer->it.cpu.expires, now); 657 timer->it.cpu.expires, now);
644 break; 658 break;
645 } 659 }
646 } 660 }
647 } 661 }
648 662
649 spin_unlock(&p->sighand->siglock); 663 spin_unlock(&p->sighand->siglock);
650 } 664 }
651 665
652 /* 666 /*
653 * The timer is locked, fire it and arrange for its reload. 667 * The timer is locked, fire it and arrange for its reload.
654 */ 668 */
655 static void cpu_timer_fire(struct k_itimer *timer) 669 static void cpu_timer_fire(struct k_itimer *timer)
656 { 670 {
657 if (unlikely(timer->sigq == NULL)) { 671 if (unlikely(timer->sigq == NULL)) {
658 /* 672 /*
659 * This a special case for clock_nanosleep, 673 * This a special case for clock_nanosleep,
660 * not a normal timer from sys_timer_create. 674 * not a normal timer from sys_timer_create.
661 */ 675 */
662 wake_up_process(timer->it_process); 676 wake_up_process(timer->it_process);
663 timer->it.cpu.expires.sched = 0; 677 timer->it.cpu.expires.sched = 0;
664 } else if (timer->it.cpu.incr.sched == 0) { 678 } else if (timer->it.cpu.incr.sched == 0) {
665 /* 679 /*
666 * One-shot timer. Clear it as soon as it's fired. 680 * One-shot timer. Clear it as soon as it's fired.
667 */ 681 */
668 posix_timer_event(timer, 0); 682 posix_timer_event(timer, 0);
669 timer->it.cpu.expires.sched = 0; 683 timer->it.cpu.expires.sched = 0;
670 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { 684 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
671 /* 685 /*
672 * The signal did not get queued because the signal 686 * The signal did not get queued because the signal
673 * was ignored, so we won't get any callback to 687 * was ignored, so we won't get any callback to
674 * reload the timer. But we need to keep it 688 * reload the timer. But we need to keep it
675 * ticking in case the signal is deliverable next time. 689 * ticking in case the signal is deliverable next time.
676 */ 690 */
677 posix_cpu_timer_schedule(timer); 691 posix_cpu_timer_schedule(timer);
678 } 692 }
679 } 693 }
680 694
681 /* 695 /*
682 * Guts of sys_timer_settime for CPU timers. 696 * Guts of sys_timer_settime for CPU timers.
683 * This is called with the timer locked and interrupts disabled. 697 * This is called with the timer locked and interrupts disabled.
684 * If we return TIMER_RETRY, it's necessary to release the timer's lock 698 * If we return TIMER_RETRY, it's necessary to release the timer's lock
685 * and try again. (This happens when the timer is in the middle of firing.) 699 * and try again. (This happens when the timer is in the middle of firing.)
686 */ 700 */
687 int posix_cpu_timer_set(struct k_itimer *timer, int flags, 701 int posix_cpu_timer_set(struct k_itimer *timer, int flags,
688 struct itimerspec *new, struct itimerspec *old) 702 struct itimerspec *new, struct itimerspec *old)
689 { 703 {
690 struct task_struct *p = timer->it.cpu.task; 704 struct task_struct *p = timer->it.cpu.task;
691 union cpu_time_count old_expires, new_expires, val; 705 union cpu_time_count old_expires, new_expires, val;
692 int ret; 706 int ret;
693 707
694 if (unlikely(p == NULL)) { 708 if (unlikely(p == NULL)) {
695 /* 709 /*
696 * Timer refers to a dead task's clock. 710 * Timer refers to a dead task's clock.
697 */ 711 */
698 return -ESRCH; 712 return -ESRCH;
699 } 713 }
700 714
701 new_expires = timespec_to_sample(timer->it_clock, &new->it_value); 715 new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
702 716
703 read_lock(&tasklist_lock); 717 read_lock(&tasklist_lock);
704 /* 718 /*
705 * We need the tasklist_lock to protect against reaping that 719 * We need the tasklist_lock to protect against reaping that
706 * clears p->signal. If p has just been reaped, we can no 720 * clears p->signal. If p has just been reaped, we can no
707 * longer get any information about it at all. 721 * longer get any information about it at all.
708 */ 722 */
709 if (unlikely(p->signal == NULL)) { 723 if (unlikely(p->signal == NULL)) {
710 read_unlock(&tasklist_lock); 724 read_unlock(&tasklist_lock);
711 put_task_struct(p); 725 put_task_struct(p);
712 timer->it.cpu.task = NULL; 726 timer->it.cpu.task = NULL;
713 return -ESRCH; 727 return -ESRCH;
714 } 728 }
715 729
716 /* 730 /*
717 * Disarm any old timer after extracting its expiry time. 731 * Disarm any old timer after extracting its expiry time.
718 */ 732 */
719 BUG_ON(!irqs_disabled()); 733 BUG_ON(!irqs_disabled());
720 734
721 ret = 0; 735 ret = 0;
722 spin_lock(&p->sighand->siglock); 736 spin_lock(&p->sighand->siglock);
723 old_expires = timer->it.cpu.expires; 737 old_expires = timer->it.cpu.expires;
724 if (unlikely(timer->it.cpu.firing)) { 738 if (unlikely(timer->it.cpu.firing)) {
725 timer->it.cpu.firing = -1; 739 timer->it.cpu.firing = -1;
726 ret = TIMER_RETRY; 740 ret = TIMER_RETRY;
727 } else 741 } else
728 list_del_init(&timer->it.cpu.entry); 742 list_del_init(&timer->it.cpu.entry);
729 spin_unlock(&p->sighand->siglock); 743 spin_unlock(&p->sighand->siglock);
730 744
731 /* 745 /*
732 * We need to sample the current value to convert the new 746 * We need to sample the current value to convert the new
733 * value from to relative and absolute, and to convert the 747 * value from to relative and absolute, and to convert the
734 * old value from absolute to relative. To set a process 748 * old value from absolute to relative. To set a process
735 * timer, we need a sample to balance the thread expiry 749 * timer, we need a sample to balance the thread expiry
736 * times (in arm_timer). With an absolute time, we must 750 * times (in arm_timer). With an absolute time, we must
737 * check if it's already passed. In short, we need a sample. 751 * check if it's already passed. In short, we need a sample.
738 */ 752 */
739 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 753 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
740 cpu_clock_sample(timer->it_clock, p, &val); 754 cpu_clock_sample(timer->it_clock, p, &val);
741 } else { 755 } else {
742 cpu_clock_sample_group(timer->it_clock, p, &val); 756 cpu_clock_sample_group(timer->it_clock, p, &val);
743 } 757 }
744 758
745 if (old) { 759 if (old) {
746 if (old_expires.sched == 0) { 760 if (old_expires.sched == 0) {
747 old->it_value.tv_sec = 0; 761 old->it_value.tv_sec = 0;
748 old->it_value.tv_nsec = 0; 762 old->it_value.tv_nsec = 0;
749 } else { 763 } else {
750 /* 764 /*
751 * Update the timer in case it has 765 * Update the timer in case it has
752 * overrun already. If it has, 766 * overrun already. If it has,
753 * we'll report it as having overrun 767 * we'll report it as having overrun
754 * and with the next reloaded timer 768 * and with the next reloaded timer
755 * already ticking, though we are 769 * already ticking, though we are
756 * swallowing that pending 770 * swallowing that pending
757 * notification here to install the 771 * notification here to install the
758 * new setting. 772 * new setting.
759 */ 773 */
760 bump_cpu_timer(timer, val); 774 bump_cpu_timer(timer, val);
761 if (cpu_time_before(timer->it_clock, val, 775 if (cpu_time_before(timer->it_clock, val,
762 timer->it.cpu.expires)) { 776 timer->it.cpu.expires)) {
763 old_expires = cpu_time_sub( 777 old_expires = cpu_time_sub(
764 timer->it_clock, 778 timer->it_clock,
765 timer->it.cpu.expires, val); 779 timer->it.cpu.expires, val);
766 sample_to_timespec(timer->it_clock, 780 sample_to_timespec(timer->it_clock,
767 old_expires, 781 old_expires,
768 &old->it_value); 782 &old->it_value);
769 } else { 783 } else {
770 old->it_value.tv_nsec = 1; 784 old->it_value.tv_nsec = 1;
771 old->it_value.tv_sec = 0; 785 old->it_value.tv_sec = 0;
772 } 786 }
773 } 787 }
774 } 788 }
775 789
776 if (unlikely(ret)) { 790 if (unlikely(ret)) {
777 /* 791 /*
778 * We are colliding with the timer actually firing. 792 * We are colliding with the timer actually firing.
779 * Punt after filling in the timer's old value, and 793 * Punt after filling in the timer's old value, and
780 * disable this firing since we are already reporting 794 * disable this firing since we are already reporting
781 * it as an overrun (thanks to bump_cpu_timer above). 795 * it as an overrun (thanks to bump_cpu_timer above).
782 */ 796 */
783 read_unlock(&tasklist_lock); 797 read_unlock(&tasklist_lock);
784 goto out; 798 goto out;
785 } 799 }
786 800
787 if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { 801 if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
788 cpu_time_add(timer->it_clock, &new_expires, val); 802 cpu_time_add(timer->it_clock, &new_expires, val);
789 } 803 }
790 804
791 /* 805 /*
792 * Install the new expiry time (or zero). 806 * Install the new expiry time (or zero).
793 * For a timer with no notification action, we don't actually 807 * For a timer with no notification action, we don't actually
794 * arm the timer (we'll just fake it for timer_gettime). 808 * arm the timer (we'll just fake it for timer_gettime).
795 */ 809 */
796 timer->it.cpu.expires = new_expires; 810 timer->it.cpu.expires = new_expires;
797 if (new_expires.sched != 0 && 811 if (new_expires.sched != 0 &&
798 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && 812 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
799 cpu_time_before(timer->it_clock, val, new_expires)) { 813 cpu_time_before(timer->it_clock, val, new_expires)) {
800 arm_timer(timer, val); 814 arm_timer(timer, val);
801 } 815 }
802 816
803 read_unlock(&tasklist_lock); 817 read_unlock(&tasklist_lock);
804 818
805 /* 819 /*
806 * Install the new reload setting, and 820 * Install the new reload setting, and
807 * set up the signal and overrun bookkeeping. 821 * set up the signal and overrun bookkeeping.
808 */ 822 */
809 timer->it.cpu.incr = timespec_to_sample(timer->it_clock, 823 timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
810 &new->it_interval); 824 &new->it_interval);
811 825
812 /* 826 /*
813 * This acts as a modification timestamp for the timer, 827 * This acts as a modification timestamp for the timer,
814 * so any automatic reload attempt will punt on seeing 828 * so any automatic reload attempt will punt on seeing
815 * that we have reset the timer manually. 829 * that we have reset the timer manually.
816 */ 830 */
817 timer->it_requeue_pending = (timer->it_requeue_pending + 2) & 831 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
818 ~REQUEUE_PENDING; 832 ~REQUEUE_PENDING;
819 timer->it_overrun_last = 0; 833 timer->it_overrun_last = 0;
820 timer->it_overrun = -1; 834 timer->it_overrun = -1;
821 835
822 if (new_expires.sched != 0 && 836 if (new_expires.sched != 0 &&
823 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && 837 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
824 !cpu_time_before(timer->it_clock, val, new_expires)) { 838 !cpu_time_before(timer->it_clock, val, new_expires)) {
825 /* 839 /*
826 * The designated time already passed, so we notify 840 * The designated time already passed, so we notify
827 * immediately, even if the thread never runs to 841 * immediately, even if the thread never runs to
828 * accumulate more time on this clock. 842 * accumulate more time on this clock.
829 */ 843 */
830 cpu_timer_fire(timer); 844 cpu_timer_fire(timer);
831 } 845 }
832 846
833 ret = 0; 847 ret = 0;
834 out: 848 out:
835 if (old) { 849 if (old) {
836 sample_to_timespec(timer->it_clock, 850 sample_to_timespec(timer->it_clock,
837 timer->it.cpu.incr, &old->it_interval); 851 timer->it.cpu.incr, &old->it_interval);
838 } 852 }
839 return ret; 853 return ret;
840 } 854 }
841 855
842 void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) 856 void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
843 { 857 {
844 union cpu_time_count now; 858 union cpu_time_count now;
845 struct task_struct *p = timer->it.cpu.task; 859 struct task_struct *p = timer->it.cpu.task;
846 int clear_dead; 860 int clear_dead;
847 861
848 /* 862 /*
849 * Easy part: convert the reload time. 863 * Easy part: convert the reload time.
850 */ 864 */
851 sample_to_timespec(timer->it_clock, 865 sample_to_timespec(timer->it_clock,
852 timer->it.cpu.incr, &itp->it_interval); 866 timer->it.cpu.incr, &itp->it_interval);
853 867
854 if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ 868 if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
855 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; 869 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
856 return; 870 return;
857 } 871 }
858 872
859 if (unlikely(p == NULL)) { 873 if (unlikely(p == NULL)) {
860 /* 874 /*
861 * This task already died and the timer will never fire. 875 * This task already died and the timer will never fire.
862 * In this case, expires is actually the dead value. 876 * In this case, expires is actually the dead value.
863 */ 877 */
864 dead: 878 dead:
865 sample_to_timespec(timer->it_clock, timer->it.cpu.expires, 879 sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
866 &itp->it_value); 880 &itp->it_value);
867 return; 881 return;
868 } 882 }
869 883
870 /* 884 /*
871 * Sample the clock to take the difference with the expiry time. 885 * Sample the clock to take the difference with the expiry time.
872 */ 886 */
873 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 887 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
874 cpu_clock_sample(timer->it_clock, p, &now); 888 cpu_clock_sample(timer->it_clock, p, &now);
875 clear_dead = p->exit_state; 889 clear_dead = p->exit_state;
876 } else { 890 } else {
877 read_lock(&tasklist_lock); 891 read_lock(&tasklist_lock);
878 if (unlikely(p->signal == NULL)) { 892 if (unlikely(p->signal == NULL)) {
879 /* 893 /*
880 * The process has been reaped. 894 * The process has been reaped.
881 * We can't even collect a sample any more. 895 * We can't even collect a sample any more.
882 * Call the timer disarmed, nothing else to do. 896 * Call the timer disarmed, nothing else to do.
883 */ 897 */
884 put_task_struct(p); 898 put_task_struct(p);
885 timer->it.cpu.task = NULL; 899 timer->it.cpu.task = NULL;
886 timer->it.cpu.expires.sched = 0; 900 timer->it.cpu.expires.sched = 0;
887 read_unlock(&tasklist_lock); 901 read_unlock(&tasklist_lock);
888 goto dead; 902 goto dead;
889 } else { 903 } else {
890 cpu_clock_sample_group(timer->it_clock, p, &now); 904 cpu_clock_sample_group(timer->it_clock, p, &now);
891 clear_dead = (unlikely(p->exit_state) && 905 clear_dead = (unlikely(p->exit_state) &&
892 thread_group_empty(p)); 906 thread_group_empty(p));
893 } 907 }
894 read_unlock(&tasklist_lock); 908 read_unlock(&tasklist_lock);
895 } 909 }
896 910
897 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { 911 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
898 if (timer->it.cpu.incr.sched == 0 && 912 if (timer->it.cpu.incr.sched == 0 &&
899 cpu_time_before(timer->it_clock, 913 cpu_time_before(timer->it_clock,
900 timer->it.cpu.expires, now)) { 914 timer->it.cpu.expires, now)) {
901 /* 915 /*
902 * Do-nothing timer expired and has no reload, 916 * Do-nothing timer expired and has no reload,
903 * so it's as if it was never set. 917 * so it's as if it was never set.
904 */ 918 */
905 timer->it.cpu.expires.sched = 0; 919 timer->it.cpu.expires.sched = 0;
906 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; 920 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
907 return; 921 return;
908 } 922 }
909 /* 923 /*
910 * Account for any expirations and reloads that should 924 * Account for any expirations and reloads that should
911 * have happened. 925 * have happened.
912 */ 926 */
913 bump_cpu_timer(timer, now); 927 bump_cpu_timer(timer, now);
914 } 928 }
915 929
916 if (unlikely(clear_dead)) { 930 if (unlikely(clear_dead)) {
917 /* 931 /*
918 * We've noticed that the thread is dead, but 932 * We've noticed that the thread is dead, but
919 * not yet reaped. Take this opportunity to 933 * not yet reaped. Take this opportunity to
920 * drop our task ref. 934 * drop our task ref.
921 */ 935 */
922 clear_dead_task(timer, now); 936 clear_dead_task(timer, now);
923 goto dead; 937 goto dead;
924 } 938 }
925 939
926 if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { 940 if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
927 sample_to_timespec(timer->it_clock, 941 sample_to_timespec(timer->it_clock,
928 cpu_time_sub(timer->it_clock, 942 cpu_time_sub(timer->it_clock,
929 timer->it.cpu.expires, now), 943 timer->it.cpu.expires, now),
930 &itp->it_value); 944 &itp->it_value);
931 } else { 945 } else {
932 /* 946 /*
933 * The timer should have expired already, but the firing 947 * The timer should have expired already, but the firing
934 * hasn't taken place yet. Say it's just about to expire. 948 * hasn't taken place yet. Say it's just about to expire.
935 */ 949 */
936 itp->it_value.tv_nsec = 1; 950 itp->it_value.tv_nsec = 1;
937 itp->it_value.tv_sec = 0; 951 itp->it_value.tv_sec = 0;
938 } 952 }
939 } 953 }
940 954
941 /* 955 /*
942 * Check for any per-thread CPU timers that have fired and move them off 956 * Check for any per-thread CPU timers that have fired and move them off
943 * the tsk->cpu_timers[N] list onto the firing list. Here we update the 957 * the tsk->cpu_timers[N] list onto the firing list. Here we update the
944 * tsk->it_*_expires values to reflect the remaining thread CPU timers. 958 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
945 */ 959 */
946 static void check_thread_timers(struct task_struct *tsk, 960 static void check_thread_timers(struct task_struct *tsk,
947 struct list_head *firing) 961 struct list_head *firing)
948 { 962 {
949 int maxfire; 963 int maxfire;
950 struct list_head *timers = tsk->cpu_timers; 964 struct list_head *timers = tsk->cpu_timers;
951 965
952 maxfire = 20; 966 maxfire = 20;
953 tsk->it_prof_expires = cputime_zero; 967 tsk->it_prof_expires = cputime_zero;
954 while (!list_empty(timers)) { 968 while (!list_empty(timers)) {
955 struct cpu_timer_list *t = list_entry(timers->next, 969 struct cpu_timer_list *t = list_entry(timers->next,
956 struct cpu_timer_list, 970 struct cpu_timer_list,
957 entry); 971 entry);
958 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { 972 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
959 tsk->it_prof_expires = t->expires.cpu; 973 tsk->it_prof_expires = t->expires.cpu;
960 break; 974 break;
961 } 975 }
962 t->firing = 1; 976 t->firing = 1;
963 list_move_tail(&t->entry, firing); 977 list_move_tail(&t->entry, firing);
964 } 978 }
965 979
966 ++timers; 980 ++timers;
967 maxfire = 20; 981 maxfire = 20;
968 tsk->it_virt_expires = cputime_zero; 982 tsk->it_virt_expires = cputime_zero;
969 while (!list_empty(timers)) { 983 while (!list_empty(timers)) {
970 struct cpu_timer_list *t = list_entry(timers->next, 984 struct cpu_timer_list *t = list_entry(timers->next,
971 struct cpu_timer_list, 985 struct cpu_timer_list,
972 entry); 986 entry);
973 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { 987 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
974 tsk->it_virt_expires = t->expires.cpu; 988 tsk->it_virt_expires = t->expires.cpu;
975 break; 989 break;
976 } 990 }
977 t->firing = 1; 991 t->firing = 1;
978 list_move_tail(&t->entry, firing); 992 list_move_tail(&t->entry, firing);
979 } 993 }
980 994
981 ++timers; 995 ++timers;
982 maxfire = 20; 996 maxfire = 20;
983 tsk->it_sched_expires = 0; 997 tsk->it_sched_expires = 0;
984 while (!list_empty(timers)) { 998 while (!list_empty(timers)) {
985 struct cpu_timer_list *t = list_entry(timers->next, 999 struct cpu_timer_list *t = list_entry(timers->next,
986 struct cpu_timer_list, 1000 struct cpu_timer_list,
987 entry); 1001 entry);
988 if (!--maxfire || tsk->sched_time < t->expires.sched) { 1002 if (!--maxfire || tsk->sched_time < t->expires.sched) {
989 tsk->it_sched_expires = t->expires.sched; 1003 tsk->it_sched_expires = t->expires.sched;
990 break; 1004 break;
991 } 1005 }
992 t->firing = 1; 1006 t->firing = 1;
993 list_move_tail(&t->entry, firing); 1007 list_move_tail(&t->entry, firing);
994 } 1008 }
995 } 1009 }
996 1010
997 /* 1011 /*
998 * Check for any per-thread CPU timers that have fired and move them 1012 * Check for any per-thread CPU timers that have fired and move them
999 * off the tsk->*_timers list onto the firing list. Per-thread timers 1013 * off the tsk->*_timers list onto the firing list. Per-thread timers
1000 * have already been taken off. 1014 * have already been taken off.
1001 */ 1015 */
1002 static void check_process_timers(struct task_struct *tsk, 1016 static void check_process_timers(struct task_struct *tsk,
1003 struct list_head *firing) 1017 struct list_head *firing)
1004 { 1018 {
1005 int maxfire; 1019 int maxfire;
1006 struct signal_struct *const sig = tsk->signal; 1020 struct signal_struct *const sig = tsk->signal;
1007 cputime_t utime, stime, ptime, virt_expires, prof_expires; 1021 cputime_t utime, stime, ptime, virt_expires, prof_expires;
1008 unsigned long long sched_time, sched_expires; 1022 unsigned long long sched_time, sched_expires;
1009 struct task_struct *t; 1023 struct task_struct *t;
1010 struct list_head *timers = sig->cpu_timers; 1024 struct list_head *timers = sig->cpu_timers;
1011 1025
1012 /* 1026 /*
1013 * Don't sample the current process CPU clocks if there are no timers. 1027 * Don't sample the current process CPU clocks if there are no timers.
1014 */ 1028 */
1015 if (list_empty(&timers[CPUCLOCK_PROF]) && 1029 if (list_empty(&timers[CPUCLOCK_PROF]) &&
1016 cputime_eq(sig->it_prof_expires, cputime_zero) && 1030 cputime_eq(sig->it_prof_expires, cputime_zero) &&
1017 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && 1031 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1018 list_empty(&timers[CPUCLOCK_VIRT]) && 1032 list_empty(&timers[CPUCLOCK_VIRT]) &&
1019 cputime_eq(sig->it_virt_expires, cputime_zero) && 1033 cputime_eq(sig->it_virt_expires, cputime_zero) &&
1020 list_empty(&timers[CPUCLOCK_SCHED])) 1034 list_empty(&timers[CPUCLOCK_SCHED]))
1021 return; 1035 return;
1022 1036
1023 /* 1037 /*
1024 * Collect the current process totals. 1038 * Collect the current process totals.
1025 */ 1039 */
1026 utime = sig->utime; 1040 utime = sig->utime;
1027 stime = sig->stime; 1041 stime = sig->stime;
1028 sched_time = sig->sched_time; 1042 sched_time = sig->sched_time;
1029 t = tsk; 1043 t = tsk;
1030 do { 1044 do {
1031 utime = cputime_add(utime, t->utime); 1045 utime = cputime_add(utime, t->utime);
1032 stime = cputime_add(stime, t->stime); 1046 stime = cputime_add(stime, t->stime);
1033 sched_time += t->sched_time; 1047 sched_time += t->sched_time;
1034 t = next_thread(t); 1048 t = next_thread(t);
1035 } while (t != tsk); 1049 } while (t != tsk);
1036 ptime = cputime_add(utime, stime); 1050 ptime = cputime_add(utime, stime);
1037 1051
1038 maxfire = 20; 1052 maxfire = 20;
1039 prof_expires = cputime_zero; 1053 prof_expires = cputime_zero;
1040 while (!list_empty(timers)) { 1054 while (!list_empty(timers)) {
1041 struct cpu_timer_list *t = list_entry(timers->next, 1055 struct cpu_timer_list *t = list_entry(timers->next,
1042 struct cpu_timer_list, 1056 struct cpu_timer_list,
1043 entry); 1057 entry);
1044 if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) { 1058 if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) {
1045 prof_expires = t->expires.cpu; 1059 prof_expires = t->expires.cpu;
1046 break; 1060 break;
1047 } 1061 }
1048 t->firing = 1; 1062 t->firing = 1;
1049 list_move_tail(&t->entry, firing); 1063 list_move_tail(&t->entry, firing);
1050 } 1064 }
1051 1065
1052 ++timers; 1066 ++timers;
1053 maxfire = 20; 1067 maxfire = 20;
1054 virt_expires = cputime_zero; 1068 virt_expires = cputime_zero;
1055 while (!list_empty(timers)) { 1069 while (!list_empty(timers)) {
1056 struct cpu_timer_list *t = list_entry(timers->next, 1070 struct cpu_timer_list *t = list_entry(timers->next,
1057 struct cpu_timer_list, 1071 struct cpu_timer_list,
1058 entry); 1072 entry);
1059 if (!--maxfire || cputime_lt(utime, t->expires.cpu)) { 1073 if (!--maxfire || cputime_lt(utime, t->expires.cpu)) {
1060 virt_expires = t->expires.cpu; 1074 virt_expires = t->expires.cpu;
1061 break; 1075 break;
1062 } 1076 }
1063 t->firing = 1; 1077 t->firing = 1;
1064 list_move_tail(&t->entry, firing); 1078 list_move_tail(&t->entry, firing);
1065 } 1079 }
1066 1080
1067 ++timers; 1081 ++timers;
1068 maxfire = 20; 1082 maxfire = 20;
1069 sched_expires = 0; 1083 sched_expires = 0;
1070 while (!list_empty(timers)) { 1084 while (!list_empty(timers)) {
1071 struct cpu_timer_list *t = list_entry(timers->next, 1085 struct cpu_timer_list *t = list_entry(timers->next,
1072 struct cpu_timer_list, 1086 struct cpu_timer_list,
1073 entry); 1087 entry);
1074 if (!--maxfire || sched_time < t->expires.sched) { 1088 if (!--maxfire || sched_time < t->expires.sched) {
1075 sched_expires = t->expires.sched; 1089 sched_expires = t->expires.sched;
1076 break; 1090 break;
1077 } 1091 }
1078 t->firing = 1; 1092 t->firing = 1;
1079 list_move_tail(&t->entry, firing); 1093 list_move_tail(&t->entry, firing);
1080 } 1094 }
1081 1095
1082 /* 1096 /*
1083 * Check for the special case process timers. 1097 * Check for the special case process timers.
1084 */ 1098 */
1085 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { 1099 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1086 if (cputime_ge(ptime, sig->it_prof_expires)) { 1100 if (cputime_ge(ptime, sig->it_prof_expires)) {
1087 /* ITIMER_PROF fires and reloads. */ 1101 /* ITIMER_PROF fires and reloads. */
1088 sig->it_prof_expires = sig->it_prof_incr; 1102 sig->it_prof_expires = sig->it_prof_incr;
1089 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { 1103 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1090 sig->it_prof_expires = cputime_add( 1104 sig->it_prof_expires = cputime_add(
1091 sig->it_prof_expires, ptime); 1105 sig->it_prof_expires, ptime);
1092 } 1106 }
1093 __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk); 1107 __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
1094 } 1108 }
1095 if (!cputime_eq(sig->it_prof_expires, cputime_zero) && 1109 if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
1096 (cputime_eq(prof_expires, cputime_zero) || 1110 (cputime_eq(prof_expires, cputime_zero) ||
1097 cputime_lt(sig->it_prof_expires, prof_expires))) { 1111 cputime_lt(sig->it_prof_expires, prof_expires))) {
1098 prof_expires = sig->it_prof_expires; 1112 prof_expires = sig->it_prof_expires;
1099 } 1113 }
1100 } 1114 }
1101 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { 1115 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1102 if (cputime_ge(utime, sig->it_virt_expires)) { 1116 if (cputime_ge(utime, sig->it_virt_expires)) {
1103 /* ITIMER_VIRTUAL fires and reloads. */ 1117 /* ITIMER_VIRTUAL fires and reloads. */
1104 sig->it_virt_expires = sig->it_virt_incr; 1118 sig->it_virt_expires = sig->it_virt_incr;
1105 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { 1119 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1106 sig->it_virt_expires = cputime_add( 1120 sig->it_virt_expires = cputime_add(
1107 sig->it_virt_expires, utime); 1121 sig->it_virt_expires, utime);
1108 } 1122 }
1109 __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk); 1123 __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
1110 } 1124 }
1111 if (!cputime_eq(sig->it_virt_expires, cputime_zero) && 1125 if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
1112 (cputime_eq(virt_expires, cputime_zero) || 1126 (cputime_eq(virt_expires, cputime_zero) ||
1113 cputime_lt(sig->it_virt_expires, virt_expires))) { 1127 cputime_lt(sig->it_virt_expires, virt_expires))) {
1114 virt_expires = sig->it_virt_expires; 1128 virt_expires = sig->it_virt_expires;
1115 } 1129 }
1116 } 1130 }
1117 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 1131 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
1118 unsigned long psecs = cputime_to_secs(ptime); 1132 unsigned long psecs = cputime_to_secs(ptime);
1119 cputime_t x; 1133 cputime_t x;
1120 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) { 1134 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
1121 /* 1135 /*
1122 * At the hard limit, we just die. 1136 * At the hard limit, we just die.
1123 * No need to calculate anything else now. 1137 * No need to calculate anything else now.
1124 */ 1138 */
1125 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); 1139 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1126 return; 1140 return;
1127 } 1141 }
1128 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) { 1142 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
1129 /* 1143 /*
1130 * At the soft limit, send a SIGXCPU every second. 1144 * At the soft limit, send a SIGXCPU every second.
1131 */ 1145 */
1132 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 1146 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1133 if (sig->rlim[RLIMIT_CPU].rlim_cur 1147 if (sig->rlim[RLIMIT_CPU].rlim_cur
1134 < sig->rlim[RLIMIT_CPU].rlim_max) { 1148 < sig->rlim[RLIMIT_CPU].rlim_max) {
1135 sig->rlim[RLIMIT_CPU].rlim_cur++; 1149 sig->rlim[RLIMIT_CPU].rlim_cur++;
1136 } 1150 }
1137 } 1151 }
1138 x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); 1152 x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
1139 if (cputime_eq(prof_expires, cputime_zero) || 1153 if (cputime_eq(prof_expires, cputime_zero) ||
1140 cputime_lt(x, prof_expires)) { 1154 cputime_lt(x, prof_expires)) {
1141 prof_expires = x; 1155 prof_expires = x;
1142 } 1156 }
1143 } 1157 }
1144 1158
1145 if (!cputime_eq(prof_expires, cputime_zero) || 1159 if (!cputime_eq(prof_expires, cputime_zero) ||
1146 !cputime_eq(virt_expires, cputime_zero) || 1160 !cputime_eq(virt_expires, cputime_zero) ||
1147 sched_expires != 0) { 1161 sched_expires != 0) {
1148 /* 1162 /*
1149 * Rebalance the threads' expiry times for the remaining 1163 * Rebalance the threads' expiry times for the remaining
1150 * process CPU timers. 1164 * process CPU timers.
1151 */ 1165 */
1152 1166
1153 cputime_t prof_left, virt_left, ticks; 1167 cputime_t prof_left, virt_left, ticks;
1154 unsigned long long sched_left, sched; 1168 unsigned long long sched_left, sched;
1155 const unsigned int nthreads = atomic_read(&sig->live); 1169 const unsigned int nthreads = atomic_read(&sig->live);
1156 1170
1157 if (!nthreads) 1171 if (!nthreads)
1158 return; 1172 return;
1159 1173
1160 prof_left = cputime_sub(prof_expires, utime); 1174 prof_left = cputime_sub(prof_expires, utime);
1161 prof_left = cputime_sub(prof_left, stime); 1175 prof_left = cputime_sub(prof_left, stime);
1162 prof_left = cputime_div(prof_left, nthreads); 1176 prof_left = cputime_div_non_zero(prof_left, nthreads);
1163 virt_left = cputime_sub(virt_expires, utime); 1177 virt_left = cputime_sub(virt_expires, utime);
1164 virt_left = cputime_div(virt_left, nthreads); 1178 virt_left = cputime_div_non_zero(virt_left, nthreads);
1165 if (sched_expires) { 1179 if (sched_expires) {
1166 sched_left = sched_expires - sched_time; 1180 sched_left = sched_expires - sched_time;
1167 do_div(sched_left, nthreads); 1181 do_div(sched_left, nthreads);
1182 sched_left = max_t(unsigned long long, sched_left, 1);
1168 } else { 1183 } else {
1169 sched_left = 0; 1184 sched_left = 0;
1170 } 1185 }
1171 t = tsk; 1186 t = tsk;
1172 do { 1187 do {
1173 if (unlikely(t->flags & PF_EXITING)) 1188 if (unlikely(t->flags & PF_EXITING))
1174 continue; 1189 continue;
1175 1190
1176 ticks = cputime_add(cputime_add(t->utime, t->stime), 1191 ticks = cputime_add(cputime_add(t->utime, t->stime),
1177 prof_left); 1192 prof_left);
1178 if (!cputime_eq(prof_expires, cputime_zero) && 1193 if (!cputime_eq(prof_expires, cputime_zero) &&
1179 (cputime_eq(t->it_prof_expires, cputime_zero) || 1194 (cputime_eq(t->it_prof_expires, cputime_zero) ||
1180 cputime_gt(t->it_prof_expires, ticks))) { 1195 cputime_gt(t->it_prof_expires, ticks))) {
1181 t->it_prof_expires = ticks; 1196 t->it_prof_expires = ticks;
1182 } 1197 }
1183 1198
1184 ticks = cputime_add(t->utime, virt_left); 1199 ticks = cputime_add(t->utime, virt_left);
1185 if (!cputime_eq(virt_expires, cputime_zero) && 1200 if (!cputime_eq(virt_expires, cputime_zero) &&
1186 (cputime_eq(t->it_virt_expires, cputime_zero) || 1201 (cputime_eq(t->it_virt_expires, cputime_zero) ||
1187 cputime_gt(t->it_virt_expires, ticks))) { 1202 cputime_gt(t->it_virt_expires, ticks))) {
1188 t->it_virt_expires = ticks; 1203 t->it_virt_expires = ticks;
1189 } 1204 }
1190 1205
1191 sched = t->sched_time + sched_left; 1206 sched = t->sched_time + sched_left;
1192 if (sched_expires && (t->it_sched_expires == 0 || 1207 if (sched_expires && (t->it_sched_expires == 0 ||
1193 t->it_sched_expires > sched)) { 1208 t->it_sched_expires > sched)) {
1194 t->it_sched_expires = sched; 1209 t->it_sched_expires = sched;
1195 } 1210 }
1196 } while ((t = next_thread(t)) != tsk); 1211 } while ((t = next_thread(t)) != tsk);
1197 } 1212 }
1198 } 1213 }
1199 1214
1200 /* 1215 /*
1201 * This is called from the signal code (via do_schedule_next_timer) 1216 * This is called from the signal code (via do_schedule_next_timer)
1202 * when the last timer signal was delivered and we have to reload the timer. 1217 * when the last timer signal was delivered and we have to reload the timer.
1203 */ 1218 */
1204 void posix_cpu_timer_schedule(struct k_itimer *timer) 1219 void posix_cpu_timer_schedule(struct k_itimer *timer)
1205 { 1220 {
1206 struct task_struct *p = timer->it.cpu.task; 1221 struct task_struct *p = timer->it.cpu.task;
1207 union cpu_time_count now; 1222 union cpu_time_count now;
1208 1223
1209 if (unlikely(p == NULL)) 1224 if (unlikely(p == NULL))
1210 /* 1225 /*
1211 * The task was cleaned up already, no future firings. 1226 * The task was cleaned up already, no future firings.
1212 */ 1227 */
1213 goto out; 1228 goto out;
1214 1229
1215 /* 1230 /*
1216 * Fetch the current sample and update the timer's expiry time. 1231 * Fetch the current sample and update the timer's expiry time.
1217 */ 1232 */
1218 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 1233 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1219 cpu_clock_sample(timer->it_clock, p, &now); 1234 cpu_clock_sample(timer->it_clock, p, &now);
1220 bump_cpu_timer(timer, now); 1235 bump_cpu_timer(timer, now);
1221 if (unlikely(p->exit_state)) { 1236 if (unlikely(p->exit_state)) {
1222 clear_dead_task(timer, now); 1237 clear_dead_task(timer, now);
1223 goto out; 1238 goto out;
1224 } 1239 }
1225 read_lock(&tasklist_lock); /* arm_timer needs it. */ 1240 read_lock(&tasklist_lock); /* arm_timer needs it. */
1226 } else { 1241 } else {
1227 read_lock(&tasklist_lock); 1242 read_lock(&tasklist_lock);
1228 if (unlikely(p->signal == NULL)) { 1243 if (unlikely(p->signal == NULL)) {
1229 /* 1244 /*
1230 * The process has been reaped. 1245 * The process has been reaped.
1231 * We can't even collect a sample any more. 1246 * We can't even collect a sample any more.
1232 */ 1247 */
1233 put_task_struct(p); 1248 put_task_struct(p);
1234 timer->it.cpu.task = p = NULL; 1249 timer->it.cpu.task = p = NULL;
1235 timer->it.cpu.expires.sched = 0; 1250 timer->it.cpu.expires.sched = 0;
1236 goto out_unlock; 1251 goto out_unlock;
1237 } else if (unlikely(p->exit_state) && thread_group_empty(p)) { 1252 } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1238 /* 1253 /*
1239 * We've noticed that the thread is dead, but 1254 * We've noticed that the thread is dead, but
1240 * not yet reaped. Take this opportunity to 1255 * not yet reaped. Take this opportunity to
1241 * drop our task ref. 1256 * drop our task ref.
1242 */ 1257 */
1243 clear_dead_task(timer, now); 1258 clear_dead_task(timer, now);
1244 goto out_unlock; 1259 goto out_unlock;
1245 } 1260 }
1246 cpu_clock_sample_group(timer->it_clock, p, &now); 1261 cpu_clock_sample_group(timer->it_clock, p, &now);
1247 bump_cpu_timer(timer, now); 1262 bump_cpu_timer(timer, now);
1248 /* Leave the tasklist_lock locked for the call below. */ 1263 /* Leave the tasklist_lock locked for the call below. */
1249 } 1264 }
1250 1265
1251 /* 1266 /*
1252 * Now re-arm for the new expiry time. 1267 * Now re-arm for the new expiry time.
1253 */ 1268 */
1254 arm_timer(timer, now); 1269 arm_timer(timer, now);
1255 1270
1256 out_unlock: 1271 out_unlock:
1257 read_unlock(&tasklist_lock); 1272 read_unlock(&tasklist_lock);
1258 1273
1259 out: 1274 out:
1260 timer->it_overrun_last = timer->it_overrun; 1275 timer->it_overrun_last = timer->it_overrun;
1261 timer->it_overrun = -1; 1276 timer->it_overrun = -1;
1262 ++timer->it_requeue_pending; 1277 ++timer->it_requeue_pending;
1263 } 1278 }
1264 1279
1265 /* 1280 /*
1266 * This is called from the timer interrupt handler. The irq handler has 1281 * This is called from the timer interrupt handler. The irq handler has
1267 * already updated our counts. We need to check if any timers fire now. 1282 * already updated our counts. We need to check if any timers fire now.
1268 * Interrupts are disabled. 1283 * Interrupts are disabled.
1269 */ 1284 */
1270 void run_posix_cpu_timers(struct task_struct *tsk) 1285 void run_posix_cpu_timers(struct task_struct *tsk)
1271 { 1286 {
1272 LIST_HEAD(firing); 1287 LIST_HEAD(firing);
1273 struct k_itimer *timer, *next; 1288 struct k_itimer *timer, *next;
1274 1289
1275 BUG_ON(!irqs_disabled()); 1290 BUG_ON(!irqs_disabled());
1276 1291
1277 #define UNEXPIRED(clock) \ 1292 #define UNEXPIRED(clock) \
1278 (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ 1293 (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
1279 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) 1294 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
1280 1295
1281 if (UNEXPIRED(prof) && UNEXPIRED(virt) && 1296 if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
1282 (tsk->it_sched_expires == 0 || 1297 (tsk->it_sched_expires == 0 ||
1283 tsk->sched_time < tsk->it_sched_expires)) 1298 tsk->sched_time < tsk->it_sched_expires))
1284 return; 1299 return;
1285 1300
1286 #undef UNEXPIRED 1301 #undef UNEXPIRED
1287 1302
1288 /* 1303 /*
1289 * Double-check with locks held. 1304 * Double-check with locks held.
1290 */ 1305 */
1291 read_lock(&tasklist_lock); 1306 read_lock(&tasklist_lock);
1292 if (likely(tsk->signal != NULL)) { 1307 if (likely(tsk->signal != NULL)) {
1293 spin_lock(&tsk->sighand->siglock); 1308 spin_lock(&tsk->sighand->siglock);
1294 1309
1295 /* 1310 /*
1296 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] 1311 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
1297 * all the timers that are firing, and put them on the firing list. 1312 * all the timers that are firing, and put them on the firing list.
1298 */ 1313 */
1299 check_thread_timers(tsk, &firing); 1314 check_thread_timers(tsk, &firing);
1300 check_process_timers(tsk, &firing); 1315 check_process_timers(tsk, &firing);
1301 1316
1302 /* 1317 /*
1303 * We must release these locks before taking any timer's lock. 1318 * We must release these locks before taking any timer's lock.
1304 * There is a potential race with timer deletion here, as the 1319 * There is a potential race with timer deletion here, as the
1305 * siglock now protects our private firing list. We have set 1320 * siglock now protects our private firing list. We have set
1306 * the firing flag in each timer, so that a deletion attempt 1321 * the firing flag in each timer, so that a deletion attempt
1307 * that gets the timer lock before we do will give it up and 1322 * that gets the timer lock before we do will give it up and
1308 * spin until we've taken care of that timer below. 1323 * spin until we've taken care of that timer below.
1309 */ 1324 */
1310 spin_unlock(&tsk->sighand->siglock); 1325 spin_unlock(&tsk->sighand->siglock);
1311 } 1326 }
1312 read_unlock(&tasklist_lock); 1327 read_unlock(&tasklist_lock);
1313 1328
1314 /* 1329 /*
1315 * Now that all the timers on our list have the firing flag, 1330 * Now that all the timers on our list have the firing flag,
1316 * noone will touch their list entries but us. We'll take 1331 * noone will touch their list entries but us. We'll take
1317 * each timer's lock before clearing its firing flag, so no 1332 * each timer's lock before clearing its firing flag, so no
1318 * timer call will interfere. 1333 * timer call will interfere.
1319 */ 1334 */
1320 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { 1335 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1321 int firing; 1336 int firing;
1322 spin_lock(&timer->it_lock); 1337 spin_lock(&timer->it_lock);
1323 list_del_init(&timer->it.cpu.entry); 1338 list_del_init(&timer->it.cpu.entry);
1324 firing = timer->it.cpu.firing; 1339 firing = timer->it.cpu.firing;
1325 timer->it.cpu.firing = 0; 1340 timer->it.cpu.firing = 0;
1326 /* 1341 /*
1327 * The firing flag is -1 if we collided with a reset 1342 * The firing flag is -1 if we collided with a reset
1328 * of the timer, which already reported this 1343 * of the timer, which already reported this
1329 * almost-firing as an overrun. So don't generate an event. 1344 * almost-firing as an overrun. So don't generate an event.
1330 */ 1345 */
1331 if (likely(firing >= 0)) { 1346 if (likely(firing >= 0)) {
1332 cpu_timer_fire(timer); 1347 cpu_timer_fire(timer);
1333 } 1348 }
1334 spin_unlock(&timer->it_lock); 1349 spin_unlock(&timer->it_lock);
1335 } 1350 }
1336 } 1351 }
1337 1352
1338 /* 1353 /*
1339 * Set one of the process-wide special case CPU timers. 1354 * Set one of the process-wide special case CPU timers.
1340 * The tasklist_lock and tsk->sighand->siglock must be held by the caller. 1355 * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
1341 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is 1356 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
1342 * absolute; non-null for ITIMER_*, where *newval is relative and we update 1357 * absolute; non-null for ITIMER_*, where *newval is relative and we update
1343 * it to be absolute, *oldval is absolute and we update it to be relative. 1358 * it to be absolute, *oldval is absolute and we update it to be relative.
1344 */ 1359 */
1345 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, 1360 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1346 cputime_t *newval, cputime_t *oldval) 1361 cputime_t *newval, cputime_t *oldval)
1347 { 1362 {
1348 union cpu_time_count now; 1363 union cpu_time_count now;
1349 struct list_head *head; 1364 struct list_head *head;
1350 1365
1351 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1366 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1352 cpu_clock_sample_group_locked(clock_idx, tsk, &now); 1367 cpu_clock_sample_group_locked(clock_idx, tsk, &now);
1353 1368
1354 if (oldval) { 1369 if (oldval) {
1355 if (!cputime_eq(*oldval, cputime_zero)) { 1370 if (!cputime_eq(*oldval, cputime_zero)) {
1356 if (cputime_le(*oldval, now.cpu)) { 1371 if (cputime_le(*oldval, now.cpu)) {
1357 /* Just about to fire. */ 1372 /* Just about to fire. */
1358 *oldval = jiffies_to_cputime(1); 1373 *oldval = jiffies_to_cputime(1);
1359 } else { 1374 } else {
1360 *oldval = cputime_sub(*oldval, now.cpu); 1375 *oldval = cputime_sub(*oldval, now.cpu);
1361 } 1376 }
1362 } 1377 }
1363 1378
1364 if (cputime_eq(*newval, cputime_zero)) 1379 if (cputime_eq(*newval, cputime_zero))
1365 return; 1380 return;
1366 *newval = cputime_add(*newval, now.cpu); 1381 *newval = cputime_add(*newval, now.cpu);
1367 1382
1368 /* 1383 /*
1369 * If the RLIMIT_CPU timer will expire before the 1384 * If the RLIMIT_CPU timer will expire before the
1370 * ITIMER_PROF timer, we have nothing else to do. 1385 * ITIMER_PROF timer, we have nothing else to do.
1371 */ 1386 */
1372 if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur 1387 if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
1373 < cputime_to_secs(*newval)) 1388 < cputime_to_secs(*newval))
1374 return; 1389 return;
1375 } 1390 }
1376 1391
1377 /* 1392 /*
1378 * Check whether there are any process timers already set to fire 1393 * Check whether there are any process timers already set to fire
1379 * before this one. If so, we don't have anything more to do. 1394 * before this one. If so, we don't have anything more to do.
1380 */ 1395 */
1381 head = &tsk->signal->cpu_timers[clock_idx]; 1396 head = &tsk->signal->cpu_timers[clock_idx];
1382 if (list_empty(head) || 1397 if (list_empty(head) ||
1383 cputime_ge(list_entry(head->next, 1398 cputime_ge(list_entry(head->next,
1384 struct cpu_timer_list, entry)->expires.cpu, 1399 struct cpu_timer_list, entry)->expires.cpu,
1385 *newval)) { 1400 *newval)) {
1386 /* 1401 /*
1387 * Rejigger each thread's expiry time so that one will 1402 * Rejigger each thread's expiry time so that one will
1388 * notice before we hit the process-cumulative expiry time. 1403 * notice before we hit the process-cumulative expiry time.
1389 */ 1404 */
1390 union cpu_time_count expires = { .sched = 0 }; 1405 union cpu_time_count expires = { .sched = 0 };
1391 expires.cpu = *newval; 1406 expires.cpu = *newval;
1392 process_timer_rebalance(tsk, clock_idx, expires, now); 1407 process_timer_rebalance(tsk, clock_idx, expires, now);
1393 } 1408 }
1394 } 1409 }
1395 1410
1396 static int do_cpu_nanosleep(const clockid_t which_clock, int flags, 1411 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1397 struct timespec *rqtp, struct itimerspec *it) 1412 struct timespec *rqtp, struct itimerspec *it)
1398 { 1413 {
1399 struct k_itimer timer; 1414 struct k_itimer timer;
1400 int error; 1415 int error;
1401 1416
1402 /* 1417 /*
1403 * Set up a temporary timer and then wait for it to go off. 1418 * Set up a temporary timer and then wait for it to go off.
1404 */ 1419 */
1405 memset(&timer, 0, sizeof timer); 1420 memset(&timer, 0, sizeof timer);
1406 spin_lock_init(&timer.it_lock); 1421 spin_lock_init(&timer.it_lock);
1407 timer.it_clock = which_clock; 1422 timer.it_clock = which_clock;
1408 timer.it_overrun = -1; 1423 timer.it_overrun = -1;
1409 error = posix_cpu_timer_create(&timer); 1424 error = posix_cpu_timer_create(&timer);
1410 timer.it_process = current; 1425 timer.it_process = current;
1411 if (!error) { 1426 if (!error) {
1412 static struct itimerspec zero_it; 1427 static struct itimerspec zero_it;
1413 1428
1414 memset(it, 0, sizeof *it); 1429 memset(it, 0, sizeof *it);
1415 it->it_value = *rqtp; 1430 it->it_value = *rqtp;
1416 1431
1417 spin_lock_irq(&timer.it_lock); 1432 spin_lock_irq(&timer.it_lock);
1418 error = posix_cpu_timer_set(&timer, flags, it, NULL); 1433 error = posix_cpu_timer_set(&timer, flags, it, NULL);
1419 if (error) { 1434 if (error) {
1420 spin_unlock_irq(&timer.it_lock); 1435 spin_unlock_irq(&timer.it_lock);
1421 return error; 1436 return error;
1422 } 1437 }
1423 1438
1424 while (!signal_pending(current)) { 1439 while (!signal_pending(current)) {
1425 if (timer.it.cpu.expires.sched == 0) { 1440 if (timer.it.cpu.expires.sched == 0) {
1426 /* 1441 /*
1427 * Our timer fired and was reset. 1442 * Our timer fired and was reset.
1428 */ 1443 */
1429 spin_unlock_irq(&timer.it_lock); 1444 spin_unlock_irq(&timer.it_lock);
1430 return 0; 1445 return 0;
1431 } 1446 }
1432 1447
1433 /* 1448 /*
1434 * Block until cpu_timer_fire (or a signal) wakes us. 1449 * Block until cpu_timer_fire (or a signal) wakes us.
1435 */ 1450 */
1436 __set_current_state(TASK_INTERRUPTIBLE); 1451 __set_current_state(TASK_INTERRUPTIBLE);
1437 spin_unlock_irq(&timer.it_lock); 1452 spin_unlock_irq(&timer.it_lock);
1438 schedule(); 1453 schedule();
1439 spin_lock_irq(&timer.it_lock); 1454 spin_lock_irq(&timer.it_lock);
1440 } 1455 }
1441 1456
1442 /* 1457 /*
1443 * We were interrupted by a signal. 1458 * We were interrupted by a signal.
1444 */ 1459 */
1445 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); 1460 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1446 posix_cpu_timer_set(&timer, 0, &zero_it, it); 1461 posix_cpu_timer_set(&timer, 0, &zero_it, it);
1447 spin_unlock_irq(&timer.it_lock); 1462 spin_unlock_irq(&timer.it_lock);
1448 1463
1449 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { 1464 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1450 /* 1465 /*
1451 * It actually did fire already. 1466 * It actually did fire already.
1452 */ 1467 */
1453 return 0; 1468 return 0;
1454 } 1469 }
1455 1470
1456 error = -ERESTART_RESTARTBLOCK; 1471 error = -ERESTART_RESTARTBLOCK;
1457 } 1472 }
1458 1473
1459 return error; 1474 return error;
1460 } 1475 }
1461 1476
1462 int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1477 int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1463 struct timespec *rqtp, struct timespec __user *rmtp) 1478 struct timespec *rqtp, struct timespec __user *rmtp)
1464 { 1479 {
1465 struct restart_block *restart_block = 1480 struct restart_block *restart_block =
1466 &current_thread_info()->restart_block; 1481 &current_thread_info()->restart_block;
1467 struct itimerspec it; 1482 struct itimerspec it;
1468 int error; 1483 int error;
1469 1484
1470 /* 1485 /*
1471 * Diagnose required errors first. 1486 * Diagnose required errors first.
1472 */ 1487 */
1473 if (CPUCLOCK_PERTHREAD(which_clock) && 1488 if (CPUCLOCK_PERTHREAD(which_clock) &&
1474 (CPUCLOCK_PID(which_clock) == 0 || 1489 (CPUCLOCK_PID(which_clock) == 0 ||
1475 CPUCLOCK_PID(which_clock) == current->pid)) 1490 CPUCLOCK_PID(which_clock) == current->pid))
1476 return -EINVAL; 1491 return -EINVAL;
1477 1492
1478 error = do_cpu_nanosleep(which_clock, flags, rqtp, &it); 1493 error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1479 1494
1480 if (error == -ERESTART_RESTARTBLOCK) { 1495 if (error == -ERESTART_RESTARTBLOCK) {
1481 1496
1482 if (flags & TIMER_ABSTIME) 1497 if (flags & TIMER_ABSTIME)
1483 return -ERESTARTNOHAND; 1498 return -ERESTARTNOHAND;
1484 /* 1499 /*
1485 * Report back to the user the time still remaining. 1500 * Report back to the user the time still remaining.
1486 */ 1501 */
1487 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1502 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1488 return -EFAULT; 1503 return -EFAULT;
1489 1504
1490 restart_block->fn = posix_cpu_nsleep_restart; 1505 restart_block->fn = posix_cpu_nsleep_restart;
1491 restart_block->arg0 = which_clock; 1506 restart_block->arg0 = which_clock;
1492 restart_block->arg1 = (unsigned long) rmtp; 1507 restart_block->arg1 = (unsigned long) rmtp;
1493 restart_block->arg2 = rqtp->tv_sec; 1508 restart_block->arg2 = rqtp->tv_sec;
1494 restart_block->arg3 = rqtp->tv_nsec; 1509 restart_block->arg3 = rqtp->tv_nsec;
1495 } 1510 }
1496 return error; 1511 return error;
1497 } 1512 }
1498 1513
1499 long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1514 long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1500 { 1515 {
1501 clockid_t which_clock = restart_block->arg0; 1516 clockid_t which_clock = restart_block->arg0;
1502 struct timespec __user *rmtp; 1517 struct timespec __user *rmtp;
1503 struct timespec t; 1518 struct timespec t;
1504 struct itimerspec it; 1519 struct itimerspec it;
1505 int error; 1520 int error;
1506 1521
1507 rmtp = (struct timespec __user *) restart_block->arg1; 1522 rmtp = (struct timespec __user *) restart_block->arg1;
1508 t.tv_sec = restart_block->arg2; 1523 t.tv_sec = restart_block->arg2;
1509 t.tv_nsec = restart_block->arg3; 1524 t.tv_nsec = restart_block->arg3;
1510 1525
1511 restart_block->fn = do_no_restart_syscall; 1526 restart_block->fn = do_no_restart_syscall;
1512 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); 1527 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1513 1528
1514 if (error == -ERESTART_RESTARTBLOCK) { 1529 if (error == -ERESTART_RESTARTBLOCK) {
1515 /* 1530 /*
1516 * Report back to the user the time still remaining. 1531 * Report back to the user the time still remaining.
1517 */ 1532 */
1518 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1533 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1519 return -EFAULT; 1534 return -EFAULT;
1520 1535
1521 restart_block->fn = posix_cpu_nsleep_restart; 1536 restart_block->fn = posix_cpu_nsleep_restart;
1522 restart_block->arg0 = which_clock; 1537 restart_block->arg0 = which_clock;
1523 restart_block->arg1 = (unsigned long) rmtp; 1538 restart_block->arg1 = (unsigned long) rmtp;
1524 restart_block->arg2 = t.tv_sec; 1539 restart_block->arg2 = t.tv_sec;
1525 restart_block->arg3 = t.tv_nsec; 1540 restart_block->arg3 = t.tv_nsec;
1526 } 1541 }
1527 return error; 1542 return error;
1528 1543
1529 } 1544 }
1530 1545
1531 1546
1532 #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) 1547 #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1533 #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) 1548 #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1534 1549
1535 static int process_cpu_clock_getres(const clockid_t which_clock, 1550 static int process_cpu_clock_getres(const clockid_t which_clock,
1536 struct timespec *tp) 1551 struct timespec *tp)
1537 { 1552 {
1538 return posix_cpu_clock_getres(PROCESS_CLOCK, tp); 1553 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1539 } 1554 }
1540 static int process_cpu_clock_get(const clockid_t which_clock, 1555 static int process_cpu_clock_get(const clockid_t which_clock,
1541 struct timespec *tp) 1556 struct timespec *tp)
1542 { 1557 {
1543 return posix_cpu_clock_get(PROCESS_CLOCK, tp); 1558 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1544 } 1559 }
1545 static int process_cpu_timer_create(struct k_itimer *timer) 1560 static int process_cpu_timer_create(struct k_itimer *timer)
1546 { 1561 {
1547 timer->it_clock = PROCESS_CLOCK; 1562 timer->it_clock = PROCESS_CLOCK;
1548 return posix_cpu_timer_create(timer); 1563 return posix_cpu_timer_create(timer);
1549 } 1564 }
1550 static int process_cpu_nsleep(const clockid_t which_clock, int flags, 1565 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1551 struct timespec *rqtp, 1566 struct timespec *rqtp,
1552 struct timespec __user *rmtp) 1567 struct timespec __user *rmtp)
1553 { 1568 {
1554 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); 1569 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1555 } 1570 }
1556 static long process_cpu_nsleep_restart(struct restart_block *restart_block) 1571 static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1557 { 1572 {
1558 return -EINVAL; 1573 return -EINVAL;
1559 } 1574 }
1560 static int thread_cpu_clock_getres(const clockid_t which_clock, 1575 static int thread_cpu_clock_getres(const clockid_t which_clock,
1561 struct timespec *tp) 1576 struct timespec *tp)
1562 { 1577 {
1563 return posix_cpu_clock_getres(THREAD_CLOCK, tp); 1578 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1564 } 1579 }
1565 static int thread_cpu_clock_get(const clockid_t which_clock, 1580 static int thread_cpu_clock_get(const clockid_t which_clock,
1566 struct timespec *tp) 1581 struct timespec *tp)
1567 { 1582 {
1568 return posix_cpu_clock_get(THREAD_CLOCK, tp); 1583 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1569 } 1584 }
1570 static int thread_cpu_timer_create(struct k_itimer *timer) 1585 static int thread_cpu_timer_create(struct k_itimer *timer)
1571 { 1586 {
1572 timer->it_clock = THREAD_CLOCK; 1587 timer->it_clock = THREAD_CLOCK;
1573 return posix_cpu_timer_create(timer); 1588 return posix_cpu_timer_create(timer);
1574 } 1589 }
1575 static int thread_cpu_nsleep(const clockid_t which_clock, int flags, 1590 static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
1576 struct timespec *rqtp, struct timespec __user *rmtp) 1591 struct timespec *rqtp, struct timespec __user *rmtp)
1577 { 1592 {
1578 return -EINVAL; 1593 return -EINVAL;
1579 } 1594 }
1580 static long thread_cpu_nsleep_restart(struct restart_block *restart_block) 1595 static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
1581 { 1596 {
1582 return -EINVAL; 1597 return -EINVAL;
1583 } 1598 }
1584 1599
1585 static __init int init_posix_cpu_timers(void) 1600 static __init int init_posix_cpu_timers(void)
1586 { 1601 {
1587 struct k_clock process = { 1602 struct k_clock process = {
1588 .clock_getres = process_cpu_clock_getres, 1603 .clock_getres = process_cpu_clock_getres,
1589 .clock_get = process_cpu_clock_get, 1604 .clock_get = process_cpu_clock_get,
1590 .clock_set = do_posix_clock_nosettime, 1605 .clock_set = do_posix_clock_nosettime,
1591 .timer_create = process_cpu_timer_create, 1606 .timer_create = process_cpu_timer_create,
1592 .nsleep = process_cpu_nsleep, 1607 .nsleep = process_cpu_nsleep,
1593 .nsleep_restart = process_cpu_nsleep_restart, 1608 .nsleep_restart = process_cpu_nsleep_restart,
1594 }; 1609 };
1595 struct k_clock thread = { 1610 struct k_clock thread = {
1596 .clock_getres = thread_cpu_clock_getres, 1611 .clock_getres = thread_cpu_clock_getres,
1597 .clock_get = thread_cpu_clock_get, 1612 .clock_get = thread_cpu_clock_get,
1598 .clock_set = do_posix_clock_nosettime, 1613 .clock_set = do_posix_clock_nosettime,
1599 .timer_create = thread_cpu_timer_create, 1614 .timer_create = thread_cpu_timer_create,
1600 .nsleep = thread_cpu_nsleep, 1615 .nsleep = thread_cpu_nsleep,
1601 .nsleep_restart = thread_cpu_nsleep_restart, 1616 .nsleep_restart = thread_cpu_nsleep_restart,
1602 }; 1617 };
1603 1618
1604 register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); 1619 register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1605 register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); 1620 register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1606 1621
1607 return 0; 1622 return 0;
1608 } 1623 }
1609 __initcall(init_posix_cpu_timers); 1624 __initcall(init_posix_cpu_timers);
1610 1625