Commit 490dea45d00f01847ebebd007685d564aaf2cd98

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent ede6f5aea0

itimers: remove the per-cpu-ish-ness

Either we bounce once cacheline per cpu per tick, yielding n^2 bounces
or we just bounce a single..

Also, using per-cpu allocations for the thread-groups complicates the
per-cpu allocator in that its currently aimed to be a fixed sized
allocator and the only possible extention to that would be vmap based,
which is seriously constrained on 32 bit archs.

So making the per-cpu memory requirement depend on the number of
processes is an issue.

Lastly, it didn't deal with cpu-hotplug, although admittedly that might
be fixable.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 46 additions and 107 deletions Side-by-side Diff

include/linux/init_task.h
... ... @@ -48,6 +48,12 @@
48 48 .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
49 49 .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
50 50 .rlim = INIT_RLIMITS, \
  51 + .cputime = { .totals = { \
  52 + .utime = cputime_zero, \
  53 + .stime = cputime_zero, \
  54 + .sum_exec_runtime = 0, \
  55 + .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock), \
  56 + }, }, \
51 57 }
52 58  
53 59 extern struct nsproxy init_nsproxy;
include/linux/sched.h
... ... @@ -450,6 +450,7 @@
450 450 cputime_t utime;
451 451 cputime_t stime;
452 452 unsigned long long sum_exec_runtime;
  453 + spinlock_t lock;
453 454 };
454 455 /* Alternate field names when used to cache expirations. */
455 456 #define prof_exp stime
... ... @@ -465,7 +466,7 @@
465 466 * used for thread group CPU clock calculations.
466 467 */
467 468 struct thread_group_cputime {
468   - struct task_cputime *totals;
  469 + struct task_cputime totals;
469 470 };
470 471  
471 472 /*
472 473  
473 474  
474 475  
475 476  
... ... @@ -2180,24 +2181,30 @@
2180 2181 * Thread group CPU time accounting.
2181 2182 */
2182 2183  
2183   -extern int thread_group_cputime_alloc(struct task_struct *);
2184   -extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
2185   -
2186   -static inline void thread_group_cputime_init(struct signal_struct *sig)
  2184 +static inline
  2185 +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
2187 2186 {
2188   - sig->cputime.totals = NULL;
  2187 + struct task_cputime *totals = &tsk->signal->cputime.totals;
  2188 + unsigned long flags;
  2189 +
  2190 + spin_lock_irqsave(&totals->lock, flags);
  2191 + *times = *totals;
  2192 + spin_unlock_irqrestore(&totals->lock, flags);
2189 2193 }
2190 2194  
2191   -static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
  2195 +static inline void thread_group_cputime_init(struct signal_struct *sig)
2192 2196 {
2193   - if (curr->signal->cputime.totals)
2194   - return 0;
2195   - return thread_group_cputime_alloc(curr);
  2197 + sig->cputime.totals = (struct task_cputime){
  2198 + .utime = cputime_zero,
  2199 + .stime = cputime_zero,
  2200 + .sum_exec_runtime = 0,
  2201 + };
  2202 +
  2203 + spin_lock_init(&sig->cputime.totals.lock);
2196 2204 }
2197 2205  
2198 2206 static inline void thread_group_cputime_free(struct signal_struct *sig)
2199 2207 {
2200   - free_percpu(sig->cputime.totals);
2201 2208 }
2202 2209  
2203 2210 /*
... ... @@ -820,14 +820,15 @@
820 820 int ret;
821 821  
822 822 if (clone_flags & CLONE_THREAD) {
823   - ret = thread_group_cputime_clone_thread(current);
824   - if (likely(!ret)) {
825   - atomic_inc(&current->signal->count);
826   - atomic_inc(&current->signal->live);
827   - }
828   - return ret;
  823 + atomic_inc(&current->signal->count);
  824 + atomic_inc(&current->signal->live);
  825 + return 0;
829 826 }
830 827 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
  828 +
  829 + if (sig)
  830 + posix_cpu_timers_init_group(sig);
  831 +
831 832 tsk->signal = sig;
832 833 if (!sig)
833 834 return -ENOMEM;
... ... @@ -863,8 +864,6 @@
863 864 task_lock(current->group_leader);
864 865 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
865 866 task_unlock(current->group_leader);
866   -
867   - posix_cpu_timers_init_group(sig);
868 867  
869 868 acct_init_pacct(&sig->pacct);
870 869  
kernel/posix-cpu-timers.c
... ... @@ -10,76 +10,6 @@
10 10 #include <linux/kernel_stat.h>
11 11  
12 12 /*
13   - * Allocate the thread_group_cputime structure appropriately and fill in the
14   - * current values of the fields. Called from copy_signal() via
15   - * thread_group_cputime_clone_thread() when adding a second or subsequent
16   - * thread to a thread group. Assumes interrupts are enabled when called.
17   - */
18   -int thread_group_cputime_alloc(struct task_struct *tsk)
19   -{
20   - struct signal_struct *sig = tsk->signal;
21   - struct task_cputime *cputime;
22   -
23   - /*
24   - * If we have multiple threads and we don't already have a
25   - * per-CPU task_cputime struct (checked in the caller), allocate
26   - * one and fill it in with the times accumulated so far. We may
27   - * race with another thread so recheck after we pick up the sighand
28   - * lock.
29   - */
30   - cputime = alloc_percpu(struct task_cputime);
31   - if (cputime == NULL)
32   - return -ENOMEM;
33   - spin_lock_irq(&tsk->sighand->siglock);
34   - if (sig->cputime.totals) {
35   - spin_unlock_irq(&tsk->sighand->siglock);
36   - free_percpu(cputime);
37   - return 0;
38   - }
39   - sig->cputime.totals = cputime;
40   - cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
41   - cputime->utime = tsk->utime;
42   - cputime->stime = tsk->stime;
43   - cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
44   - spin_unlock_irq(&tsk->sighand->siglock);
45   - return 0;
46   -}
47   -
48   -/**
49   - * thread_group_cputime - Sum the thread group time fields across all CPUs.
50   - *
51   - * @tsk: The task we use to identify the thread group.
52   - * @times: task_cputime structure in which we return the summed fields.
53   - *
54   - * Walk the list of CPUs to sum the per-CPU time fields in the thread group
55   - * time structure.
56   - */
57   -void thread_group_cputime(
58   - struct task_struct *tsk,
59   - struct task_cputime *times)
60   -{
61   - struct task_cputime *totals, *tot;
62   - int i;
63   -
64   - totals = tsk->signal->cputime.totals;
65   - if (!totals) {
66   - times->utime = tsk->utime;
67   - times->stime = tsk->stime;
68   - times->sum_exec_runtime = tsk->se.sum_exec_runtime;
69   - return;
70   - }
71   -
72   - times->stime = times->utime = cputime_zero;
73   - times->sum_exec_runtime = 0;
74   - for_each_possible_cpu(i) {
75   - tot = per_cpu_ptr(totals, i);
76   - times->utime = cputime_add(times->utime, tot->utime);
77   - times->stime = cputime_add(times->stime, tot->stime);
78   - times->sum_exec_runtime += tot->sum_exec_runtime;
79   - }
80   -}
81   -
82   -/*
83 13 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
84 14 */
85 15 void update_rlimit_cpu(unsigned long rlim_new)
kernel/sched_stats.h
... ... @@ -296,6 +296,7 @@
296 296 static inline void account_group_user_time(struct task_struct *tsk,
297 297 cputime_t cputime)
298 298 {
  299 + struct task_cputime *times;
299 300 struct signal_struct *sig;
300 301  
301 302 /* tsk == current, ensure it is safe to use ->signal */
302 303  
... ... @@ -303,13 +304,11 @@
303 304 return;
304 305  
305 306 sig = tsk->signal;
306   - if (sig->cputime.totals) {
307   - struct task_cputime *times;
  307 + times = &sig->cputime.totals;
308 308  
309   - times = per_cpu_ptr(sig->cputime.totals, get_cpu());
310   - times->utime = cputime_add(times->utime, cputime);
311   - put_cpu_no_resched();
312   - }
  309 + spin_lock(&times->lock);
  310 + times->utime = cputime_add(times->utime, cputime);
  311 + spin_unlock(&times->lock);
313 312 }
314 313  
315 314 /**
... ... @@ -325,6 +324,7 @@
325 324 static inline void account_group_system_time(struct task_struct *tsk,
326 325 cputime_t cputime)
327 326 {
  327 + struct task_cputime *times;
328 328 struct signal_struct *sig;
329 329  
330 330 /* tsk == current, ensure it is safe to use ->signal */
331 331  
... ... @@ -332,13 +332,11 @@
332 332 return;
333 333  
334 334 sig = tsk->signal;
335   - if (sig->cputime.totals) {
336   - struct task_cputime *times;
  335 + times = &sig->cputime.totals;
337 336  
338   - times = per_cpu_ptr(sig->cputime.totals, get_cpu());
339   - times->stime = cputime_add(times->stime, cputime);
340   - put_cpu_no_resched();
341   - }
  337 + spin_lock(&times->lock);
  338 + times->stime = cputime_add(times->stime, cputime);
  339 + spin_unlock(&times->lock);
342 340 }
343 341  
344 342 /**
... ... @@ -354,6 +352,7 @@
354 352 static inline void account_group_exec_runtime(struct task_struct *tsk,
355 353 unsigned long long ns)
356 354 {
  355 + struct task_cputime *times;
357 356 struct signal_struct *sig;
358 357  
359 358 sig = tsk->signal;
360 359  
... ... @@ -362,12 +361,10 @@
362 361 if (unlikely(!sig))
363 362 return;
364 363  
365   - if (sig->cputime.totals) {
366   - struct task_cputime *times;
  364 + times = &sig->cputime.totals;
367 365  
368   - times = per_cpu_ptr(sig->cputime.totals, get_cpu());
369   - times->sum_exec_runtime += ns;
370   - put_cpu_no_resched();
371   - }
  366 + spin_lock(&times->lock);
  367 + times->sum_exec_runtime += ns;
  368 + spin_unlock(&times->lock);
372 369 }