Commit 61420f59a589c0668f70cbe725785837c78ece90
Exists in
master
and in
4 other branches
Merge branch 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
* 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6: [PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID [PATCH] improve idle cputime accounting [PATCH] improve precision of idle time detection. [PATCH] improve precision of process accounting. [PATCH] idle cputime accounting [PATCH] fix scaled & unscaled cputime accounting
Showing 30 changed files Side-by-side Diff
- arch/ia64/kernel/time.c
- arch/powerpc/kernel/process.c
- arch/powerpc/kernel/time.c
- arch/s390/include/asm/cpu.h
- arch/s390/include/asm/cputime.h
- arch/s390/include/asm/lowcore.h
- arch/s390/include/asm/system.h
- arch/s390/include/asm/thread_info.h
- arch/s390/include/asm/timer.h
- arch/s390/include/asm/vdso.h
- arch/s390/kernel/asm-offsets.c
- arch/s390/kernel/entry.S
- arch/s390/kernel/entry64.S
- arch/s390/kernel/head64.S
- arch/s390/kernel/process.c
- arch/s390/kernel/s390_ext.c
- arch/s390/kernel/setup.c
- arch/s390/kernel/smp.c
- arch/s390/kernel/vdso.c
- arch/s390/kernel/vdso64/clock_getres.S
- arch/s390/kernel/vdso64/clock_gettime.S
- arch/s390/kernel/vtime.c
- arch/x86/xen/time.c
- drivers/s390/cio/cio.c
- drivers/s390/s390mach.c
- include/linux/kernel_stat.h
- include/linux/sched.h
- kernel/sched.c
- kernel/time/tick-sched.c
- kernel/timer.c
arch/ia64/kernel/time.c
... | ... | @@ -93,13 +93,14 @@ |
93 | 93 | now = ia64_get_itc(); |
94 | 94 | |
95 | 95 | delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); |
96 | - account_system_time(prev, 0, delta_stime); | |
97 | - account_system_time_scaled(prev, delta_stime); | |
96 | + if (idle_task(smp_processor_id()) != prev) | |
97 | + account_system_time(prev, 0, delta_stime, delta_stime); | |
98 | + else | |
99 | + account_idle_time(delta_stime); | |
98 | 100 | |
99 | 101 | if (pi->ac_utime) { |
100 | 102 | delta_utime = cycle_to_cputime(pi->ac_utime); |
101 | - account_user_time(prev, delta_utime); | |
102 | - account_user_time_scaled(prev, delta_utime); | |
103 | + account_user_time(prev, delta_utime, delta_utime); | |
103 | 104 | } |
104 | 105 | |
105 | 106 | pi->ac_stamp = ni->ac_stamp = now; |
... | ... | @@ -122,8 +123,10 @@ |
122 | 123 | now = ia64_get_itc(); |
123 | 124 | |
124 | 125 | delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); |
125 | - account_system_time(tsk, 0, delta_stime); | |
126 | - account_system_time_scaled(tsk, delta_stime); | |
126 | + if (irq_count() || idle_task(smp_processor_id()) != tsk) | |
127 | + account_system_time(tsk, 0, delta_stime, delta_stime); | |
128 | + else | |
129 | + account_idle_time(delta_stime); | |
127 | 130 | ti->ac_stime = 0; |
128 | 131 | |
129 | 132 | ti->ac_stamp = now; |
... | ... | @@ -143,8 +146,7 @@ |
143 | 146 | |
144 | 147 | if (ti->ac_utime) { |
145 | 148 | delta_utime = cycle_to_cputime(ti->ac_utime); |
146 | - account_user_time(p, delta_utime); | |
147 | - account_user_time_scaled(p, delta_utime); | |
149 | + account_user_time(p, delta_utime, delta_utime); | |
148 | 150 | ti->ac_utime = 0; |
149 | 151 | } |
150 | 152 | } |
arch/powerpc/kernel/process.c
arch/powerpc/kernel/time.c
... | ... | @@ -256,8 +256,10 @@ |
256 | 256 | delta += sys_time; |
257 | 257 | get_paca()->system_time = 0; |
258 | 258 | } |
259 | - account_system_time(tsk, 0, delta); | |
260 | - account_system_time_scaled(tsk, deltascaled); | |
259 | + if (in_irq() || idle_task(smp_processor_id()) != tsk) | |
260 | + account_system_time(tsk, 0, delta, deltascaled); | |
261 | + else | |
262 | + account_idle_time(delta); | |
261 | 263 | per_cpu(cputime_last_delta, smp_processor_id()) = delta; |
262 | 264 | per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; |
263 | 265 | local_irq_restore(flags); |
264 | 266 | |
... | ... | @@ -275,10 +277,8 @@ |
275 | 277 | |
276 | 278 | utime = get_paca()->user_time; |
277 | 279 | get_paca()->user_time = 0; |
278 | - account_user_time(tsk, utime); | |
279 | - | |
280 | 280 | utimescaled = cputime_to_scaled(utime); |
281 | - account_user_time_scaled(tsk, utimescaled); | |
281 | + account_user_time(tsk, utime, utimescaled); | |
282 | 282 | } |
283 | 283 | |
284 | 284 | /* |
... | ... | @@ -338,8 +338,12 @@ |
338 | 338 | tb = mftb(); |
339 | 339 | purr = mfspr(SPRN_PURR); |
340 | 340 | stolen = (tb - pme->tb) - (purr - pme->purr); |
341 | - if (stolen > 0) | |
342 | - account_steal_time(current, stolen); | |
341 | + if (stolen > 0) { | |
342 | + if (idle_task(smp_processor_id()) != current) | |
343 | + account_steal_time(stolen); | |
344 | + else | |
345 | + account_idle_time(stolen); | |
346 | + } | |
343 | 347 | pme->tb = tb; |
344 | 348 | pme->purr = purr; |
345 | 349 | } |
arch/s390/include/asm/cpu.h
... | ... | @@ -14,7 +14,6 @@ |
14 | 14 | |
15 | 15 | struct s390_idle_data { |
16 | 16 | spinlock_t lock; |
17 | - unsigned int in_idle; | |
18 | 17 | unsigned long long idle_count; |
19 | 18 | unsigned long long idle_enter; |
20 | 19 | unsigned long long idle_time; |
21 | 20 | |
... | ... | @@ -22,12 +21,12 @@ |
22 | 21 | |
23 | 22 | DECLARE_PER_CPU(struct s390_idle_data, s390_idle); |
24 | 23 | |
25 | -void s390_idle_leave(void); | |
24 | +void vtime_start_cpu(void); | |
26 | 25 | |
27 | 26 | static inline void s390_idle_check(void) |
28 | 27 | { |
29 | - if ((&__get_cpu_var(s390_idle))->in_idle) | |
30 | - s390_idle_leave(); | |
28 | + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) | |
29 | + vtime_start_cpu(); | |
31 | 30 | } |
32 | 31 | |
33 | 32 | #endif /* _ASM_S390_CPU_H_ */ |
arch/s390/include/asm/cputime.h
... | ... | @@ -11,7 +11,7 @@ |
11 | 11 | |
12 | 12 | #include <asm/div64.h> |
13 | 13 | |
14 | -/* We want to use micro-second resolution. */ | |
14 | +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ | |
15 | 15 | |
16 | 16 | typedef unsigned long long cputime_t; |
17 | 17 | typedef unsigned long long cputime64_t; |
18 | 18 | |
... | ... | @@ -53,9 +53,9 @@ |
53 | 53 | #define cputime_ge(__a, __b) ((__a) >= (__b)) |
54 | 54 | #define cputime_lt(__a, __b) ((__a) < (__b)) |
55 | 55 | #define cputime_le(__a, __b) ((__a) <= (__b)) |
56 | -#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) | |
56 | +#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) | |
57 | 57 | #define cputime_to_scaled(__ct) (__ct) |
58 | -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) | |
58 | +#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) | |
59 | 59 | |
60 | 60 | #define cputime64_zero (0ULL) |
61 | 61 | #define cputime64_add(__a, __b) ((__a) + (__b)) |
... | ... | @@ -64,7 +64,7 @@ |
64 | 64 | static inline u64 |
65 | 65 | cputime64_to_jiffies64(cputime64_t cputime) |
66 | 66 | { |
67 | - do_div(cputime, 1000000 / HZ); | |
67 | + do_div(cputime, 4096000000ULL / HZ); | |
68 | 68 | return cputime; |
69 | 69 | } |
70 | 70 | |
71 | 71 | |
... | ... | @@ -74,13 +74,13 @@ |
74 | 74 | static inline unsigned int |
75 | 75 | cputime_to_msecs(const cputime_t cputime) |
76 | 76 | { |
77 | - return __div(cputime, 1000); | |
77 | + return __div(cputime, 4096000); | |
78 | 78 | } |
79 | 79 | |
80 | 80 | static inline cputime_t |
81 | 81 | msecs_to_cputime(const unsigned int m) |
82 | 82 | { |
83 | - return (cputime_t) m * 1000; | |
83 | + return (cputime_t) m * 4096000; | |
84 | 84 | } |
85 | 85 | |
86 | 86 | /* |
87 | 87 | |
... | ... | @@ -89,13 +89,13 @@ |
89 | 89 | static inline unsigned int |
90 | 90 | cputime_to_secs(const cputime_t cputime) |
91 | 91 | { |
92 | - return __div(cputime, 1000000); | |
92 | + return __div(cputime, 2048000000) >> 1; | |
93 | 93 | } |
94 | 94 | |
95 | 95 | static inline cputime_t |
96 | 96 | secs_to_cputime(const unsigned int s) |
97 | 97 | { |
98 | - return (cputime_t) s * 1000000; | |
98 | + return (cputime_t) s * 4096000000ULL; | |
99 | 99 | } |
100 | 100 | |
101 | 101 | /* |
... | ... | @@ -104,7 +104,7 @@ |
104 | 104 | static inline cputime_t |
105 | 105 | timespec_to_cputime(const struct timespec *value) |
106 | 106 | { |
107 | - return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; | |
107 | + return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; | |
108 | 108 | } |
109 | 109 | |
110 | 110 | static inline void |
111 | 111 | |
... | ... | @@ -114,12 +114,12 @@ |
114 | 114 | register_pair rp; |
115 | 115 | |
116 | 116 | rp.pair = cputime >> 1; |
117 | - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); | |
118 | - value->tv_nsec = rp.subreg.even * 1000; | |
117 | + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); | |
118 | + value->tv_nsec = rp.subreg.even * 1000 / 4096; | |
119 | 119 | value->tv_sec = rp.subreg.odd; |
120 | 120 | #else |
121 | - value->tv_nsec = (cputime % 1000000) * 1000; | |
122 | - value->tv_sec = cputime / 1000000; | |
121 | + value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; | |
122 | + value->tv_sec = cputime / 4096000000ULL; | |
123 | 123 | #endif |
124 | 124 | } |
125 | 125 | |
... | ... | @@ -131,7 +131,7 @@ |
131 | 131 | static inline cputime_t |
132 | 132 | timeval_to_cputime(const struct timeval *value) |
133 | 133 | { |
134 | - return value->tv_usec + (u64) value->tv_sec * 1000000; | |
134 | + return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; | |
135 | 135 | } |
136 | 136 | |
137 | 137 | static inline void |
138 | 138 | |
... | ... | @@ -141,12 +141,12 @@ |
141 | 141 | register_pair rp; |
142 | 142 | |
143 | 143 | rp.pair = cputime >> 1; |
144 | - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); | |
145 | - value->tv_usec = rp.subreg.even; | |
144 | + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); | |
145 | + value->tv_usec = rp.subreg.even / 4096; | |
146 | 146 | value->tv_sec = rp.subreg.odd; |
147 | 147 | #else |
148 | - value->tv_usec = cputime % 1000000; | |
149 | - value->tv_sec = cputime / 1000000; | |
148 | + value->tv_usec = cputime % 4096000000ULL; | |
149 | + value->tv_sec = cputime / 4096000000ULL; | |
150 | 150 | #endif |
151 | 151 | } |
152 | 152 | |
153 | 153 | |
... | ... | @@ -156,13 +156,13 @@ |
156 | 156 | static inline clock_t |
157 | 157 | cputime_to_clock_t(cputime_t cputime) |
158 | 158 | { |
159 | - return __div(cputime, 1000000 / USER_HZ); | |
159 | + return __div(cputime, 4096000000ULL / USER_HZ); | |
160 | 160 | } |
161 | 161 | |
162 | 162 | static inline cputime_t |
163 | 163 | clock_t_to_cputime(unsigned long x) |
164 | 164 | { |
165 | - return (cputime_t) x * (1000000 / USER_HZ); | |
165 | + return (cputime_t) x * (4096000000ULL / USER_HZ); | |
166 | 166 | } |
167 | 167 | |
168 | 168 | /* |
... | ... | @@ -171,7 +171,7 @@ |
171 | 171 | static inline clock_t |
172 | 172 | cputime64_to_clock_t(cputime64_t cputime) |
173 | 173 | { |
174 | - return __div(cputime, 1000000 / USER_HZ); | |
174 | + return __div(cputime, 4096000000ULL / USER_HZ); | |
175 | 175 | } |
176 | 176 | |
177 | 177 | #endif /* _S390_CPUTIME_H */ |
arch/s390/include/asm/lowcore.h
... | ... | @@ -67,11 +67,11 @@ |
67 | 67 | #define __LC_SYNC_ENTER_TIMER 0x248 |
68 | 68 | #define __LC_ASYNC_ENTER_TIMER 0x250 |
69 | 69 | #define __LC_EXIT_TIMER 0x258 |
70 | -#define __LC_LAST_UPDATE_TIMER 0x260 | |
71 | -#define __LC_USER_TIMER 0x268 | |
72 | -#define __LC_SYSTEM_TIMER 0x270 | |
73 | -#define __LC_LAST_UPDATE_CLOCK 0x278 | |
74 | -#define __LC_STEAL_CLOCK 0x280 | |
70 | +#define __LC_USER_TIMER 0x260 | |
71 | +#define __LC_SYSTEM_TIMER 0x268 | |
72 | +#define __LC_STEAL_TIMER 0x270 | |
73 | +#define __LC_LAST_UPDATE_TIMER 0x278 | |
74 | +#define __LC_LAST_UPDATE_CLOCK 0x280 | |
75 | 75 | #define __LC_RETURN_MCCK_PSW 0x288 |
76 | 76 | #define __LC_KERNEL_STACK 0xC40 |
77 | 77 | #define __LC_THREAD_INFO 0xC44 |
... | ... | @@ -89,11 +89,11 @@ |
89 | 89 | #define __LC_SYNC_ENTER_TIMER 0x250 |
90 | 90 | #define __LC_ASYNC_ENTER_TIMER 0x258 |
91 | 91 | #define __LC_EXIT_TIMER 0x260 |
92 | -#define __LC_LAST_UPDATE_TIMER 0x268 | |
93 | -#define __LC_USER_TIMER 0x270 | |
94 | -#define __LC_SYSTEM_TIMER 0x278 | |
95 | -#define __LC_LAST_UPDATE_CLOCK 0x280 | |
96 | -#define __LC_STEAL_CLOCK 0x288 | |
92 | +#define __LC_USER_TIMER 0x268 | |
93 | +#define __LC_SYSTEM_TIMER 0x270 | |
94 | +#define __LC_STEAL_TIMER 0x278 | |
95 | +#define __LC_LAST_UPDATE_TIMER 0x280 | |
96 | +#define __LC_LAST_UPDATE_CLOCK 0x288 | |
97 | 97 | #define __LC_RETURN_MCCK_PSW 0x290 |
98 | 98 | #define __LC_KERNEL_STACK 0xD40 |
99 | 99 | #define __LC_THREAD_INFO 0xD48 |
100 | 100 | |
... | ... | @@ -106,8 +106,10 @@ |
106 | 106 | #define __LC_IPLDEV 0xDB8 |
107 | 107 | #define __LC_CURRENT 0xDD8 |
108 | 108 | #define __LC_INT_CLOCK 0xDE8 |
109 | +#define __LC_VDSO_PER_CPU 0xE38 | |
109 | 110 | #endif /* __s390x__ */ |
110 | 111 | |
112 | +#define __LC_PASTE 0xE40 | |
111 | 113 | |
112 | 114 | #define __LC_PANIC_MAGIC 0xE00 |
113 | 115 | #ifndef __s390x__ |
... | ... | @@ -252,11 +254,11 @@ |
252 | 254 | __u64 sync_enter_timer; /* 0x248 */ |
253 | 255 | __u64 async_enter_timer; /* 0x250 */ |
254 | 256 | __u64 exit_timer; /* 0x258 */ |
255 | - __u64 last_update_timer; /* 0x260 */ | |
256 | - __u64 user_timer; /* 0x268 */ | |
257 | - __u64 system_timer; /* 0x270 */ | |
258 | - __u64 last_update_clock; /* 0x278 */ | |
259 | - __u64 steal_clock; /* 0x280 */ | |
257 | + __u64 user_timer; /* 0x260 */ | |
258 | + __u64 system_timer; /* 0x268 */ | |
259 | + __u64 steal_timer; /* 0x270 */ | |
260 | + __u64 last_update_timer; /* 0x278 */ | |
261 | + __u64 last_update_clock; /* 0x280 */ | |
260 | 262 | psw_t return_mcck_psw; /* 0x288 */ |
261 | 263 | __u8 pad8[0xc00-0x290]; /* 0x290 */ |
262 | 264 | |
... | ... | @@ -343,11 +345,11 @@ |
343 | 345 | __u64 sync_enter_timer; /* 0x250 */ |
344 | 346 | __u64 async_enter_timer; /* 0x258 */ |
345 | 347 | __u64 exit_timer; /* 0x260 */ |
346 | - __u64 last_update_timer; /* 0x268 */ | |
347 | - __u64 user_timer; /* 0x270 */ | |
348 | - __u64 system_timer; /* 0x278 */ | |
349 | - __u64 last_update_clock; /* 0x280 */ | |
350 | - __u64 steal_clock; /* 0x288 */ | |
348 | + __u64 user_timer; /* 0x268 */ | |
349 | + __u64 system_timer; /* 0x270 */ | |
350 | + __u64 steal_timer; /* 0x278 */ | |
351 | + __u64 last_update_timer; /* 0x280 */ | |
352 | + __u64 last_update_clock; /* 0x288 */ | |
351 | 353 | psw_t return_mcck_psw; /* 0x290 */ |
352 | 354 | __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ |
353 | 355 | /* System info area */ |
... | ... | @@ -381,7 +383,12 @@ |
381 | 383 | /* whether the kernel died with panic() or not */ |
382 | 384 | __u32 panic_magic; /* 0xe00 */ |
383 | 385 | |
384 | - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ | |
386 | + /* Per cpu primary space access list */ | |
387 | + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ | |
388 | + __u32 vdso_per_cpu_data; /* 0xe3c */ | |
389 | + __u32 paste[16]; /* 0xe40 */ | |
390 | + | |
391 | + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ | |
385 | 392 | |
386 | 393 | /* 64 bit extparam used for pfault, diag 250 etc */ |
387 | 394 | __u64 ext_params2; /* 0x11B8 */ |
arch/s390/include/asm/system.h
... | ... | @@ -99,7 +99,7 @@ |
99 | 99 | prev = __switch_to(prev,next); \ |
100 | 100 | } while (0) |
101 | 101 | |
102 | -extern void account_vtime(struct task_struct *); | |
102 | +extern void account_vtime(struct task_struct *, struct task_struct *); | |
103 | 103 | extern void account_tick_vtime(struct task_struct *); |
104 | 104 | extern void account_system_vtime(struct task_struct *); |
105 | 105 | |
... | ... | @@ -121,7 +121,7 @@ |
121 | 121 | |
122 | 122 | #define finish_arch_switch(prev) do { \ |
123 | 123 | set_fs(current->thread.mm_segment); \ |
124 | - account_vtime(prev); \ | |
124 | + account_vtime(prev, current); \ | |
125 | 125 | } while (0) |
126 | 126 | |
127 | 127 | #define nop() asm volatile("nop") |
arch/s390/include/asm/thread_info.h
arch/s390/include/asm/timer.h
... | ... | @@ -23,20 +23,18 @@ |
23 | 23 | __u64 expires; |
24 | 24 | __u64 interval; |
25 | 25 | |
26 | - spinlock_t lock; | |
27 | - unsigned long magic; | |
28 | - | |
29 | 26 | void (*function)(unsigned long); |
30 | 27 | unsigned long data; |
31 | 28 | }; |
32 | 29 | |
33 | -/* the offset value will wrap after ca. 71 years */ | |
30 | +/* the vtimer value will wrap after ca. 71 years */ | |
34 | 31 | struct vtimer_queue { |
35 | 32 | struct list_head list; |
36 | 33 | spinlock_t lock; |
37 | - __u64 to_expire; /* current event expire time */ | |
38 | - __u64 offset; /* list offset to zero */ | |
39 | - __u64 idle; /* temp var for idle */ | |
34 | + __u64 timer; /* last programmed timer */ | |
35 | + __u64 elapsed; /* elapsed time of timer expire values */ | |
36 | + __u64 idle; /* temp var for idle */ | |
37 | + int do_spt; /* =1: reprogram cpu timer in idle */ | |
40 | 38 | }; |
41 | 39 | |
42 | 40 | extern void init_virt_timer(struct vtimer_list *timer); |
... | ... | @@ -48,8 +46,8 @@ |
48 | 46 | extern void init_cpu_vtimer(void); |
49 | 47 | extern void vtime_init(void); |
50 | 48 | |
51 | -extern void vtime_start_cpu_timer(void); | |
52 | -extern void vtime_stop_cpu_timer(void); | |
49 | +extern void vtime_stop_cpu(void); | |
50 | +extern void vtime_start_leave(void); | |
53 | 51 | |
54 | 52 | #endif /* __KERNEL__ */ |
55 | 53 |
arch/s390/include/asm/vdso.h
... | ... | @@ -12,9 +12,9 @@ |
12 | 12 | #ifndef __ASSEMBLY__ |
13 | 13 | |
14 | 14 | /* |
15 | - * Note about this structure: | |
15 | + * Note about the vdso_data and vdso_per_cpu_data structures: | |
16 | 16 | * |
17 | - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this | |
17 | + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the | |
18 | 18 | * structure is supposed to be known only to the function in the vdso |
19 | 19 | * itself and may change without notice. |
20 | 20 | */ |
21 | 21 | |
22 | 22 | |
... | ... | @@ -28,9 +28,20 @@ |
28 | 28 | __u64 wtom_clock_nsec; /* 0x28 */ |
29 | 29 | __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ |
30 | 30 | __u32 tz_dsttime; /* Type of dst correction 0x34 */ |
31 | + __u32 ectg_available; | |
31 | 32 | }; |
32 | 33 | |
34 | +struct vdso_per_cpu_data { | |
35 | + __u64 ectg_timer_base; | |
36 | + __u64 ectg_user_time; | |
37 | +}; | |
38 | + | |
33 | 39 | extern struct vdso_data *vdso_data; |
40 | + | |
41 | +#ifdef CONFIG_64BIT | |
42 | +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); | |
43 | +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); | |
44 | +#endif | |
34 | 45 | |
35 | 46 | #endif /* __ASSEMBLY__ */ |
36 | 47 |
arch/s390/kernel/asm-offsets.c
... | ... | @@ -48,6 +48,11 @@ |
48 | 48 | DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); |
49 | 49 | DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); |
50 | 50 | DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); |
51 | + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); | |
52 | + DEFINE(__VDSO_ECTG_BASE, | |
53 | + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); | |
54 | + DEFINE(__VDSO_ECTG_USER, | |
55 | + offsetof(struct vdso_per_cpu_data, ectg_user_time)); | |
51 | 56 | /* constants used by the vdso */ |
52 | 57 | DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); |
53 | 58 | DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); |
arch/s390/kernel/entry.S
... | ... | @@ -583,8 +583,8 @@ |
583 | 583 | |
584 | 584 | .globl io_int_handler |
585 | 585 | io_int_handler: |
586 | - stpt __LC_ASYNC_ENTER_TIMER | |
587 | 586 | stck __LC_INT_CLOCK |
587 | + stpt __LC_ASYNC_ENTER_TIMER | |
588 | 588 | SAVE_ALL_BASE __LC_SAVE_AREA+16 |
589 | 589 | SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 |
590 | 590 | CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 |
591 | 591 | |
... | ... | @@ -723,8 +723,8 @@ |
723 | 723 | |
724 | 724 | .globl ext_int_handler |
725 | 725 | ext_int_handler: |
726 | - stpt __LC_ASYNC_ENTER_TIMER | |
727 | 726 | stck __LC_INT_CLOCK |
727 | + stpt __LC_ASYNC_ENTER_TIMER | |
728 | 728 | SAVE_ALL_BASE __LC_SAVE_AREA+16 |
729 | 729 | SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 |
730 | 730 | CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 |
... | ... | @@ -750,6 +750,7 @@ |
750 | 750 | |
751 | 751 | .globl mcck_int_handler |
752 | 752 | mcck_int_handler: |
753 | + stck __LC_INT_CLOCK | |
753 | 754 | spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer |
754 | 755 | lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs |
755 | 756 | SAVE_ALL_BASE __LC_SAVE_AREA+32 |
arch/s390/kernel/entry64.S
... | ... | @@ -177,8 +177,11 @@ |
177 | 177 | .if !\sync |
178 | 178 | ni \psworg+1,0xfd # clear wait state bit |
179 | 179 | .endif |
180 | - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user | |
180 | + lg %r14,__LC_VDSO_PER_CPU | |
181 | + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user | |
181 | 182 | stpt __LC_EXIT_TIMER |
183 | + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER | |
184 | + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user | |
182 | 185 | lpswe \psworg # back to caller |
183 | 186 | .endm |
184 | 187 | |
185 | 188 | |
... | ... | @@ -559,8 +562,8 @@ |
559 | 562 | */ |
560 | 563 | .globl io_int_handler |
561 | 564 | io_int_handler: |
562 | - stpt __LC_ASYNC_ENTER_TIMER | |
563 | 565 | stck __LC_INT_CLOCK |
566 | + stpt __LC_ASYNC_ENTER_TIMER | |
564 | 567 | SAVE_ALL_BASE __LC_SAVE_AREA+32 |
565 | 568 | SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 |
566 | 569 | CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 |
567 | 570 | |
... | ... | @@ -721,8 +724,8 @@ |
721 | 724 | */ |
722 | 725 | .globl ext_int_handler |
723 | 726 | ext_int_handler: |
724 | - stpt __LC_ASYNC_ENTER_TIMER | |
725 | 727 | stck __LC_INT_CLOCK |
728 | + stpt __LC_ASYNC_ENTER_TIMER | |
726 | 729 | SAVE_ALL_BASE __LC_SAVE_AREA+32 |
727 | 730 | SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 |
728 | 731 | CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 |
... | ... | @@ -746,6 +749,7 @@ |
746 | 749 | */ |
747 | 750 | .globl mcck_int_handler |
748 | 751 | mcck_int_handler: |
752 | + stck __LC_INT_CLOCK | |
749 | 753 | la %r1,4095 # revalidate r1 |
750 | 754 | spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer |
751 | 755 | lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs |
752 | 756 | |
753 | 757 | |
754 | 758 | |
755 | 759 | |
756 | 760 | |
... | ... | @@ -979,23 +983,23 @@ |
979 | 983 | |
980 | 984 | cleanup_sysc_leave: |
981 | 985 | clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) |
982 | - je 2f | |
983 | - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER | |
986 | + je 3f | |
984 | 987 | clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) |
985 | - je 2f | |
986 | - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) | |
988 | + jhe 0f | |
989 | + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER | |
990 | +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) | |
987 | 991 | cghi %r12,__LC_MCK_OLD_PSW |
988 | - jne 0f | |
992 | + jne 1f | |
989 | 993 | mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) |
990 | - j 1f | |
991 | -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) | |
992 | -1: lmg %r0,%r11,SP_R0(%r15) | |
994 | + j 2f | |
995 | +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) | |
996 | +2: lmg %r0,%r11,SP_R0(%r15) | |
993 | 997 | lg %r15,SP_R15(%r15) |
994 | -2: la %r12,__LC_RETURN_PSW | |
998 | +3: la %r12,__LC_RETURN_PSW | |
995 | 999 | br %r14 |
996 | 1000 | cleanup_sysc_leave_insn: |
997 | 1001 | .quad sysc_done - 4 |
998 | - .quad sysc_done - 8 | |
1002 | + .quad sysc_done - 16 | |
999 | 1003 | |
1000 | 1004 | cleanup_io_return: |
1001 | 1005 | mvc __LC_RETURN_PSW(8),0(%r12) |
1002 | 1006 | |
1003 | 1007 | |
1004 | 1008 | |
1005 | 1009 | |
1006 | 1010 | |
... | ... | @@ -1005,23 +1009,23 @@ |
1005 | 1009 | |
1006 | 1010 | cleanup_io_leave: |
1007 | 1011 | clc 8(8,%r12),BASED(cleanup_io_leave_insn) |
1008 | - je 2f | |
1009 | - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER | |
1012 | + je 3f | |
1010 | 1013 | clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) |
1011 | - je 2f | |
1012 | - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) | |
1014 | + jhe 0f | |
1015 | + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER | |
1016 | +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) | |
1013 | 1017 | cghi %r12,__LC_MCK_OLD_PSW |
1014 | - jne 0f | |
1018 | + jne 1f | |
1015 | 1019 | mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) |
1016 | - j 1f | |
1017 | -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) | |
1018 | -1: lmg %r0,%r11,SP_R0(%r15) | |
1020 | + j 2f | |
1021 | +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) | |
1022 | +2: lmg %r0,%r11,SP_R0(%r15) | |
1019 | 1023 | lg %r15,SP_R15(%r15) |
1020 | -2: la %r12,__LC_RETURN_PSW | |
1024 | +3: la %r12,__LC_RETURN_PSW | |
1021 | 1025 | br %r14 |
1022 | 1026 | cleanup_io_leave_insn: |
1023 | 1027 | .quad io_done - 4 |
1024 | - .quad io_done - 8 | |
1028 | + .quad io_done - 16 | |
1025 | 1029 | |
1026 | 1030 | /* |
1027 | 1031 | * Integer constants |
arch/s390/kernel/head64.S
arch/s390/kernel/process.c
... | ... | @@ -38,6 +38,7 @@ |
38 | 38 | #include <linux/utsname.h> |
39 | 39 | #include <linux/tick.h> |
40 | 40 | #include <linux/elfcore.h> |
41 | +#include <linux/kernel_stat.h> | |
41 | 42 | #include <asm/uaccess.h> |
42 | 43 | #include <asm/pgtable.h> |
43 | 44 | #include <asm/system.h> |
... | ... | @@ -45,7 +46,6 @@ |
45 | 46 | #include <asm/processor.h> |
46 | 47 | #include <asm/irq.h> |
47 | 48 | #include <asm/timer.h> |
48 | -#include <asm/cpu.h> | |
49 | 49 | #include "entry.h" |
50 | 50 | |
51 | 51 | asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); |
... | ... | @@ -75,36 +75,6 @@ |
75 | 75 | return sf->gprs[8]; |
76 | 76 | } |
77 | 77 | |
78 | -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { | |
79 | - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) | |
80 | -}; | |
81 | - | |
82 | -static int s390_idle_enter(void) | |
83 | -{ | |
84 | - struct s390_idle_data *idle; | |
85 | - | |
86 | - idle = &__get_cpu_var(s390_idle); | |
87 | - spin_lock(&idle->lock); | |
88 | - idle->idle_count++; | |
89 | - idle->in_idle = 1; | |
90 | - idle->idle_enter = get_clock(); | |
91 | - spin_unlock(&idle->lock); | |
92 | - vtime_stop_cpu_timer(); | |
93 | - return NOTIFY_OK; | |
94 | -} | |
95 | - | |
96 | -void s390_idle_leave(void) | |
97 | -{ | |
98 | - struct s390_idle_data *idle; | |
99 | - | |
100 | - vtime_start_cpu_timer(); | |
101 | - idle = &__get_cpu_var(s390_idle); | |
102 | - spin_lock(&idle->lock); | |
103 | - idle->idle_time += get_clock() - idle->idle_enter; | |
104 | - idle->in_idle = 0; | |
105 | - spin_unlock(&idle->lock); | |
106 | -} | |
107 | - | |
108 | 78 | extern void s390_handle_mcck(void); |
109 | 79 | /* |
110 | 80 | * The idle loop on a S390... |
... | ... | @@ -117,10 +87,6 @@ |
117 | 87 | local_irq_enable(); |
118 | 88 | return; |
119 | 89 | } |
120 | - if (s390_idle_enter() == NOTIFY_BAD) { | |
121 | - local_irq_enable(); | |
122 | - return; | |
123 | - } | |
124 | 90 | #ifdef CONFIG_HOTPLUG_CPU |
125 | 91 | if (cpu_is_offline(smp_processor_id())) { |
126 | 92 | preempt_enable_no_resched(); |
... | ... | @@ -130,7 +96,6 @@ |
130 | 96 | local_mcck_disable(); |
131 | 97 | if (test_thread_flag(TIF_MCCK_PENDING)) { |
132 | 98 | local_mcck_enable(); |
133 | - s390_idle_leave(); | |
134 | 99 | local_irq_enable(); |
135 | 100 | s390_handle_mcck(); |
136 | 101 | return; |
... | ... | @@ -138,9 +103,9 @@ |
138 | 103 | trace_hardirqs_on(); |
139 | 104 | /* Don't trace preempt off for idle. */ |
140 | 105 | stop_critical_timings(); |
141 | - /* Wait for external, I/O or machine check interrupt. */ | |
142 | - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | | |
143 | - PSW_MASK_IO | PSW_MASK_EXT); | |
106 | + /* Stop virtual timer and halt the cpu. */ | |
107 | + vtime_stop_cpu(); | |
108 | + /* Reenable preemption tracer. */ | |
144 | 109 | start_critical_timings(); |
145 | 110 | } |
146 | 111 |
arch/s390/kernel/s390_ext.c
... | ... | @@ -119,8 +119,8 @@ |
119 | 119 | struct pt_regs *old_regs; |
120 | 120 | |
121 | 121 | old_regs = set_irq_regs(regs); |
122 | - irq_enter(); | |
123 | 122 | s390_idle_check(); |
123 | + irq_enter(); | |
124 | 124 | if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) |
125 | 125 | /* Serve timer interrupts first. */ |
126 | 126 | clock_comparator_work(); |
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
... | ... | @@ -47,6 +47,7 @@ |
47 | 47 | #include <asm/lowcore.h> |
48 | 48 | #include <asm/sclp.h> |
49 | 49 | #include <asm/cpu.h> |
50 | +#include <asm/vdso.h> | |
50 | 51 | #include "entry.h" |
51 | 52 | |
52 | 53 | /* |
... | ... | @@ -500,6 +501,9 @@ |
500 | 501 | goto out; |
501 | 502 | lowcore->extended_save_area_addr = (u32) save_area; |
502 | 503 | } |
504 | +#else | |
505 | + if (vdso_alloc_per_cpu(cpu, lowcore)) | |
506 | + goto out; | |
503 | 507 | #endif |
504 | 508 | lowcore_ptr[cpu] = lowcore; |
505 | 509 | return 0; |
... | ... | @@ -522,6 +526,8 @@ |
522 | 526 | #ifndef CONFIG_64BIT |
523 | 527 | if (MACHINE_HAS_IEEE) |
524 | 528 | free_page((unsigned long) lowcore->extended_save_area_addr); |
529 | +#else | |
530 | + vdso_free_per_cpu(cpu, lowcore); | |
525 | 531 | #endif |
526 | 532 | free_page(lowcore->panic_stack - PAGE_SIZE); |
527 | 533 | free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); |
... | ... | @@ -664,6 +670,7 @@ |
664 | 670 | lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); |
665 | 671 | panic_stack = __get_free_page(GFP_KERNEL); |
666 | 672 | async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); |
673 | + BUG_ON(!lowcore || !panic_stack || !async_stack); | |
667 | 674 | #ifndef CONFIG_64BIT |
668 | 675 | if (MACHINE_HAS_IEEE) |
669 | 676 | save_area = get_zeroed_page(GFP_KERNEL); |
... | ... | @@ -677,6 +684,8 @@ |
677 | 684 | #ifndef CONFIG_64BIT |
678 | 685 | if (MACHINE_HAS_IEEE) |
679 | 686 | lowcore->extended_save_area_addr = (u32) save_area; |
687 | +#else | |
688 | + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); | |
680 | 689 | #endif |
681 | 690 | set_prefix((u32)(unsigned long) lowcore); |
682 | 691 | local_mcck_enable(); |
683 | 692 | |
... | ... | @@ -845,9 +854,11 @@ |
845 | 854 | unsigned long long idle_count; |
846 | 855 | |
847 | 856 | idle = &per_cpu(s390_idle, dev->id); |
848 | - spin_lock_irq(&idle->lock); | |
857 | + spin_lock(&idle->lock); | |
849 | 858 | idle_count = idle->idle_count; |
850 | - spin_unlock_irq(&idle->lock); | |
859 | + if (idle->idle_enter) | |
860 | + idle_count++; | |
861 | + spin_unlock(&idle->lock); | |
851 | 862 | return sprintf(buf, "%llu\n", idle_count); |
852 | 863 | } |
853 | 864 | static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); |
854 | 865 | |
... | ... | @@ -856,18 +867,17 @@ |
856 | 867 | struct sysdev_attribute *attr, char *buf) |
857 | 868 | { |
858 | 869 | struct s390_idle_data *idle; |
859 | - unsigned long long new_time; | |
870 | + unsigned long long now, idle_time, idle_enter; | |
860 | 871 | |
861 | 872 | idle = &per_cpu(s390_idle, dev->id); |
862 | - spin_lock_irq(&idle->lock); | |
863 | - if (idle->in_idle) { | |
864 | - new_time = get_clock(); | |
865 | - idle->idle_time += new_time - idle->idle_enter; | |
866 | - idle->idle_enter = new_time; | |
867 | - } | |
868 | - new_time = idle->idle_time; | |
869 | - spin_unlock_irq(&idle->lock); | |
870 | - return sprintf(buf, "%llu\n", new_time >> 12); | |
873 | + spin_lock(&idle->lock); | |
874 | + now = get_clock(); | |
875 | + idle_time = idle->idle_time; | |
876 | + idle_enter = idle->idle_enter; | |
877 | + if (idle_enter != 0ULL && idle_enter < now) | |
878 | + idle_time += now - idle_enter; | |
879 | + spin_unlock(&idle->lock); | |
880 | + return sprintf(buf, "%llu\n", idle_time >> 12); | |
871 | 881 | } |
872 | 882 | static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); |
873 | 883 |
arch/s390/kernel/vdso.c
... | ... | @@ -31,9 +31,6 @@ |
31 | 31 | #include <asm/sections.h> |
32 | 32 | #include <asm/vdso.h> |
33 | 33 | |
34 | -/* Max supported size for symbol names */ | |
35 | -#define MAX_SYMNAME 64 | |
36 | - | |
37 | 34 | #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) |
38 | 35 | extern char vdso32_start, vdso32_end; |
39 | 36 | static void *vdso32_kbase = &vdso32_start; |
... | ... | @@ -71,6 +68,119 @@ |
71 | 68 | struct vdso_data *vdso_data = &vdso_data_store.data; |
72 | 69 | |
73 | 70 | /* |
71 | + * Setup vdso data page. | |
72 | + */ | |
73 | +static void vdso_init_data(struct vdso_data *vd) | |
74 | +{ | |
75 | + unsigned int facility_list; | |
76 | + | |
77 | + facility_list = stfl(); | |
78 | + vd->ectg_available = switch_amode && (facility_list & 1); | |
79 | +} | |
80 | + | |
81 | +#ifdef CONFIG_64BIT | |
82 | +/* | |
83 | + * Setup per cpu vdso data page. | |
84 | + */ | |
85 | +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) | |
86 | +{ | |
87 | +} | |
88 | + | |
89 | +/* | |
90 | + * Allocate/free per cpu vdso data. | |
91 | + */ | |
92 | +#ifdef CONFIG_64BIT | |
93 | +#define SEGMENT_ORDER 2 | |
94 | +#else | |
95 | +#define SEGMENT_ORDER 1 | |
96 | +#endif | |
97 | + | |
98 | +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) | |
99 | +{ | |
100 | + unsigned long segment_table, page_table, page_frame; | |
101 | + u32 *psal, *aste; | |
102 | + int i; | |
103 | + | |
104 | + lowcore->vdso_per_cpu_data = __LC_PASTE; | |
105 | + | |
106 | + if (!switch_amode || !vdso_enabled) | |
107 | + return 0; | |
108 | + | |
109 | + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); | |
110 | + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); | |
111 | + page_frame = get_zeroed_page(GFP_KERNEL); | |
112 | + if (!segment_table || !page_table || !page_frame) | |
113 | + goto out; | |
114 | + | |
115 | + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, | |
116 | + PAGE_SIZE << SEGMENT_ORDER); | |
117 | + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, | |
118 | + 256*sizeof(unsigned long)); | |
119 | + | |
120 | + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; | |
121 | + *(unsigned long *) page_table = _PAGE_RO + page_frame; | |
122 | + | |
123 | + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); | |
124 | + aste = psal + 32; | |
125 | + | |
126 | + for (i = 4; i < 32; i += 4) | |
127 | + psal[i] = 0x80000000; | |
128 | + | |
129 | + lowcore->paste[4] = (u32)(addr_t) psal; | |
130 | + psal[0] = 0x20000000; | |
131 | + psal[2] = (u32)(addr_t) aste; | |
132 | + *(unsigned long *) (aste + 2) = segment_table + | |
133 | + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; | |
134 | + aste[4] = (u32)(addr_t) psal; | |
135 | + lowcore->vdso_per_cpu_data = page_frame; | |
136 | + | |
137 | + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); | |
138 | + return 0; | |
139 | + | |
140 | +out: | |
141 | + free_page(page_frame); | |
142 | + free_page(page_table); | |
143 | + free_pages(segment_table, SEGMENT_ORDER); | |
144 | + return -ENOMEM; | |
145 | +} | |
146 | + | |
147 | +#ifdef CONFIG_HOTPLUG_CPU | |
148 | +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) | |
149 | +{ | |
150 | + unsigned long segment_table, page_table, page_frame; | |
151 | + u32 *psal, *aste; | |
152 | + | |
153 | + if (!switch_amode || !vdso_enabled) | |
154 | + return; | |
155 | + | |
156 | + psal = (u32 *)(addr_t) lowcore->paste[4]; | |
157 | + aste = (u32 *)(addr_t) psal[2]; | |
158 | + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; | |
159 | + page_table = *(unsigned long *) segment_table; | |
160 | + page_frame = *(unsigned long *) page_table; | |
161 | + | |
162 | + free_page(page_frame); | |
163 | + free_page(page_table); | |
164 | + free_pages(segment_table, SEGMENT_ORDER); | |
165 | +} | |
166 | +#endif /* CONFIG_HOTPLUG_CPU */ | |
167 | + | |
168 | +static void __vdso_init_cr5(void *dummy) | |
169 | +{ | |
170 | + unsigned long cr5; | |
171 | + | |
172 | + cr5 = offsetof(struct _lowcore, paste); | |
173 | + __ctl_load(cr5, 5, 5); | |
174 | +} | |
175 | + | |
176 | +static void vdso_init_cr5(void) | |
177 | +{ | |
178 | + if (switch_amode && vdso_enabled) | |
179 | + on_each_cpu(__vdso_init_cr5, NULL, 1); | |
180 | +} | |
181 | +#endif /* CONFIG_64BIT */ | |
182 | + | |
183 | +/* | |
74 | 184 | * This is called from binfmt_elf, we create the special vma for the |
75 | 185 | * vDSO and insert it into the mm struct tree |
76 | 186 | */ |
... | ... | @@ -172,6 +282,9 @@ |
172 | 282 | { |
173 | 283 | int i; |
174 | 284 | |
285 | + if (!vdso_enabled) | |
286 | + return 0; | |
287 | + vdso_init_data(vdso_data); | |
175 | 288 | #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) |
176 | 289 | /* Calculate the size of the 32 bit vDSO */ |
177 | 290 | vdso32_pages = ((&vdso32_end - &vdso32_start |
... | ... | @@ -208,6 +321,10 @@ |
208 | 321 | } |
209 | 322 | vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); |
210 | 323 | vdso64_pagelist[vdso64_pages] = NULL; |
324 | +#ifndef CONFIG_SMP | |
325 | + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); | |
326 | +#endif | |
327 | + vdso_init_cr5(); | |
211 | 328 | #endif /* CONFIG_64BIT */ |
212 | 329 | |
213 | 330 | get_page(virt_to_page(vdso_data)); |
arch/s390/kernel/vdso64/clock_getres.S
... | ... | @@ -22,7 +22,12 @@ |
22 | 22 | cghi %r2,CLOCK_REALTIME |
23 | 23 | je 0f |
24 | 24 | cghi %r2,CLOCK_MONOTONIC |
25 | + je 0f | |
26 | + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ | |
25 | 27 | jne 2f |
28 | + larl %r5,_vdso_data | |
29 | + icm %r0,15,__LC_ECTG_OK(%r5) | |
30 | + jz 2f | |
26 | 31 | 0: ltgr %r3,%r3 |
27 | 32 | jz 1f /* res == NULL */ |
28 | 33 | larl %r1,3f |
arch/s390/kernel/vdso64/clock_gettime.S
... | ... | @@ -22,8 +22,10 @@ |
22 | 22 | larl %r5,_vdso_data |
23 | 23 | cghi %r2,CLOCK_REALTIME |
24 | 24 | je 4f |
25 | + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ | |
26 | + je 9f | |
25 | 27 | cghi %r2,CLOCK_MONOTONIC |
26 | - jne 9f | |
28 | + jne 12f | |
27 | 29 | |
28 | 30 | /* CLOCK_MONOTONIC */ |
29 | 31 | ltgr %r3,%r3 |
... | ... | @@ -42,7 +44,7 @@ |
42 | 44 | alg %r0,__VDSO_WTOM_SEC(%r5) |
43 | 45 | clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ |
44 | 46 | jne 0b |
45 | - larl %r5,10f | |
47 | + larl %r5,13f | |
46 | 48 | 1: clg %r1,0(%r5) |
47 | 49 | jl 2f |
48 | 50 | slg %r1,0(%r5) |
... | ... | @@ -68,7 +70,7 @@ |
68 | 70 | lg %r0,__VDSO_XTIME_SEC(%r5) |
69 | 71 | clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ |
70 | 72 | jne 5b |
71 | - larl %r5,10f | |
73 | + larl %r5,13f | |
72 | 74 | 6: clg %r1,0(%r5) |
73 | 75 | jl 7f |
74 | 76 | slg %r1,0(%r5) |
75 | 77 | |
76 | 78 | |
... | ... | @@ -79,12 +81,39 @@ |
79 | 81 | 8: lghi %r2,0 |
80 | 82 | br %r14 |
81 | 83 | |
84 | + /* CLOCK_THREAD_CPUTIME_ID for this thread */ | |
85 | +9: icm %r0,15,__VDSO_ECTG_OK(%r5) | |
86 | + jz 12f | |
87 | + ear %r2,%a4 | |
88 | + llilh %r4,0x0100 | |
89 | + sar %a4,%r4 | |
90 | + lghi %r4,0 | |
91 | + sacf 512 /* Magic ectg instruction */ | |
92 | + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 | |
93 | + sacf 0 | |
94 | + sar %a4,%r2 | |
95 | + algr %r1,%r0 /* r1 = cputime as TOD value */ | |
96 | + mghi %r1,1000 /* convert to nanoseconds */ | |
97 | + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ | |
98 | + lgr %r4,%r1 | |
99 | + larl %r5,13f | |
100 | + srlg %r1,%r1,9 /* divide by 1000000000 */ | |
101 | + mlg %r0,8(%r5) | |
102 | + srlg %r0,%r0,11 /* r0 = tv_sec */ | |
103 | + stg %r0,0(%r3) | |
104 | + msg %r0,0(%r5) /* calculate tv_nsec */ | |
105 | + slgr %r4,%r0 /* r4 = tv_nsec */ | |
106 | + stg %r4,8(%r3) | |
107 | + lghi %r2,0 | |
108 | + br %r14 | |
109 | + | |
82 | 110 | /* Fallback to system call */ |
83 | -9: lghi %r1,__NR_clock_gettime | |
111 | +12: lghi %r1,__NR_clock_gettime | |
84 | 112 | svc 0 |
85 | 113 | br %r14 |
86 | 114 | |
87 | -10: .quad 1000000000 | |
115 | +13: .quad 1000000000 | |
116 | +14: .quad 19342813113834067 | |
88 | 117 | .cfi_endproc |
89 | 118 | .size __kernel_clock_gettime,.-__kernel_clock_gettime |
arch/s390/kernel/vtime.c
... | ... | @@ -23,19 +23,43 @@ |
23 | 23 | #include <asm/s390_ext.h> |
24 | 24 | #include <asm/timer.h> |
25 | 25 | #include <asm/irq_regs.h> |
26 | +#include <asm/cpu.h> | |
26 | 27 | |
27 | 28 | static ext_int_info_t ext_int_info_timer; |
29 | + | |
28 | 30 | static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); |
29 | 31 | |
32 | +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { | |
33 | + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) | |
34 | +}; | |
35 | + | |
36 | +static inline __u64 get_vtimer(void) | |
37 | +{ | |
38 | + __u64 timer; | |
39 | + | |
40 | + asm volatile("STPT %0" : "=m" (timer)); | |
41 | + return timer; | |
42 | +} | |
43 | + | |
44 | +static inline void set_vtimer(__u64 expires) | |
45 | +{ | |
46 | + __u64 timer; | |
47 | + | |
48 | + asm volatile (" STPT %0\n" /* Store current cpu timer value */ | |
49 | + " SPT %1" /* Set new value immediatly afterwards */ | |
50 | + : "=m" (timer) : "m" (expires) ); | |
51 | + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; | |
52 | + S390_lowcore.last_update_timer = expires; | |
53 | +} | |
54 | + | |
30 | 55 | /* |
31 | 56 | * Update process times based on virtual cpu times stored by entry.S |
32 | 57 | * to the lowcore fields user_timer, system_timer & steal_clock. |
33 | 58 | */ |
34 | -void account_process_tick(struct task_struct *tsk, int user_tick) | |
59 | +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) | |
35 | 60 | { |
36 | - cputime_t cputime; | |
37 | - __u64 timer, clock; | |
38 | - int rcu_user_flag; | |
61 | + struct thread_info *ti = task_thread_info(tsk); | |
62 | + __u64 timer, clock, user, system, steal; | |
39 | 63 | |
40 | 64 | timer = S390_lowcore.last_update_timer; |
41 | 65 | clock = S390_lowcore.last_update_clock; |
42 | 66 | |
43 | 67 | |
44 | 68 | |
45 | 69 | |
46 | 70 | |
47 | 71 | |
48 | 72 | |
... | ... | @@ -44,50 +68,41 @@ |
44 | 68 | : "=m" (S390_lowcore.last_update_timer), |
45 | 69 | "=m" (S390_lowcore.last_update_clock) ); |
46 | 70 | S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; |
47 | - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; | |
71 | + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; | |
48 | 72 | |
49 | - cputime = S390_lowcore.user_timer >> 12; | |
50 | - rcu_user_flag = cputime != 0; | |
51 | - S390_lowcore.user_timer -= cputime << 12; | |
52 | - S390_lowcore.steal_clock -= cputime << 12; | |
53 | - account_user_time(tsk, cputime); | |
73 | + user = S390_lowcore.user_timer - ti->user_timer; | |
74 | + S390_lowcore.steal_timer -= user; | |
75 | + ti->user_timer = S390_lowcore.user_timer; | |
76 | + account_user_time(tsk, user, user); | |
54 | 77 | |
55 | - cputime = S390_lowcore.system_timer >> 12; | |
56 | - S390_lowcore.system_timer -= cputime << 12; | |
57 | - S390_lowcore.steal_clock -= cputime << 12; | |
58 | - account_system_time(tsk, HARDIRQ_OFFSET, cputime); | |
78 | + system = S390_lowcore.system_timer - ti->system_timer; | |
79 | + S390_lowcore.steal_timer -= system; | |
80 | + ti->system_timer = S390_lowcore.system_timer; | |
81 | + account_system_time(tsk, hardirq_offset, system, system); | |
59 | 82 | |
60 | - cputime = S390_lowcore.steal_clock; | |
61 | - if ((__s64) cputime > 0) { | |
62 | - cputime >>= 12; | |
63 | - S390_lowcore.steal_clock -= cputime << 12; | |
64 | - account_steal_time(tsk, cputime); | |
83 | + steal = S390_lowcore.steal_timer; | |
84 | + if ((s64) steal > 0) { | |
85 | + S390_lowcore.steal_timer = 0; | |
86 | + account_steal_time(steal); | |
65 | 87 | } |
66 | 88 | } |
67 | 89 | |
68 | -/* | |
69 | - * Update process times based on virtual cpu times stored by entry.S | |
70 | - * to the lowcore fields user_timer, system_timer & steal_clock. | |
71 | - */ | |
72 | -void account_vtime(struct task_struct *tsk) | |
90 | +void account_vtime(struct task_struct *prev, struct task_struct *next) | |
73 | 91 | { |
74 | - cputime_t cputime; | |
75 | - __u64 timer; | |
92 | + struct thread_info *ti; | |
76 | 93 | |
77 | - timer = S390_lowcore.last_update_timer; | |
78 | - asm volatile (" STPT %0" /* Store current cpu timer value */ | |
79 | - : "=m" (S390_lowcore.last_update_timer) ); | |
80 | - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; | |
94 | + do_account_vtime(prev, 0); | |
95 | + ti = task_thread_info(prev); | |
96 | + ti->user_timer = S390_lowcore.user_timer; | |
97 | + ti->system_timer = S390_lowcore.system_timer; | |
98 | + ti = task_thread_info(next); | |
99 | + S390_lowcore.user_timer = ti->user_timer; | |
100 | + S390_lowcore.system_timer = ti->system_timer; | |
101 | +} | |
81 | 102 | |
82 | - cputime = S390_lowcore.user_timer >> 12; | |
83 | - S390_lowcore.user_timer -= cputime << 12; | |
84 | - S390_lowcore.steal_clock -= cputime << 12; | |
85 | - account_user_time(tsk, cputime); | |
86 | - | |
87 | - cputime = S390_lowcore.system_timer >> 12; | |
88 | - S390_lowcore.system_timer -= cputime << 12; | |
89 | - S390_lowcore.steal_clock -= cputime << 12; | |
90 | - account_system_time(tsk, 0, cputime); | |
103 | +void account_process_tick(struct task_struct *tsk, int user_tick) | |
104 | +{ | |
105 | + do_account_vtime(tsk, HARDIRQ_OFFSET); | |
91 | 106 | } |
92 | 107 | |
93 | 108 | /* |
94 | 109 | |
95 | 110 | |
96 | 111 | |
97 | 112 | |
98 | 113 | |
99 | 114 | |
100 | 115 | |
101 | 116 | |
102 | 117 | |
103 | 118 | |
104 | 119 | |
105 | 120 | |
106 | 121 | |
... | ... | @@ -96,80 +111,131 @@ |
96 | 111 | */ |
97 | 112 | void account_system_vtime(struct task_struct *tsk) |
98 | 113 | { |
99 | - cputime_t cputime; | |
100 | - __u64 timer; | |
114 | + struct thread_info *ti = task_thread_info(tsk); | |
115 | + __u64 timer, system; | |
101 | 116 | |
102 | 117 | timer = S390_lowcore.last_update_timer; |
103 | - asm volatile (" STPT %0" /* Store current cpu timer value */ | |
104 | - : "=m" (S390_lowcore.last_update_timer) ); | |
118 | + S390_lowcore.last_update_timer = get_vtimer(); | |
105 | 119 | S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; |
106 | 120 | |
107 | - cputime = S390_lowcore.system_timer >> 12; | |
108 | - S390_lowcore.system_timer -= cputime << 12; | |
109 | - S390_lowcore.steal_clock -= cputime << 12; | |
110 | - account_system_time(tsk, 0, cputime); | |
121 | + system = S390_lowcore.system_timer - ti->system_timer; | |
122 | + S390_lowcore.steal_timer -= system; | |
123 | + ti->system_timer = S390_lowcore.system_timer; | |
124 | + account_system_time(tsk, 0, system, system); | |
111 | 125 | } |
112 | 126 | EXPORT_SYMBOL_GPL(account_system_vtime); |
113 | 127 | |
114 | -static inline void set_vtimer(__u64 expires) | |
128 | +void vtime_start_cpu(void) | |
115 | 129 | { |
116 | - __u64 timer; | |
130 | + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); | |
131 | + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); | |
132 | + __u64 idle_time, expires; | |
117 | 133 | |
118 | - asm volatile (" STPT %0\n" /* Store current cpu timer value */ | |
119 | - " SPT %1" /* Set new value immediatly afterwards */ | |
120 | - : "=m" (timer) : "m" (expires) ); | |
121 | - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; | |
122 | - S390_lowcore.last_update_timer = expires; | |
134 | + /* Account time spent with enabled wait psw loaded as idle time. */ | |
135 | + idle_time = S390_lowcore.int_clock - idle->idle_enter; | |
136 | + account_idle_time(idle_time); | |
137 | + S390_lowcore.last_update_clock = S390_lowcore.int_clock; | |
123 | 138 | |
124 | - /* store expire time for this CPU timer */ | |
125 | - __get_cpu_var(virt_cpu_timer).to_expire = expires; | |
126 | -} | |
139 | + /* Account system time spent going idle. */ | |
140 | + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; | |
141 | + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; | |
127 | 142 | |
128 | -void vtime_start_cpu_timer(void) | |
129 | -{ | |
130 | - struct vtimer_queue *vt_list; | |
143 | + /* Restart vtime CPU timer */ | |
144 | + if (vq->do_spt) { | |
145 | + /* Program old expire value but first save progress. */ | |
146 | + expires = vq->idle - S390_lowcore.async_enter_timer; | |
147 | + expires += get_vtimer(); | |
148 | + set_vtimer(expires); | |
149 | + } else { | |
150 | + /* Don't account the CPU timer delta while the cpu was idle. */ | |
151 | + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; | |
152 | + } | |
131 | 153 | |
132 | - vt_list = &__get_cpu_var(virt_cpu_timer); | |
133 | - | |
134 | - /* CPU timer interrupt is pending, don't reprogramm it */ | |
135 | - if (vt_list->idle & 1LL<<63) | |
136 | - return; | |
137 | - | |
138 | - if (!list_empty(&vt_list->list)) | |
139 | - set_vtimer(vt_list->idle); | |
154 | + spin_lock(&idle->lock); | |
155 | + idle->idle_time += idle_time; | |
156 | + idle->idle_enter = 0ULL; | |
157 | + idle->idle_count++; | |
158 | + spin_unlock(&idle->lock); | |
140 | 159 | } |
141 | 160 | |
142 | -void vtime_stop_cpu_timer(void) | |
161 | +void vtime_stop_cpu(void) | |
143 | 162 | { |
144 | - struct vtimer_queue *vt_list; | |
163 | + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); | |
164 | + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); | |
165 | + psw_t psw; | |
145 | 166 | |
146 | - vt_list = &__get_cpu_var(virt_cpu_timer); | |
167 | + /* Wait for external, I/O or machine check interrupt. */ | |
168 | + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; | |
147 | 169 | |
148 | - /* nothing to do */ | |
149 | - if (list_empty(&vt_list->list)) { | |
150 | - vt_list->idle = VTIMER_MAX_SLICE; | |
151 | - goto fire; | |
170 | + /* Check if the CPU timer needs to be reprogrammed. */ | |
171 | + if (vq->do_spt) { | |
172 | + __u64 vmax = VTIMER_MAX_SLICE; | |
173 | + /* | |
174 | + * The inline assembly is equivalent to | |
175 | + * vq->idle = get_cpu_timer(); | |
176 | + * set_cpu_timer(VTIMER_MAX_SLICE); | |
177 | + * idle->idle_enter = get_clock(); | |
178 | + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | | |
179 | + * PSW_MASK_IO | PSW_MASK_EXT); | |
180 | + * The difference is that the inline assembly makes sure that | |
181 | + * the last three instruction are stpt, stck and lpsw in that | |
182 | + * order. This is done to increase the precision. | |
183 | + */ | |
184 | + asm volatile( | |
185 | +#ifndef CONFIG_64BIT | |
186 | + " basr 1,0\n" | |
187 | + "0: ahi 1,1f-0b\n" | |
188 | + " st 1,4(%2)\n" | |
189 | +#else /* CONFIG_64BIT */ | |
190 | + " larl 1,1f\n" | |
191 | + " stg 1,8(%2)\n" | |
192 | +#endif /* CONFIG_64BIT */ | |
193 | + " stpt 0(%4)\n" | |
194 | + " spt 0(%5)\n" | |
195 | + " stck 0(%3)\n" | |
196 | +#ifndef CONFIG_64BIT | |
197 | + " lpsw 0(%2)\n" | |
198 | +#else /* CONFIG_64BIT */ | |
199 | + " lpswe 0(%2)\n" | |
200 | +#endif /* CONFIG_64BIT */ | |
201 | + "1:" | |
202 | + : "=m" (idle->idle_enter), "=m" (vq->idle) | |
203 | + : "a" (&psw), "a" (&idle->idle_enter), | |
204 | + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) | |
205 | + : "memory", "cc", "1"); | |
206 | + } else { | |
207 | + /* | |
208 | + * The inline assembly is equivalent to | |
209 | + * vq->idle = get_cpu_timer(); | |
210 | + * idle->idle_enter = get_clock(); | |
211 | + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | | |
212 | + * PSW_MASK_IO | PSW_MASK_EXT); | |
213 | + * The difference is that the inline assembly makes sure that | |
214 | + * the last three instruction are stpt, stck and lpsw in that | |
215 | + * order. This is done to increase the precision. | |
216 | + */ | |
217 | + asm volatile( | |
218 | +#ifndef CONFIG_64BIT | |
219 | + " basr 1,0\n" | |
220 | + "0: ahi 1,1f-0b\n" | |
221 | + " st 1,4(%2)\n" | |
222 | +#else /* CONFIG_64BIT */ | |
223 | + " larl 1,1f\n" | |
224 | + " stg 1,8(%2)\n" | |
225 | +#endif /* CONFIG_64BIT */ | |
226 | + " stpt 0(%4)\n" | |
227 | + " stck 0(%3)\n" | |
228 | +#ifndef CONFIG_64BIT | |
229 | + " lpsw 0(%2)\n" | |
230 | +#else /* CONFIG_64BIT */ | |
231 | + " lpswe 0(%2)\n" | |
232 | +#endif /* CONFIG_64BIT */ | |
233 | + "1:" | |
234 | + : "=m" (idle->idle_enter), "=m" (vq->idle) | |
235 | + : "a" (&psw), "a" (&idle->idle_enter), | |
236 | + "a" (&vq->idle), "m" (psw) | |
237 | + : "memory", "cc", "1"); | |
152 | 238 | } |
153 | - | |
154 | - /* store the actual expire value */ | |
155 | - asm volatile ("STPT %0" : "=m" (vt_list->idle)); | |
156 | - | |
157 | - /* | |
158 | - * If the CPU timer is negative we don't reprogramm | |
159 | - * it because we will get instantly an interrupt. | |
160 | - */ | |
161 | - if (vt_list->idle & 1LL<<63) | |
162 | - return; | |
163 | - | |
164 | - vt_list->offset += vt_list->to_expire - vt_list->idle; | |
165 | - | |
166 | - /* | |
167 | - * We cannot halt the CPU timer, we just write a value that | |
168 | - * nearly never expires (only after 71 years) and re-write | |
169 | - * the stored expire value if we continue the timer | |
170 | - */ | |
171 | - fire: | |
172 | - set_vtimer(VTIMER_MAX_SLICE); | |
173 | 239 | } |
174 | 240 | |
175 | 241 | /* |
176 | 242 | |
177 | 243 | |
178 | 244 | |
... | ... | @@ -195,30 +261,23 @@ |
195 | 261 | */ |
196 | 262 | static void do_callbacks(struct list_head *cb_list) |
197 | 263 | { |
198 | - struct vtimer_queue *vt_list; | |
264 | + struct vtimer_queue *vq; | |
199 | 265 | struct vtimer_list *event, *tmp; |
200 | - void (*fn)(unsigned long); | |
201 | - unsigned long data; | |
202 | 266 | |
203 | 267 | if (list_empty(cb_list)) |
204 | 268 | return; |
205 | 269 | |
206 | - vt_list = &__get_cpu_var(virt_cpu_timer); | |
270 | + vq = &__get_cpu_var(virt_cpu_timer); | |
207 | 271 | |
208 | 272 | list_for_each_entry_safe(event, tmp, cb_list, entry) { |
209 | - fn = event->function; | |
210 | - data = event->data; | |
211 | - fn(data); | |
212 | - | |
213 | - if (!event->interval) | |
214 | - /* delete one shot timer */ | |
215 | - list_del_init(&event->entry); | |
216 | - else { | |
217 | - /* move interval timer back to list */ | |
218 | - spin_lock(&vt_list->lock); | |
219 | - list_del_init(&event->entry); | |
220 | - list_add_sorted(event, &vt_list->list); | |
221 | - spin_unlock(&vt_list->lock); | |
273 | + list_del_init(&event->entry); | |
274 | + (event->function)(event->data); | |
275 | + if (event->interval) { | |
276 | + /* Recharge interval timer */ | |
277 | + event->expires = event->interval + vq->elapsed; | |
278 | + spin_lock(&vq->lock); | |
279 | + list_add_sorted(event, &vq->list); | |
280 | + spin_unlock(&vq->lock); | |
222 | 281 | } |
223 | 282 | } |
224 | 283 | } |
225 | 284 | |
226 | 285 | |
227 | 286 | |
228 | 287 | |
229 | 288 | |
230 | 289 | |
231 | 290 | |
... | ... | @@ -228,64 +287,57 @@ |
228 | 287 | */ |
229 | 288 | static void do_cpu_timer_interrupt(__u16 error_code) |
230 | 289 | { |
231 | - __u64 next, delta; | |
232 | - struct vtimer_queue *vt_list; | |
290 | + struct vtimer_queue *vq; | |
233 | 291 | struct vtimer_list *event, *tmp; |
234 | - struct list_head *ptr; | |
235 | - /* the callback queue */ | |
236 | - struct list_head cb_list; | |
292 | + struct list_head cb_list; /* the callback queue */ | |
293 | + __u64 elapsed, next; | |
237 | 294 | |
238 | 295 | INIT_LIST_HEAD(&cb_list); |
239 | - vt_list = &__get_cpu_var(virt_cpu_timer); | |
296 | + vq = &__get_cpu_var(virt_cpu_timer); | |
240 | 297 | |
241 | 298 | /* walk timer list, fire all expired events */ |
242 | - spin_lock(&vt_list->lock); | |
299 | + spin_lock(&vq->lock); | |
243 | 300 | |
244 | - if (vt_list->to_expire < VTIMER_MAX_SLICE) | |
245 | - vt_list->offset += vt_list->to_expire; | |
246 | - | |
247 | - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { | |
248 | - if (event->expires > vt_list->offset) | |
249 | - /* found first unexpired event, leave */ | |
250 | - break; | |
251 | - | |
252 | - /* re-charge interval timer, we have to add the offset */ | |
253 | - if (event->interval) | |
254 | - event->expires = event->interval + vt_list->offset; | |
255 | - | |
256 | - /* move expired timer to the callback queue */ | |
257 | - list_move_tail(&event->entry, &cb_list); | |
301 | + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); | |
302 | + BUG_ON((s64) elapsed < 0); | |
303 | + vq->elapsed = 0; | |
304 | + list_for_each_entry_safe(event, tmp, &vq->list, entry) { | |
305 | + if (event->expires < elapsed) | |
306 | + /* move expired timer to the callback queue */ | |
307 | + list_move_tail(&event->entry, &cb_list); | |
308 | + else | |
309 | + event->expires -= elapsed; | |
258 | 310 | } |
259 | - spin_unlock(&vt_list->lock); | |
311 | + spin_unlock(&vq->lock); | |
312 | + | |
313 | + vq->do_spt = list_empty(&cb_list); | |
260 | 314 | do_callbacks(&cb_list); |
261 | 315 | |
262 | 316 | /* next event is first in list */ |
263 | - spin_lock(&vt_list->lock); | |
264 | - if (!list_empty(&vt_list->list)) { | |
265 | - ptr = vt_list->list.next; | |
266 | - event = list_entry(ptr, struct vtimer_list, entry); | |
267 | - next = event->expires - vt_list->offset; | |
268 | - | |
269 | - /* add the expired time from this interrupt handler | |
270 | - * and the callback functions | |
271 | - */ | |
272 | - asm volatile ("STPT %0" : "=m" (delta)); | |
273 | - delta = 0xffffffffffffffffLL - delta + 1; | |
274 | - vt_list->offset += delta; | |
275 | - next -= delta; | |
276 | - } else { | |
277 | - vt_list->offset = 0; | |
278 | - next = VTIMER_MAX_SLICE; | |
279 | - } | |
280 | - spin_unlock(&vt_list->lock); | |
281 | - set_vtimer(next); | |
317 | + next = VTIMER_MAX_SLICE; | |
318 | + spin_lock(&vq->lock); | |
319 | + if (!list_empty(&vq->list)) { | |
320 | + event = list_first_entry(&vq->list, struct vtimer_list, entry); | |
321 | + next = event->expires; | |
322 | + } else | |
323 | + vq->do_spt = 0; | |
324 | + spin_unlock(&vq->lock); | |
325 | + /* | |
326 | + * To improve precision add the time spent by the | |
327 | + * interrupt handler to the elapsed time. | |
328 | + * Note: CPU timer counts down and we got an interrupt, | |
329 | + * the current content is negative | |
330 | + */ | |
331 | + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); | |
332 | + set_vtimer(next - elapsed); | |
333 | + vq->timer = next - elapsed; | |
334 | + vq->elapsed = elapsed; | |
282 | 335 | } |
283 | 336 | |
284 | 337 | void init_virt_timer(struct vtimer_list *timer) |
285 | 338 | { |
286 | 339 | timer->function = NULL; |
287 | 340 | INIT_LIST_HEAD(&timer->entry); |
288 | - spin_lock_init(&timer->lock); | |
289 | 341 | } |
290 | 342 | EXPORT_SYMBOL(init_virt_timer); |
291 | 343 | |
292 | 344 | |
293 | 345 | |
294 | 346 | |
295 | 347 | |
... | ... | @@ -299,44 +351,40 @@ |
299 | 351 | */ |
300 | 352 | static void internal_add_vtimer(struct vtimer_list *timer) |
301 | 353 | { |
354 | + struct vtimer_queue *vq; | |
302 | 355 | unsigned long flags; |
303 | - __u64 done; | |
304 | - struct vtimer_list *event; | |
305 | - struct vtimer_queue *vt_list; | |
356 | + __u64 left, expires; | |
306 | 357 | |
307 | - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); | |
308 | - spin_lock_irqsave(&vt_list->lock, flags); | |
358 | + vq = &per_cpu(virt_cpu_timer, timer->cpu); | |
359 | + spin_lock_irqsave(&vq->lock, flags); | |
309 | 360 | |
310 | 361 | BUG_ON(timer->cpu != smp_processor_id()); |
311 | 362 | |
312 | - /* if list is empty we only have to set the timer */ | |
313 | - if (list_empty(&vt_list->list)) { | |
314 | - /* reset the offset, this may happen if the last timer was | |
315 | - * just deleted by mod_virt_timer and the interrupt | |
316 | - * didn't happen until here | |
317 | - */ | |
318 | - vt_list->offset = 0; | |
319 | - goto fire; | |
363 | + if (list_empty(&vq->list)) { | |
364 | + /* First timer on this cpu, just program it. */ | |
365 | + list_add(&timer->entry, &vq->list); | |
366 | + set_vtimer(timer->expires); | |
367 | + vq->timer = timer->expires; | |
368 | + vq->elapsed = 0; | |
369 | + } else { | |
370 | + /* Check progress of old timers. */ | |
371 | + expires = timer->expires; | |
372 | + left = get_vtimer(); | |
373 | + if (likely((s64) expires < (s64) left)) { | |
374 | + /* The new timer expires before the current timer. */ | |
375 | + set_vtimer(expires); | |
376 | + vq->elapsed += vq->timer - left; | |
377 | + vq->timer = expires; | |
378 | + } else { | |
379 | + vq->elapsed += vq->timer - left; | |
380 | + vq->timer = left; | |
381 | + } | |
382 | + /* Insert new timer into per cpu list. */ | |
383 | + timer->expires += vq->elapsed; | |
384 | + list_add_sorted(timer, &vq->list); | |
320 | 385 | } |
321 | 386 | |
322 | - /* save progress */ | |
323 | - asm volatile ("STPT %0" : "=m" (done)); | |
324 | - | |
325 | - /* calculate completed work */ | |
326 | - done = vt_list->to_expire - done + vt_list->offset; | |
327 | - vt_list->offset = 0; | |
328 | - | |
329 | - list_for_each_entry(event, &vt_list->list, entry) | |
330 | - event->expires -= done; | |
331 | - | |
332 | - fire: | |
333 | - list_add_sorted(timer, &vt_list->list); | |
334 | - | |
335 | - /* get first element, which is the next vtimer slice */ | |
336 | - event = list_entry(vt_list->list.next, struct vtimer_list, entry); | |
337 | - | |
338 | - set_vtimer(event->expires); | |
339 | - spin_unlock_irqrestore(&vt_list->lock, flags); | |
387 | + spin_unlock_irqrestore(&vq->lock, flags); | |
340 | 388 | /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ |
341 | 389 | put_cpu(); |
342 | 390 | } |
343 | 391 | |
... | ... | @@ -381,14 +429,15 @@ |
381 | 429 | * If we change a pending timer the function must be called on the CPU |
382 | 430 | * where the timer is running on, e.g. by smp_call_function_single() |
383 | 431 | * |
384 | - * The original mod_timer adds the timer if it is not pending. For compatibility | |
385 | - * we do the same. The timer will be added on the current CPU as a oneshot timer. | |
432 | + * The original mod_timer adds the timer if it is not pending. For | |
433 | + * compatibility we do the same. The timer will be added on the current | |
434 | + * CPU as a oneshot timer. | |
386 | 435 | * |
387 | 436 | * returns whether it has modified a pending timer (1) or not (0) |
388 | 437 | */ |
389 | 438 | int mod_virt_timer(struct vtimer_list *timer, __u64 expires) |
390 | 439 | { |
391 | - struct vtimer_queue *vt_list; | |
440 | + struct vtimer_queue *vq; | |
392 | 441 | unsigned long flags; |
393 | 442 | int cpu; |
394 | 443 | |
395 | 444 | |
396 | 445 | |
... | ... | @@ -404,17 +453,17 @@ |
404 | 453 | return 1; |
405 | 454 | |
406 | 455 | cpu = get_cpu(); |
407 | - vt_list = &per_cpu(virt_cpu_timer, cpu); | |
456 | + vq = &per_cpu(virt_cpu_timer, cpu); | |
408 | 457 | |
409 | 458 | /* check if we run on the right CPU */ |
410 | 459 | BUG_ON(timer->cpu != cpu); |
411 | 460 | |
412 | 461 | /* disable interrupts before test if timer is pending */ |
413 | - spin_lock_irqsave(&vt_list->lock, flags); | |
462 | + spin_lock_irqsave(&vq->lock, flags); | |
414 | 463 | |
415 | 464 | /* if timer isn't pending add it on the current CPU */ |
416 | 465 | if (!vtimer_pending(timer)) { |
417 | - spin_unlock_irqrestore(&vt_list->lock, flags); | |
466 | + spin_unlock_irqrestore(&vq->lock, flags); | |
418 | 467 | /* we do not activate an interval timer with mod_virt_timer */ |
419 | 468 | timer->interval = 0; |
420 | 469 | timer->expires = expires; |
... | ... | @@ -431,7 +480,7 @@ |
431 | 480 | timer->interval = expires; |
432 | 481 | |
433 | 482 | /* the timer can't expire anymore so we can release the lock */ |
434 | - spin_unlock_irqrestore(&vt_list->lock, flags); | |
483 | + spin_unlock_irqrestore(&vq->lock, flags); | |
435 | 484 | internal_add_vtimer(timer); |
436 | 485 | return 1; |
437 | 486 | } |
438 | 487 | |
439 | 488 | |
... | ... | @@ -445,25 +494,19 @@ |
445 | 494 | int del_virt_timer(struct vtimer_list *timer) |
446 | 495 | { |
447 | 496 | unsigned long flags; |
448 | - struct vtimer_queue *vt_list; | |
497 | + struct vtimer_queue *vq; | |
449 | 498 | |
450 | 499 | /* check if timer is pending */ |
451 | 500 | if (!vtimer_pending(timer)) |
452 | 501 | return 0; |
453 | 502 | |
454 | - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); | |
455 | - spin_lock_irqsave(&vt_list->lock, flags); | |
503 | + vq = &per_cpu(virt_cpu_timer, timer->cpu); | |
504 | + spin_lock_irqsave(&vq->lock, flags); | |
456 | 505 | |
457 | 506 | /* we don't interrupt a running timer, just let it expire! */ |
458 | 507 | list_del_init(&timer->entry); |
459 | 508 | |
460 | - /* last timer removed */ | |
461 | - if (list_empty(&vt_list->list)) { | |
462 | - vt_list->to_expire = 0; | |
463 | - vt_list->offset = 0; | |
464 | - } | |
465 | - | |
466 | - spin_unlock_irqrestore(&vt_list->lock, flags); | |
509 | + spin_unlock_irqrestore(&vq->lock, flags); | |
467 | 510 | return 1; |
468 | 511 | } |
469 | 512 | EXPORT_SYMBOL(del_virt_timer); |
470 | 513 | |
471 | 514 | |
472 | 515 | |
473 | 516 | |
... | ... | @@ -473,24 +516,19 @@ |
473 | 516 | */ |
474 | 517 | void init_cpu_vtimer(void) |
475 | 518 | { |
476 | - struct vtimer_queue *vt_list; | |
519 | + struct vtimer_queue *vq; | |
477 | 520 | |
478 | 521 | /* kick the virtual timer */ |
479 | - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; | |
480 | - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; | |
481 | - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); | |
482 | 522 | asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); |
523 | + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer)); | |
483 | 524 | |
525 | + /* initialize per cpu vtimer structure */ | |
526 | + vq = &__get_cpu_var(virt_cpu_timer); | |
527 | + INIT_LIST_HEAD(&vq->list); | |
528 | + spin_lock_init(&vq->lock); | |
529 | + | |
484 | 530 | /* enable cpu timer interrupts */ |
485 | 531 | __ctl_set_bit(0,10); |
486 | - | |
487 | - vt_list = &__get_cpu_var(virt_cpu_timer); | |
488 | - INIT_LIST_HEAD(&vt_list->list); | |
489 | - spin_lock_init(&vt_list->lock); | |
490 | - vt_list->to_expire = 0; | |
491 | - vt_list->offset = 0; | |
492 | - vt_list->idle = 0; | |
493 | - | |
494 | 532 | } |
495 | 533 | |
496 | 534 | void __init vtime_init(void) |
arch/x86/xen/time.c
... | ... | @@ -132,8 +132,7 @@ |
132 | 132 | *snap = state; |
133 | 133 | |
134 | 134 | /* Add the appropriate number of ticks of stolen time, |
135 | - including any left-overs from last time. Passing NULL to | |
136 | - account_steal_time accounts the time as stolen. */ | |
135 | + including any left-overs from last time. */ | |
137 | 136 | stolen = runnable + offline + __get_cpu_var(residual_stolen); |
138 | 137 | |
139 | 138 | if (stolen < 0) |
140 | 139 | |
... | ... | @@ -141,11 +140,10 @@ |
141 | 140 | |
142 | 141 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
143 | 142 | __get_cpu_var(residual_stolen) = stolen; |
144 | - account_steal_time(NULL, ticks); | |
143 | + account_steal_ticks(ticks); | |
145 | 144 | |
146 | 145 | /* Add the appropriate number of ticks of blocked time, |
147 | - including any left-overs from last time. Passing idle to | |
148 | - account_steal_time accounts the time as idle/wait. */ | |
146 | + including any left-overs from last time. */ | |
149 | 147 | blocked += __get_cpu_var(residual_blocked); |
150 | 148 | |
151 | 149 | if (blocked < 0) |
... | ... | @@ -153,7 +151,7 @@ |
153 | 151 | |
154 | 152 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
155 | 153 | __get_cpu_var(residual_blocked) = blocked; |
156 | - account_steal_time(idle_task(smp_processor_id()), ticks); | |
154 | + account_idle_ticks(ticks); | |
157 | 155 | } |
158 | 156 | |
159 | 157 | /* |
drivers/s390/cio/cio.c
... | ... | @@ -632,8 +632,8 @@ |
632 | 632 | struct pt_regs *old_regs; |
633 | 633 | |
634 | 634 | old_regs = set_irq_regs(regs); |
635 | - irq_enter(); | |
636 | 635 | s390_idle_check(); |
636 | + irq_enter(); | |
637 | 637 | if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) |
638 | 638 | /* Serve timer interrupts first. */ |
639 | 639 | clock_comparator_work(); |
drivers/s390/s390mach.c
... | ... | @@ -18,6 +18,7 @@ |
18 | 18 | #include <asm/etr.h> |
19 | 19 | #include <asm/lowcore.h> |
20 | 20 | #include <asm/cio.h> |
21 | +#include <asm/cpu.h> | |
21 | 22 | #include "s390mach.h" |
22 | 23 | |
23 | 24 | static struct semaphore m_sem; |
... | ... | @@ -368,6 +369,8 @@ |
368 | 369 | int umode; |
369 | 370 | |
370 | 371 | lockdep_off(); |
372 | + | |
373 | + s390_idle_check(); | |
371 | 374 | |
372 | 375 | mci = (struct mci *) &S390_lowcore.mcck_interruption_code; |
373 | 376 | mcck = &__get_cpu_var(cpu_mcck); |
include/linux/kernel_stat.h
... | ... | @@ -79,11 +79,14 @@ |
79 | 79 | } |
80 | 80 | |
81 | 81 | extern unsigned long long task_delta_exec(struct task_struct *); |
82 | -extern void account_user_time(struct task_struct *, cputime_t); | |
83 | -extern void account_user_time_scaled(struct task_struct *, cputime_t); | |
84 | -extern void account_system_time(struct task_struct *, int, cputime_t); | |
85 | -extern void account_system_time_scaled(struct task_struct *, cputime_t); | |
86 | -extern void account_steal_time(struct task_struct *, cputime_t); | |
82 | +extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | |
83 | +extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | |
84 | +extern void account_steal_time(cputime_t); | |
85 | +extern void account_idle_time(cputime_t); | |
86 | + | |
87 | +extern void account_process_tick(struct task_struct *, int user); | |
88 | +extern void account_steal_ticks(unsigned long ticks); | |
89 | +extern void account_idle_ticks(unsigned long ticks); | |
87 | 90 | |
88 | 91 | #endif /* _LINUX_KERNEL_STAT_H */ |
include/linux/sched.h
kernel/sched.c
... | ... | @@ -4150,13 +4150,17 @@ |
4150 | 4150 | * Account user cpu time to a process. |
4151 | 4151 | * @p: the process that the cpu time gets accounted to |
4152 | 4152 | * @cputime: the cpu time spent in user space since the last update |
4153 | + * @cputime_scaled: cputime scaled by cpu frequency | |
4153 | 4154 | */ |
4154 | -void account_user_time(struct task_struct *p, cputime_t cputime) | |
4155 | +void account_user_time(struct task_struct *p, cputime_t cputime, | |
4156 | + cputime_t cputime_scaled) | |
4155 | 4157 | { |
4156 | 4158 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
4157 | 4159 | cputime64_t tmp; |
4158 | 4160 | |
4161 | + /* Add user time to process. */ | |
4159 | 4162 | p->utime = cputime_add(p->utime, cputime); |
4163 | + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); | |
4160 | 4164 | account_group_user_time(p, cputime); |
4161 | 4165 | |
4162 | 4166 | /* Add user time to cpustat. */ |
4163 | 4167 | |
4164 | 4168 | |
4165 | 4169 | |
4166 | 4170 | |
4167 | 4171 | |
4168 | 4172 | |
4169 | 4173 | |
4170 | 4174 | |
4171 | 4175 | |
4172 | 4176 | |
4173 | 4177 | |
... | ... | @@ -4173,51 +4177,48 @@ |
4173 | 4177 | * Account guest cpu time to a process. |
4174 | 4178 | * @p: the process that the cpu time gets accounted to |
4175 | 4179 | * @cputime: the cpu time spent in virtual machine since the last update |
4180 | + * @cputime_scaled: cputime scaled by cpu frequency | |
4176 | 4181 | */ |
4177 | -static void account_guest_time(struct task_struct *p, cputime_t cputime) | |
4182 | +static void account_guest_time(struct task_struct *p, cputime_t cputime, | |
4183 | + cputime_t cputime_scaled) | |
4178 | 4184 | { |
4179 | 4185 | cputime64_t tmp; |
4180 | 4186 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
4181 | 4187 | |
4182 | 4188 | tmp = cputime_to_cputime64(cputime); |
4183 | 4189 | |
4190 | + /* Add guest time to process. */ | |
4184 | 4191 | p->utime = cputime_add(p->utime, cputime); |
4192 | + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); | |
4185 | 4193 | account_group_user_time(p, cputime); |
4186 | 4194 | p->gtime = cputime_add(p->gtime, cputime); |
4187 | 4195 | |
4196 | + /* Add guest time to cpustat. */ | |
4188 | 4197 | cpustat->user = cputime64_add(cpustat->user, tmp); |
4189 | 4198 | cpustat->guest = cputime64_add(cpustat->guest, tmp); |
4190 | 4199 | } |
4191 | 4200 | |
4192 | 4201 | /* |
4193 | - * Account scaled user cpu time to a process. | |
4194 | - * @p: the process that the cpu time gets accounted to | |
4195 | - * @cputime: the cpu time spent in user space since the last update | |
4196 | - */ | |
4197 | -void account_user_time_scaled(struct task_struct *p, cputime_t cputime) | |
4198 | -{ | |
4199 | - p->utimescaled = cputime_add(p->utimescaled, cputime); | |
4200 | -} | |
4201 | - | |
4202 | -/* | |
4203 | 4202 | * Account system cpu time to a process. |
4204 | 4203 | * @p: the process that the cpu time gets accounted to |
4205 | 4204 | * @hardirq_offset: the offset to subtract from hardirq_count() |
4206 | 4205 | * @cputime: the cpu time spent in kernel space since the last update |
4206 | + * @cputime_scaled: cputime scaled by cpu frequency | |
4207 | 4207 | */ |
4208 | 4208 | void account_system_time(struct task_struct *p, int hardirq_offset, |
4209 | - cputime_t cputime) | |
4209 | + cputime_t cputime, cputime_t cputime_scaled) | |
4210 | 4210 | { |
4211 | 4211 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
4212 | - struct rq *rq = this_rq(); | |
4213 | 4212 | cputime64_t tmp; |
4214 | 4213 | |
4215 | 4214 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
4216 | - account_guest_time(p, cputime); | |
4215 | + account_guest_time(p, cputime, cputime_scaled); | |
4217 | 4216 | return; |
4218 | 4217 | } |
4219 | 4218 | |
4219 | + /* Add system time to process. */ | |
4220 | 4220 | p->stime = cputime_add(p->stime, cputime); |
4221 | + p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); | |
4221 | 4222 | account_group_system_time(p, cputime); |
4222 | 4223 | |
4223 | 4224 | /* Add system time to cpustat. */ |
4224 | 4225 | |
4225 | 4226 | |
4226 | 4227 | |
4227 | 4228 | |
4228 | 4229 | |
4229 | 4230 | |
4230 | 4231 | |
4231 | 4232 | |
4232 | 4233 | |
... | ... | @@ -4226,47 +4227,83 @@ |
4226 | 4227 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
4227 | 4228 | else if (softirq_count()) |
4228 | 4229 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
4229 | - else if (p != rq->idle) | |
4230 | - cpustat->system = cputime64_add(cpustat->system, tmp); | |
4231 | - else if (atomic_read(&rq->nr_iowait) > 0) | |
4232 | - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | |
4233 | 4230 | else |
4234 | - cpustat->idle = cputime64_add(cpustat->idle, tmp); | |
4231 | + cpustat->system = cputime64_add(cpustat->system, tmp); | |
4232 | + | |
4235 | 4233 | /* Account for system time used */ |
4236 | 4234 | acct_update_integrals(p); |
4237 | 4235 | } |
4238 | 4236 | |
4239 | 4237 | /* |
4240 | - * Account scaled system cpu time to a process. | |
4241 | - * @p: the process that the cpu time gets accounted to | |
4242 | - * @hardirq_offset: the offset to subtract from hardirq_count() | |
4243 | - * @cputime: the cpu time spent in kernel space since the last update | |
4238 | + * Account for involuntary wait time. | |
4239 | + * @steal: the cpu time spent in involuntary wait | |
4244 | 4240 | */ |
4245 | -void account_system_time_scaled(struct task_struct *p, cputime_t cputime) | |
4241 | +void account_steal_time(cputime_t cputime) | |
4246 | 4242 | { |
4247 | - p->stimescaled = cputime_add(p->stimescaled, cputime); | |
4243 | + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | |
4244 | + cputime64_t cputime64 = cputime_to_cputime64(cputime); | |
4245 | + | |
4246 | + cpustat->steal = cputime64_add(cpustat->steal, cputime64); | |
4248 | 4247 | } |
4249 | 4248 | |
4250 | 4249 | /* |
4251 | - * Account for involuntary wait time. | |
4252 | - * @p: the process from which the cpu time has been stolen | |
4253 | - * @steal: the cpu time spent in involuntary wait | |
4250 | + * Account for idle time. | |
4251 | + * @cputime: the cpu time spent in idle wait | |
4254 | 4252 | */ |
4255 | -void account_steal_time(struct task_struct *p, cputime_t steal) | |
4253 | +void account_idle_time(cputime_t cputime) | |
4256 | 4254 | { |
4257 | 4255 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
4258 | - cputime64_t tmp = cputime_to_cputime64(steal); | |
4256 | + cputime64_t cputime64 = cputime_to_cputime64(cputime); | |
4259 | 4257 | struct rq *rq = this_rq(); |
4260 | 4258 | |
4261 | - if (p == rq->idle) { | |
4262 | - p->stime = cputime_add(p->stime, steal); | |
4263 | - if (atomic_read(&rq->nr_iowait) > 0) | |
4264 | - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | |
4265 | - else | |
4266 | - cpustat->idle = cputime64_add(cpustat->idle, tmp); | |
4267 | - } else | |
4268 | - cpustat->steal = cputime64_add(cpustat->steal, tmp); | |
4259 | + if (atomic_read(&rq->nr_iowait) > 0) | |
4260 | + cpustat->iowait = cputime64_add(cpustat->iowait, cputime64); | |
4261 | + else | |
4262 | + cpustat->idle = cputime64_add(cpustat->idle, cputime64); | |
4269 | 4263 | } |
4264 | + | |
4265 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING | |
4266 | + | |
4267 | +/* | |
4268 | + * Account a single tick of cpu time. | |
4269 | + * @p: the process that the cpu time gets accounted to | |
4270 | + * @user_tick: indicates if the tick is a user or a system tick | |
4271 | + */ | |
4272 | +void account_process_tick(struct task_struct *p, int user_tick) | |
4273 | +{ | |
4274 | + cputime_t one_jiffy = jiffies_to_cputime(1); | |
4275 | + cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); | |
4276 | + struct rq *rq = this_rq(); | |
4277 | + | |
4278 | + if (user_tick) | |
4279 | + account_user_time(p, one_jiffy, one_jiffy_scaled); | |
4280 | + else if (p != rq->idle) | |
4281 | + account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | |
4282 | + one_jiffy_scaled); | |
4283 | + else | |
4284 | + account_idle_time(one_jiffy); | |
4285 | +} | |
4286 | + | |
4287 | +/* | |
4288 | + * Account multiple ticks of steal time. | |
4289 | + * @p: the process from which the cpu time has been stolen | |
4290 | + * @ticks: number of stolen ticks | |
4291 | + */ | |
4292 | +void account_steal_ticks(unsigned long ticks) | |
4293 | +{ | |
4294 | + account_steal_time(jiffies_to_cputime(ticks)); | |
4295 | +} | |
4296 | + | |
4297 | +/* | |
4298 | + * Account multiple ticks of idle time. | |
4299 | + * @ticks: number of stolen ticks | |
4300 | + */ | |
4301 | +void account_idle_ticks(unsigned long ticks) | |
4302 | +{ | |
4303 | + account_idle_time(jiffies_to_cputime(ticks)); | |
4304 | +} | |
4305 | + | |
4306 | +#endif | |
4270 | 4307 | |
4271 | 4308 | /* |
4272 | 4309 | * Use precise platform statistics if available: |
kernel/time/tick-sched.c
... | ... | @@ -419,7 +419,9 @@ |
419 | 419 | { |
420 | 420 | int cpu = smp_processor_id(); |
421 | 421 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
422 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING | |
422 | 423 | unsigned long ticks; |
424 | +#endif | |
423 | 425 | ktime_t now; |
424 | 426 | |
425 | 427 | local_irq_disable(); |
... | ... | @@ -441,6 +443,7 @@ |
441 | 443 | tick_do_update_jiffies64(now); |
442 | 444 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
443 | 445 | |
446 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING | |
444 | 447 | /* |
445 | 448 | * We stopped the tick in idle. Update process times would miss the |
446 | 449 | * time we slept as update_process_times does only a 1 tick |
... | ... | @@ -450,12 +453,9 @@ |
450 | 453 | /* |
451 | 454 | * We might be one off. Do not randomly account a huge number of ticks! |
452 | 455 | */ |
453 | - if (ticks && ticks < LONG_MAX) { | |
454 | - add_preempt_count(HARDIRQ_OFFSET); | |
455 | - account_system_time(current, HARDIRQ_OFFSET, | |
456 | - jiffies_to_cputime(ticks)); | |
457 | - sub_preempt_count(HARDIRQ_OFFSET); | |
458 | - } | |
456 | + if (ticks && ticks < LONG_MAX) | |
457 | + account_idle_ticks(ticks); | |
458 | +#endif | |
459 | 459 | |
460 | 460 | touch_softlockup_watchdog(); |
461 | 461 | /* |
kernel/timer.c
... | ... | @@ -1018,21 +1018,6 @@ |
1018 | 1018 | } |
1019 | 1019 | #endif |
1020 | 1020 | |
1021 | -#ifndef CONFIG_VIRT_CPU_ACCOUNTING | |
1022 | -void account_process_tick(struct task_struct *p, int user_tick) | |
1023 | -{ | |
1024 | - cputime_t one_jiffy = jiffies_to_cputime(1); | |
1025 | - | |
1026 | - if (user_tick) { | |
1027 | - account_user_time(p, one_jiffy); | |
1028 | - account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); | |
1029 | - } else { | |
1030 | - account_system_time(p, HARDIRQ_OFFSET, one_jiffy); | |
1031 | - account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); | |
1032 | - } | |
1033 | -} | |
1034 | -#endif | |
1035 | - | |
1036 | 1021 | /* |
1037 | 1022 | * Called from the timer interrupt handler to charge one tick to the current |
1038 | 1023 | * process. user_tick is 1 if the tick is user time, 0 for system. |