Commit 61420f59a589c0668f70cbe725785837c78ece90

Authored by Linus Torvalds

Merge branch 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6

* 'cputime' of git://git390.osdl.marist.edu/pub/scm/linux-2.6:
  [PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID
  [PATCH] improve idle cputime accounting
  [PATCH] improve precision of idle time detection.
  [PATCH] improve precision of process accounting.
  [PATCH] idle cputime accounting
  [PATCH] fix scaled & unscaled cputime accounting

Showing 30 changed files Side-by-side Diff

arch/ia64/kernel/time.c
... ... @@ -93,13 +93,14 @@
93 93 now = ia64_get_itc();
94 94  
95 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96   - account_system_time(prev, 0, delta_stime);
97   - account_system_time_scaled(prev, delta_stime);
  96 + if (idle_task(smp_processor_id()) != prev)
  97 + account_system_time(prev, 0, delta_stime, delta_stime);
  98 + else
  99 + account_idle_time(delta_stime);
98 100  
99 101 if (pi->ac_utime) {
100 102 delta_utime = cycle_to_cputime(pi->ac_utime);
101   - account_user_time(prev, delta_utime);
102   - account_user_time_scaled(prev, delta_utime);
  103 + account_user_time(prev, delta_utime, delta_utime);
103 104 }
104 105  
105 106 pi->ac_stamp = ni->ac_stamp = now;
... ... @@ -122,8 +123,10 @@
122 123 now = ia64_get_itc();
123 124  
124 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
125   - account_system_time(tsk, 0, delta_stime);
126   - account_system_time_scaled(tsk, delta_stime);
  126 + if (irq_count() || idle_task(smp_processor_id()) != tsk)
  127 + account_system_time(tsk, 0, delta_stime, delta_stime);
  128 + else
  129 + account_idle_time(delta_stime);
127 130 ti->ac_stime = 0;
128 131  
129 132 ti->ac_stamp = now;
... ... @@ -143,8 +146,7 @@
143 146  
144 147 if (ti->ac_utime) {
145 148 delta_utime = cycle_to_cputime(ti->ac_utime);
146   - account_user_time(p, delta_utime);
147   - account_user_time_scaled(p, delta_utime);
  149 + account_user_time(p, delta_utime, delta_utime);
148 150 ti->ac_utime = 0;
149 151 }
150 152 }
arch/powerpc/kernel/process.c
... ... @@ -33,6 +33,7 @@
33 33 #include <linux/mqueue.h>
34 34 #include <linux/hardirq.h>
35 35 #include <linux/utsname.h>
  36 +#include <linux/kernel_stat.h>
36 37  
37 38 #include <asm/pgtable.h>
38 39 #include <asm/uaccess.h>
arch/powerpc/kernel/time.c
... ... @@ -256,8 +256,10 @@
256 256 delta += sys_time;
257 257 get_paca()->system_time = 0;
258 258 }
259   - account_system_time(tsk, 0, delta);
260   - account_system_time_scaled(tsk, deltascaled);
  259 + if (in_irq() || idle_task(smp_processor_id()) != tsk)
  260 + account_system_time(tsk, 0, delta, deltascaled);
  261 + else
  262 + account_idle_time(delta);
261 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
262 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
263 265 local_irq_restore(flags);
264 266  
... ... @@ -275,10 +277,8 @@
275 277  
276 278 utime = get_paca()->user_time;
277 279 get_paca()->user_time = 0;
278   - account_user_time(tsk, utime);
279   -
280 280 utimescaled = cputime_to_scaled(utime);
281   - account_user_time_scaled(tsk, utimescaled);
  281 + account_user_time(tsk, utime, utimescaled);
282 282 }
283 283  
284 284 /*
... ... @@ -338,8 +338,12 @@
338 338 tb = mftb();
339 339 purr = mfspr(SPRN_PURR);
340 340 stolen = (tb - pme->tb) - (purr - pme->purr);
341   - if (stolen > 0)
342   - account_steal_time(current, stolen);
  341 + if (stolen > 0) {
  342 + if (idle_task(smp_processor_id()) != current)
  343 + account_steal_time(stolen);
  344 + else
  345 + account_idle_time(stolen);
  346 + }
343 347 pme->tb = tb;
344 348 pme->purr = purr;
345 349 }
arch/s390/include/asm/cpu.h
... ... @@ -14,7 +14,6 @@
14 14  
15 15 struct s390_idle_data {
16 16 spinlock_t lock;
17   - unsigned int in_idle;
18 17 unsigned long long idle_count;
19 18 unsigned long long idle_enter;
20 19 unsigned long long idle_time;
21 20  
... ... @@ -22,12 +21,12 @@
22 21  
23 22 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
24 23  
25   -void s390_idle_leave(void);
  24 +void vtime_start_cpu(void);
26 25  
27 26 static inline void s390_idle_check(void)
28 27 {
29   - if ((&__get_cpu_var(s390_idle))->in_idle)
30   - s390_idle_leave();
  28 + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
  29 + vtime_start_cpu();
31 30 }
32 31  
33 32 #endif /* _ASM_S390_CPU_H_ */
arch/s390/include/asm/cputime.h
... ... @@ -11,7 +11,7 @@
11 11  
12 12 #include <asm/div64.h>
13 13  
14   -/* We want to use micro-second resolution. */
  14 +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
15 15  
16 16 typedef unsigned long long cputime_t;
17 17 typedef unsigned long long cputime64_t;
18 18  
... ... @@ -53,9 +53,9 @@
53 53 #define cputime_ge(__a, __b) ((__a) >= (__b))
54 54 #define cputime_lt(__a, __b) ((__a) < (__b))
55 55 #define cputime_le(__a, __b) ((__a) <= (__b))
56   -#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ))
  56 +#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
57 57 #define cputime_to_scaled(__ct) (__ct)
58   -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ))
  58 +#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
59 59  
60 60 #define cputime64_zero (0ULL)
61 61 #define cputime64_add(__a, __b) ((__a) + (__b))
... ... @@ -64,7 +64,7 @@
64 64 static inline u64
65 65 cputime64_to_jiffies64(cputime64_t cputime)
66 66 {
67   - do_div(cputime, 1000000 / HZ);
  67 + do_div(cputime, 4096000000ULL / HZ);
68 68 return cputime;
69 69 }
70 70  
71 71  
... ... @@ -74,13 +74,13 @@
74 74 static inline unsigned int
75 75 cputime_to_msecs(const cputime_t cputime)
76 76 {
77   - return __div(cputime, 1000);
  77 + return __div(cputime, 4096000);
78 78 }
79 79  
80 80 static inline cputime_t
81 81 msecs_to_cputime(const unsigned int m)
82 82 {
83   - return (cputime_t) m * 1000;
  83 + return (cputime_t) m * 4096000;
84 84 }
85 85  
86 86 /*
87 87  
... ... @@ -89,13 +89,13 @@
89 89 static inline unsigned int
90 90 cputime_to_secs(const cputime_t cputime)
91 91 {
92   - return __div(cputime, 1000000);
  92 + return __div(cputime, 2048000000) >> 1;
93 93 }
94 94  
95 95 static inline cputime_t
96 96 secs_to_cputime(const unsigned int s)
97 97 {
98   - return (cputime_t) s * 1000000;
  98 + return (cputime_t) s * 4096000000ULL;
99 99 }
100 100  
101 101 /*
... ... @@ -104,7 +104,7 @@
104 104 static inline cputime_t
105 105 timespec_to_cputime(const struct timespec *value)
106 106 {
107   - return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000;
  107 + return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
108 108 }
109 109  
110 110 static inline void
111 111  
... ... @@ -114,12 +114,12 @@
114 114 register_pair rp;
115 115  
116 116 rp.pair = cputime >> 1;
117   - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
118   - value->tv_nsec = rp.subreg.even * 1000;
  117 + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
  118 + value->tv_nsec = rp.subreg.even * 1000 / 4096;
119 119 value->tv_sec = rp.subreg.odd;
120 120 #else
121   - value->tv_nsec = (cputime % 1000000) * 1000;
122   - value->tv_sec = cputime / 1000000;
  121 + value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
  122 + value->tv_sec = cputime / 4096000000ULL;
123 123 #endif
124 124 }
125 125  
... ... @@ -131,7 +131,7 @@
131 131 static inline cputime_t
132 132 timeval_to_cputime(const struct timeval *value)
133 133 {
134   - return value->tv_usec + (u64) value->tv_sec * 1000000;
  134 + return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
135 135 }
136 136  
137 137 static inline void
138 138  
... ... @@ -141,12 +141,12 @@
141 141 register_pair rp;
142 142  
143 143 rp.pair = cputime >> 1;
144   - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
145   - value->tv_usec = rp.subreg.even;
  144 + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
  145 + value->tv_usec = rp.subreg.even / 4096;
146 146 value->tv_sec = rp.subreg.odd;
147 147 #else
148   - value->tv_usec = cputime % 1000000;
149   - value->tv_sec = cputime / 1000000;
  148 + value->tv_usec = cputime % 4096000000ULL;
  149 + value->tv_sec = cputime / 4096000000ULL;
150 150 #endif
151 151 }
152 152  
153 153  
... ... @@ -156,13 +156,13 @@
156 156 static inline clock_t
157 157 cputime_to_clock_t(cputime_t cputime)
158 158 {
159   - return __div(cputime, 1000000 / USER_HZ);
  159 + return __div(cputime, 4096000000ULL / USER_HZ);
160 160 }
161 161  
162 162 static inline cputime_t
163 163 clock_t_to_cputime(unsigned long x)
164 164 {
165   - return (cputime_t) x * (1000000 / USER_HZ);
  165 + return (cputime_t) x * (4096000000ULL / USER_HZ);
166 166 }
167 167  
168 168 /*
... ... @@ -171,7 +171,7 @@
171 171 static inline clock_t
172 172 cputime64_to_clock_t(cputime64_t cputime)
173 173 {
174   - return __div(cputime, 1000000 / USER_HZ);
  174 + return __div(cputime, 4096000000ULL / USER_HZ);
175 175 }
176 176  
177 177 #endif /* _S390_CPUTIME_H */
arch/s390/include/asm/lowcore.h
... ... @@ -67,11 +67,11 @@
67 67 #define __LC_SYNC_ENTER_TIMER 0x248
68 68 #define __LC_ASYNC_ENTER_TIMER 0x250
69 69 #define __LC_EXIT_TIMER 0x258
70   -#define __LC_LAST_UPDATE_TIMER 0x260
71   -#define __LC_USER_TIMER 0x268
72   -#define __LC_SYSTEM_TIMER 0x270
73   -#define __LC_LAST_UPDATE_CLOCK 0x278
74   -#define __LC_STEAL_CLOCK 0x280
  70 +#define __LC_USER_TIMER 0x260
  71 +#define __LC_SYSTEM_TIMER 0x268
  72 +#define __LC_STEAL_TIMER 0x270
  73 +#define __LC_LAST_UPDATE_TIMER 0x278
  74 +#define __LC_LAST_UPDATE_CLOCK 0x280
75 75 #define __LC_RETURN_MCCK_PSW 0x288
76 76 #define __LC_KERNEL_STACK 0xC40
77 77 #define __LC_THREAD_INFO 0xC44
... ... @@ -89,11 +89,11 @@
89 89 #define __LC_SYNC_ENTER_TIMER 0x250
90 90 #define __LC_ASYNC_ENTER_TIMER 0x258
91 91 #define __LC_EXIT_TIMER 0x260
92   -#define __LC_LAST_UPDATE_TIMER 0x268
93   -#define __LC_USER_TIMER 0x270
94   -#define __LC_SYSTEM_TIMER 0x278
95   -#define __LC_LAST_UPDATE_CLOCK 0x280
96   -#define __LC_STEAL_CLOCK 0x288
  92 +#define __LC_USER_TIMER 0x268
  93 +#define __LC_SYSTEM_TIMER 0x270
  94 +#define __LC_STEAL_TIMER 0x278
  95 +#define __LC_LAST_UPDATE_TIMER 0x280
  96 +#define __LC_LAST_UPDATE_CLOCK 0x288
97 97 #define __LC_RETURN_MCCK_PSW 0x290
98 98 #define __LC_KERNEL_STACK 0xD40
99 99 #define __LC_THREAD_INFO 0xD48
100 100  
... ... @@ -106,8 +106,10 @@
106 106 #define __LC_IPLDEV 0xDB8
107 107 #define __LC_CURRENT 0xDD8
108 108 #define __LC_INT_CLOCK 0xDE8
  109 +#define __LC_VDSO_PER_CPU 0xE38
109 110 #endif /* __s390x__ */
110 111  
  112 +#define __LC_PASTE 0xE40
111 113  
112 114 #define __LC_PANIC_MAGIC 0xE00
113 115 #ifndef __s390x__
... ... @@ -252,11 +254,11 @@
252 254 __u64 sync_enter_timer; /* 0x248 */
253 255 __u64 async_enter_timer; /* 0x250 */
254 256 __u64 exit_timer; /* 0x258 */
255   - __u64 last_update_timer; /* 0x260 */
256   - __u64 user_timer; /* 0x268 */
257   - __u64 system_timer; /* 0x270 */
258   - __u64 last_update_clock; /* 0x278 */
259   - __u64 steal_clock; /* 0x280 */
  257 + __u64 user_timer; /* 0x260 */
  258 + __u64 system_timer; /* 0x268 */
  259 + __u64 steal_timer; /* 0x270 */
  260 + __u64 last_update_timer; /* 0x278 */
  261 + __u64 last_update_clock; /* 0x280 */
260 262 psw_t return_mcck_psw; /* 0x288 */
261 263 __u8 pad8[0xc00-0x290]; /* 0x290 */
262 264  
... ... @@ -343,11 +345,11 @@
343 345 __u64 sync_enter_timer; /* 0x250 */
344 346 __u64 async_enter_timer; /* 0x258 */
345 347 __u64 exit_timer; /* 0x260 */
346   - __u64 last_update_timer; /* 0x268 */
347   - __u64 user_timer; /* 0x270 */
348   - __u64 system_timer; /* 0x278 */
349   - __u64 last_update_clock; /* 0x280 */
350   - __u64 steal_clock; /* 0x288 */
  348 + __u64 user_timer; /* 0x268 */
  349 + __u64 system_timer; /* 0x270 */
  350 + __u64 steal_timer; /* 0x278 */
  351 + __u64 last_update_timer; /* 0x280 */
  352 + __u64 last_update_clock; /* 0x288 */
351 353 psw_t return_mcck_psw; /* 0x290 */
352 354 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
353 355 /* System info area */
... ... @@ -381,7 +383,12 @@
381 383 /* whether the kernel died with panic() or not */
382 384 __u32 panic_magic; /* 0xe00 */
383 385  
384   - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */
  386 + /* Per cpu primary space access list */
  387 + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
  388 + __u32 vdso_per_cpu_data; /* 0xe3c */
  389 + __u32 paste[16]; /* 0xe40 */
  390 +
  391 + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
385 392  
386 393 /* 64 bit extparam used for pfault, diag 250 etc */
387 394 __u64 ext_params2; /* 0x11B8 */
arch/s390/include/asm/system.h
... ... @@ -99,7 +99,7 @@
99 99 prev = __switch_to(prev,next); \
100 100 } while (0)
101 101  
102   -extern void account_vtime(struct task_struct *);
  102 +extern void account_vtime(struct task_struct *, struct task_struct *);
103 103 extern void account_tick_vtime(struct task_struct *);
104 104 extern void account_system_vtime(struct task_struct *);
105 105  
... ... @@ -121,7 +121,7 @@
121 121  
122 122 #define finish_arch_switch(prev) do { \
123 123 set_fs(current->thread.mm_segment); \
124   - account_vtime(prev); \
  124 + account_vtime(prev, current); \
125 125 } while (0)
126 126  
127 127 #define nop() asm volatile("nop")
arch/s390/include/asm/thread_info.h
... ... @@ -47,6 +47,8 @@
47 47 unsigned int cpu; /* current CPU */
48 48 int preempt_count; /* 0 => preemptable, <0 => BUG */
49 49 struct restart_block restart_block;
  50 + __u64 user_timer;
  51 + __u64 system_timer;
50 52 };
51 53  
52 54 /*
arch/s390/include/asm/timer.h
... ... @@ -23,20 +23,18 @@
23 23 __u64 expires;
24 24 __u64 interval;
25 25  
26   - spinlock_t lock;
27   - unsigned long magic;
28   -
29 26 void (*function)(unsigned long);
30 27 unsigned long data;
31 28 };
32 29  
33   -/* the offset value will wrap after ca. 71 years */
  30 +/* the vtimer value will wrap after ca. 71 years */
34 31 struct vtimer_queue {
35 32 struct list_head list;
36 33 spinlock_t lock;
37   - __u64 to_expire; /* current event expire time */
38   - __u64 offset; /* list offset to zero */
39   - __u64 idle; /* temp var for idle */
  34 + __u64 timer; /* last programmed timer */
  35 + __u64 elapsed; /* elapsed time of timer expire values */
  36 + __u64 idle; /* temp var for idle */
  37 + int do_spt; /* =1: reprogram cpu timer in idle */
40 38 };
41 39  
42 40 extern void init_virt_timer(struct vtimer_list *timer);
... ... @@ -48,8 +46,8 @@
48 46 extern void init_cpu_vtimer(void);
49 47 extern void vtime_init(void);
50 48  
51   -extern void vtime_start_cpu_timer(void);
52   -extern void vtime_stop_cpu_timer(void);
  49 +extern void vtime_stop_cpu(void);
  50 +extern void vtime_start_leave(void);
53 51  
54 52 #endif /* __KERNEL__ */
55 53  
arch/s390/include/asm/vdso.h
... ... @@ -12,9 +12,9 @@
12 12 #ifndef __ASSEMBLY__
13 13  
14 14 /*
15   - * Note about this structure:
  15 + * Note about the vdso_data and vdso_per_cpu_data structures:
16 16 *
17   - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this
  17 + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
18 18 * structure is supposed to be known only to the function in the vdso
19 19 * itself and may change without notice.
20 20 */
21 21  
22 22  
... ... @@ -28,9 +28,20 @@
28 28 __u64 wtom_clock_nsec; /* 0x28 */
29 29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
30 30 __u32 tz_dsttime; /* Type of dst correction 0x34 */
  31 + __u32 ectg_available;
31 32 };
32 33  
  34 +struct vdso_per_cpu_data {
  35 + __u64 ectg_timer_base;
  36 + __u64 ectg_user_time;
  37 +};
  38 +
33 39 extern struct vdso_data *vdso_data;
  40 +
  41 +#ifdef CONFIG_64BIT
  42 +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
  43 +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
  44 +#endif
34 45  
35 46 #endif /* __ASSEMBLY__ */
36 47  
arch/s390/kernel/asm-offsets.c
... ... @@ -48,6 +48,11 @@
48 48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
49 49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
50 50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
  51 + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
  52 + DEFINE(__VDSO_ECTG_BASE,
  53 + offsetof(struct vdso_per_cpu_data, ectg_timer_base));
  54 + DEFINE(__VDSO_ECTG_USER,
  55 + offsetof(struct vdso_per_cpu_data, ectg_user_time));
51 56 /* constants used by the vdso */
52 57 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
53 58 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
arch/s390/kernel/entry.S
... ... @@ -583,8 +583,8 @@
583 583  
584 584 .globl io_int_handler
585 585 io_int_handler:
586   - stpt __LC_ASYNC_ENTER_TIMER
587 586 stck __LC_INT_CLOCK
  587 + stpt __LC_ASYNC_ENTER_TIMER
588 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
591 591  
... ... @@ -723,8 +723,8 @@
723 723  
724 724 .globl ext_int_handler
725 725 ext_int_handler:
726   - stpt __LC_ASYNC_ENTER_TIMER
727 726 stck __LC_INT_CLOCK
  727 + stpt __LC_ASYNC_ENTER_TIMER
728 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
... ... @@ -750,6 +750,7 @@
750 750  
751 751 .globl mcck_int_handler
752 752 mcck_int_handler:
  753 + stck __LC_INT_CLOCK
753 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
arch/s390/kernel/entry64.S
... ... @@ -177,8 +177,11 @@
177 177 .if !\sync
178 178 ni \psworg+1,0xfd # clear wait state bit
179 179 .endif
180   - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
  180 + lg %r14,__LC_VDSO_PER_CPU
  181 + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
181 182 stpt __LC_EXIT_TIMER
  183 + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
  184 + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
182 185 lpswe \psworg # back to caller
183 186 .endm
184 187  
185 188  
... ... @@ -559,8 +562,8 @@
559 562 */
560 563 .globl io_int_handler
561 564 io_int_handler:
562   - stpt __LC_ASYNC_ENTER_TIMER
563 565 stck __LC_INT_CLOCK
  566 + stpt __LC_ASYNC_ENTER_TIMER
564 567 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 568 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 569 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
567 570  
... ... @@ -721,8 +724,8 @@
721 724 */
722 725 .globl ext_int_handler
723 726 ext_int_handler:
724   - stpt __LC_ASYNC_ENTER_TIMER
725 727 stck __LC_INT_CLOCK
  728 + stpt __LC_ASYNC_ENTER_TIMER
726 729 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 730 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 731 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
... ... @@ -746,6 +749,7 @@
746 749 */
747 750 .globl mcck_int_handler
748 751 mcck_int_handler:
  752 + stck __LC_INT_CLOCK
749 753 la %r1,4095 # revalidate r1
750 754 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 755 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
752 756  
753 757  
754 758  
755 759  
756 760  
... ... @@ -979,23 +983,23 @@
979 983  
980 984 cleanup_sysc_leave:
981 985 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
982   - je 2f
983   - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
  986 + je 3f
984 987 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
985   - je 2f
986   - mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
  988 + jhe 0f
  989 + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
  990 +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
987 991 cghi %r12,__LC_MCK_OLD_PSW
988   - jne 0f
  992 + jne 1f
989 993 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
990   - j 1f
991   -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
992   -1: lmg %r0,%r11,SP_R0(%r15)
  994 + j 2f
  995 +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
  996 +2: lmg %r0,%r11,SP_R0(%r15)
993 997 lg %r15,SP_R15(%r15)
994   -2: la %r12,__LC_RETURN_PSW
  998 +3: la %r12,__LC_RETURN_PSW
995 999 br %r14
996 1000 cleanup_sysc_leave_insn:
997 1001 .quad sysc_done - 4
998   - .quad sysc_done - 8
  1002 + .quad sysc_done - 16
999 1003  
1000 1004 cleanup_io_return:
1001 1005 mvc __LC_RETURN_PSW(8),0(%r12)
1002 1006  
1003 1007  
1004 1008  
1005 1009  
1006 1010  
... ... @@ -1005,23 +1009,23 @@
1005 1009  
1006 1010 cleanup_io_leave:
1007 1011 clc 8(8,%r12),BASED(cleanup_io_leave_insn)
1008   - je 2f
1009   - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
  1012 + je 3f
1010 1013 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
1011   - je 2f
1012   - mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
  1014 + jhe 0f
  1015 + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
  1016 +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
1013 1017 cghi %r12,__LC_MCK_OLD_PSW
1014   - jne 0f
  1018 + jne 1f
1015 1019 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
1016   - j 1f
1017   -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
1018   -1: lmg %r0,%r11,SP_R0(%r15)
  1020 + j 2f
  1021 +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
  1022 +2: lmg %r0,%r11,SP_R0(%r15)
1019 1023 lg %r15,SP_R15(%r15)
1020   -2: la %r12,__LC_RETURN_PSW
  1024 +3: la %r12,__LC_RETURN_PSW
1021 1025 br %r14
1022 1026 cleanup_io_leave_insn:
1023 1027 .quad io_done - 4
1024   - .quad io_done - 8
  1028 + .quad io_done - 16
1025 1029  
1026 1030 /*
1027 1031 * Integer constants
arch/s390/kernel/head64.S
... ... @@ -87,6 +87,8 @@
87 87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
88 88 # move IPL device to lowcore
89 89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
  90 + lghi %r0,__LC_PASTE
  91 + stg %r0,__LC_VDSO_PER_CPU
90 92 #
91 93 # Setup stack
92 94 #
arch/s390/kernel/process.c
... ... @@ -38,6 +38,7 @@
38 38 #include <linux/utsname.h>
39 39 #include <linux/tick.h>
40 40 #include <linux/elfcore.h>
  41 +#include <linux/kernel_stat.h>
41 42 #include <asm/uaccess.h>
42 43 #include <asm/pgtable.h>
43 44 #include <asm/system.h>
... ... @@ -45,7 +46,6 @@
45 46 #include <asm/processor.h>
46 47 #include <asm/irq.h>
47 48 #include <asm/timer.h>
48   -#include <asm/cpu.h>
49 49 #include "entry.h"
50 50  
51 51 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
... ... @@ -75,36 +75,6 @@
75 75 return sf->gprs[8];
76 76 }
77 77  
78   -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
79   - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
80   -};
81   -
82   -static int s390_idle_enter(void)
83   -{
84   - struct s390_idle_data *idle;
85   -
86   - idle = &__get_cpu_var(s390_idle);
87   - spin_lock(&idle->lock);
88   - idle->idle_count++;
89   - idle->in_idle = 1;
90   - idle->idle_enter = get_clock();
91   - spin_unlock(&idle->lock);
92   - vtime_stop_cpu_timer();
93   - return NOTIFY_OK;
94   -}
95   -
96   -void s390_idle_leave(void)
97   -{
98   - struct s390_idle_data *idle;
99   -
100   - vtime_start_cpu_timer();
101   - idle = &__get_cpu_var(s390_idle);
102   - spin_lock(&idle->lock);
103   - idle->idle_time += get_clock() - idle->idle_enter;
104   - idle->in_idle = 0;
105   - spin_unlock(&idle->lock);
106   -}
107   -
108 78 extern void s390_handle_mcck(void);
109 79 /*
110 80 * The idle loop on a S390...
... ... @@ -117,10 +87,6 @@
117 87 local_irq_enable();
118 88 return;
119 89 }
120   - if (s390_idle_enter() == NOTIFY_BAD) {
121   - local_irq_enable();
122   - return;
123   - }
124 90 #ifdef CONFIG_HOTPLUG_CPU
125 91 if (cpu_is_offline(smp_processor_id())) {
126 92 preempt_enable_no_resched();
... ... @@ -130,7 +96,6 @@
130 96 local_mcck_disable();
131 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
132 98 local_mcck_enable();
133   - s390_idle_leave();
134 99 local_irq_enable();
135 100 s390_handle_mcck();
136 101 return;
... ... @@ -138,9 +103,9 @@
138 103 trace_hardirqs_on();
139 104 /* Don't trace preempt off for idle. */
140 105 stop_critical_timings();
141   - /* Wait for external, I/O or machine check interrupt. */
142   - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
143   - PSW_MASK_IO | PSW_MASK_EXT);
  106 + /* Stop virtual timer and halt the cpu. */
  107 + vtime_stop_cpu();
  108 + /* Reenable preemption tracer. */
144 109 start_critical_timings();
145 110 }
146 111  
arch/s390/kernel/s390_ext.c
... ... @@ -119,8 +119,8 @@
119 119 struct pt_regs *old_regs;
120 120  
121 121 old_regs = set_irq_regs(regs);
122   - irq_enter();
123 122 s390_idle_check();
  123 + irq_enter();
124 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 125 /* Serve timer interrupts first. */
126 126 clock_comparator_work();
arch/s390/kernel/setup.c
... ... @@ -427,6 +427,8 @@
427 427 /* enable extended save area */
428 428 __ctl_set_bit(14, 29);
429 429 }
  430 +#else
  431 + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
430 432 #endif
431 433 set_prefix((u32)(unsigned long) lc);
432 434 }
arch/s390/kernel/smp.c
... ... @@ -47,6 +47,7 @@
47 47 #include <asm/lowcore.h>
48 48 #include <asm/sclp.h>
49 49 #include <asm/cpu.h>
  50 +#include <asm/vdso.h>
50 51 #include "entry.h"
51 52  
52 53 /*
... ... @@ -500,6 +501,9 @@
500 501 goto out;
501 502 lowcore->extended_save_area_addr = (u32) save_area;
502 503 }
  504 +#else
  505 + if (vdso_alloc_per_cpu(cpu, lowcore))
  506 + goto out;
503 507 #endif
504 508 lowcore_ptr[cpu] = lowcore;
505 509 return 0;
... ... @@ -522,6 +526,8 @@
522 526 #ifndef CONFIG_64BIT
523 527 if (MACHINE_HAS_IEEE)
524 528 free_page((unsigned long) lowcore->extended_save_area_addr);
  529 +#else
  530 + vdso_free_per_cpu(cpu, lowcore);
525 531 #endif
526 532 free_page(lowcore->panic_stack - PAGE_SIZE);
527 533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
... ... @@ -664,6 +670,7 @@
664 670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
665 671 panic_stack = __get_free_page(GFP_KERNEL);
666 672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
  673 + BUG_ON(!lowcore || !panic_stack || !async_stack);
667 674 #ifndef CONFIG_64BIT
668 675 if (MACHINE_HAS_IEEE)
669 676 save_area = get_zeroed_page(GFP_KERNEL);
... ... @@ -677,6 +684,8 @@
677 684 #ifndef CONFIG_64BIT
678 685 if (MACHINE_HAS_IEEE)
679 686 lowcore->extended_save_area_addr = (u32) save_area;
  687 +#else
  688 + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
680 689 #endif
681 690 set_prefix((u32)(unsigned long) lowcore);
682 691 local_mcck_enable();
683 692  
... ... @@ -845,9 +854,11 @@
845 854 unsigned long long idle_count;
846 855  
847 856 idle = &per_cpu(s390_idle, dev->id);
848   - spin_lock_irq(&idle->lock);
  857 + spin_lock(&idle->lock);
849 858 idle_count = idle->idle_count;
850   - spin_unlock_irq(&idle->lock);
  859 + if (idle->idle_enter)
  860 + idle_count++;
  861 + spin_unlock(&idle->lock);
851 862 return sprintf(buf, "%llu\n", idle_count);
852 863 }
853 864 static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
854 865  
... ... @@ -856,18 +867,17 @@
856 867 struct sysdev_attribute *attr, char *buf)
857 868 {
858 869 struct s390_idle_data *idle;
859   - unsigned long long new_time;
  870 + unsigned long long now, idle_time, idle_enter;
860 871  
861 872 idle = &per_cpu(s390_idle, dev->id);
862   - spin_lock_irq(&idle->lock);
863   - if (idle->in_idle) {
864   - new_time = get_clock();
865   - idle->idle_time += new_time - idle->idle_enter;
866   - idle->idle_enter = new_time;
867   - }
868   - new_time = idle->idle_time;
869   - spin_unlock_irq(&idle->lock);
870   - return sprintf(buf, "%llu\n", new_time >> 12);
  873 + spin_lock(&idle->lock);
  874 + now = get_clock();
  875 + idle_time = idle->idle_time;
  876 + idle_enter = idle->idle_enter;
  877 + if (idle_enter != 0ULL && idle_enter < now)
  878 + idle_time += now - idle_enter;
  879 + spin_unlock(&idle->lock);
  880 + return sprintf(buf, "%llu\n", idle_time >> 12);
871 881 }
872 882 static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
873 883  
arch/s390/kernel/vdso.c
... ... @@ -31,9 +31,6 @@
31 31 #include <asm/sections.h>
32 32 #include <asm/vdso.h>
33 33  
34   -/* Max supported size for symbol names */
35   -#define MAX_SYMNAME 64
36   -
37 34 #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
38 35 extern char vdso32_start, vdso32_end;
39 36 static void *vdso32_kbase = &vdso32_start;
... ... @@ -71,6 +68,119 @@
71 68 struct vdso_data *vdso_data = &vdso_data_store.data;
72 69  
73 70 /*
  71 + * Setup vdso data page.
  72 + */
  73 +static void vdso_init_data(struct vdso_data *vd)
  74 +{
  75 + unsigned int facility_list;
  76 +
  77 + facility_list = stfl();
  78 + vd->ectg_available = switch_amode && (facility_list & 1);
  79 +}
  80 +
  81 +#ifdef CONFIG_64BIT
  82 +/*
  83 + * Setup per cpu vdso data page.
  84 + */
  85 +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
  86 +{
  87 +}
  88 +
  89 +/*
  90 + * Allocate/free per cpu vdso data.
  91 + */
  92 +#ifdef CONFIG_64BIT
  93 +#define SEGMENT_ORDER 2
  94 +#else
  95 +#define SEGMENT_ORDER 1
  96 +#endif
  97 +
  98 +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
  99 +{
  100 + unsigned long segment_table, page_table, page_frame;
  101 + u32 *psal, *aste;
  102 + int i;
  103 +
  104 + lowcore->vdso_per_cpu_data = __LC_PASTE;
  105 +
  106 + if (!switch_amode || !vdso_enabled)
  107 + return 0;
  108 +
  109 + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
  110 + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
  111 + page_frame = get_zeroed_page(GFP_KERNEL);
  112 + if (!segment_table || !page_table || !page_frame)
  113 + goto out;
  114 +
  115 + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
  116 + PAGE_SIZE << SEGMENT_ORDER);
  117 + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
  118 + 256*sizeof(unsigned long));
  119 +
  120 + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
  121 + *(unsigned long *) page_table = _PAGE_RO + page_frame;
  122 +
  123 + psal = (u32 *) (page_table + 256*sizeof(unsigned long));
  124 + aste = psal + 32;
  125 +
  126 + for (i = 4; i < 32; i += 4)
  127 + psal[i] = 0x80000000;
  128 +
  129 + lowcore->paste[4] = (u32)(addr_t) psal;
  130 + psal[0] = 0x20000000;
  131 + psal[2] = (u32)(addr_t) aste;
  132 + *(unsigned long *) (aste + 2) = segment_table +
  133 + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
  134 + aste[4] = (u32)(addr_t) psal;
  135 + lowcore->vdso_per_cpu_data = page_frame;
  136 +
  137 + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
  138 + return 0;
  139 +
  140 +out:
  141 + free_page(page_frame);
  142 + free_page(page_table);
  143 + free_pages(segment_table, SEGMENT_ORDER);
  144 + return -ENOMEM;
  145 +}
  146 +
  147 +#ifdef CONFIG_HOTPLUG_CPU
  148 +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
  149 +{
  150 + unsigned long segment_table, page_table, page_frame;
  151 + u32 *psal, *aste;
  152 +
  153 + if (!switch_amode || !vdso_enabled)
  154 + return;
  155 +
  156 + psal = (u32 *)(addr_t) lowcore->paste[4];
  157 + aste = (u32 *)(addr_t) psal[2];
  158 + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
  159 + page_table = *(unsigned long *) segment_table;
  160 + page_frame = *(unsigned long *) page_table;
  161 +
  162 + free_page(page_frame);
  163 + free_page(page_table);
  164 + free_pages(segment_table, SEGMENT_ORDER);
  165 +}
  166 +#endif /* CONFIG_HOTPLUG_CPU */
  167 +
  168 +static void __vdso_init_cr5(void *dummy)
  169 +{
  170 + unsigned long cr5;
  171 +
  172 + cr5 = offsetof(struct _lowcore, paste);
  173 + __ctl_load(cr5, 5, 5);
  174 +}
  175 +
  176 +static void vdso_init_cr5(void)
  177 +{
  178 + if (switch_amode && vdso_enabled)
  179 + on_each_cpu(__vdso_init_cr5, NULL, 1);
  180 +}
  181 +#endif /* CONFIG_64BIT */
  182 +
  183 +/*
74 184 * This is called from binfmt_elf, we create the special vma for the
75 185 * vDSO and insert it into the mm struct tree
76 186 */
... ... @@ -172,6 +282,9 @@
172 282 {
173 283 int i;
174 284  
  285 + if (!vdso_enabled)
  286 + return 0;
  287 + vdso_init_data(vdso_data);
175 288 #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
176 289 /* Calculate the size of the 32 bit vDSO */
177 290 vdso32_pages = ((&vdso32_end - &vdso32_start
... ... @@ -208,6 +321,10 @@
208 321 }
209 322 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
210 323 vdso64_pagelist[vdso64_pages] = NULL;
  324 +#ifndef CONFIG_SMP
  325 + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
  326 +#endif
  327 + vdso_init_cr5();
211 328 #endif /* CONFIG_64BIT */
212 329  
213 330 get_page(virt_to_page(vdso_data));
arch/s390/kernel/vdso64/clock_getres.S
... ... @@ -22,7 +22,12 @@
22 22 cghi %r2,CLOCK_REALTIME
23 23 je 0f
24 24 cghi %r2,CLOCK_MONOTONIC
  25 + je 0f
  26 + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
25 27 jne 2f
  28 + larl %r5,_vdso_data
  29 + icm %r0,15,__LC_ECTG_OK(%r5)
  30 + jz 2f
26 31 0: ltgr %r3,%r3
27 32 jz 1f /* res == NULL */
28 33 larl %r1,3f
arch/s390/kernel/vdso64/clock_gettime.S
... ... @@ -22,8 +22,10 @@
22 22 larl %r5,_vdso_data
23 23 cghi %r2,CLOCK_REALTIME
24 24 je 4f
  25 + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
  26 + je 9f
25 27 cghi %r2,CLOCK_MONOTONIC
26   - jne 9f
  28 + jne 12f
27 29  
28 30 /* CLOCK_MONOTONIC */
29 31 ltgr %r3,%r3
... ... @@ -42,7 +44,7 @@
42 44 alg %r0,__VDSO_WTOM_SEC(%r5)
43 45 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
44 46 jne 0b
45   - larl %r5,10f
  47 + larl %r5,13f
46 48 1: clg %r1,0(%r5)
47 49 jl 2f
48 50 slg %r1,0(%r5)
... ... @@ -68,7 +70,7 @@
68 70 lg %r0,__VDSO_XTIME_SEC(%r5)
69 71 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
70 72 jne 5b
71   - larl %r5,10f
  73 + larl %r5,13f
72 74 6: clg %r1,0(%r5)
73 75 jl 7f
74 76 slg %r1,0(%r5)
75 77  
76 78  
... ... @@ -79,12 +81,39 @@
79 81 8: lghi %r2,0
80 82 br %r14
81 83  
  84 + /* CLOCK_THREAD_CPUTIME_ID for this thread */
  85 +9: icm %r0,15,__VDSO_ECTG_OK(%r5)
  86 + jz 12f
  87 + ear %r2,%a4
  88 + llilh %r4,0x0100
  89 + sar %a4,%r4
  90 + lghi %r4,0
  91 + sacf 512 /* Magic ectg instruction */
  92 + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
  93 + sacf 0
  94 + sar %a4,%r2
  95 + algr %r1,%r0 /* r1 = cputime as TOD value */
  96 + mghi %r1,1000 /* convert to nanoseconds */
  97 + srlg %r1,%r1,12 /* r1 = cputime in nanosec */
  98 + lgr %r4,%r1
  99 + larl %r5,13f
  100 + srlg %r1,%r1,9 /* divide by 1000000000 */
  101 + mlg %r0,8(%r5)
  102 + srlg %r0,%r0,11 /* r0 = tv_sec */
  103 + stg %r0,0(%r3)
  104 + msg %r0,0(%r5) /* calculate tv_nsec */
  105 + slgr %r4,%r0 /* r4 = tv_nsec */
  106 + stg %r4,8(%r3)
  107 + lghi %r2,0
  108 + br %r14
  109 +
82 110 /* Fallback to system call */
83   -9: lghi %r1,__NR_clock_gettime
  111 +12: lghi %r1,__NR_clock_gettime
84 112 svc 0
85 113 br %r14
86 114  
87   -10: .quad 1000000000
  115 +13: .quad 1000000000
  116 +14: .quad 19342813113834067
88 117 .cfi_endproc
89 118 .size __kernel_clock_gettime,.-__kernel_clock_gettime
arch/s390/kernel/vtime.c
... ... @@ -23,19 +23,43 @@
23 23 #include <asm/s390_ext.h>
24 24 #include <asm/timer.h>
25 25 #include <asm/irq_regs.h>
  26 +#include <asm/cpu.h>
26 27  
27 28 static ext_int_info_t ext_int_info_timer;
  29 +
28 30 static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31  
  32 +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
  33 + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
  34 +};
  35 +
  36 +static inline __u64 get_vtimer(void)
  37 +{
  38 + __u64 timer;
  39 +
  40 + asm volatile("STPT %0" : "=m" (timer));
  41 + return timer;
  42 +}
  43 +
  44 +static inline void set_vtimer(__u64 expires)
  45 +{
  46 + __u64 timer;
  47 +
  48 + asm volatile (" STPT %0\n" /* Store current cpu timer value */
  49 + " SPT %1" /* Set new value immediatly afterwards */
  50 + : "=m" (timer) : "m" (expires) );
  51 + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
  52 + S390_lowcore.last_update_timer = expires;
  53 +}
  54 +
30 55 /*
31 56 * Update process times based on virtual cpu times stored by entry.S
32 57 * to the lowcore fields user_timer, system_timer & steal_clock.
33 58 */
34   -void account_process_tick(struct task_struct *tsk, int user_tick)
  59 +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
35 60 {
36   - cputime_t cputime;
37   - __u64 timer, clock;
38   - int rcu_user_flag;
  61 + struct thread_info *ti = task_thread_info(tsk);
  62 + __u64 timer, clock, user, system, steal;
39 63  
40 64 timer = S390_lowcore.last_update_timer;
41 65 clock = S390_lowcore.last_update_clock;
42 66  
43 67  
44 68  
45 69  
46 70  
47 71  
48 72  
... ... @@ -44,50 +68,41 @@
44 68 : "=m" (S390_lowcore.last_update_timer),
45 69 "=m" (S390_lowcore.last_update_clock) );
46 70 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
47   - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock;
  71 + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
48 72  
49   - cputime = S390_lowcore.user_timer >> 12;
50   - rcu_user_flag = cputime != 0;
51   - S390_lowcore.user_timer -= cputime << 12;
52   - S390_lowcore.steal_clock -= cputime << 12;
53   - account_user_time(tsk, cputime);
  73 + user = S390_lowcore.user_timer - ti->user_timer;
  74 + S390_lowcore.steal_timer -= user;
  75 + ti->user_timer = S390_lowcore.user_timer;
  76 + account_user_time(tsk, user, user);
54 77  
55   - cputime = S390_lowcore.system_timer >> 12;
56   - S390_lowcore.system_timer -= cputime << 12;
57   - S390_lowcore.steal_clock -= cputime << 12;
58   - account_system_time(tsk, HARDIRQ_OFFSET, cputime);
  78 + system = S390_lowcore.system_timer - ti->system_timer;
  79 + S390_lowcore.steal_timer -= system;
  80 + ti->system_timer = S390_lowcore.system_timer;
  81 + account_system_time(tsk, hardirq_offset, system, system);
59 82  
60   - cputime = S390_lowcore.steal_clock;
61   - if ((__s64) cputime > 0) {
62   - cputime >>= 12;
63   - S390_lowcore.steal_clock -= cputime << 12;
64   - account_steal_time(tsk, cputime);
  83 + steal = S390_lowcore.steal_timer;
  84 + if ((s64) steal > 0) {
  85 + S390_lowcore.steal_timer = 0;
  86 + account_steal_time(steal);
65 87 }
66 88 }
67 89  
68   -/*
69   - * Update process times based on virtual cpu times stored by entry.S
70   - * to the lowcore fields user_timer, system_timer & steal_clock.
71   - */
72   -void account_vtime(struct task_struct *tsk)
  90 +void account_vtime(struct task_struct *prev, struct task_struct *next)
73 91 {
74   - cputime_t cputime;
75   - __u64 timer;
  92 + struct thread_info *ti;
76 93  
77   - timer = S390_lowcore.last_update_timer;
78   - asm volatile (" STPT %0" /* Store current cpu timer value */
79   - : "=m" (S390_lowcore.last_update_timer) );
80   - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
  94 + do_account_vtime(prev, 0);
  95 + ti = task_thread_info(prev);
  96 + ti->user_timer = S390_lowcore.user_timer;
  97 + ti->system_timer = S390_lowcore.system_timer;
  98 + ti = task_thread_info(next);
  99 + S390_lowcore.user_timer = ti->user_timer;
  100 + S390_lowcore.system_timer = ti->system_timer;
  101 +}
81 102  
82   - cputime = S390_lowcore.user_timer >> 12;
83   - S390_lowcore.user_timer -= cputime << 12;
84   - S390_lowcore.steal_clock -= cputime << 12;
85   - account_user_time(tsk, cputime);
86   -
87   - cputime = S390_lowcore.system_timer >> 12;
88   - S390_lowcore.system_timer -= cputime << 12;
89   - S390_lowcore.steal_clock -= cputime << 12;
90   - account_system_time(tsk, 0, cputime);
  103 +void account_process_tick(struct task_struct *tsk, int user_tick)
  104 +{
  105 + do_account_vtime(tsk, HARDIRQ_OFFSET);
91 106 }
92 107  
93 108 /*
94 109  
95 110  
96 111  
97 112  
98 113  
99 114  
100 115  
101 116  
102 117  
103 118  
104 119  
105 120  
106 121  
... ... @@ -96,80 +111,131 @@
96 111 */
97 112 void account_system_vtime(struct task_struct *tsk)
98 113 {
99   - cputime_t cputime;
100   - __u64 timer;
  114 + struct thread_info *ti = task_thread_info(tsk);
  115 + __u64 timer, system;
101 116  
102 117 timer = S390_lowcore.last_update_timer;
103   - asm volatile (" STPT %0" /* Store current cpu timer value */
104   - : "=m" (S390_lowcore.last_update_timer) );
  118 + S390_lowcore.last_update_timer = get_vtimer();
105 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
106 120  
107   - cputime = S390_lowcore.system_timer >> 12;
108   - S390_lowcore.system_timer -= cputime << 12;
109   - S390_lowcore.steal_clock -= cputime << 12;
110   - account_system_time(tsk, 0, cputime);
  121 + system = S390_lowcore.system_timer - ti->system_timer;
  122 + S390_lowcore.steal_timer -= system;
  123 + ti->system_timer = S390_lowcore.system_timer;
  124 + account_system_time(tsk, 0, system, system);
111 125 }
112 126 EXPORT_SYMBOL_GPL(account_system_vtime);
113 127  
114   -static inline void set_vtimer(__u64 expires)
  128 +void vtime_start_cpu(void)
115 129 {
116   - __u64 timer;
  130 + struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
  131 + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
  132 + __u64 idle_time, expires;
117 133  
118   - asm volatile (" STPT %0\n" /* Store current cpu timer value */
119   - " SPT %1" /* Set new value immediatly afterwards */
120   - : "=m" (timer) : "m" (expires) );
121   - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
122   - S390_lowcore.last_update_timer = expires;
  134 + /* Account time spent with enabled wait psw loaded as idle time. */
  135 + idle_time = S390_lowcore.int_clock - idle->idle_enter;
  136 + account_idle_time(idle_time);
  137 + S390_lowcore.last_update_clock = S390_lowcore.int_clock;
123 138  
124   - /* store expire time for this CPU timer */
125   - __get_cpu_var(virt_cpu_timer).to_expire = expires;
126   -}
  139 + /* Account system time spent going idle. */
  140 + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
  141 + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
127 142  
128   -void vtime_start_cpu_timer(void)
129   -{
130   - struct vtimer_queue *vt_list;
  143 + /* Restart vtime CPU timer */
  144 + if (vq->do_spt) {
  145 + /* Program old expire value but first save progress. */
  146 + expires = vq->idle - S390_lowcore.async_enter_timer;
  147 + expires += get_vtimer();
  148 + set_vtimer(expires);
  149 + } else {
  150 + /* Don't account the CPU timer delta while the cpu was idle. */
  151 + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
  152 + }
131 153  
132   - vt_list = &__get_cpu_var(virt_cpu_timer);
133   -
134   - /* CPU timer interrupt is pending, don't reprogramm it */
135   - if (vt_list->idle & 1LL<<63)
136   - return;
137   -
138   - if (!list_empty(&vt_list->list))
139   - set_vtimer(vt_list->idle);
  154 + spin_lock(&idle->lock);
  155 + idle->idle_time += idle_time;
  156 + idle->idle_enter = 0ULL;
  157 + idle->idle_count++;
  158 + spin_unlock(&idle->lock);
140 159 }
141 160  
142   -void vtime_stop_cpu_timer(void)
  161 +void vtime_stop_cpu(void)
143 162 {
144   - struct vtimer_queue *vt_list;
  163 + struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
  164 + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
  165 + psw_t psw;
145 166  
146   - vt_list = &__get_cpu_var(virt_cpu_timer);
  167 + /* Wait for external, I/O or machine check interrupt. */
  168 + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
147 169  
148   - /* nothing to do */
149   - if (list_empty(&vt_list->list)) {
150   - vt_list->idle = VTIMER_MAX_SLICE;
151   - goto fire;
  170 + /* Check if the CPU timer needs to be reprogrammed. */
  171 + if (vq->do_spt) {
  172 + __u64 vmax = VTIMER_MAX_SLICE;
  173 + /*
  174 + * The inline assembly is equivalent to
  175 + * vq->idle = get_cpu_timer();
  176 + * set_cpu_timer(VTIMER_MAX_SLICE);
  177 + * idle->idle_enter = get_clock();
  178 + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
  179 + * PSW_MASK_IO | PSW_MASK_EXT);
  180 + * The difference is that the inline assembly makes sure that
  181 + * the last three instruction are stpt, stck and lpsw in that
  182 + * order. This is done to increase the precision.
  183 + */
  184 + asm volatile(
  185 +#ifndef CONFIG_64BIT
  186 + " basr 1,0\n"
  187 + "0: ahi 1,1f-0b\n"
  188 + " st 1,4(%2)\n"
  189 +#else /* CONFIG_64BIT */
  190 + " larl 1,1f\n"
  191 + " stg 1,8(%2)\n"
  192 +#endif /* CONFIG_64BIT */
  193 + " stpt 0(%4)\n"
  194 + " spt 0(%5)\n"
  195 + " stck 0(%3)\n"
  196 +#ifndef CONFIG_64BIT
  197 + " lpsw 0(%2)\n"
  198 +#else /* CONFIG_64BIT */
  199 + " lpswe 0(%2)\n"
  200 +#endif /* CONFIG_64BIT */
  201 + "1:"
  202 + : "=m" (idle->idle_enter), "=m" (vq->idle)
  203 + : "a" (&psw), "a" (&idle->idle_enter),
  204 + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
  205 + : "memory", "cc", "1");
  206 + } else {
  207 + /*
  208 + * The inline assembly is equivalent to
  209 + * vq->idle = get_cpu_timer();
  210 + * idle->idle_enter = get_clock();
  211 + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
  212 + * PSW_MASK_IO | PSW_MASK_EXT);
  213 + * The difference is that the inline assembly makes sure that
  214 + * the last three instruction are stpt, stck and lpsw in that
  215 + * order. This is done to increase the precision.
  216 + */
  217 + asm volatile(
  218 +#ifndef CONFIG_64BIT
  219 + " basr 1,0\n"
  220 + "0: ahi 1,1f-0b\n"
  221 + " st 1,4(%2)\n"
  222 +#else /* CONFIG_64BIT */
  223 + " larl 1,1f\n"
  224 + " stg 1,8(%2)\n"
  225 +#endif /* CONFIG_64BIT */
  226 + " stpt 0(%4)\n"
  227 + " stck 0(%3)\n"
  228 +#ifndef CONFIG_64BIT
  229 + " lpsw 0(%2)\n"
  230 +#else /* CONFIG_64BIT */
  231 + " lpswe 0(%2)\n"
  232 +#endif /* CONFIG_64BIT */
  233 + "1:"
  234 + : "=m" (idle->idle_enter), "=m" (vq->idle)
  235 + : "a" (&psw), "a" (&idle->idle_enter),
  236 + "a" (&vq->idle), "m" (psw)
  237 + : "memory", "cc", "1");
152 238 }
153   -
154   - /* store the actual expire value */
155   - asm volatile ("STPT %0" : "=m" (vt_list->idle));
156   -
157   - /*
158   - * If the CPU timer is negative we don't reprogramm
159   - * it because we will get instantly an interrupt.
160   - */
161   - if (vt_list->idle & 1LL<<63)
162   - return;
163   -
164   - vt_list->offset += vt_list->to_expire - vt_list->idle;
165   -
166   - /*
167   - * We cannot halt the CPU timer, we just write a value that
168   - * nearly never expires (only after 71 years) and re-write
169   - * the stored expire value if we continue the timer
170   - */
171   - fire:
172   - set_vtimer(VTIMER_MAX_SLICE);
173 239 }
174 240  
175 241 /*
176 242  
177 243  
178 244  
... ... @@ -195,30 +261,23 @@
195 261 */
196 262 static void do_callbacks(struct list_head *cb_list)
197 263 {
198   - struct vtimer_queue *vt_list;
  264 + struct vtimer_queue *vq;
199 265 struct vtimer_list *event, *tmp;
200   - void (*fn)(unsigned long);
201   - unsigned long data;
202 266  
203 267 if (list_empty(cb_list))
204 268 return;
205 269  
206   - vt_list = &__get_cpu_var(virt_cpu_timer);
  270 + vq = &__get_cpu_var(virt_cpu_timer);
207 271  
208 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209   - fn = event->function;
210   - data = event->data;
211   - fn(data);
212   -
213   - if (!event->interval)
214   - /* delete one shot timer */
215   - list_del_init(&event->entry);
216   - else {
217   - /* move interval timer back to list */
218   - spin_lock(&vt_list->lock);
219   - list_del_init(&event->entry);
220   - list_add_sorted(event, &vt_list->list);
221   - spin_unlock(&vt_list->lock);
  273 + list_del_init(&event->entry);
  274 + (event->function)(event->data);
  275 + if (event->interval) {
  276 + /* Recharge interval timer */
  277 + event->expires = event->interval + vq->elapsed;
  278 + spin_lock(&vq->lock);
  279 + list_add_sorted(event, &vq->list);
  280 + spin_unlock(&vq->lock);
222 281 }
223 282 }
224 283 }
225 284  
226 285  
227 286  
228 287  
229 288  
230 289  
231 290  
... ... @@ -228,64 +287,57 @@
228 287 */
229 288 static void do_cpu_timer_interrupt(__u16 error_code)
230 289 {
231   - __u64 next, delta;
232   - struct vtimer_queue *vt_list;
  290 + struct vtimer_queue *vq;
233 291 struct vtimer_list *event, *tmp;
234   - struct list_head *ptr;
235   - /* the callback queue */
236   - struct list_head cb_list;
  292 + struct list_head cb_list; /* the callback queue */
  293 + __u64 elapsed, next;
237 294  
238 295 INIT_LIST_HEAD(&cb_list);
239   - vt_list = &__get_cpu_var(virt_cpu_timer);
  296 + vq = &__get_cpu_var(virt_cpu_timer);
240 297  
241 298 /* walk timer list, fire all expired events */
242   - spin_lock(&vt_list->lock);
  299 + spin_lock(&vq->lock);
243 300  
244   - if (vt_list->to_expire < VTIMER_MAX_SLICE)
245   - vt_list->offset += vt_list->to_expire;
246   -
247   - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) {
248   - if (event->expires > vt_list->offset)
249   - /* found first unexpired event, leave */
250   - break;
251   -
252   - /* re-charge interval timer, we have to add the offset */
253   - if (event->interval)
254   - event->expires = event->interval + vt_list->offset;
255   -
256   - /* move expired timer to the callback queue */
257   - list_move_tail(&event->entry, &cb_list);
  301 + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
  302 + BUG_ON((s64) elapsed < 0);
  303 + vq->elapsed = 0;
  304 + list_for_each_entry_safe(event, tmp, &vq->list, entry) {
  305 + if (event->expires < elapsed)
  306 + /* move expired timer to the callback queue */
  307 + list_move_tail(&event->entry, &cb_list);
  308 + else
  309 + event->expires -= elapsed;
258 310 }
259   - spin_unlock(&vt_list->lock);
  311 + spin_unlock(&vq->lock);
  312 +
  313 + vq->do_spt = list_empty(&cb_list);
260 314 do_callbacks(&cb_list);
261 315  
262 316 /* next event is first in list */
263   - spin_lock(&vt_list->lock);
264   - if (!list_empty(&vt_list->list)) {
265   - ptr = vt_list->list.next;
266   - event = list_entry(ptr, struct vtimer_list, entry);
267   - next = event->expires - vt_list->offset;
268   -
269   - /* add the expired time from this interrupt handler
270   - * and the callback functions
271   - */
272   - asm volatile ("STPT %0" : "=m" (delta));
273   - delta = 0xffffffffffffffffLL - delta + 1;
274   - vt_list->offset += delta;
275   - next -= delta;
276   - } else {
277   - vt_list->offset = 0;
278   - next = VTIMER_MAX_SLICE;
279   - }
280   - spin_unlock(&vt_list->lock);
281   - set_vtimer(next);
  317 + next = VTIMER_MAX_SLICE;
  318 + spin_lock(&vq->lock);
  319 + if (!list_empty(&vq->list)) {
  320 + event = list_first_entry(&vq->list, struct vtimer_list, entry);
  321 + next = event->expires;
  322 + } else
  323 + vq->do_spt = 0;
  324 + spin_unlock(&vq->lock);
  325 + /*
  326 + * To improve precision add the time spent by the
  327 + * interrupt handler to the elapsed time.
  328 + * Note: CPU timer counts down and we got an interrupt,
  329 + * the current content is negative
  330 + */
  331 + elapsed = S390_lowcore.async_enter_timer - get_vtimer();
  332 + set_vtimer(next - elapsed);
  333 + vq->timer = next - elapsed;
  334 + vq->elapsed = elapsed;
282 335 }
283 336  
284 337 void init_virt_timer(struct vtimer_list *timer)
285 338 {
286 339 timer->function = NULL;
287 340 INIT_LIST_HEAD(&timer->entry);
288   - spin_lock_init(&timer->lock);
289 341 }
290 342 EXPORT_SYMBOL(init_virt_timer);
291 343  
292 344  
293 345  
294 346  
295 347  
... ... @@ -299,44 +351,40 @@
299 351 */
300 352 static void internal_add_vtimer(struct vtimer_list *timer)
301 353 {
  354 + struct vtimer_queue *vq;
302 355 unsigned long flags;
303   - __u64 done;
304   - struct vtimer_list *event;
305   - struct vtimer_queue *vt_list;
  356 + __u64 left, expires;
306 357  
307   - vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
308   - spin_lock_irqsave(&vt_list->lock, flags);
  358 + vq = &per_cpu(virt_cpu_timer, timer->cpu);
  359 + spin_lock_irqsave(&vq->lock, flags);
309 360  
310 361 BUG_ON(timer->cpu != smp_processor_id());
311 362  
312   - /* if list is empty we only have to set the timer */
313   - if (list_empty(&vt_list->list)) {
314   - /* reset the offset, this may happen if the last timer was
315   - * just deleted by mod_virt_timer and the interrupt
316   - * didn't happen until here
317   - */
318   - vt_list->offset = 0;
319   - goto fire;
  363 + if (list_empty(&vq->list)) {
  364 + /* First timer on this cpu, just program it. */
  365 + list_add(&timer->entry, &vq->list);
  366 + set_vtimer(timer->expires);
  367 + vq->timer = timer->expires;
  368 + vq->elapsed = 0;
  369 + } else {
  370 + /* Check progress of old timers. */
  371 + expires = timer->expires;
  372 + left = get_vtimer();
  373 + if (likely((s64) expires < (s64) left)) {
  374 + /* The new timer expires before the current timer. */
  375 + set_vtimer(expires);
  376 + vq->elapsed += vq->timer - left;
  377 + vq->timer = expires;
  378 + } else {
  379 + vq->elapsed += vq->timer - left;
  380 + vq->timer = left;
  381 + }
  382 + /* Insert new timer into per cpu list. */
  383 + timer->expires += vq->elapsed;
  384 + list_add_sorted(timer, &vq->list);
320 385 }
321 386  
322   - /* save progress */
323   - asm volatile ("STPT %0" : "=m" (done));
324   -
325   - /* calculate completed work */
326   - done = vt_list->to_expire - done + vt_list->offset;
327   - vt_list->offset = 0;
328   -
329   - list_for_each_entry(event, &vt_list->list, entry)
330   - event->expires -= done;
331   -
332   - fire:
333   - list_add_sorted(timer, &vt_list->list);
334   -
335   - /* get first element, which is the next vtimer slice */
336   - event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337   -
338   - set_vtimer(event->expires);
339   - spin_unlock_irqrestore(&vt_list->lock, flags);
  387 + spin_unlock_irqrestore(&vq->lock, flags);
340 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 389 put_cpu();
342 390 }
343 391  
... ... @@ -381,14 +429,15 @@
381 429 * If we change a pending timer the function must be called on the CPU
382 430 * where the timer is running on, e.g. by smp_call_function_single()
383 431 *
384   - * The original mod_timer adds the timer if it is not pending. For compatibility
385   - * we do the same. The timer will be added on the current CPU as a oneshot timer.
  432 + * The original mod_timer adds the timer if it is not pending. For
  433 + * compatibility we do the same. The timer will be added on the current
  434 + * CPU as a oneshot timer.
386 435 *
387 436 * returns whether it has modified a pending timer (1) or not (0)
388 437 */
389 438 int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390 439 {
391   - struct vtimer_queue *vt_list;
  440 + struct vtimer_queue *vq;
392 441 unsigned long flags;
393 442 int cpu;
394 443  
395 444  
396 445  
... ... @@ -404,17 +453,17 @@
404 453 return 1;
405 454  
406 455 cpu = get_cpu();
407   - vt_list = &per_cpu(virt_cpu_timer, cpu);
  456 + vq = &per_cpu(virt_cpu_timer, cpu);
408 457  
409 458 /* check if we run on the right CPU */
410 459 BUG_ON(timer->cpu != cpu);
411 460  
412 461 /* disable interrupts before test if timer is pending */
413   - spin_lock_irqsave(&vt_list->lock, flags);
  462 + spin_lock_irqsave(&vq->lock, flags);
414 463  
415 464 /* if timer isn't pending add it on the current CPU */
416 465 if (!vtimer_pending(timer)) {
417   - spin_unlock_irqrestore(&vt_list->lock, flags);
  466 + spin_unlock_irqrestore(&vq->lock, flags);
418 467 /* we do not activate an interval timer with mod_virt_timer */
419 468 timer->interval = 0;
420 469 timer->expires = expires;
... ... @@ -431,7 +480,7 @@
431 480 timer->interval = expires;
432 481  
433 482 /* the timer can't expire anymore so we can release the lock */
434   - spin_unlock_irqrestore(&vt_list->lock, flags);
  483 + spin_unlock_irqrestore(&vq->lock, flags);
435 484 internal_add_vtimer(timer);
436 485 return 1;
437 486 }
438 487  
439 488  
... ... @@ -445,25 +494,19 @@
445 494 int del_virt_timer(struct vtimer_list *timer)
446 495 {
447 496 unsigned long flags;
448   - struct vtimer_queue *vt_list;
  497 + struct vtimer_queue *vq;
449 498  
450 499 /* check if timer is pending */
451 500 if (!vtimer_pending(timer))
452 501 return 0;
453 502  
454   - vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
455   - spin_lock_irqsave(&vt_list->lock, flags);
  503 + vq = &per_cpu(virt_cpu_timer, timer->cpu);
  504 + spin_lock_irqsave(&vq->lock, flags);
456 505  
457 506 /* we don't interrupt a running timer, just let it expire! */
458 507 list_del_init(&timer->entry);
459 508  
460   - /* last timer removed */
461   - if (list_empty(&vt_list->list)) {
462   - vt_list->to_expire = 0;
463   - vt_list->offset = 0;
464   - }
465   -
466   - spin_unlock_irqrestore(&vt_list->lock, flags);
  509 + spin_unlock_irqrestore(&vq->lock, flags);
467 510 return 1;
468 511 }
469 512 EXPORT_SYMBOL(del_virt_timer);
470 513  
471 514  
472 515  
473 516  
... ... @@ -473,24 +516,19 @@
473 516 */
474 517 void init_cpu_vtimer(void)
475 518 {
476   - struct vtimer_queue *vt_list;
  519 + struct vtimer_queue *vq;
477 520  
478 521 /* kick the virtual timer */
479   - S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480   - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481   - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
482 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
  523 + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
483 524  
  525 + /* initialize per cpu vtimer structure */
  526 + vq = &__get_cpu_var(virt_cpu_timer);
  527 + INIT_LIST_HEAD(&vq->list);
  528 + spin_lock_init(&vq->lock);
  529 +
484 530 /* enable cpu timer interrupts */
485 531 __ctl_set_bit(0,10);
486   -
487   - vt_list = &__get_cpu_var(virt_cpu_timer);
488   - INIT_LIST_HEAD(&vt_list->list);
489   - spin_lock_init(&vt_list->lock);
490   - vt_list->to_expire = 0;
491   - vt_list->offset = 0;
492   - vt_list->idle = 0;
493   -
494 532 }
495 533  
496 534 void __init vtime_init(void)
... ... @@ -132,8 +132,7 @@
132 132 *snap = state;
133 133  
134 134 /* Add the appropriate number of ticks of stolen time,
135   - including any left-overs from last time. Passing NULL to
136   - account_steal_time accounts the time as stolen. */
  135 + including any left-overs from last time. */
137 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137  
139 138 if (stolen < 0)
140 139  
... ... @@ -141,11 +140,10 @@
141 140  
142 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 142 __get_cpu_var(residual_stolen) = stolen;
144   - account_steal_time(NULL, ticks);
  143 + account_steal_ticks(ticks);
145 144  
146 145 /* Add the appropriate number of ticks of blocked time,
147   - including any left-overs from last time. Passing idle to
148   - account_steal_time accounts the time as idle/wait. */
  146 + including any left-overs from last time. */
149 147 blocked += __get_cpu_var(residual_blocked);
150 148  
151 149 if (blocked < 0)
... ... @@ -153,7 +151,7 @@
153 151  
154 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 153 __get_cpu_var(residual_blocked) = blocked;
156   - account_steal_time(idle_task(smp_processor_id()), ticks);
  154 + account_idle_ticks(ticks);
157 155 }
158 156  
159 157 /*
drivers/s390/cio/cio.c
... ... @@ -632,8 +632,8 @@
632 632 struct pt_regs *old_regs;
633 633  
634 634 old_regs = set_irq_regs(regs);
635   - irq_enter();
636 635 s390_idle_check();
  636 + irq_enter();
637 637 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
638 638 /* Serve timer interrupts first. */
639 639 clock_comparator_work();
drivers/s390/s390mach.c
... ... @@ -18,6 +18,7 @@
18 18 #include <asm/etr.h>
19 19 #include <asm/lowcore.h>
20 20 #include <asm/cio.h>
  21 +#include <asm/cpu.h>
21 22 #include "s390mach.h"
22 23  
23 24 static struct semaphore m_sem;
... ... @@ -368,6 +369,8 @@
368 369 int umode;
369 370  
370 371 lockdep_off();
  372 +
  373 + s390_idle_check();
371 374  
372 375 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
373 376 mcck = &__get_cpu_var(cpu_mcck);
include/linux/kernel_stat.h
... ... @@ -79,11 +79,14 @@
79 79 }
80 80  
81 81 extern unsigned long long task_delta_exec(struct task_struct *);
82   -extern void account_user_time(struct task_struct *, cputime_t);
83   -extern void account_user_time_scaled(struct task_struct *, cputime_t);
84   -extern void account_system_time(struct task_struct *, int, cputime_t);
85   -extern void account_system_time_scaled(struct task_struct *, cputime_t);
86   -extern void account_steal_time(struct task_struct *, cputime_t);
  82 +extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
  83 +extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
  84 +extern void account_steal_time(cputime_t);
  85 +extern void account_idle_time(cputime_t);
  86 +
  87 +extern void account_process_tick(struct task_struct *, int user);
  88 +extern void account_steal_ticks(unsigned long ticks);
  89 +extern void account_idle_ticks(unsigned long ticks);
87 90  
88 91 #endif /* _LINUX_KERNEL_STAT_H */
include/linux/sched.h
... ... @@ -284,7 +284,6 @@
284 284  
285 285 extern void cpu_init (void);
286 286 extern void trap_init(void);
287   -extern void account_process_tick(struct task_struct *task, int user);
288 287 extern void update_process_times(int user);
289 288 extern void scheduler_tick(void);
290 289  
... ... @@ -4150,13 +4150,17 @@
4150 4150 * Account user cpu time to a process.
4151 4151 * @p: the process that the cpu time gets accounted to
4152 4152 * @cputime: the cpu time spent in user space since the last update
  4153 + * @cputime_scaled: cputime scaled by cpu frequency
4153 4154 */
4154   -void account_user_time(struct task_struct *p, cputime_t cputime)
  4155 +void account_user_time(struct task_struct *p, cputime_t cputime,
  4156 + cputime_t cputime_scaled)
4155 4157 {
4156 4158 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4157 4159 cputime64_t tmp;
4158 4160  
  4161 + /* Add user time to process. */
4159 4162 p->utime = cputime_add(p->utime, cputime);
  4163 + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
4160 4164 account_group_user_time(p, cputime);
4161 4165  
4162 4166 /* Add user time to cpustat. */
4163 4167  
4164 4168  
4165 4169  
4166 4170  
4167 4171  
4168 4172  
4169 4173  
4170 4174  
4171 4175  
4172 4176  
4173 4177  
... ... @@ -4173,51 +4177,48 @@
4173 4177 * Account guest cpu time to a process.
4174 4178 * @p: the process that the cpu time gets accounted to
4175 4179 * @cputime: the cpu time spent in virtual machine since the last update
  4180 + * @cputime_scaled: cputime scaled by cpu frequency
4176 4181 */
4177   -static void account_guest_time(struct task_struct *p, cputime_t cputime)
  4182 +static void account_guest_time(struct task_struct *p, cputime_t cputime,
  4183 + cputime_t cputime_scaled)
4178 4184 {
4179 4185 cputime64_t tmp;
4180 4186 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4181 4187  
4182 4188 tmp = cputime_to_cputime64(cputime);
4183 4189  
  4190 + /* Add guest time to process. */
4184 4191 p->utime = cputime_add(p->utime, cputime);
  4192 + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
4185 4193 account_group_user_time(p, cputime);
4186 4194 p->gtime = cputime_add(p->gtime, cputime);
4187 4195  
  4196 + /* Add guest time to cpustat. */
4188 4197 cpustat->user = cputime64_add(cpustat->user, tmp);
4189 4198 cpustat->guest = cputime64_add(cpustat->guest, tmp);
4190 4199 }
4191 4200  
4192 4201 /*
4193   - * Account scaled user cpu time to a process.
4194   - * @p: the process that the cpu time gets accounted to
4195   - * @cputime: the cpu time spent in user space since the last update
4196   - */
4197   -void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
4198   -{
4199   - p->utimescaled = cputime_add(p->utimescaled, cputime);
4200   -}
4201   -
4202   -/*
4203 4202 * Account system cpu time to a process.
4204 4203 * @p: the process that the cpu time gets accounted to
4205 4204 * @hardirq_offset: the offset to subtract from hardirq_count()
4206 4205 * @cputime: the cpu time spent in kernel space since the last update
  4206 + * @cputime_scaled: cputime scaled by cpu frequency
4207 4207 */
4208 4208 void account_system_time(struct task_struct *p, int hardirq_offset,
4209   - cputime_t cputime)
  4209 + cputime_t cputime, cputime_t cputime_scaled)
4210 4210 {
4211 4211 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4212   - struct rq *rq = this_rq();
4213 4212 cputime64_t tmp;
4214 4213  
4215 4214 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
4216   - account_guest_time(p, cputime);
  4215 + account_guest_time(p, cputime, cputime_scaled);
4217 4216 return;
4218 4217 }
4219 4218  
  4219 + /* Add system time to process. */
4220 4220 p->stime = cputime_add(p->stime, cputime);
  4221 + p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
4221 4222 account_group_system_time(p, cputime);
4222 4223  
4223 4224 /* Add system time to cpustat. */
4224 4225  
4225 4226  
4226 4227  
4227 4228  
4228 4229  
4229 4230  
4230 4231  
4231 4232  
4232 4233  
... ... @@ -4226,47 +4227,83 @@
4226 4227 cpustat->irq = cputime64_add(cpustat->irq, tmp);
4227 4228 else if (softirq_count())
4228 4229 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
4229   - else if (p != rq->idle)
4230   - cpustat->system = cputime64_add(cpustat->system, tmp);
4231   - else if (atomic_read(&rq->nr_iowait) > 0)
4232   - cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4233 4230 else
4234   - cpustat->idle = cputime64_add(cpustat->idle, tmp);
  4231 + cpustat->system = cputime64_add(cpustat->system, tmp);
  4232 +
4235 4233 /* Account for system time used */
4236 4234 acct_update_integrals(p);
4237 4235 }
4238 4236  
4239 4237 /*
4240   - * Account scaled system cpu time to a process.
4241   - * @p: the process that the cpu time gets accounted to
4242   - * @hardirq_offset: the offset to subtract from hardirq_count()
4243   - * @cputime: the cpu time spent in kernel space since the last update
  4238 + * Account for involuntary wait time.
  4239 + * @steal: the cpu time spent in involuntary wait
4244 4240 */
4245   -void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
  4241 +void account_steal_time(cputime_t cputime)
4246 4242 {
4247   - p->stimescaled = cputime_add(p->stimescaled, cputime);
  4243 + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
  4244 + cputime64_t cputime64 = cputime_to_cputime64(cputime);
  4245 +
  4246 + cpustat->steal = cputime64_add(cpustat->steal, cputime64);
4248 4247 }
4249 4248  
4250 4249 /*
4251   - * Account for involuntary wait time.
4252   - * @p: the process from which the cpu time has been stolen
4253   - * @steal: the cpu time spent in involuntary wait
  4250 + * Account for idle time.
  4251 + * @cputime: the cpu time spent in idle wait
4254 4252 */
4255   -void account_steal_time(struct task_struct *p, cputime_t steal)
  4253 +void account_idle_time(cputime_t cputime)
4256 4254 {
4257 4255 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4258   - cputime64_t tmp = cputime_to_cputime64(steal);
  4256 + cputime64_t cputime64 = cputime_to_cputime64(cputime);
4259 4257 struct rq *rq = this_rq();
4260 4258  
4261   - if (p == rq->idle) {
4262   - p->stime = cputime_add(p->stime, steal);
4263   - if (atomic_read(&rq->nr_iowait) > 0)
4264   - cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4265   - else
4266   - cpustat->idle = cputime64_add(cpustat->idle, tmp);
4267   - } else
4268   - cpustat->steal = cputime64_add(cpustat->steal, tmp);
  4259 + if (atomic_read(&rq->nr_iowait) > 0)
  4260 + cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
  4261 + else
  4262 + cpustat->idle = cputime64_add(cpustat->idle, cputime64);
4269 4263 }
  4264 +
  4265 +#ifndef CONFIG_VIRT_CPU_ACCOUNTING
  4266 +
  4267 +/*
  4268 + * Account a single tick of cpu time.
  4269 + * @p: the process that the cpu time gets accounted to
  4270 + * @user_tick: indicates if the tick is a user or a system tick
  4271 + */
  4272 +void account_process_tick(struct task_struct *p, int user_tick)
  4273 +{
  4274 + cputime_t one_jiffy = jiffies_to_cputime(1);
  4275 + cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
  4276 + struct rq *rq = this_rq();
  4277 +
  4278 + if (user_tick)
  4279 + account_user_time(p, one_jiffy, one_jiffy_scaled);
  4280 + else if (p != rq->idle)
  4281 + account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
  4282 + one_jiffy_scaled);
  4283 + else
  4284 + account_idle_time(one_jiffy);
  4285 +}
  4286 +
  4287 +/*
  4288 + * Account multiple ticks of steal time.
  4289 + * @p: the process from which the cpu time has been stolen
  4290 + * @ticks: number of stolen ticks
  4291 + */
  4292 +void account_steal_ticks(unsigned long ticks)
  4293 +{
  4294 + account_steal_time(jiffies_to_cputime(ticks));
  4295 +}
  4296 +
  4297 +/*
  4298 + * Account multiple ticks of idle time.
  4299 + * @ticks: number of stolen ticks
  4300 + */
  4301 +void account_idle_ticks(unsigned long ticks)
  4302 +{
  4303 + account_idle_time(jiffies_to_cputime(ticks));
  4304 +}
  4305 +
  4306 +#endif
4270 4307  
4271 4308 /*
4272 4309 * Use precise platform statistics if available:
kernel/time/tick-sched.c
... ... @@ -419,7 +419,9 @@
419 419 {
420 420 int cpu = smp_processor_id();
421 421 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
  422 +#ifndef CONFIG_VIRT_CPU_ACCOUNTING
422 423 unsigned long ticks;
  424 +#endif
423 425 ktime_t now;
424 426  
425 427 local_irq_disable();
... ... @@ -441,6 +443,7 @@
441 443 tick_do_update_jiffies64(now);
442 444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
443 445  
  446 +#ifndef CONFIG_VIRT_CPU_ACCOUNTING
444 447 /*
445 448 * We stopped the tick in idle. Update process times would miss the
446 449 * time we slept as update_process_times does only a 1 tick
... ... @@ -450,12 +453,9 @@
450 453 /*
451 454 * We might be one off. Do not randomly account a huge number of ticks!
452 455 */
453   - if (ticks && ticks < LONG_MAX) {
454   - add_preempt_count(HARDIRQ_OFFSET);
455   - account_system_time(current, HARDIRQ_OFFSET,
456   - jiffies_to_cputime(ticks));
457   - sub_preempt_count(HARDIRQ_OFFSET);
458   - }
  456 + if (ticks && ticks < LONG_MAX)
  457 + account_idle_ticks(ticks);
  458 +#endif
459 459  
460 460 touch_softlockup_watchdog();
461 461 /*
... ... @@ -1018,21 +1018,6 @@
1018 1018 }
1019 1019 #endif
1020 1020  
1021   -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1022   -void account_process_tick(struct task_struct *p, int user_tick)
1023   -{
1024   - cputime_t one_jiffy = jiffies_to_cputime(1);
1025   -
1026   - if (user_tick) {
1027   - account_user_time(p, one_jiffy);
1028   - account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
1029   - } else {
1030   - account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
1031   - account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
1032   - }
1033   -}
1034   -#endif
1035   -
1036 1021 /*
1037 1022 * Called from the timer interrupt handler to charge one tick to the current
1038 1023 * process. user_tick is 1 if the tick is user time, 0 for system.