Commit 3b5d8510b94a95e493e8c4951ffc3d1cf6a6792d
Exists in
master
and in
20 other branches
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar: "The biggest change is the rwsem lock-steal improvements, both to the assembly optimized and the spinlock based variants. The other notable change is the clean up of the seqlock implementation to be based on the seqcount infrastructure. The rest is assorted smaller debuggability, cleanup and continued -rt locking changes." * 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: rwsem-spinlock: Implement writer lock-stealing for better scalability futex: Revert "futex: Mark get_robust_list as deprecated" generic: Use raw local irq variant for generic cmpxchg lockdep: Selftest: convert spinlock to raw spinlock seqlock: Use seqcount infrastructure seqlock: Remove unused functions ntp: Make ntp_lock raw intel_idle: Convert i7300_idle_lock to raw_spinlock locking: Various static lock initializer fixes lockdep: Print more info when MAX_LOCK_DEPTH is exceeded rwsem: Implement writer lock-stealing for better scalability lockdep: Silence warning if CONFIG_LOCKDEP isn't set watchdog: Use local_clock for get_timestamp() lockdep: Rename print_unlock_inbalance_bug() to print_unlock_imbalance_bug() locking/stat: Fix a typo
Showing 17 changed files Side-by-side Diff
- Documentation/lockstat.txt
- drivers/char/random.c
- drivers/idle/i7300_idle.c
- drivers/usb/chipidea/debug.c
- fs/file.c
- include/asm-generic/cmpxchg-local.h
- include/linux/idr.h
- include/linux/lockdep.h
- include/linux/seqlock.h
- kernel/futex.c
- kernel/futex_compat.c
- kernel/lockdep.c
- kernel/time/ntp.c
- kernel/watchdog.c
- lib/locking-selftest.c
- lib/rwsem-spinlock.c
- lib/rwsem.c
Documentation/lockstat.txt
drivers/char/random.c
... | ... | @@ -445,7 +445,7 @@ |
445 | 445 | .poolinfo = &poolinfo_table[0], |
446 | 446 | .name = "input", |
447 | 447 | .limit = 1, |
448 | - .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), | |
448 | + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), | |
449 | 449 | .pool = input_pool_data |
450 | 450 | }; |
451 | 451 | |
... | ... | @@ -454,7 +454,7 @@ |
454 | 454 | .name = "blocking", |
455 | 455 | .limit = 1, |
456 | 456 | .pull = &input_pool, |
457 | - .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), | |
457 | + .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), | |
458 | 458 | .pool = blocking_pool_data |
459 | 459 | }; |
460 | 460 | |
... | ... | @@ -462,7 +462,7 @@ |
462 | 462 | .poolinfo = &poolinfo_table[1], |
463 | 463 | .name = "nonblocking", |
464 | 464 | .pull = &input_pool, |
465 | - .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), | |
465 | + .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), | |
466 | 466 | .pool = nonblocking_pool_data |
467 | 467 | }; |
468 | 468 |
drivers/idle/i7300_idle.c
... | ... | @@ -75,7 +75,7 @@ |
75 | 75 | |
76 | 76 | static struct pci_dev *fbd_dev; |
77 | 77 | |
78 | -static spinlock_t i7300_idle_lock; | |
78 | +static raw_spinlock_t i7300_idle_lock; | |
79 | 79 | static int i7300_idle_active; |
80 | 80 | |
81 | 81 | static u8 i7300_idle_thrtctl_saved; |
... | ... | @@ -457,7 +457,7 @@ |
457 | 457 | idle_begin_time = ktime_get(); |
458 | 458 | } |
459 | 459 | |
460 | - spin_lock_irqsave(&i7300_idle_lock, flags); | |
460 | + raw_spin_lock_irqsave(&i7300_idle_lock, flags); | |
461 | 461 | if (val == IDLE_START) { |
462 | 462 | |
463 | 463 | cpumask_set_cpu(smp_processor_id(), idle_cpumask); |
... | ... | @@ -506,7 +506,7 @@ |
506 | 506 | } |
507 | 507 | } |
508 | 508 | end: |
509 | - spin_unlock_irqrestore(&i7300_idle_lock, flags); | |
509 | + raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); | |
510 | 510 | return 0; |
511 | 511 | } |
512 | 512 | |
... | ... | @@ -548,7 +548,7 @@ |
548 | 548 | |
549 | 549 | static int __init i7300_idle_init(void) |
550 | 550 | { |
551 | - spin_lock_init(&i7300_idle_lock); | |
551 | + raw_spin_lock_init(&i7300_idle_lock); | |
552 | 552 | total_us = 0; |
553 | 553 | |
554 | 554 | if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) |
drivers/usb/chipidea/debug.c
fs/file.c
include/asm-generic/cmpxchg-local.h
... | ... | @@ -21,7 +21,7 @@ |
21 | 21 | if (size == 8 && sizeof(unsigned long) != 8) |
22 | 22 | wrong_size_cmpxchg(ptr); |
23 | 23 | |
24 | - local_irq_save(flags); | |
24 | + raw_local_irq_save(flags); | |
25 | 25 | switch (size) { |
26 | 26 | case 1: prev = *(u8 *)ptr; |
27 | 27 | if (prev == old) |
... | ... | @@ -42,7 +42,7 @@ |
42 | 42 | default: |
43 | 43 | wrong_size_cmpxchg(ptr); |
44 | 44 | } |
45 | - local_irq_restore(flags); | |
45 | + raw_local_irq_restore(flags); | |
46 | 46 | return prev; |
47 | 47 | } |
48 | 48 | |
49 | 49 | |
... | ... | @@ -55,11 +55,11 @@ |
55 | 55 | u64 prev; |
56 | 56 | unsigned long flags; |
57 | 57 | |
58 | - local_irq_save(flags); | |
58 | + raw_local_irq_save(flags); | |
59 | 59 | prev = *(u64 *)ptr; |
60 | 60 | if (prev == old) |
61 | 61 | *(u64 *)ptr = new; |
62 | - local_irq_restore(flags); | |
62 | + raw_local_irq_restore(flags); | |
63 | 63 | return prev; |
64 | 64 | } |
65 | 65 |
include/linux/idr.h
... | ... | @@ -136,7 +136,7 @@ |
136 | 136 | struct ida_bitmap *free_bitmap; |
137 | 137 | }; |
138 | 138 | |
139 | -#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } | |
139 | +#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } | |
140 | 140 | #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) |
141 | 141 | |
142 | 142 | int ida_pre_get(struct ida *ida, gfp_t gfp_mask); |
include/linux/lockdep.h
include/linux/seqlock.h
... | ... | @@ -30,92 +30,12 @@ |
30 | 30 | #include <linux/preempt.h> |
31 | 31 | #include <asm/processor.h> |
32 | 32 | |
33 | -typedef struct { | |
34 | - unsigned sequence; | |
35 | - spinlock_t lock; | |
36 | -} seqlock_t; | |
37 | - | |
38 | 33 | /* |
39 | - * These macros triggered gcc-3.x compile-time problems. We think these are | |
40 | - * OK now. Be cautious. | |
41 | - */ | |
42 | -#define __SEQLOCK_UNLOCKED(lockname) \ | |
43 | - { 0, __SPIN_LOCK_UNLOCKED(lockname) } | |
44 | - | |
45 | -#define seqlock_init(x) \ | |
46 | - do { \ | |
47 | - (x)->sequence = 0; \ | |
48 | - spin_lock_init(&(x)->lock); \ | |
49 | - } while (0) | |
50 | - | |
51 | -#define DEFINE_SEQLOCK(x) \ | |
52 | - seqlock_t x = __SEQLOCK_UNLOCKED(x) | |
53 | - | |
54 | -/* Lock out other writers and update the count. | |
55 | - * Acts like a normal spin_lock/unlock. | |
56 | - * Don't need preempt_disable() because that is in the spin_lock already. | |
57 | - */ | |
58 | -static inline void write_seqlock(seqlock_t *sl) | |
59 | -{ | |
60 | - spin_lock(&sl->lock); | |
61 | - ++sl->sequence; | |
62 | - smp_wmb(); | |
63 | -} | |
64 | - | |
65 | -static inline void write_sequnlock(seqlock_t *sl) | |
66 | -{ | |
67 | - smp_wmb(); | |
68 | - sl->sequence++; | |
69 | - spin_unlock(&sl->lock); | |
70 | -} | |
71 | - | |
72 | -static inline int write_tryseqlock(seqlock_t *sl) | |
73 | -{ | |
74 | - int ret = spin_trylock(&sl->lock); | |
75 | - | |
76 | - if (ret) { | |
77 | - ++sl->sequence; | |
78 | - smp_wmb(); | |
79 | - } | |
80 | - return ret; | |
81 | -} | |
82 | - | |
83 | -/* Start of read calculation -- fetch last complete writer token */ | |
84 | -static __always_inline unsigned read_seqbegin(const seqlock_t *sl) | |
85 | -{ | |
86 | - unsigned ret; | |
87 | - | |
88 | -repeat: | |
89 | - ret = ACCESS_ONCE(sl->sequence); | |
90 | - if (unlikely(ret & 1)) { | |
91 | - cpu_relax(); | |
92 | - goto repeat; | |
93 | - } | |
94 | - smp_rmb(); | |
95 | - | |
96 | - return ret; | |
97 | -} | |
98 | - | |
99 | -/* | |
100 | - * Test if reader processed invalid data. | |
101 | - * | |
102 | - * If sequence value changed then writer changed data while in section. | |
103 | - */ | |
104 | -static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) | |
105 | -{ | |
106 | - smp_rmb(); | |
107 | - | |
108 | - return unlikely(sl->sequence != start); | |
109 | -} | |
110 | - | |
111 | - | |
112 | -/* | |
113 | 34 | * Version using sequence counter only. |
114 | 35 | * This can be used when code has its own mutex protecting the |
115 | 36 | * updating starting before the write_seqcountbeqin() and ending |
116 | 37 | * after the write_seqcount_end(). |
117 | 38 | */ |
118 | - | |
119 | 39 | typedef struct seqcount { |
120 | 40 | unsigned sequence; |
121 | 41 | } seqcount_t; |
... | ... | @@ -218,7 +138,6 @@ |
218 | 138 | static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) |
219 | 139 | { |
220 | 140 | smp_rmb(); |
221 | - | |
222 | 141 | return __read_seqcount_retry(s, start); |
223 | 142 | } |
224 | 143 | |
225 | 144 | |
226 | 145 | |
227 | 146 | |
228 | 147 | |
229 | 148 | |
... | ... | @@ -252,32 +171,102 @@ |
252 | 171 | s->sequence+=2; |
253 | 172 | } |
254 | 173 | |
174 | +typedef struct { | |
175 | + struct seqcount seqcount; | |
176 | + spinlock_t lock; | |
177 | +} seqlock_t; | |
178 | + | |
255 | 179 | /* |
256 | - * Possible sw/hw IRQ protected versions of the interfaces. | |
180 | + * These macros triggered gcc-3.x compile-time problems. We think these are | |
181 | + * OK now. Be cautious. | |
257 | 182 | */ |
258 | -#define write_seqlock_irqsave(lock, flags) \ | |
259 | - do { local_irq_save(flags); write_seqlock(lock); } while (0) | |
260 | -#define write_seqlock_irq(lock) \ | |
261 | - do { local_irq_disable(); write_seqlock(lock); } while (0) | |
262 | -#define write_seqlock_bh(lock) \ | |
263 | - do { local_bh_disable(); write_seqlock(lock); } while (0) | |
183 | +#define __SEQLOCK_UNLOCKED(lockname) \ | |
184 | + { \ | |
185 | + .seqcount = SEQCNT_ZERO, \ | |
186 | + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ | |
187 | + } | |
264 | 188 | |
265 | -#define write_sequnlock_irqrestore(lock, flags) \ | |
266 | - do { write_sequnlock(lock); local_irq_restore(flags); } while(0) | |
267 | -#define write_sequnlock_irq(lock) \ | |
268 | - do { write_sequnlock(lock); local_irq_enable(); } while(0) | |
269 | -#define write_sequnlock_bh(lock) \ | |
270 | - do { write_sequnlock(lock); local_bh_enable(); } while(0) | |
189 | +#define seqlock_init(x) \ | |
190 | + do { \ | |
191 | + seqcount_init(&(x)->seqcount); \ | |
192 | + spin_lock_init(&(x)->lock); \ | |
193 | + } while (0) | |
271 | 194 | |
272 | -#define read_seqbegin_irqsave(lock, flags) \ | |
273 | - ({ local_irq_save(flags); read_seqbegin(lock); }) | |
195 | +#define DEFINE_SEQLOCK(x) \ | |
196 | + seqlock_t x = __SEQLOCK_UNLOCKED(x) | |
274 | 197 | |
275 | -#define read_seqretry_irqrestore(lock, iv, flags) \ | |
276 | - ({ \ | |
277 | - int ret = read_seqretry(lock, iv); \ | |
278 | - local_irq_restore(flags); \ | |
279 | - ret; \ | |
280 | - }) | |
198 | +/* | |
199 | + * Read side functions for starting and finalizing a read side section. | |
200 | + */ | |
201 | +static inline unsigned read_seqbegin(const seqlock_t *sl) | |
202 | +{ | |
203 | + return read_seqcount_begin(&sl->seqcount); | |
204 | +} | |
205 | + | |
206 | +static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
207 | +{ | |
208 | + return read_seqcount_retry(&sl->seqcount, start); | |
209 | +} | |
210 | + | |
211 | +/* | |
212 | + * Lock out other writers and update the count. | |
213 | + * Acts like a normal spin_lock/unlock. | |
214 | + * Don't need preempt_disable() because that is in the spin_lock already. | |
215 | + */ | |
216 | +static inline void write_seqlock(seqlock_t *sl) | |
217 | +{ | |
218 | + spin_lock(&sl->lock); | |
219 | + write_seqcount_begin(&sl->seqcount); | |
220 | +} | |
221 | + | |
222 | +static inline void write_sequnlock(seqlock_t *sl) | |
223 | +{ | |
224 | + write_seqcount_end(&sl->seqcount); | |
225 | + spin_unlock(&sl->lock); | |
226 | +} | |
227 | + | |
228 | +static inline void write_seqlock_bh(seqlock_t *sl) | |
229 | +{ | |
230 | + spin_lock_bh(&sl->lock); | |
231 | + write_seqcount_begin(&sl->seqcount); | |
232 | +} | |
233 | + | |
234 | +static inline void write_sequnlock_bh(seqlock_t *sl) | |
235 | +{ | |
236 | + write_seqcount_end(&sl->seqcount); | |
237 | + spin_unlock_bh(&sl->lock); | |
238 | +} | |
239 | + | |
240 | +static inline void write_seqlock_irq(seqlock_t *sl) | |
241 | +{ | |
242 | + spin_lock_irq(&sl->lock); | |
243 | + write_seqcount_begin(&sl->seqcount); | |
244 | +} | |
245 | + | |
246 | +static inline void write_sequnlock_irq(seqlock_t *sl) | |
247 | +{ | |
248 | + write_seqcount_end(&sl->seqcount); | |
249 | + spin_unlock_irq(&sl->lock); | |
250 | +} | |
251 | + | |
252 | +static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
253 | +{ | |
254 | + unsigned long flags; | |
255 | + | |
256 | + spin_lock_irqsave(&sl->lock, flags); | |
257 | + write_seqcount_begin(&sl->seqcount); | |
258 | + return flags; | |
259 | +} | |
260 | + | |
261 | +#define write_seqlock_irqsave(lock, flags) \ | |
262 | + do { flags = __write_seqlock_irqsave(lock); } while (0) | |
263 | + | |
264 | +static inline void | |
265 | +write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
266 | +{ | |
267 | + write_seqcount_end(&sl->seqcount); | |
268 | + spin_unlock_irqrestore(&sl->lock, flags); | |
269 | +} | |
281 | 270 | |
282 | 271 | #endif /* __LINUX_SEQLOCK_H */ |
kernel/futex.c
kernel/futex_compat.c
kernel/lockdep.c
... | ... | @@ -3190,9 +3190,14 @@ |
3190 | 3190 | #endif |
3191 | 3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { |
3192 | 3192 | debug_locks_off(); |
3193 | - printk("BUG: MAX_LOCK_DEPTH too low!\n"); | |
3193 | + printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", | |
3194 | + curr->lockdep_depth, MAX_LOCK_DEPTH); | |
3194 | 3195 | printk("turning off the locking correctness validator.\n"); |
3196 | + | |
3197 | + lockdep_print_held_locks(current); | |
3198 | + debug_show_all_locks(); | |
3195 | 3199 | dump_stack(); |
3200 | + | |
3196 | 3201 | return 0; |
3197 | 3202 | } |
3198 | 3203 | |
... | ... | @@ -3203,7 +3208,7 @@ |
3203 | 3208 | } |
3204 | 3209 | |
3205 | 3210 | static int |
3206 | -print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |
3211 | +print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |
3207 | 3212 | unsigned long ip) |
3208 | 3213 | { |
3209 | 3214 | if (!debug_locks_off()) |
... | ... | @@ -3246,7 +3251,7 @@ |
3246 | 3251 | return 0; |
3247 | 3252 | |
3248 | 3253 | if (curr->lockdep_depth <= 0) |
3249 | - return print_unlock_inbalance_bug(curr, lock, ip); | |
3254 | + return print_unlock_imbalance_bug(curr, lock, ip); | |
3250 | 3255 | |
3251 | 3256 | return 1; |
3252 | 3257 | } |
... | ... | @@ -3317,7 +3322,7 @@ |
3317 | 3322 | goto found_it; |
3318 | 3323 | prev_hlock = hlock; |
3319 | 3324 | } |
3320 | - return print_unlock_inbalance_bug(curr, lock, ip); | |
3325 | + return print_unlock_imbalance_bug(curr, lock, ip); | |
3321 | 3326 | |
3322 | 3327 | found_it: |
3323 | 3328 | lockdep_init_map(lock, name, key, 0); |
... | ... | @@ -3384,7 +3389,7 @@ |
3384 | 3389 | goto found_it; |
3385 | 3390 | prev_hlock = hlock; |
3386 | 3391 | } |
3387 | - return print_unlock_inbalance_bug(curr, lock, ip); | |
3392 | + return print_unlock_imbalance_bug(curr, lock, ip); | |
3388 | 3393 | |
3389 | 3394 | found_it: |
3390 | 3395 | if (hlock->instance == lock) |
kernel/time/ntp.c
... | ... | @@ -23,7 +23,7 @@ |
23 | 23 | * NTP timekeeping variables: |
24 | 24 | */ |
25 | 25 | |
26 | -DEFINE_SPINLOCK(ntp_lock); | |
26 | +DEFINE_RAW_SPINLOCK(ntp_lock); | |
27 | 27 | |
28 | 28 | |
29 | 29 | /* USER_HZ period (usecs): */ |
... | ... | @@ -348,7 +348,7 @@ |
348 | 348 | { |
349 | 349 | unsigned long flags; |
350 | 350 | |
351 | - spin_lock_irqsave(&ntp_lock, flags); | |
351 | + raw_spin_lock_irqsave(&ntp_lock, flags); | |
352 | 352 | |
353 | 353 | time_adjust = 0; /* stop active adjtime() */ |
354 | 354 | time_status |= STA_UNSYNC; |
... | ... | @@ -362,7 +362,7 @@ |
362 | 362 | |
363 | 363 | /* Clear PPS state variables */ |
364 | 364 | pps_clear(); |
365 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
365 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
366 | 366 | |
367 | 367 | } |
368 | 368 | |
369 | 369 | |
... | ... | @@ -372,9 +372,9 @@ |
372 | 372 | unsigned long flags; |
373 | 373 | s64 ret; |
374 | 374 | |
375 | - spin_lock_irqsave(&ntp_lock, flags); | |
375 | + raw_spin_lock_irqsave(&ntp_lock, flags); | |
376 | 376 | ret = tick_length; |
377 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
377 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
378 | 378 | return ret; |
379 | 379 | } |
380 | 380 | |
... | ... | @@ -395,7 +395,7 @@ |
395 | 395 | int leap = 0; |
396 | 396 | unsigned long flags; |
397 | 397 | |
398 | - spin_lock_irqsave(&ntp_lock, flags); | |
398 | + raw_spin_lock_irqsave(&ntp_lock, flags); | |
399 | 399 | |
400 | 400 | /* |
401 | 401 | * Leap second processing. If in leap-insert state at the end of the |
... | ... | @@ -479,7 +479,7 @@ |
479 | 479 | time_adjust = 0; |
480 | 480 | |
481 | 481 | out: |
482 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
482 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
483 | 483 | |
484 | 484 | return leap; |
485 | 485 | } |
... | ... | @@ -672,7 +672,7 @@ |
672 | 672 | |
673 | 673 | getnstimeofday(&ts); |
674 | 674 | |
675 | - spin_lock_irq(&ntp_lock); | |
675 | + raw_spin_lock_irq(&ntp_lock); | |
676 | 676 | |
677 | 677 | if (txc->modes & ADJ_ADJTIME) { |
678 | 678 | long save_adjust = time_adjust; |
... | ... | @@ -714,7 +714,7 @@ |
714 | 714 | /* fill PPS status fields */ |
715 | 715 | pps_fill_timex(txc); |
716 | 716 | |
717 | - spin_unlock_irq(&ntp_lock); | |
717 | + raw_spin_unlock_irq(&ntp_lock); | |
718 | 718 | |
719 | 719 | txc->time.tv_sec = ts.tv_sec; |
720 | 720 | txc->time.tv_usec = ts.tv_nsec; |
... | ... | @@ -912,7 +912,7 @@ |
912 | 912 | |
913 | 913 | pts_norm = pps_normalize_ts(*phase_ts); |
914 | 914 | |
915 | - spin_lock_irqsave(&ntp_lock, flags); | |
915 | + raw_spin_lock_irqsave(&ntp_lock, flags); | |
916 | 916 | |
917 | 917 | /* clear the error bits, they will be set again if needed */ |
918 | 918 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); |
... | ... | @@ -925,7 +925,7 @@ |
925 | 925 | * just start the frequency interval */ |
926 | 926 | if (unlikely(pps_fbase.tv_sec == 0)) { |
927 | 927 | pps_fbase = *raw_ts; |
928 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
928 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
929 | 929 | return; |
930 | 930 | } |
931 | 931 | |
... | ... | @@ -940,7 +940,7 @@ |
940 | 940 | time_status |= STA_PPSJITTER; |
941 | 941 | /* restart the frequency calibration interval */ |
942 | 942 | pps_fbase = *raw_ts; |
943 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
943 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
944 | 944 | pr_err("hardpps: PPSJITTER: bad pulse\n"); |
945 | 945 | return; |
946 | 946 | } |
... | ... | @@ -957,7 +957,7 @@ |
957 | 957 | |
958 | 958 | hardpps_update_phase(pts_norm.nsec); |
959 | 959 | |
960 | - spin_unlock_irqrestore(&ntp_lock, flags); | |
960 | + raw_spin_unlock_irqrestore(&ntp_lock, flags); | |
961 | 961 | } |
962 | 962 | EXPORT_SYMBOL(hardpps); |
963 | 963 |
kernel/watchdog.c
... | ... | @@ -113,9 +113,9 @@ |
113 | 113 | * resolution, and we don't need to waste time with a big divide when |
114 | 114 | * 2^30ns == 1.074s. |
115 | 115 | */ |
116 | -static unsigned long get_timestamp(int this_cpu) | |
116 | +static unsigned long get_timestamp(void) | |
117 | 117 | { |
118 | - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | |
118 | + return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ | |
119 | 119 | } |
120 | 120 | |
121 | 121 | static void set_sample_period(void) |
... | ... | @@ -133,9 +133,7 @@ |
133 | 133 | /* Commands for resetting the watchdog */ |
134 | 134 | static void __touch_watchdog(void) |
135 | 135 | { |
136 | - int this_cpu = smp_processor_id(); | |
137 | - | |
138 | - __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); | |
136 | + __this_cpu_write(watchdog_touch_ts, get_timestamp()); | |
139 | 137 | } |
140 | 138 | |
141 | 139 | void touch_softlockup_watchdog(void) |
... | ... | @@ -196,7 +194,7 @@ |
196 | 194 | |
197 | 195 | static int is_softlockup(unsigned long touch_ts) |
198 | 196 | { |
199 | - unsigned long now = get_timestamp(smp_processor_id()); | |
197 | + unsigned long now = get_timestamp(); | |
200 | 198 | |
201 | 199 | /* Warn about unreasonable delays: */ |
202 | 200 | if (time_after(now, touch_ts + get_softlockup_thresh())) |
lib/locking-selftest.c
... | ... | @@ -47,10 +47,10 @@ |
47 | 47 | * Normal standalone locks, for the circular and irq-context |
48 | 48 | * dependency tests: |
49 | 49 | */ |
50 | -static DEFINE_SPINLOCK(lock_A); | |
51 | -static DEFINE_SPINLOCK(lock_B); | |
52 | -static DEFINE_SPINLOCK(lock_C); | |
53 | -static DEFINE_SPINLOCK(lock_D); | |
50 | +static DEFINE_RAW_SPINLOCK(lock_A); | |
51 | +static DEFINE_RAW_SPINLOCK(lock_B); | |
52 | +static DEFINE_RAW_SPINLOCK(lock_C); | |
53 | +static DEFINE_RAW_SPINLOCK(lock_D); | |
54 | 54 | |
55 | 55 | static DEFINE_RWLOCK(rwlock_A); |
56 | 56 | static DEFINE_RWLOCK(rwlock_B); |
... | ... | @@ -73,12 +73,12 @@ |
73 | 73 | * but X* and Y* are different classes. We do this so that |
74 | 74 | * we do not trigger a real lockup: |
75 | 75 | */ |
76 | -static DEFINE_SPINLOCK(lock_X1); | |
77 | -static DEFINE_SPINLOCK(lock_X2); | |
78 | -static DEFINE_SPINLOCK(lock_Y1); | |
79 | -static DEFINE_SPINLOCK(lock_Y2); | |
80 | -static DEFINE_SPINLOCK(lock_Z1); | |
81 | -static DEFINE_SPINLOCK(lock_Z2); | |
76 | +static DEFINE_RAW_SPINLOCK(lock_X1); | |
77 | +static DEFINE_RAW_SPINLOCK(lock_X2); | |
78 | +static DEFINE_RAW_SPINLOCK(lock_Y1); | |
79 | +static DEFINE_RAW_SPINLOCK(lock_Y2); | |
80 | +static DEFINE_RAW_SPINLOCK(lock_Z1); | |
81 | +static DEFINE_RAW_SPINLOCK(lock_Z2); | |
82 | 82 | |
83 | 83 | static DEFINE_RWLOCK(rwlock_X1); |
84 | 84 | static DEFINE_RWLOCK(rwlock_X2); |
85 | 85 | |
... | ... | @@ -107,10 +107,10 @@ |
107 | 107 | */ |
108 | 108 | #define INIT_CLASS_FUNC(class) \ |
109 | 109 | static noinline void \ |
110 | -init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ | |
111 | - struct rw_semaphore *rwsem) \ | |
110 | +init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ | |
111 | + struct mutex *mutex, struct rw_semaphore *rwsem)\ | |
112 | 112 | { \ |
113 | - spin_lock_init(lock); \ | |
113 | + raw_spin_lock_init(lock); \ | |
114 | 114 | rwlock_init(rwlock); \ |
115 | 115 | mutex_init(mutex); \ |
116 | 116 | init_rwsem(rwsem); \ |
117 | 117 | |
... | ... | @@ -168,10 +168,10 @@ |
168 | 168 | * Shortcuts for lock/unlock API variants, to keep |
169 | 169 | * the testcases compact: |
170 | 170 | */ |
171 | -#define L(x) spin_lock(&lock_##x) | |
172 | -#define U(x) spin_unlock(&lock_##x) | |
171 | +#define L(x) raw_spin_lock(&lock_##x) | |
172 | +#define U(x) raw_spin_unlock(&lock_##x) | |
173 | 173 | #define LU(x) L(x); U(x) |
174 | -#define SI(x) spin_lock_init(&lock_##x) | |
174 | +#define SI(x) raw_spin_lock_init(&lock_##x) | |
175 | 175 | |
176 | 176 | #define WL(x) write_lock(&rwlock_##x) |
177 | 177 | #define WU(x) write_unlock(&rwlock_##x) |
... | ... | @@ -911,7 +911,7 @@ |
911 | 911 | |
912 | 912 | #define I2(x) \ |
913 | 913 | do { \ |
914 | - spin_lock_init(&lock_##x); \ | |
914 | + raw_spin_lock_init(&lock_##x); \ | |
915 | 915 | rwlock_init(&rwlock_##x); \ |
916 | 916 | mutex_init(&mutex_##x); \ |
917 | 917 | init_rwsem(&rwsem_##x); \ |
lib/rwsem-spinlock.c
... | ... | @@ -73,20 +73,13 @@ |
73 | 73 | goto dont_wake_writers; |
74 | 74 | } |
75 | 75 | |
76 | - /* if we are allowed to wake writers try to grant a single write lock | |
77 | - * if there's a writer at the front of the queue | |
78 | - * - we leave the 'waiting count' incremented to signify potential | |
79 | - * contention | |
76 | + /* | |
77 | + * as we support write lock stealing, we can't set sem->activity | |
78 | + * to -1 here to indicate we get the lock. Instead, we wake it up | |
79 | + * to let it go get it again. | |
80 | 80 | */ |
81 | 81 | if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { |
82 | - sem->activity = -1; | |
83 | - list_del(&waiter->list); | |
84 | - tsk = waiter->task; | |
85 | - /* Don't touch waiter after ->task has been NULLed */ | |
86 | - smp_mb(); | |
87 | - waiter->task = NULL; | |
88 | - wake_up_process(tsk); | |
89 | - put_task_struct(tsk); | |
82 | + wake_up_process(waiter->task); | |
90 | 83 | goto out; |
91 | 84 | } |
92 | 85 | |
93 | 86 | |
94 | 87 | |
95 | 88 | |
... | ... | @@ -121,18 +114,10 @@ |
121 | 114 | __rwsem_wake_one_writer(struct rw_semaphore *sem) |
122 | 115 | { |
123 | 116 | struct rwsem_waiter *waiter; |
124 | - struct task_struct *tsk; | |
125 | 117 | |
126 | - sem->activity = -1; | |
127 | - | |
128 | 118 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
129 | - list_del(&waiter->list); | |
119 | + wake_up_process(waiter->task); | |
130 | 120 | |
131 | - tsk = waiter->task; | |
132 | - smp_mb(); | |
133 | - waiter->task = NULL; | |
134 | - wake_up_process(tsk); | |
135 | - put_task_struct(tsk); | |
136 | 121 | return sem; |
137 | 122 | } |
138 | 123 | |
... | ... | @@ -204,7 +189,6 @@ |
204 | 189 | |
205 | 190 | /* |
206 | 191 | * get a write lock on the semaphore |
207 | - * - we increment the waiting count anyway to indicate an exclusive lock | |
208 | 192 | */ |
209 | 193 | void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) |
210 | 194 | { |
211 | 195 | |
212 | 196 | |
213 | 197 | |
214 | 198 | |
215 | 199 | |
216 | 200 | |
217 | 201 | |
218 | 202 | |
... | ... | @@ -214,37 +198,32 @@ |
214 | 198 | |
215 | 199 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
216 | 200 | |
217 | - if (sem->activity == 0 && list_empty(&sem->wait_list)) { | |
218 | - /* granted */ | |
219 | - sem->activity = -1; | |
220 | - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
221 | - goto out; | |
222 | - } | |
223 | - | |
224 | - tsk = current; | |
225 | - set_task_state(tsk, TASK_UNINTERRUPTIBLE); | |
226 | - | |
227 | 201 | /* set up my own style of waitqueue */ |
202 | + tsk = current; | |
228 | 203 | waiter.task = tsk; |
229 | 204 | waiter.flags = RWSEM_WAITING_FOR_WRITE; |
230 | - get_task_struct(tsk); | |
231 | - | |
232 | 205 | list_add_tail(&waiter.list, &sem->wait_list); |
233 | 206 | |
234 | - /* we don't need to touch the semaphore struct anymore */ | |
235 | - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
236 | - | |
237 | - /* wait to be given the lock */ | |
207 | + /* wait for someone to release the lock */ | |
238 | 208 | for (;;) { |
239 | - if (!waiter.task) | |
209 | + /* | |
210 | + * That is the key to support write lock stealing: allows the | |
211 | + * task already on CPU to get the lock soon rather than put | |
212 | + * itself into sleep and waiting for system woke it or someone | |
213 | + * else in the head of the wait list up. | |
214 | + */ | |
215 | + if (sem->activity == 0) | |
240 | 216 | break; |
241 | - schedule(); | |
242 | 217 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
218 | + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
219 | + schedule(); | |
220 | + raw_spin_lock_irqsave(&sem->wait_lock, flags); | |
243 | 221 | } |
222 | + /* got the lock */ | |
223 | + sem->activity = -1; | |
224 | + list_del(&waiter.list); | |
244 | 225 | |
245 | - tsk->state = TASK_RUNNING; | |
246 | - out: | |
247 | - ; | |
226 | + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | |
248 | 227 | } |
249 | 228 | |
250 | 229 | void __sched __down_write(struct rw_semaphore *sem) |
... | ... | @@ -262,8 +241,8 @@ |
262 | 241 | |
263 | 242 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
264 | 243 | |
265 | - if (sem->activity == 0 && list_empty(&sem->wait_list)) { | |
266 | - /* granted */ | |
244 | + if (sem->activity == 0) { | |
245 | + /* got the lock */ | |
267 | 246 | sem->activity = -1; |
268 | 247 | ret = 1; |
269 | 248 | } |
lib/rwsem.c
... | ... | @@ -2,6 +2,8 @@ |
2 | 2 | * |
3 | 3 | * Written by David Howells (dhowells@redhat.com). |
4 | 4 | * Derived from arch/i386/kernel/semaphore.c |
5 | + * | |
6 | + * Writer lock-stealing by Alex Shi <alex.shi@intel.com> | |
5 | 7 | */ |
6 | 8 | #include <linux/rwsem.h> |
7 | 9 | #include <linux/sched.h> |
... | ... | @@ -60,7 +62,7 @@ |
60 | 62 | struct rwsem_waiter *waiter; |
61 | 63 | struct task_struct *tsk; |
62 | 64 | struct list_head *next; |
63 | - signed long oldcount, woken, loop, adjustment; | |
65 | + signed long woken, loop, adjustment; | |
64 | 66 | |
65 | 67 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); |
66 | 68 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) |
... | ... | @@ -72,30 +74,8 @@ |
72 | 74 | */ |
73 | 75 | goto out; |
74 | 76 | |
75 | - /* There's a writer at the front of the queue - try to grant it the | |
76 | - * write lock. However, we only wake this writer if we can transition | |
77 | - * the active part of the count from 0 -> 1 | |
78 | - */ | |
79 | - adjustment = RWSEM_ACTIVE_WRITE_BIAS; | |
80 | - if (waiter->list.next == &sem->wait_list) | |
81 | - adjustment -= RWSEM_WAITING_BIAS; | |
82 | - | |
83 | - try_again_write: | |
84 | - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | |
85 | - if (oldcount & RWSEM_ACTIVE_MASK) | |
86 | - /* Someone grabbed the sem already */ | |
87 | - goto undo_write; | |
88 | - | |
89 | - /* We must be careful not to touch 'waiter' after we set ->task = NULL. | |
90 | - * It is an allocated on the waiter's stack and may become invalid at | |
91 | - * any time after that point (due to a wakeup from another source). | |
92 | - */ | |
93 | - list_del(&waiter->list); | |
94 | - tsk = waiter->task; | |
95 | - smp_mb(); | |
96 | - waiter->task = NULL; | |
97 | - wake_up_process(tsk); | |
98 | - put_task_struct(tsk); | |
77 | + /* Wake up the writing waiter and let the task grab the sem: */ | |
78 | + wake_up_process(waiter->task); | |
99 | 79 | goto out; |
100 | 80 | |
101 | 81 | readers_only: |
102 | 82 | |
103 | 83 | |
... | ... | @@ -157,12 +137,40 @@ |
157 | 137 | |
158 | 138 | out: |
159 | 139 | return sem; |
140 | +} | |
160 | 141 | |
161 | - /* undo the change to the active count, but check for a transition | |
162 | - * 1->0 */ | |
163 | - undo_write: | |
142 | +/* Try to get write sem, caller holds sem->wait_lock: */ | |
143 | +static int try_get_writer_sem(struct rw_semaphore *sem, | |
144 | + struct rwsem_waiter *waiter) | |
145 | +{ | |
146 | + struct rwsem_waiter *fwaiter; | |
147 | + long oldcount, adjustment; | |
148 | + | |
149 | + /* only steal when first waiter is writing */ | |
150 | + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | |
151 | + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) | |
152 | + return 0; | |
153 | + | |
154 | + adjustment = RWSEM_ACTIVE_WRITE_BIAS; | |
155 | + /* Only one waiter in the queue: */ | |
156 | + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) | |
157 | + adjustment -= RWSEM_WAITING_BIAS; | |
158 | + | |
159 | +try_again_write: | |
160 | + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; | |
161 | + if (!(oldcount & RWSEM_ACTIVE_MASK)) { | |
162 | + /* No active lock: */ | |
163 | + struct task_struct *tsk = waiter->task; | |
164 | + | |
165 | + list_del(&waiter->list); | |
166 | + smp_mb(); | |
167 | + put_task_struct(tsk); | |
168 | + tsk->state = TASK_RUNNING; | |
169 | + return 1; | |
170 | + } | |
171 | + /* some one grabbed the sem already */ | |
164 | 172 | if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) |
165 | - goto out; | |
173 | + return 0; | |
166 | 174 | goto try_again_write; |
167 | 175 | } |
168 | 176 | |
... | ... | @@ -210,6 +218,15 @@ |
210 | 218 | for (;;) { |
211 | 219 | if (!waiter.task) |
212 | 220 | break; |
221 | + | |
222 | + raw_spin_lock_irq(&sem->wait_lock); | |
223 | + /* Try to get the writer sem, may steal from the head writer: */ | |
224 | + if (flags == RWSEM_WAITING_FOR_WRITE) | |
225 | + if (try_get_writer_sem(sem, &waiter)) { | |
226 | + raw_spin_unlock_irq(&sem->wait_lock); | |
227 | + return sem; | |
228 | + } | |
229 | + raw_spin_unlock_irq(&sem->wait_lock); | |
213 | 230 | schedule(); |
214 | 231 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
215 | 232 | } |