Commit 96645678cd726e87ce42a0664de71e047e32bca4

Authored by Peter Zijlstra
Committed by Linus Torvalds
1 parent 443aef0edd

lockstat: measure lock bouncing

__acquire
        |
       lock _____
        |        \
        |    __contended
        |         |
        |        wait
        | _______/
        |/
        |
   __acquired
        |
   __release
        |
     unlock

We measure acquisition and contention bouncing.

This is done by recording a cpu stamp in each lock instance.

Contention bouncing requires the cpu stamp to be set on acquisition. Hence we
move __acquired into the generic path.

__acquired is then used to measure acquisition bouncing by comparing the
current cpu with the old stamp before replacing it.

__contended is used to measure contention bouncing (only useful for preemptable
locks)

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 55 additions and 21 deletions Side-by-side Diff

include/linux/lockdep.h
... ... @@ -130,12 +130,24 @@
130 130 unsigned long nr;
131 131 };
132 132  
  133 +enum bounce_type {
  134 + bounce_acquired_write,
  135 + bounce_acquired_read,
  136 + bounce_contended_write,
  137 + bounce_contended_read,
  138 + nr_bounce_types,
  139 +
  140 + bounce_acquired = bounce_acquired_write,
  141 + bounce_contended = bounce_contended_write,
  142 +};
  143 +
133 144 struct lock_class_stats {
134 145 unsigned long contention_point[4];
135 146 struct lock_time read_waittime;
136 147 struct lock_time write_waittime;
137 148 struct lock_time read_holdtime;
138 149 struct lock_time write_holdtime;
  150 + unsigned long bounces[nr_bounce_types];
139 151 };
140 152  
141 153 struct lock_class_stats lock_stats(struct lock_class *class);
... ... @@ -150,6 +162,9 @@
150 162 struct lock_class_key *key;
151 163 struct lock_class *class_cache;
152 164 const char *name;
  165 +#ifdef CONFIG_LOCK_STAT
  166 + int cpu;
  167 +#endif
153 168 };
154 169  
155 170 /*
156 171  
... ... @@ -321,8 +336,8 @@
321 336 if (!try(_lock)) { \
322 337 lock_contended(&(_lock)->dep_map, _RET_IP_); \
323 338 lock(_lock); \
324   - lock_acquired(&(_lock)->dep_map); \
325 339 } \
  340 + lock_acquired(&(_lock)->dep_map); \
326 341 } while (0)
327 342  
328 343 #else /* CONFIG_LOCK_STAT */
... ... @@ -177,6 +177,9 @@
177 177  
178 178 lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
179 179 lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);
  180 +
  181 + for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
  182 + stats.bounces[i] += pcs->bounces[i];
180 183 }
181 184  
182 185 return stats;
... ... @@ -2325,6 +2328,9 @@
2325 2328 lock->name = name;
2326 2329 lock->key = key;
2327 2330 lock->class_cache = NULL;
  2331 +#ifdef CONFIG_LOCK_STAT
  2332 + lock->cpu = raw_smp_processor_id();
  2333 +#endif
2328 2334 if (subclass)
2329 2335 register_lock_class(lock, subclass, 1);
2330 2336 }
... ... @@ -2775,6 +2781,8 @@
2775 2781 stats = get_lock_stats(hlock->class);
2776 2782 if (point < ARRAY_SIZE(stats->contention_point))
2777 2783 stats->contention_point[i]++;
  2784 + if (lock->cpu != smp_processor_id())
  2785 + stats->bounces[bounce_contended + !!hlock->read]++;
2778 2786 put_lock_stats(stats);
2779 2787 }
2780 2788  
... ... @@ -2786,8 +2794,8 @@
2786 2794 struct lock_class_stats *stats;
2787 2795 unsigned int depth;
2788 2796 u64 now;
2789   - s64 waittime;
2790   - int i;
  2797 + s64 waittime = 0;
  2798 + int i, cpu;
2791 2799  
2792 2800 depth = curr->lockdep_depth;
2793 2801 if (DEBUG_LOCKS_WARN_ON(!depth))
2794 2802  
2795 2803  
2796 2804  
... ... @@ -2809,19 +2817,25 @@
2809 2817 return;
2810 2818  
2811 2819 found_it:
2812   - if (!hlock->waittime_stamp)
2813   - return;
  2820 + cpu = smp_processor_id();
  2821 + if (hlock->waittime_stamp) {
  2822 + now = sched_clock();
  2823 + waittime = now - hlock->waittime_stamp;
  2824 + hlock->holdtime_stamp = now;
  2825 + }
2814 2826  
2815   - now = sched_clock();
2816   - waittime = now - hlock->waittime_stamp;
2817   - hlock->holdtime_stamp = now;
2818   -
2819 2827 stats = get_lock_stats(hlock->class);
2820   - if (hlock->read)
2821   - lock_time_inc(&stats->read_waittime, waittime);
2822   - else
2823   - lock_time_inc(&stats->write_waittime, waittime);
  2828 + if (waittime) {
  2829 + if (hlock->read)
  2830 + lock_time_inc(&stats->read_waittime, waittime);
  2831 + else
  2832 + lock_time_inc(&stats->write_waittime, waittime);
  2833 + }
  2834 + if (lock->cpu != cpu)
  2835 + stats->bounces[bounce_acquired + !!hlock->read]++;
2824 2836 put_lock_stats(stats);
  2837 +
  2838 + lock->cpu = cpu;
2825 2839 }
2826 2840  
2827 2841 void lock_contended(struct lockdep_map *lock, unsigned long ip)
kernel/lockdep_proc.c
... ... @@ -430,16 +430,18 @@
430 430 else
431 431 seq_printf(m, "%40s:", name);
432 432  
  433 + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
433 434 seq_lock_time(m, &stats->write_waittime);
434   - seq_puts(m, " ");
  435 + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
435 436 seq_lock_time(m, &stats->write_holdtime);
436 437 seq_puts(m, "\n");
437 438 }
438 439  
439 440 if (stats->read_holdtime.nr) {
440 441 seq_printf(m, "%38s-R:", name);
  442 + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]);
441 443 seq_lock_time(m, &stats->read_waittime);
442   - seq_puts(m, " ");
  444 + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]);
443 445 seq_lock_time(m, &stats->read_holdtime);
444 446 seq_puts(m, "\n");
445 447 }
446 448  
447 449  
448 450  
449 451  
... ... @@ -469,26 +471,29 @@
469 471 }
470 472 if (i) {
471 473 seq_puts(m, "\n");
472   - seq_line(m, '.', 0, 40 + 1 + 8 * (14 + 1));
  474 + seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
473 475 seq_puts(m, "\n");
474 476 }
475 477 }
476 478  
477 479 static void seq_header(struct seq_file *m)
478 480 {
479   - seq_printf(m, "lock_stat version 0.1\n");
480   - seq_line(m, '-', 0, 40 + 1 + 8 * (14 + 1));
481   - seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s\n",
  481 + seq_printf(m, "lock_stat version 0.2\n");
  482 + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
  483 + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
  484 + "%14s %14s\n",
482 485 "class name",
  486 + "con-bounces",
483 487 "contentions",
484 488 "waittime-min",
485 489 "waittime-max",
486 490 "waittime-total",
  491 + "acq-bounces",
487 492 "acquisitions",
488 493 "holdtime-min",
489 494 "holdtime-max",
490 495 "holdtime-total");
491   - seq_line(m, '-', 0, 40 + 1 + 8 * (14 + 1));
  496 + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
492 497 seq_printf(m, "\n");
493 498 }
494 499  
... ... @@ -180,8 +180,8 @@
180 180 spin_lock_mutex(&lock->wait_lock, flags);
181 181 }
182 182  
183   - lock_acquired(&lock->dep_map);
184 183 done:
  184 + lock_acquired(&lock->dep_map);
185 185 /* got the lock - rejoice! */
186 186 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
187 187 debug_mutex_set_owner(lock, task_thread_info(task));