Commit 9745512ce79de686df354dc70a8d1a74d801892d

Authored by Arjan van de Ven
Committed by Ingo Molnar
1 parent 326587b840

sched: latencytop support

LatencyTOP kernel infrastructure; it measures latencies in the
scheduler and tracks it system wide and per process.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 11 changed files with 429 additions and 1 deletions Side-by-side Diff

arch/x86/kernel/stacktrace.c
... ... @@ -33,6 +33,19 @@
33 33 trace->entries[trace->nr_entries++] = addr;
34 34 }
35 35  
  36 +static void save_stack_address_nosched(void *data, unsigned long addr)
  37 +{
  38 + struct stack_trace *trace = (struct stack_trace *)data;
  39 + if (in_sched_functions(addr))
  40 + return;
  41 + if (trace->skip > 0) {
  42 + trace->skip--;
  43 + return;
  44 + }
  45 + if (trace->nr_entries < trace->max_entries)
  46 + trace->entries[trace->nr_entries++] = addr;
  47 +}
  48 +
36 49 static const struct stacktrace_ops save_stack_ops = {
37 50 .warning = save_stack_warning,
38 51 .warning_symbol = save_stack_warning_symbol,
... ... @@ -40,6 +53,13 @@
40 53 .address = save_stack_address,
41 54 };
42 55  
  56 +static const struct stacktrace_ops save_stack_ops_nosched = {
  57 + .warning = save_stack_warning,
  58 + .warning_symbol = save_stack_warning_symbol,
  59 + .stack = save_stack_stack,
  60 + .address = save_stack_address_nosched,
  61 +};
  62 +
43 63 /*
44 64 * Save stack-backtrace addresses into a stack_trace buffer.
45 65 */
... ... @@ -50,4 +70,11 @@
50 70 trace->entries[trace->nr_entries++] = ULONG_MAX;
51 71 }
52 72 EXPORT_SYMBOL(save_stack_trace);
  73 +
  74 +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
  75 +{
  76 + dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
  77 + if (trace->nr_entries < trace->max_entries)
  78 + trace->entries[trace->nr_entries++] = ULONG_MAX;
  79 +}
... ... @@ -310,6 +310,77 @@
310 310 }
311 311 #endif
312 312  
  313 +#ifdef CONFIG_LATENCYTOP
  314 +static int lstats_show_proc(struct seq_file *m, void *v)
  315 +{
  316 + int i;
  317 + struct task_struct *task = m->private;
  318 + seq_puts(m, "Latency Top version : v0.1\n");
  319 +
  320 + for (i = 0; i < 32; i++) {
  321 + if (task->latency_record[i].backtrace[0]) {
  322 + int q;
  323 + seq_printf(m, "%i %li %li ",
  324 + task->latency_record[i].count,
  325 + task->latency_record[i].time,
  326 + task->latency_record[i].max);
  327 + for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
  328 + char sym[KSYM_NAME_LEN];
  329 + char *c;
  330 + if (!task->latency_record[i].backtrace[q])
  331 + break;
  332 + if (task->latency_record[i].backtrace[q] == ULONG_MAX)
  333 + break;
  334 + sprint_symbol(sym, task->latency_record[i].backtrace[q]);
  335 + c = strchr(sym, '+');
  336 + if (c)
  337 + *c = 0;
  338 + seq_printf(m, "%s ", sym);
  339 + }
  340 + seq_printf(m, "\n");
  341 + }
  342 +
  343 + }
  344 + return 0;
  345 +}
  346 +
  347 +static int lstats_open(struct inode *inode, struct file *file)
  348 +{
  349 + int ret;
  350 + struct seq_file *m;
  351 + struct task_struct *task = get_proc_task(inode);
  352 +
  353 + ret = single_open(file, lstats_show_proc, NULL);
  354 + if (!ret) {
  355 + m = file->private_data;
  356 + m->private = task;
  357 + }
  358 + return ret;
  359 +}
  360 +
  361 +static ssize_t lstats_write(struct file *file, const char __user *buf,
  362 + size_t count, loff_t *offs)
  363 +{
  364 + struct seq_file *m;
  365 + struct task_struct *task;
  366 +
  367 + m = file->private_data;
  368 + task = m->private;
  369 + clear_all_latency_tracing(task);
  370 +
  371 + return count;
  372 +}
  373 +
  374 +static const struct file_operations proc_lstats_operations = {
  375 + .open = lstats_open,
  376 + .read = seq_read,
  377 + .write = lstats_write,
  378 + .llseek = seq_lseek,
  379 + .release = single_release,
  380 +};
  381 +
  382 +#endif
  383 +
313 384 /* The badness from the OOM killer */
314 385 unsigned long badness(struct task_struct *p, unsigned long uptime);
315 386 static int proc_oom_score(struct task_struct *task, char *buffer)
... ... @@ -1020,6 +1091,7 @@
1020 1091 };
1021 1092 #endif
1022 1093  
  1094 +
1023 1095 #ifdef CONFIG_SCHED_DEBUG
1024 1096 /*
1025 1097 * Print out various scheduling related per-task fields:
... ... @@ -2230,6 +2302,9 @@
2230 2302 #ifdef CONFIG_SCHEDSTATS
2231 2303 INF("schedstat", S_IRUGO, pid_schedstat),
2232 2304 #endif
  2305 +#ifdef CONFIG_LATENCYTOP
  2306 + REG("latency", S_IRUGO, lstats),
  2307 +#endif
2233 2308 #ifdef CONFIG_PROC_PID_CPUSET
2234 2309 REG("cpuset", S_IRUGO, cpuset),
2235 2310 #endif
... ... @@ -2554,6 +2629,9 @@
2554 2629 #endif
2555 2630 #ifdef CONFIG_SCHEDSTATS
2556 2631 INF("schedstat", S_IRUGO, pid_schedstat),
  2632 +#endif
  2633 +#ifdef CONFIG_LATENCYTOP
  2634 + REG("latency", S_IRUGO, lstats),
2557 2635 #endif
2558 2636 #ifdef CONFIG_PROC_PID_CPUSET
2559 2637 REG("cpuset", S_IRUGO, cpuset),
include/linux/latencytop.h
  1 +/*
  2 + * latencytop.h: Infrastructure for displaying latency
  3 + *
  4 + * (C) Copyright 2008 Intel Corporation
  5 + * Author: Arjan van de Ven <arjan@linux.intel.com>
  6 + *
  7 + */
  8 +
  9 +#ifndef _INCLUDE_GUARD_LATENCYTOP_H_
  10 +#define _INCLUDE_GUARD_LATENCYTOP_H_
  11 +
  12 +#ifdef CONFIG_LATENCYTOP
  13 +
  14 +#define LT_SAVECOUNT 32
  15 +#define LT_BACKTRACEDEPTH 12
  16 +
  17 +struct latency_record {
  18 + unsigned long backtrace[LT_BACKTRACEDEPTH];
  19 + unsigned int count;
  20 + unsigned long time;
  21 + unsigned long max;
  22 +};
  23 +
  24 +
  25 +struct task_struct;
  26 +
  27 +void account_scheduler_latency(struct task_struct *task, int usecs, int inter);
  28 +
  29 +void clear_all_latency_tracing(struct task_struct *p);
  30 +
  31 +#else
  32 +
  33 +static inline void
  34 +account_scheduler_latency(struct task_struct *task, int usecs, int inter)
  35 +{
  36 +}
  37 +
  38 +static inline void clear_all_latency_tracing(struct task_struct *p)
  39 +{
  40 +}
  41 +
  42 +#endif
  43 +
  44 +#endif
include/linux/sched.h
... ... @@ -88,6 +88,7 @@
88 88 #include <linux/hrtimer.h>
89 89 #include <linux/task_io_accounting.h>
90 90 #include <linux/kobject.h>
  91 +#include <linux/latencytop.h>
91 92  
92 93 #include <asm/processor.h>
93 94  
... ... @@ -1220,6 +1221,10 @@
1220 1221 int make_it_fail;
1221 1222 #endif
1222 1223 struct prop_local_single dirties;
  1224 +#ifdef CONFIG_LATENCYTOP
  1225 + int latency_record_count;
  1226 + struct latency_record latency_record[LT_SAVECOUNT];
  1227 +#endif
1223 1228 };
1224 1229  
1225 1230 /*
include/linux/stacktrace.h
... ... @@ -9,10 +9,13 @@
9 9 };
10 10  
11 11 extern void save_stack_trace(struct stack_trace *trace);
  12 +extern void save_stack_trace_tsk(struct task_struct *tsk,
  13 + struct stack_trace *trace);
12 14  
13 15 extern void print_stack_trace(struct stack_trace *trace, int spaces);
14 16 #else
15 17 # define save_stack_trace(trace) do { } while (0)
  18 +# define save_stack_trace_tsk(tsk, trace) do { } while (0)
16 19 # define print_stack_trace(trace, spaces) do { } while (0)
17 20 #endif
18 21  
... ... @@ -62,6 +62,7 @@
62 62 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
63 63 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
64 64 obj-$(CONFIG_MARKERS) += marker.o
  65 +obj-$(CONFIG_LATENCYTOP) += latencytop.o
65 66  
66 67 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
67 68 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
... ... @@ -1205,6 +1205,7 @@
1205 1205 #ifdef TIF_SYSCALL_EMU
1206 1206 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
1207 1207 #endif
  1208 + clear_all_latency_tracing(p);
1208 1209  
1209 1210 /* Our parent execution domain becomes current domain
1210 1211 These must match for thread signalling to apply */
  1 +/*
  2 + * latencytop.c: Latency display infrastructure
  3 + *
  4 + * (C) Copyright 2008 Intel Corporation
  5 + * Author: Arjan van de Ven <arjan@linux.intel.com>
  6 + *
  7 + * This program is free software; you can redistribute it and/or
  8 + * modify it under the terms of the GNU General Public License
  9 + * as published by the Free Software Foundation; version 2
  10 + * of the License.
  11 + */
  12 +#include <linux/latencytop.h>
  13 +#include <linux/kallsyms.h>
  14 +#include <linux/seq_file.h>
  15 +#include <linux/notifier.h>
  16 +#include <linux/spinlock.h>
  17 +#include <linux/proc_fs.h>
  18 +#include <linux/module.h>
  19 +#include <linux/sched.h>
  20 +#include <linux/list.h>
  21 +#include <linux/slab.h>
  22 +#include <linux/stacktrace.h>
  23 +
  24 +static DEFINE_SPINLOCK(latency_lock);
  25 +
  26 +#define MAXLR 128
  27 +static struct latency_record latency_record[MAXLR];
  28 +
  29 +int latencytop_enabled;
  30 +
  31 +void clear_all_latency_tracing(struct task_struct *p)
  32 +{
  33 + unsigned long flags;
  34 +
  35 + if (!latencytop_enabled)
  36 + return;
  37 +
  38 + spin_lock_irqsave(&latency_lock, flags);
  39 + memset(&p->latency_record, 0, sizeof(p->latency_record));
  40 + p->latency_record_count = 0;
  41 + spin_unlock_irqrestore(&latency_lock, flags);
  42 +}
  43 +
  44 +static void clear_global_latency_tracing(void)
  45 +{
  46 + unsigned long flags;
  47 +
  48 + spin_lock_irqsave(&latency_lock, flags);
  49 + memset(&latency_record, 0, sizeof(latency_record));
  50 + spin_unlock_irqrestore(&latency_lock, flags);
  51 +}
  52 +
  53 +static void __sched
  54 +account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
  55 +{
  56 + int firstnonnull = MAXLR + 1;
  57 + int i;
  58 +
  59 + if (!latencytop_enabled)
  60 + return;
  61 +
  62 + /* skip kernel threads for now */
  63 + if (!tsk->mm)
  64 + return;
  65 +
  66 + for (i = 0; i < MAXLR; i++) {
  67 + int q;
  68 + int same = 1;
  69 + /* Nothing stored: */
  70 + if (!latency_record[i].backtrace[0]) {
  71 + if (firstnonnull > i)
  72 + firstnonnull = i;
  73 + continue;
  74 + }
  75 + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
  76 + if (latency_record[i].backtrace[q] !=
  77 + lat->backtrace[q])
  78 + same = 0;
  79 + if (same && lat->backtrace[q] == 0)
  80 + break;
  81 + if (same && lat->backtrace[q] == ULONG_MAX)
  82 + break;
  83 + }
  84 + if (same) {
  85 + latency_record[i].count++;
  86 + latency_record[i].time += lat->time;
  87 + if (lat->time > latency_record[i].max)
  88 + latency_record[i].max = lat->time;
  89 + return;
  90 + }
  91 + }
  92 +
  93 + i = firstnonnull;
  94 + if (i >= MAXLR - 1)
  95 + return;
  96 +
  97 + /* Allocted a new one: */
  98 + memcpy(&latency_record[i], lat, sizeof(struct latency_record));
  99 +}
  100 +
  101 +static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
  102 +{
  103 + struct stack_trace trace;
  104 +
  105 + memset(&trace, 0, sizeof(trace));
  106 + trace.max_entries = LT_BACKTRACEDEPTH;
  107 + trace.entries = &lat->backtrace[0];
  108 + trace.skip = 0;
  109 + save_stack_trace_tsk(tsk, &trace);
  110 +}
  111 +
  112 +void __sched
  113 +account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
  114 +{
  115 + unsigned long flags;
  116 + int i, q;
  117 + struct latency_record lat;
  118 +
  119 + if (!latencytop_enabled)
  120 + return;
  121 +
  122 + /* Long interruptible waits are generally user requested... */
  123 + if (inter && usecs > 5000)
  124 + return;
  125 +
  126 + memset(&lat, 0, sizeof(lat));
  127 + lat.count = 1;
  128 + lat.time = usecs;
  129 + lat.max = usecs;
  130 + store_stacktrace(tsk, &lat);
  131 +
  132 + spin_lock_irqsave(&latency_lock, flags);
  133 +
  134 + account_global_scheduler_latency(tsk, &lat);
  135 +
  136 + /*
  137 + * short term hack; if we're > 32 we stop; future we recycle:
  138 + */
  139 + tsk->latency_record_count++;
  140 + if (tsk->latency_record_count >= LT_SAVECOUNT)
  141 + goto out_unlock;
  142 +
  143 + for (i = 0; i < LT_SAVECOUNT ; i++) {
  144 + struct latency_record *mylat;
  145 + int same = 1;
  146 + mylat = &tsk->latency_record[i];
  147 + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
  148 + if (mylat->backtrace[q] !=
  149 + lat.backtrace[q])
  150 + same = 0;
  151 + if (same && lat.backtrace[q] == 0)
  152 + break;
  153 + if (same && lat.backtrace[q] == ULONG_MAX)
  154 + break;
  155 + }
  156 + if (same) {
  157 + mylat->count++;
  158 + mylat->time += lat.time;
  159 + if (lat.time > mylat->max)
  160 + mylat->max = lat.time;
  161 + goto out_unlock;
  162 + }
  163 + }
  164 +
  165 + /* Allocated a new one: */
  166 + i = tsk->latency_record_count;
  167 + memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
  168 +
  169 +out_unlock:
  170 + spin_unlock_irqrestore(&latency_lock, flags);
  171 +}
  172 +
  173 +static int lstats_show(struct seq_file *m, void *v)
  174 +{
  175 + int i;
  176 +
  177 + seq_puts(m, "Latency Top version : v0.1\n");
  178 +
  179 + for (i = 0; i < MAXLR; i++) {
  180 + if (latency_record[i].backtrace[0]) {
  181 + int q;
  182 + seq_printf(m, "%i %li %li ",
  183 + latency_record[i].count,
  184 + latency_record[i].time,
  185 + latency_record[i].max);
  186 + for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
  187 + char sym[KSYM_NAME_LEN];
  188 + char *c;
  189 + if (!latency_record[i].backtrace[q])
  190 + break;
  191 + if (latency_record[i].backtrace[q] == ULONG_MAX)
  192 + break;
  193 + sprint_symbol(sym, latency_record[i].backtrace[q]);
  194 + c = strchr(sym, '+');
  195 + if (c)
  196 + *c = 0;
  197 + seq_printf(m, "%s ", sym);
  198 + }
  199 + seq_printf(m, "\n");
  200 + }
  201 + }
  202 + return 0;
  203 +}
  204 +
  205 +static ssize_t
  206 +lstats_write(struct file *file, const char __user *buf, size_t count,
  207 + loff_t *offs)
  208 +{
  209 + clear_global_latency_tracing();
  210 +
  211 + return count;
  212 +}
  213 +
  214 +static int lstats_open(struct inode *inode, struct file *filp)
  215 +{
  216 + return single_open(filp, lstats_show, NULL);
  217 +}
  218 +
  219 +static struct file_operations lstats_fops = {
  220 + .open = lstats_open,
  221 + .read = seq_read,
  222 + .write = lstats_write,
  223 + .llseek = seq_lseek,
  224 + .release = single_release,
  225 +};
  226 +
  227 +static int __init init_lstats_procfs(void)
  228 +{
  229 + struct proc_dir_entry *pe;
  230 +
  231 + pe = create_proc_entry("latency_stats", 0644, NULL);
  232 + if (!pe)
  233 + return -ENOMEM;
  234 +
  235 + pe->proc_fops = &lstats_fops;
  236 +
  237 + return 0;
  238 +}
  239 +__initcall(init_lstats_procfs);
... ... @@ -20,6 +20,8 @@
20 20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
21 21 */
22 22  
  23 +#include <linux/latencytop.h>
  24 +
23 25 /*
24 26 * Targeted preemption latency for CPU-bound tasks:
25 27 * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
... ... @@ -434,6 +436,7 @@
434 436 #ifdef CONFIG_SCHEDSTATS
435 437 if (se->sleep_start) {
436 438 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
  439 + struct task_struct *tsk = task_of(se);
437 440  
438 441 if ((s64)delta < 0)
439 442 delta = 0;
440 443  
... ... @@ -443,9 +446,12 @@
443 446  
444 447 se->sleep_start = 0;
445 448 se->sum_sleep_runtime += delta;
  449 +
  450 + account_scheduler_latency(tsk, delta >> 10, 1);
446 451 }
447 452 if (se->block_start) {
448 453 u64 delta = rq_of(cfs_rq)->clock - se->block_start;
  454 + struct task_struct *tsk = task_of(se);
449 455  
450 456 if ((s64)delta < 0)
451 457 delta = 0;
452 458  
... ... @@ -462,11 +468,11 @@
462 468 * time that the task spent sleeping:
463 469 */
464 470 if (unlikely(prof_on == SLEEP_PROFILING)) {
465   - struct task_struct *tsk = task_of(se);
466 471  
467 472 profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
468 473 delta >> 20);
469 474 }
  475 + account_scheduler_latency(tsk, delta >> 10, 0);
470 476 }
471 477 #endif
472 478 }
... ... @@ -81,6 +81,7 @@
81 81 extern int maps_protect;
82 82 extern int sysctl_stat_interval;
83 83 extern int audit_argv_kb;
  84 +extern int latencytop_enabled;
84 85  
85 86 /* Constants used for minimum and maximum */
86 87 #ifdef CONFIG_DETECT_SOFTLOCKUP
... ... @@ -414,6 +415,15 @@
414 415 .maxlen = sizeof(int),
415 416 .mode = 0644,
416 417 .proc_handler = &proc_dointvec_taint,
  418 + },
  419 +#endif
  420 +#ifdef CONFIG_LATENCYTOP
  421 + {
  422 + .procname = "latencytop",
  423 + .data = &latencytop_enabled,
  424 + .maxlen = sizeof(int),
  425 + .mode = 0644,
  426 + .proc_handler = &proc_dointvec,
417 427 },
418 428 #endif
419 429 #ifdef CONFIG_SECURITY_CAPABILITIES
... ... @@ -517,5 +517,19 @@
517 517 help
518 518 Provide stacktrace filter for fault-injection capabilities
519 519  
  520 +config LATENCYTOP
  521 + bool "Latency measuring infrastructure"
  522 + select FRAME_POINTER if !MIPS
  523 + select KALLSYMS
  524 + select KALLSYMS_ALL
  525 + select STACKTRACE
  526 + select SCHEDSTATS
  527 + select SCHED_DEBUG
  528 + depends on X86 || X86_64
  529 + help
  530 + Enable this option if you want to use the LatencyTOP tool
  531 + to find out which userspace is blocking on what kernel operations.
  532 +
  533 +
520 534 source "samples/Kconfig"