Commit 3690b5e6fd9daa030039ae9bda69044228bd476d

Authored by Lai Jiangshan
Committed by Ingo Molnar
1 parent ce5e5540c0

trace_workqueue: use percpu data for workqueue stat

Impact: use percpu data instead of a global structure

Use:

   static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);

instead of allocating a global structure.

percpu data also works well on NUMA.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 1 changed file with 29 additions and 35 deletions Inline Diff

kernel/trace/trace_workqueue.c
1 /* 1 /*
2 * Workqueue statistical tracer. 2 * Workqueue statistical tracer.
3 * 3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * 5 *
6 */ 6 */
7 7
8 8
9 #include <trace/workqueue.h> 9 #include <trace/workqueue.h>
10 #include <linux/list.h> 10 #include <linux/list.h>
11 #include <linux/percpu.h>
11 #include "trace_stat.h" 12 #include "trace_stat.h"
12 #include "trace.h" 13 #include "trace.h"
13 14
14 15
15 /* A cpu workqueue thread */ 16 /* A cpu workqueue thread */
16 struct cpu_workqueue_stats { 17 struct cpu_workqueue_stats {
17 struct list_head list; 18 struct list_head list;
18 /* Useful to know if we print the cpu headers */ 19 /* Useful to know if we print the cpu headers */
19 bool first_entry; 20 bool first_entry;
20 int cpu; 21 int cpu;
21 pid_t pid; 22 pid_t pid;
22 /* Can be inserted from interrupt or user context, need to be atomic */ 23 /* Can be inserted from interrupt or user context, need to be atomic */
23 atomic_t inserted; 24 atomic_t inserted;
24 /* 25 /*
25 * Don't need to be atomic, works are serialized in a single workqueue thread 26 * Don't need to be atomic, works are serialized in a single workqueue thread
26 * on a single CPU. 27 * on a single CPU.
27 */ 28 */
28 unsigned int executed; 29 unsigned int executed;
29 }; 30 };
30 31
31 /* List of workqueue threads on one cpu */ 32 /* List of workqueue threads on one cpu */
32 struct workqueue_global_stats { 33 struct workqueue_global_stats {
33 struct list_head list; 34 struct list_head list;
34 spinlock_t lock; 35 spinlock_t lock;
35 }; 36 };
36 37
37 /* Don't need a global lock because allocated before the workqueues, and 38 /* Don't need a global lock because allocated before the workqueues, and
38 * never freed. 39 * never freed.
39 */ 40 */
40 static struct workqueue_global_stats *all_workqueue_stat; 41 static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42 #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43
42 /* Insertion of a work */ 44 /* Insertion of a work */
43 static void 45 static void
44 probe_workqueue_insertion(struct task_struct *wq_thread, 46 probe_workqueue_insertion(struct task_struct *wq_thread,
45 struct work_struct *work) 47 struct work_struct *work)
46 { 48 {
47 int cpu = cpumask_first(&wq_thread->cpus_allowed); 49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
48 struct cpu_workqueue_stats *node, *next; 50 struct cpu_workqueue_stats *node, *next;
49 unsigned long flags; 51 unsigned long flags;
50 52
51 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
52 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, 54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
53 list) { 55 list) {
54 if (node->pid == wq_thread->pid) { 56 if (node->pid == wq_thread->pid) {
55 atomic_inc(&node->inserted); 57 atomic_inc(&node->inserted);
56 goto found; 58 goto found;
57 } 59 }
58 } 60 }
59 pr_debug("trace_workqueue: entry not found\n"); 61 pr_debug("trace_workqueue: entry not found\n");
60 found: 62 found:
61 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
62 } 64 }
63 65
64 /* Execution of a work */ 66 /* Execution of a work */
65 static void 67 static void
66 probe_workqueue_execution(struct task_struct *wq_thread, 68 probe_workqueue_execution(struct task_struct *wq_thread,
67 struct work_struct *work) 69 struct work_struct *work)
68 { 70 {
69 int cpu = cpumask_first(&wq_thread->cpus_allowed); 71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
70 struct cpu_workqueue_stats *node, *next; 72 struct cpu_workqueue_stats *node, *next;
71 unsigned long flags; 73 unsigned long flags;
72 74
73 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
74 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, 76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
75 list) { 77 list) {
76 if (node->pid == wq_thread->pid) { 78 if (node->pid == wq_thread->pid) {
77 node->executed++; 79 node->executed++;
78 goto found; 80 goto found;
79 } 81 }
80 } 82 }
81 pr_debug("trace_workqueue: entry not found\n"); 83 pr_debug("trace_workqueue: entry not found\n");
82 found: 84 found:
83 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
84 } 86 }
85 87
86 /* Creation of a cpu workqueue thread */ 88 /* Creation of a cpu workqueue thread */
87 static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) 89 static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
88 { 90 {
89 struct cpu_workqueue_stats *cws; 91 struct cpu_workqueue_stats *cws;
90 unsigned long flags; 92 unsigned long flags;
91 93
92 WARN_ON(cpu < 0 || cpu >= num_possible_cpus()); 94 WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
93 95
94 /* Workqueues are sometimes created in atomic context */ 96 /* Workqueues are sometimes created in atomic context */
95 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC); 97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
96 if (!cws) { 98 if (!cws) {
97 pr_warning("trace_workqueue: not enough memory\n"); 99 pr_warning("trace_workqueue: not enough memory\n");
98 return; 100 return;
99 } 101 }
100 tracing_record_cmdline(wq_thread); 102 tracing_record_cmdline(wq_thread);
101 103
102 INIT_LIST_HEAD(&cws->list); 104 INIT_LIST_HEAD(&cws->list);
103 cws->cpu = cpu; 105 cws->cpu = cpu;
104 106
105 cws->pid = wq_thread->pid; 107 cws->pid = wq_thread->pid;
106 108
107 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 109 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
108 if (list_empty(&all_workqueue_stat[cpu].list)) 110 if (list_empty(&workqueue_cpu_stat(cpu)->list))
109 cws->first_entry = true; 111 cws->first_entry = true;
110 list_add_tail(&cws->list, &all_workqueue_stat[cpu].list); 112 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
111 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 113 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
112 } 114 }
113 115
114 /* Destruction of a cpu workqueue thread */ 116 /* Destruction of a cpu workqueue thread */
115 static void probe_workqueue_destruction(struct task_struct *wq_thread) 117 static void probe_workqueue_destruction(struct task_struct *wq_thread)
116 { 118 {
117 /* Workqueue only execute on one cpu */ 119 /* Workqueue only execute on one cpu */
118 int cpu = cpumask_first(&wq_thread->cpus_allowed); 120 int cpu = cpumask_first(&wq_thread->cpus_allowed);
119 struct cpu_workqueue_stats *node, *next; 121 struct cpu_workqueue_stats *node, *next;
120 unsigned long flags; 122 unsigned long flags;
121 123
122 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 124 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
123 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, 125 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
124 list) { 126 list) {
125 if (node->pid == wq_thread->pid) { 127 if (node->pid == wq_thread->pid) {
126 list_del(&node->list); 128 list_del(&node->list);
127 kfree(node); 129 kfree(node);
128 goto found; 130 goto found;
129 } 131 }
130 } 132 }
131 133
132 pr_debug("trace_workqueue: don't find workqueue to destroy\n"); 134 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
133 found: 135 found:
134 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 136 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
135 137
136 } 138 }
137 139
138 static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) 140 static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
139 { 141 {
140 unsigned long flags; 142 unsigned long flags;
141 struct cpu_workqueue_stats *ret = NULL; 143 struct cpu_workqueue_stats *ret = NULL;
142 144
143 145
144 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 146 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
145 147
146 if (!list_empty(&all_workqueue_stat[cpu].list)) 148 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
147 ret = list_entry(all_workqueue_stat[cpu].list.next, 149 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
148 struct cpu_workqueue_stats, list); 150 struct cpu_workqueue_stats, list);
149 151
150 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 152 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
151 153
152 return ret; 154 return ret;
153 } 155 }
154 156
155 static void *workqueue_stat_start(void) 157 static void *workqueue_stat_start(void)
156 { 158 {
157 int cpu; 159 int cpu;
158 void *ret = NULL; 160 void *ret = NULL;
159 161
160 for_each_possible_cpu(cpu) { 162 for_each_possible_cpu(cpu) {
161 ret = workqueue_stat_start_cpu(cpu); 163 ret = workqueue_stat_start_cpu(cpu);
162 if (ret) 164 if (ret)
163 return ret; 165 return ret;
164 } 166 }
165 return NULL; 167 return NULL;
166 } 168 }
167 169
168 static void *workqueue_stat_next(void *prev, int idx) 170 static void *workqueue_stat_next(void *prev, int idx)
169 { 171 {
170 struct cpu_workqueue_stats *prev_cws = prev; 172 struct cpu_workqueue_stats *prev_cws = prev;
171 int cpu = prev_cws->cpu; 173 int cpu = prev_cws->cpu;
172 unsigned long flags; 174 unsigned long flags;
173 void *ret = NULL; 175 void *ret = NULL;
174 176
175 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 177 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
176 if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) { 178 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
177 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
178 for (++cpu ; cpu < num_possible_cpus(); cpu++) { 180 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
179 ret = workqueue_stat_start_cpu(cpu); 181 ret = workqueue_stat_start_cpu(cpu);
180 if (ret) 182 if (ret)
181 return ret; 183 return ret;
182 } 184 }
183 return NULL; 185 return NULL;
184 } 186 }
185 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 187 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
186 188
187 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 189 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
188 list); 190 list);
189 } 191 }
190 192
191 static int workqueue_stat_show(struct seq_file *s, void *p) 193 static int workqueue_stat_show(struct seq_file *s, void *p)
192 { 194 {
193 struct cpu_workqueue_stats *cws = p; 195 struct cpu_workqueue_stats *cws = p;
194 unsigned long flags; 196 unsigned long flags;
195 int cpu = cws->cpu; 197 int cpu = cws->cpu;
196 198
197 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu, 199 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
198 atomic_read(&cws->inserted), 200 atomic_read(&cws->inserted),
199 cws->executed, 201 cws->executed,
200 trace_find_cmdline(cws->pid)); 202 trace_find_cmdline(cws->pid));
201 203
202 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); 204 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
203 if (&cws->list == all_workqueue_stat[cpu].list.next) 205 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
204 seq_printf(s, "\n"); 206 seq_printf(s, "\n");
205 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); 207 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
206 208
207 return 0; 209 return 0;
208 } 210 }
209 211
210 static int workqueue_stat_headers(struct seq_file *s) 212 static int workqueue_stat_headers(struct seq_file *s)
211 { 213 {
212 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 214 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
213 seq_printf(s, "# | | | |\n\n"); 215 seq_printf(s, "# | | | |\n\n");
214 return 0; 216 return 0;
215 } 217 }
216 218
217 struct tracer_stat workqueue_stats __read_mostly = { 219 struct tracer_stat workqueue_stats __read_mostly = {
218 .name = "workqueues", 220 .name = "workqueues",
219 .stat_start = workqueue_stat_start, 221 .stat_start = workqueue_stat_start,
220 .stat_next = workqueue_stat_next, 222 .stat_next = workqueue_stat_next,
221 .stat_show = workqueue_stat_show, 223 .stat_show = workqueue_stat_show,
222 .stat_headers = workqueue_stat_headers 224 .stat_headers = workqueue_stat_headers
223 }; 225 };
224 226
225 227
226 int __init stat_workqueue_init(void) 228 int __init stat_workqueue_init(void)
227 { 229 {
228 if (register_stat_tracer(&workqueue_stats)) { 230 if (register_stat_tracer(&workqueue_stats)) {
229 pr_warning("Unable to register workqueue stat tracer\n"); 231 pr_warning("Unable to register workqueue stat tracer\n");
230 return 1; 232 return 1;
231 } 233 }
232 234
233 return 0; 235 return 0;
234 } 236 }
235 fs_initcall(stat_workqueue_init); 237 fs_initcall(stat_workqueue_init);
236 238
237 /* 239 /*
238 * Workqueues are created very early, just after pre-smp initcalls. 240 * Workqueues are created very early, just after pre-smp initcalls.
239 * So we must register our tracepoints at this stage. 241 * So we must register our tracepoints at this stage.
240 */ 242 */
241 int __init trace_workqueue_early_init(void) 243 int __init trace_workqueue_early_init(void)
242 { 244 {
243 int ret, cpu; 245 int ret, cpu;
244 246
245 ret = register_trace_workqueue_insertion(probe_workqueue_insertion); 247 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
246 if (ret) 248 if (ret)
247 goto out; 249 goto out;
248 250
249 ret = register_trace_workqueue_execution(probe_workqueue_execution); 251 ret = register_trace_workqueue_execution(probe_workqueue_execution);
250 if (ret) 252 if (ret)
251 goto no_insertion; 253 goto no_insertion;
252 254
253 ret = register_trace_workqueue_creation(probe_workqueue_creation); 255 ret = register_trace_workqueue_creation(probe_workqueue_creation);
254 if (ret) 256 if (ret)
255 goto no_execution; 257 goto no_execution;
256 258
257 ret = register_trace_workqueue_destruction(probe_workqueue_destruction); 259 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
258 if (ret) 260 if (ret)
259 goto no_creation; 261 goto no_creation;
260 262
261 all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
262 * num_possible_cpus(), GFP_KERNEL);
263
264 if (!all_workqueue_stat) {
265 pr_warning("trace_workqueue: not enough memory\n");
266 goto no_creation;
267 }
268
269 for_each_possible_cpu(cpu) { 263 for_each_possible_cpu(cpu) {
270 spin_lock_init(&all_workqueue_stat[cpu].lock); 264 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
271 INIT_LIST_HEAD(&all_workqueue_stat[cpu].list); 265 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
272 } 266 }
273 267
274 return 0; 268 return 0;
275 269
276 no_creation: 270 no_creation:
277 unregister_trace_workqueue_creation(probe_workqueue_creation); 271 unregister_trace_workqueue_creation(probe_workqueue_creation);
278 no_execution: 272 no_execution:
279 unregister_trace_workqueue_execution(probe_workqueue_execution); 273 unregister_trace_workqueue_execution(probe_workqueue_execution);
280 no_insertion: 274 no_insertion:
281 unregister_trace_workqueue_insertion(probe_workqueue_insertion); 275 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
282 out: 276 out:
283 pr_warning("trace_workqueue: unable to trace workqueues\n"); 277 pr_warning("trace_workqueue: unable to trace workqueues\n");
284 278
285 return 1; 279 return 1;
286 } 280 }