Commit 3690b5e6fd9daa030039ae9bda69044228bd476d

Authored by Lai Jiangshan
Committed by Ingo Molnar
1 parent ce5e5540c0

trace_workqueue: use percpu data for workqueue stat

Impact: use percpu data instead of a global structure

Use:

   static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);

instead of allocating a global structure.

percpu data also works well on NUMA.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 1 changed file with 29 additions and 35 deletions Side-by-side Diff

kernel/trace/trace_workqueue.c
... ... @@ -8,6 +8,7 @@
8 8  
9 9 #include <trace/workqueue.h>
10 10 #include <linux/list.h>
  11 +#include <linux/percpu.h>
11 12 #include "trace_stat.h"
12 13 #include "trace.h"
13 14  
... ... @@ -37,7 +38,8 @@
37 38 /* Don't need a global lock because allocated before the workqueues, and
38 39 * never freed.
39 40 */
40   -static struct workqueue_global_stats *all_workqueue_stat;
  41 +static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
  42 +#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43  
42 44 /* Insertion of a work */
43 45 static void
... ... @@ -48,8 +50,8 @@
48 50 struct cpu_workqueue_stats *node, *next;
49 51 unsigned long flags;
50 52  
51   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
52   - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
  53 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  54 + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
53 55 list) {
54 56 if (node->pid == wq_thread->pid) {
55 57 atomic_inc(&node->inserted);
... ... @@ -58,7 +60,7 @@
58 60 }
59 61 pr_debug("trace_workqueue: entry not found\n");
60 62 found:
61   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  63 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
62 64 }
63 65  
64 66 /* Execution of a work */
... ... @@ -70,8 +72,8 @@
70 72 struct cpu_workqueue_stats *node, *next;
71 73 unsigned long flags;
72 74  
73   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
74   - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
  75 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  76 + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
75 77 list) {
76 78 if (node->pid == wq_thread->pid) {
77 79 node->executed++;
... ... @@ -80,7 +82,7 @@
80 82 }
81 83 pr_debug("trace_workqueue: entry not found\n");
82 84 found:
83   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  85 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
84 86 }
85 87  
86 88 /* Creation of a cpu workqueue thread */
87 89  
... ... @@ -104,11 +106,11 @@
104 106  
105 107 cws->pid = wq_thread->pid;
106 108  
107   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
108   - if (list_empty(&all_workqueue_stat[cpu].list))
  109 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  110 + if (list_empty(&workqueue_cpu_stat(cpu)->list))
109 111 cws->first_entry = true;
110   - list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
111   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  112 + list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
  113 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
112 114 }
113 115  
114 116 /* Destruction of a cpu workqueue thread */
... ... @@ -119,8 +121,8 @@
119 121 struct cpu_workqueue_stats *node, *next;
120 122 unsigned long flags;
121 123  
122   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
123   - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
  124 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  125 + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
124 126 list) {
125 127 if (node->pid == wq_thread->pid) {
126 128 list_del(&node->list);
... ... @@ -131,7 +133,7 @@
131 133  
132 134 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
133 135 found:
134   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  136 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
135 137  
136 138 }
137 139  
138 140  
139 141  
... ... @@ -141,13 +143,13 @@
141 143 struct cpu_workqueue_stats *ret = NULL;
142 144  
143 145  
144   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
  146 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
145 147  
146   - if (!list_empty(&all_workqueue_stat[cpu].list))
147   - ret = list_entry(all_workqueue_stat[cpu].list.next,
  148 + if (!list_empty(&workqueue_cpu_stat(cpu)->list))
  149 + ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
148 150 struct cpu_workqueue_stats, list);
149 151  
150   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  152 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
151 153  
152 154 return ret;
153 155 }
... ... @@ -172,9 +174,9 @@
172 174 unsigned long flags;
173 175 void *ret = NULL;
174 176  
175   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
176   - if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
177   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  177 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  178 + if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
  179 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
178 180 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
179 181 ret = workqueue_stat_start_cpu(cpu);
180 182 if (ret)
... ... @@ -182,7 +184,7 @@
182 184 }
183 185 return NULL;
184 186 }
185   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  187 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
186 188  
187 189 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
188 190 list);
189 191  
... ... @@ -199,10 +201,10 @@
199 201 cws->executed,
200 202 trace_find_cmdline(cws->pid));
201 203  
202   - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
203   - if (&cws->list == all_workqueue_stat[cpu].list.next)
  204 + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
  205 + if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
204 206 seq_printf(s, "\n");
205   - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
  207 + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
206 208  
207 209 return 0;
208 210 }
209 211  
... ... @@ -258,17 +260,9 @@
258 260 if (ret)
259 261 goto no_creation;
260 262  
261   - all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
262   - * num_possible_cpus(), GFP_KERNEL);
263   -
264   - if (!all_workqueue_stat) {
265   - pr_warning("trace_workqueue: not enough memory\n");
266   - goto no_creation;
267   - }
268   -
269 263 for_each_possible_cpu(cpu) {
270   - spin_lock_init(&all_workqueue_stat[cpu].lock);
271   - INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
  264 + spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
  265 + INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
272 266 }
273 267  
274 268 return 0;