Commit 068e35eee9ef98eb4cab55181977e24995d273be

Authored by Matt Helsley
Committed by Frederic Weisbecker
1 parent 89e45aac42

hw breakpoints: Fix pid namespace bug

Hardware breakpoints can't be registered within pid namespaces
because tsk->pid is passed rather than the pid in the current
namespace.

(See https://bugzilla.kernel.org/show_bug.cgi?id=17281 )

This is a quick fix demonstrating the problem but is not the
best method of solving the problem since passing pids internally
is not the best way to avoid pid namespace bugs. Subsequent patches
will show a better solution.

Much thanks to Frederic Weisbecker <fweisbec@gmail.com> for doing
the bulk of the work finding this bug.

Reported-by: Robin Green <greenrd@greenrd.org>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: 2.6.33-2.6.35 <stable@kernel.org>
LKML-Reference: <f63454af09fb1915717251570423eb9ddd338340.1284407762.git.matthltc@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

Showing 1 changed file with 2 additions and 1 deletions Inline Diff

kernel/hw_breakpoint.c
1 /* 1 /*
2 * This program is free software; you can redistribute it and/or modify 2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by 3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or 4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version. 5 * (at your option) any later version.
6 * 6 *
7 * This program is distributed in the hope that it will be useful, 7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details. 10 * GNU General Public License for more details.
11 * 11 *
12 * You should have received a copy of the GNU General Public License 12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software 13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009 17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
19 * 19 *
20 * Thanks to Ingo Molnar for his many suggestions. 20 * Thanks to Ingo Molnar for his many suggestions.
21 * 21 *
22 * Authors: Alan Stern <stern@rowland.harvard.edu> 22 * Authors: Alan Stern <stern@rowland.harvard.edu>
23 * K.Prasad <prasad@linux.vnet.ibm.com> 23 * K.Prasad <prasad@linux.vnet.ibm.com>
24 * Frederic Weisbecker <fweisbec@gmail.com> 24 * Frederic Weisbecker <fweisbec@gmail.com>
25 */ 25 */
26 26
27 /* 27 /*
28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
29 * using the CPU's debug registers. 29 * using the CPU's debug registers.
30 * This file contains the arch-independent routines. 30 * This file contains the arch-independent routines.
31 */ 31 */
32 32
33 #include <linux/irqflags.h> 33 #include <linux/irqflags.h>
34 #include <linux/kallsyms.h> 34 #include <linux/kallsyms.h>
35 #include <linux/notifier.h> 35 #include <linux/notifier.h>
36 #include <linux/kprobes.h> 36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h> 37 #include <linux/kdebug.h>
38 #include <linux/kernel.h> 38 #include <linux/kernel.h>
39 #include <linux/module.h> 39 #include <linux/module.h>
40 #include <linux/percpu.h> 40 #include <linux/percpu.h>
41 #include <linux/sched.h> 41 #include <linux/sched.h>
42 #include <linux/init.h> 42 #include <linux/init.h>
43 #include <linux/slab.h> 43 #include <linux/slab.h>
44 #include <linux/list.h> 44 #include <linux/list.h>
45 #include <linux/cpu.h> 45 #include <linux/cpu.h>
46 #include <linux/smp.h> 46 #include <linux/smp.h>
47 47
48 #include <linux/hw_breakpoint.h> 48 #include <linux/hw_breakpoint.h>
49 49
50 50
51 /* 51 /*
52 * Constraints data 52 * Constraints data
53 */ 53 */
54 54
55 /* Number of pinned cpu breakpoints in a cpu */ 55 /* Number of pinned cpu breakpoints in a cpu */
56 static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); 56 static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
57 57
58 /* Number of pinned task breakpoints in a cpu */ 58 /* Number of pinned task breakpoints in a cpu */
59 static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); 59 static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
60 60
61 /* Number of non-pinned cpu/task breakpoints in a cpu */ 61 /* Number of non-pinned cpu/task breakpoints in a cpu */
62 static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); 62 static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
63 63
64 static int nr_slots[TYPE_MAX]; 64 static int nr_slots[TYPE_MAX];
65 65
66 /* Keep track of the breakpoints attached to tasks */ 66 /* Keep track of the breakpoints attached to tasks */
67 static LIST_HEAD(bp_task_head); 67 static LIST_HEAD(bp_task_head);
68 68
69 static int constraints_initialized; 69 static int constraints_initialized;
70 70
71 /* Gather the number of total pinned and un-pinned bp in a cpuset */ 71 /* Gather the number of total pinned and un-pinned bp in a cpuset */
72 struct bp_busy_slots { 72 struct bp_busy_slots {
73 unsigned int pinned; 73 unsigned int pinned;
74 unsigned int flexible; 74 unsigned int flexible;
75 }; 75 };
76 76
77 /* Serialize accesses to the above constraints */ 77 /* Serialize accesses to the above constraints */
78 static DEFINE_MUTEX(nr_bp_mutex); 78 static DEFINE_MUTEX(nr_bp_mutex);
79 79
80 __weak int hw_breakpoint_weight(struct perf_event *bp) 80 __weak int hw_breakpoint_weight(struct perf_event *bp)
81 { 81 {
82 return 1; 82 return 1;
83 } 83 }
84 84
85 static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) 85 static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
86 { 86 {
87 if (bp->attr.bp_type & HW_BREAKPOINT_RW) 87 if (bp->attr.bp_type & HW_BREAKPOINT_RW)
88 return TYPE_DATA; 88 return TYPE_DATA;
89 89
90 return TYPE_INST; 90 return TYPE_INST;
91 } 91 }
92 92
93 /* 93 /*
94 * Report the maximum number of pinned breakpoints a task 94 * Report the maximum number of pinned breakpoints a task
95 * have in this cpu 95 * have in this cpu
96 */ 96 */
97 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 97 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
98 { 98 {
99 int i; 99 int i;
100 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 100 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
101 101
102 for (i = nr_slots[type] - 1; i >= 0; i--) { 102 for (i = nr_slots[type] - 1; i >= 0; i--) {
103 if (tsk_pinned[i] > 0) 103 if (tsk_pinned[i] > 0)
104 return i + 1; 104 return i + 1;
105 } 105 }
106 106
107 return 0; 107 return 0;
108 } 108 }
109 109
110 /* 110 /*
111 * Count the number of breakpoints of the same type and same task. 111 * Count the number of breakpoints of the same type and same task.
112 * The given event must be not on the list. 112 * The given event must be not on the list.
113 */ 113 */
114 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) 114 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
115 { 115 {
116 struct perf_event_context *ctx = bp->ctx; 116 struct perf_event_context *ctx = bp->ctx;
117 struct perf_event *iter; 117 struct perf_event *iter;
118 int count = 0; 118 int count = 0;
119 119
120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) { 120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
121 if (iter->ctx == ctx && find_slot_idx(iter) == type) 121 if (iter->ctx == ctx && find_slot_idx(iter) == type)
122 count += hw_breakpoint_weight(iter); 122 count += hw_breakpoint_weight(iter);
123 } 123 }
124 124
125 return count; 125 return count;
126 } 126 }
127 127
128 /* 128 /*
129 * Report the number of pinned/un-pinned breakpoints we have in 129 * Report the number of pinned/un-pinned breakpoints we have in
130 * a given cpu (cpu > -1) or in all of them (cpu = -1). 130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
131 */ 131 */
132 static void 132 static void
133 fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, 133 fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134 enum bp_type_idx type) 134 enum bp_type_idx type)
135 { 135 {
136 int cpu = bp->cpu; 136 int cpu = bp->cpu;
137 struct task_struct *tsk = bp->ctx->task; 137 struct task_struct *tsk = bp->ctx->task;
138 138
139 if (cpu >= 0) { 139 if (cpu >= 0) {
140 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); 140 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
141 if (!tsk) 141 if (!tsk)
142 slots->pinned += max_task_bp_pinned(cpu, type); 142 slots->pinned += max_task_bp_pinned(cpu, type);
143 else 143 else
144 slots->pinned += task_bp_pinned(bp, type); 144 slots->pinned += task_bp_pinned(bp, type);
145 slots->flexible = per_cpu(nr_bp_flexible[type], cpu); 145 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
146 146
147 return; 147 return;
148 } 148 }
149 149
150 for_each_online_cpu(cpu) { 150 for_each_online_cpu(cpu) {
151 unsigned int nr; 151 unsigned int nr;
152 152
153 nr = per_cpu(nr_cpu_bp_pinned[type], cpu); 153 nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
154 if (!tsk) 154 if (!tsk)
155 nr += max_task_bp_pinned(cpu, type); 155 nr += max_task_bp_pinned(cpu, type);
156 else 156 else
157 nr += task_bp_pinned(bp, type); 157 nr += task_bp_pinned(bp, type);
158 158
159 if (nr > slots->pinned) 159 if (nr > slots->pinned)
160 slots->pinned = nr; 160 slots->pinned = nr;
161 161
162 nr = per_cpu(nr_bp_flexible[type], cpu); 162 nr = per_cpu(nr_bp_flexible[type], cpu);
163 163
164 if (nr > slots->flexible) 164 if (nr > slots->flexible)
165 slots->flexible = nr; 165 slots->flexible = nr;
166 } 166 }
167 } 167 }
168 168
169 /* 169 /*
170 * For now, continue to consider flexible as pinned, until we can 170 * For now, continue to consider flexible as pinned, until we can
171 * ensure no flexible event can ever be scheduled before a pinned event 171 * ensure no flexible event can ever be scheduled before a pinned event
172 * in a same cpu. 172 * in a same cpu.
173 */ 173 */
174 static void 174 static void
175 fetch_this_slot(struct bp_busy_slots *slots, int weight) 175 fetch_this_slot(struct bp_busy_slots *slots, int weight)
176 { 176 {
177 slots->pinned += weight; 177 slots->pinned += weight;
178 } 178 }
179 179
180 /* 180 /*
181 * Add a pinned breakpoint for the given task in our constraint table 181 * Add a pinned breakpoint for the given task in our constraint table
182 */ 182 */
183 static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, 183 static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
184 enum bp_type_idx type, int weight) 184 enum bp_type_idx type, int weight)
185 { 185 {
186 unsigned int *tsk_pinned; 186 unsigned int *tsk_pinned;
187 int old_count = 0; 187 int old_count = 0;
188 int old_idx = 0; 188 int old_idx = 0;
189 int idx = 0; 189 int idx = 0;
190 190
191 old_count = task_bp_pinned(bp, type); 191 old_count = task_bp_pinned(bp, type);
192 old_idx = old_count - 1; 192 old_idx = old_count - 1;
193 idx = old_idx + weight; 193 idx = old_idx + weight;
194 194
195 /* tsk_pinned[n] is the number of tasks having n breakpoints */ 195 /* tsk_pinned[n] is the number of tasks having n breakpoints */
196 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); 196 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
197 if (enable) { 197 if (enable) {
198 tsk_pinned[idx]++; 198 tsk_pinned[idx]++;
199 if (old_count > 0) 199 if (old_count > 0)
200 tsk_pinned[old_idx]--; 200 tsk_pinned[old_idx]--;
201 } else { 201 } else {
202 tsk_pinned[idx]--; 202 tsk_pinned[idx]--;
203 if (old_count > 0) 203 if (old_count > 0)
204 tsk_pinned[old_idx]++; 204 tsk_pinned[old_idx]++;
205 } 205 }
206 } 206 }
207 207
208 /* 208 /*
209 * Add/remove the given breakpoint in our constraint table 209 * Add/remove the given breakpoint in our constraint table
210 */ 210 */
211 static void 211 static void
212 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, 212 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
213 int weight) 213 int weight)
214 { 214 {
215 int cpu = bp->cpu; 215 int cpu = bp->cpu;
216 struct task_struct *tsk = bp->ctx->task; 216 struct task_struct *tsk = bp->ctx->task;
217 217
218 /* Pinned counter cpu profiling */ 218 /* Pinned counter cpu profiling */
219 if (!tsk) { 219 if (!tsk) {
220 220
221 if (enable) 221 if (enable)
222 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; 222 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
223 else 223 else
224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; 224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
225 return; 225 return;
226 } 226 }
227 227
228 /* Pinned counter task profiling */ 228 /* Pinned counter task profiling */
229 229
230 if (!enable) 230 if (!enable)
231 list_del(&bp->hw.bp_list); 231 list_del(&bp->hw.bp_list);
232 232
233 if (cpu >= 0) { 233 if (cpu >= 0) {
234 toggle_bp_task_slot(bp, cpu, enable, type, weight); 234 toggle_bp_task_slot(bp, cpu, enable, type, weight);
235 } else { 235 } else {
236 for_each_online_cpu(cpu) 236 for_each_online_cpu(cpu)
237 toggle_bp_task_slot(bp, cpu, enable, type, weight); 237 toggle_bp_task_slot(bp, cpu, enable, type, weight);
238 } 238 }
239 239
240 if (enable) 240 if (enable)
241 list_add_tail(&bp->hw.bp_list, &bp_task_head); 241 list_add_tail(&bp->hw.bp_list, &bp_task_head);
242 } 242 }
243 243
244 /* 244 /*
245 * Function to perform processor-specific cleanup during unregistration 245 * Function to perform processor-specific cleanup during unregistration
246 */ 246 */
247 __weak void arch_unregister_hw_breakpoint(struct perf_event *bp) 247 __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
248 { 248 {
249 /* 249 /*
250 * A weak stub function here for those archs that don't define 250 * A weak stub function here for those archs that don't define
251 * it inside arch/.../kernel/hw_breakpoint.c 251 * it inside arch/.../kernel/hw_breakpoint.c
252 */ 252 */
253 } 253 }
254 254
255 /* 255 /*
256 * Contraints to check before allowing this new breakpoint counter: 256 * Contraints to check before allowing this new breakpoint counter:
257 * 257 *
258 * == Non-pinned counter == (Considered as pinned for now) 258 * == Non-pinned counter == (Considered as pinned for now)
259 * 259 *
260 * - If attached to a single cpu, check: 260 * - If attached to a single cpu, check:
261 * 261 *
262 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) 262 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
263 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM 263 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
264 * 264 *
265 * -> If there are already non-pinned counters in this cpu, it means 265 * -> If there are already non-pinned counters in this cpu, it means
266 * there is already a free slot for them. 266 * there is already a free slot for them.
267 * Otherwise, we check that the maximum number of per task 267 * Otherwise, we check that the maximum number of per task
268 * breakpoints (for this cpu) plus the number of per cpu breakpoint 268 * breakpoints (for this cpu) plus the number of per cpu breakpoint
269 * (for this cpu) doesn't cover every registers. 269 * (for this cpu) doesn't cover every registers.
270 * 270 *
271 * - If attached to every cpus, check: 271 * - If attached to every cpus, check:
272 * 272 *
273 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) 273 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
274 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM 274 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
275 * 275 *
276 * -> This is roughly the same, except we check the number of per cpu 276 * -> This is roughly the same, except we check the number of per cpu
277 * bp for every cpu and we keep the max one. Same for the per tasks 277 * bp for every cpu and we keep the max one. Same for the per tasks
278 * breakpoints. 278 * breakpoints.
279 * 279 *
280 * 280 *
281 * == Pinned counter == 281 * == Pinned counter ==
282 * 282 *
283 * - If attached to a single cpu, check: 283 * - If attached to a single cpu, check:
284 * 284 *
285 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) 285 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
286 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM 286 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
287 * 287 *
288 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep 288 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
289 * one register at least (or they will never be fed). 289 * one register at least (or they will never be fed).
290 * 290 *
291 * - If attached to every cpus, check: 291 * - If attached to every cpus, check:
292 * 292 *
293 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 293 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
294 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 294 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
295 */ 295 */
296 static int __reserve_bp_slot(struct perf_event *bp) 296 static int __reserve_bp_slot(struct perf_event *bp)
297 { 297 {
298 struct bp_busy_slots slots = {0}; 298 struct bp_busy_slots slots = {0};
299 enum bp_type_idx type; 299 enum bp_type_idx type;
300 int weight; 300 int weight;
301 301
302 /* We couldn't initialize breakpoint constraints on boot */ 302 /* We couldn't initialize breakpoint constraints on boot */
303 if (!constraints_initialized) 303 if (!constraints_initialized)
304 return -ENOMEM; 304 return -ENOMEM;
305 305
306 /* Basic checks */ 306 /* Basic checks */
307 if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || 307 if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
308 bp->attr.bp_type == HW_BREAKPOINT_INVALID) 308 bp->attr.bp_type == HW_BREAKPOINT_INVALID)
309 return -EINVAL; 309 return -EINVAL;
310 310
311 type = find_slot_idx(bp); 311 type = find_slot_idx(bp);
312 weight = hw_breakpoint_weight(bp); 312 weight = hw_breakpoint_weight(bp);
313 313
314 fetch_bp_busy_slots(&slots, bp, type); 314 fetch_bp_busy_slots(&slots, bp, type);
315 /* 315 /*
316 * Simulate the addition of this breakpoint to the constraints 316 * Simulate the addition of this breakpoint to the constraints
317 * and see the result. 317 * and see the result.
318 */ 318 */
319 fetch_this_slot(&slots, weight); 319 fetch_this_slot(&slots, weight);
320 320
321 /* Flexible counters need to keep at least one slot */ 321 /* Flexible counters need to keep at least one slot */
322 if (slots.pinned + (!!slots.flexible) > nr_slots[type]) 322 if (slots.pinned + (!!slots.flexible) > nr_slots[type])
323 return -ENOSPC; 323 return -ENOSPC;
324 324
325 toggle_bp_slot(bp, true, type, weight); 325 toggle_bp_slot(bp, true, type, weight);
326 326
327 return 0; 327 return 0;
328 } 328 }
329 329
330 int reserve_bp_slot(struct perf_event *bp) 330 int reserve_bp_slot(struct perf_event *bp)
331 { 331 {
332 int ret; 332 int ret;
333 333
334 mutex_lock(&nr_bp_mutex); 334 mutex_lock(&nr_bp_mutex);
335 335
336 ret = __reserve_bp_slot(bp); 336 ret = __reserve_bp_slot(bp);
337 337
338 mutex_unlock(&nr_bp_mutex); 338 mutex_unlock(&nr_bp_mutex);
339 339
340 return ret; 340 return ret;
341 } 341 }
342 342
343 static void __release_bp_slot(struct perf_event *bp) 343 static void __release_bp_slot(struct perf_event *bp)
344 { 344 {
345 enum bp_type_idx type; 345 enum bp_type_idx type;
346 int weight; 346 int weight;
347 347
348 type = find_slot_idx(bp); 348 type = find_slot_idx(bp);
349 weight = hw_breakpoint_weight(bp); 349 weight = hw_breakpoint_weight(bp);
350 toggle_bp_slot(bp, false, type, weight); 350 toggle_bp_slot(bp, false, type, weight);
351 } 351 }
352 352
353 void release_bp_slot(struct perf_event *bp) 353 void release_bp_slot(struct perf_event *bp)
354 { 354 {
355 mutex_lock(&nr_bp_mutex); 355 mutex_lock(&nr_bp_mutex);
356 356
357 arch_unregister_hw_breakpoint(bp); 357 arch_unregister_hw_breakpoint(bp);
358 __release_bp_slot(bp); 358 __release_bp_slot(bp);
359 359
360 mutex_unlock(&nr_bp_mutex); 360 mutex_unlock(&nr_bp_mutex);
361 } 361 }
362 362
363 /* 363 /*
364 * Allow the kernel debugger to reserve breakpoint slots without 364 * Allow the kernel debugger to reserve breakpoint slots without
365 * taking a lock using the dbg_* variant of for the reserve and 365 * taking a lock using the dbg_* variant of for the reserve and
366 * release breakpoint slots. 366 * release breakpoint slots.
367 */ 367 */
368 int dbg_reserve_bp_slot(struct perf_event *bp) 368 int dbg_reserve_bp_slot(struct perf_event *bp)
369 { 369 {
370 if (mutex_is_locked(&nr_bp_mutex)) 370 if (mutex_is_locked(&nr_bp_mutex))
371 return -1; 371 return -1;
372 372
373 return __reserve_bp_slot(bp); 373 return __reserve_bp_slot(bp);
374 } 374 }
375 375
376 int dbg_release_bp_slot(struct perf_event *bp) 376 int dbg_release_bp_slot(struct perf_event *bp)
377 { 377 {
378 if (mutex_is_locked(&nr_bp_mutex)) 378 if (mutex_is_locked(&nr_bp_mutex))
379 return -1; 379 return -1;
380 380
381 __release_bp_slot(bp); 381 __release_bp_slot(bp);
382 382
383 return 0; 383 return 0;
384 } 384 }
385 385
386 static int validate_hw_breakpoint(struct perf_event *bp) 386 static int validate_hw_breakpoint(struct perf_event *bp)
387 { 387 {
388 int ret; 388 int ret;
389 389
390 ret = arch_validate_hwbkpt_settings(bp); 390 ret = arch_validate_hwbkpt_settings(bp);
391 if (ret) 391 if (ret)
392 return ret; 392 return ret;
393 393
394 if (arch_check_bp_in_kernelspace(bp)) { 394 if (arch_check_bp_in_kernelspace(bp)) {
395 if (bp->attr.exclude_kernel) 395 if (bp->attr.exclude_kernel)
396 return -EINVAL; 396 return -EINVAL;
397 /* 397 /*
398 * Don't let unprivileged users set a breakpoint in the trap 398 * Don't let unprivileged users set a breakpoint in the trap
399 * path to avoid trap recursion attacks. 399 * path to avoid trap recursion attacks.
400 */ 400 */
401 if (!capable(CAP_SYS_ADMIN)) 401 if (!capable(CAP_SYS_ADMIN))
402 return -EPERM; 402 return -EPERM;
403 } 403 }
404 404
405 return 0; 405 return 0;
406 } 406 }
407 407
408 int register_perf_hw_breakpoint(struct perf_event *bp) 408 int register_perf_hw_breakpoint(struct perf_event *bp)
409 { 409 {
410 int ret; 410 int ret;
411 411
412 ret = reserve_bp_slot(bp); 412 ret = reserve_bp_slot(bp);
413 if (ret) 413 if (ret)
414 return ret; 414 return ret;
415 415
416 ret = validate_hw_breakpoint(bp); 416 ret = validate_hw_breakpoint(bp);
417 417
418 /* if arch_validate_hwbkpt_settings() fails then release bp slot */ 418 /* if arch_validate_hwbkpt_settings() fails then release bp slot */
419 if (ret) 419 if (ret)
420 release_bp_slot(bp); 420 release_bp_slot(bp);
421 421
422 return ret; 422 return ret;
423 } 423 }
424 424
425 /** 425 /**
426 * register_user_hw_breakpoint - register a hardware breakpoint for user space 426 * register_user_hw_breakpoint - register a hardware breakpoint for user space
427 * @attr: breakpoint attributes 427 * @attr: breakpoint attributes
428 * @triggered: callback to trigger when we hit the breakpoint 428 * @triggered: callback to trigger when we hit the breakpoint
429 * @tsk: pointer to 'task_struct' of the process to which the address belongs 429 * @tsk: pointer to 'task_struct' of the process to which the address belongs
430 */ 430 */
431 struct perf_event * 431 struct perf_event *
432 register_user_hw_breakpoint(struct perf_event_attr *attr, 432 register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 struct task_struct *tsk) 434 struct task_struct *tsk)
435 { 435 {
436 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 436 return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk),
437 triggered);
437 } 438 }
438 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 439 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
439 440
440 /** 441 /**
441 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 442 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
442 * @bp: the breakpoint structure to modify 443 * @bp: the breakpoint structure to modify
443 * @attr: new breakpoint attributes 444 * @attr: new breakpoint attributes
444 * @triggered: callback to trigger when we hit the breakpoint 445 * @triggered: callback to trigger when we hit the breakpoint
445 * @tsk: pointer to 'task_struct' of the process to which the address belongs 446 * @tsk: pointer to 'task_struct' of the process to which the address belongs
446 */ 447 */
447 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 448 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
448 { 449 {
449 u64 old_addr = bp->attr.bp_addr; 450 u64 old_addr = bp->attr.bp_addr;
450 u64 old_len = bp->attr.bp_len; 451 u64 old_len = bp->attr.bp_len;
451 int old_type = bp->attr.bp_type; 452 int old_type = bp->attr.bp_type;
452 int err = 0; 453 int err = 0;
453 454
454 perf_event_disable(bp); 455 perf_event_disable(bp);
455 456
456 bp->attr.bp_addr = attr->bp_addr; 457 bp->attr.bp_addr = attr->bp_addr;
457 bp->attr.bp_type = attr->bp_type; 458 bp->attr.bp_type = attr->bp_type;
458 bp->attr.bp_len = attr->bp_len; 459 bp->attr.bp_len = attr->bp_len;
459 460
460 if (attr->disabled) 461 if (attr->disabled)
461 goto end; 462 goto end;
462 463
463 err = validate_hw_breakpoint(bp); 464 err = validate_hw_breakpoint(bp);
464 if (!err) 465 if (!err)
465 perf_event_enable(bp); 466 perf_event_enable(bp);
466 467
467 if (err) { 468 if (err) {
468 bp->attr.bp_addr = old_addr; 469 bp->attr.bp_addr = old_addr;
469 bp->attr.bp_type = old_type; 470 bp->attr.bp_type = old_type;
470 bp->attr.bp_len = old_len; 471 bp->attr.bp_len = old_len;
471 if (!bp->attr.disabled) 472 if (!bp->attr.disabled)
472 perf_event_enable(bp); 473 perf_event_enable(bp);
473 474
474 return err; 475 return err;
475 } 476 }
476 477
477 end: 478 end:
478 bp->attr.disabled = attr->disabled; 479 bp->attr.disabled = attr->disabled;
479 480
480 return 0; 481 return 0;
481 } 482 }
482 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 483 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
483 484
484 /** 485 /**
485 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint 486 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
486 * @bp: the breakpoint structure to unregister 487 * @bp: the breakpoint structure to unregister
487 */ 488 */
488 void unregister_hw_breakpoint(struct perf_event *bp) 489 void unregister_hw_breakpoint(struct perf_event *bp)
489 { 490 {
490 if (!bp) 491 if (!bp)
491 return; 492 return;
492 perf_event_release_kernel(bp); 493 perf_event_release_kernel(bp);
493 } 494 }
494 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); 495 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
495 496
496 /** 497 /**
497 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel 498 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
498 * @attr: breakpoint attributes 499 * @attr: breakpoint attributes
499 * @triggered: callback to trigger when we hit the breakpoint 500 * @triggered: callback to trigger when we hit the breakpoint
500 * 501 *
501 * @return a set of per_cpu pointers to perf events 502 * @return a set of per_cpu pointers to perf events
502 */ 503 */
503 struct perf_event * __percpu * 504 struct perf_event * __percpu *
504 register_wide_hw_breakpoint(struct perf_event_attr *attr, 505 register_wide_hw_breakpoint(struct perf_event_attr *attr,
505 perf_overflow_handler_t triggered) 506 perf_overflow_handler_t triggered)
506 { 507 {
507 struct perf_event * __percpu *cpu_events, **pevent, *bp; 508 struct perf_event * __percpu *cpu_events, **pevent, *bp;
508 long err; 509 long err;
509 int cpu; 510 int cpu;
510 511
511 cpu_events = alloc_percpu(typeof(*cpu_events)); 512 cpu_events = alloc_percpu(typeof(*cpu_events));
512 if (!cpu_events) 513 if (!cpu_events)
513 return (void __percpu __force *)ERR_PTR(-ENOMEM); 514 return (void __percpu __force *)ERR_PTR(-ENOMEM);
514 515
515 get_online_cpus(); 516 get_online_cpus();
516 for_each_online_cpu(cpu) { 517 for_each_online_cpu(cpu) {
517 pevent = per_cpu_ptr(cpu_events, cpu); 518 pevent = per_cpu_ptr(cpu_events, cpu);
518 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 519 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
519 520
520 *pevent = bp; 521 *pevent = bp;
521 522
522 if (IS_ERR(bp)) { 523 if (IS_ERR(bp)) {
523 err = PTR_ERR(bp); 524 err = PTR_ERR(bp);
524 goto fail; 525 goto fail;
525 } 526 }
526 } 527 }
527 put_online_cpus(); 528 put_online_cpus();
528 529
529 return cpu_events; 530 return cpu_events;
530 531
531 fail: 532 fail:
532 for_each_online_cpu(cpu) { 533 for_each_online_cpu(cpu) {
533 pevent = per_cpu_ptr(cpu_events, cpu); 534 pevent = per_cpu_ptr(cpu_events, cpu);
534 if (IS_ERR(*pevent)) 535 if (IS_ERR(*pevent))
535 break; 536 break;
536 unregister_hw_breakpoint(*pevent); 537 unregister_hw_breakpoint(*pevent);
537 } 538 }
538 put_online_cpus(); 539 put_online_cpus();
539 540
540 free_percpu(cpu_events); 541 free_percpu(cpu_events);
541 return (void __percpu __force *)ERR_PTR(err); 542 return (void __percpu __force *)ERR_PTR(err);
542 } 543 }
543 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 544 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
544 545
545 /** 546 /**
546 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 547 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
547 * @cpu_events: the per cpu set of events to unregister 548 * @cpu_events: the per cpu set of events to unregister
548 */ 549 */
549 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 550 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
550 { 551 {
551 int cpu; 552 int cpu;
552 struct perf_event **pevent; 553 struct perf_event **pevent;
553 554
554 for_each_possible_cpu(cpu) { 555 for_each_possible_cpu(cpu) {
555 pevent = per_cpu_ptr(cpu_events, cpu); 556 pevent = per_cpu_ptr(cpu_events, cpu);
556 unregister_hw_breakpoint(*pevent); 557 unregister_hw_breakpoint(*pevent);
557 } 558 }
558 free_percpu(cpu_events); 559 free_percpu(cpu_events);
559 } 560 }
560 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 561 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
561 562
562 static struct notifier_block hw_breakpoint_exceptions_nb = { 563 static struct notifier_block hw_breakpoint_exceptions_nb = {
563 .notifier_call = hw_breakpoint_exceptions_notify, 564 .notifier_call = hw_breakpoint_exceptions_notify,
564 /* we need to be notified first */ 565 /* we need to be notified first */
565 .priority = 0x7fffffff 566 .priority = 0x7fffffff
566 }; 567 };
567 568
568 static int __init init_hw_breakpoint(void) 569 static int __init init_hw_breakpoint(void)
569 { 570 {
570 unsigned int **task_bp_pinned; 571 unsigned int **task_bp_pinned;
571 int cpu, err_cpu; 572 int cpu, err_cpu;
572 int i; 573 int i;
573 574
574 for (i = 0; i < TYPE_MAX; i++) 575 for (i = 0; i < TYPE_MAX; i++)
575 nr_slots[i] = hw_breakpoint_slots(i); 576 nr_slots[i] = hw_breakpoint_slots(i);
576 577
577 for_each_possible_cpu(cpu) { 578 for_each_possible_cpu(cpu) {
578 for (i = 0; i < TYPE_MAX; i++) { 579 for (i = 0; i < TYPE_MAX; i++) {
579 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); 580 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
580 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], 581 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
581 GFP_KERNEL); 582 GFP_KERNEL);
582 if (!*task_bp_pinned) 583 if (!*task_bp_pinned)
583 goto err_alloc; 584 goto err_alloc;
584 } 585 }
585 } 586 }
586 587
587 constraints_initialized = 1; 588 constraints_initialized = 1;
588 589
589 return register_die_notifier(&hw_breakpoint_exceptions_nb); 590 return register_die_notifier(&hw_breakpoint_exceptions_nb);
590 591
591 err_alloc: 592 err_alloc:
592 for_each_possible_cpu(err_cpu) { 593 for_each_possible_cpu(err_cpu) {
593 if (err_cpu == cpu) 594 if (err_cpu == cpu)
594 break; 595 break;
595 for (i = 0; i < TYPE_MAX; i++) 596 for (i = 0; i < TYPE_MAX; i++)
596 kfree(per_cpu(nr_task_bp_pinned[i], cpu)); 597 kfree(per_cpu(nr_task_bp_pinned[i], cpu));
597 } 598 }
598 599
599 return -ENOMEM; 600 return -ENOMEM;
600 } 601 }
601 core_initcall(init_hw_breakpoint); 602 core_initcall(init_hw_breakpoint);
602 603
603 604
604 struct pmu perf_ops_bp = { 605 struct pmu perf_ops_bp = {
605 .enable = arch_install_hw_breakpoint, 606 .enable = arch_install_hw_breakpoint,
606 .disable = arch_uninstall_hw_breakpoint, 607 .disable = arch_uninstall_hw_breakpoint,
607 .read = hw_breakpoint_pmu_read, 608 .read = hw_breakpoint_pmu_read,
608 }; 609 };
609 610