Blame view
kernel/trace/blktrace.c
46.3 KB
91c1e6ba3
|
1 |
// SPDX-License-Identifier: GPL-2.0 |
2056a782f
|
2 |
/* |
0fe234795
|
3 |
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> |
2056a782f
|
4 |
* |
2056a782f
|
5 |
*/ |
2056a782f
|
6 7 8 9 10 11 |
#include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/blktrace_api.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/mutex.h> |
5a0e3ad6a
|
12 |
#include <linux/slab.h> |
2056a782f
|
13 |
#include <linux/debugfs.h> |
6e5fdeedc
|
14 |
#include <linux/export.h> |
be1c63411
|
15 |
#include <linux/time.h> |
939b36697
|
16 |
#include <linux/uaccess.h> |
a404d5576
|
17 |
#include <linux/list.h> |
ca1136c99
|
18 |
#include <linux/blk-cgroup.h> |
55782138e
|
19 |
|
18fbda91c
|
20 |
#include "../../block/blk.h" |
55782138e
|
21 |
#include <trace/events/block.h> |
2db270a80
|
22 |
#include "trace_output.h" |
2056a782f
|
23 |
|
55782138e
|
24 |
#ifdef CONFIG_BLK_DEV_IO_TRACE |
2056a782f
|
25 |
static unsigned int blktrace_seq __read_mostly = 1; |
c71a89615
|
26 |
static struct trace_array *blk_tr; |
5006ea73f
|
27 |
static bool blk_tracer_enabled __read_mostly; |
c71a89615
|
28 |
|
a404d5576
|
29 30 |
static LIST_HEAD(running_trace_list); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); |
c71a89615
|
31 |
/* Select an alternative, minimalistic output than the original one */ |
ef18012b2
|
32 |
#define TRACE_BLK_OPT_CLASSIC 0x1 |
ca1136c99
|
33 |
#define TRACE_BLK_OPT_CGROUP 0x2 |
69fd5c391
|
34 |
#define TRACE_BLK_OPT_CGNAME 0x4 |
c71a89615
|
35 36 37 |
static struct tracer_opt blk_tracer_opts[] = { /* Default disable the minimalistic output */ |
157f9c00e
|
38 |
{ TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, |
ca1136c99
|
39 40 |
#ifdef CONFIG_BLK_CGROUP { TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) }, |
69fd5c391
|
41 |
{ TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) }, |
ca1136c99
|
42 |
#endif |
c71a89615
|
43 44 45 46 47 48 49 |
{ } }; static struct tracer_flags blk_tracer_flags = { .val = 0, .opts = blk_tracer_opts, }; |
5f3ea37c7
|
50 |
/* Global reference count of probes */ |
a6da0024f
|
51 52 |
static DEFINE_MUTEX(blk_probe_mutex); static int blk_probes_ref; |
5f3ea37c7
|
53 |
|
3c289ba7c
|
54 |
static void blk_register_tracepoints(void); |
5f3ea37c7
|
55 |
static void blk_unregister_tracepoints(void); |
2056a782f
|
56 |
/* |
be1c63411
|
57 58 |
* Send out a notify message. */ |
a863055b1
|
59 |
static void trace_note(struct blk_trace *bt, pid_t pid, int action, |
ca1136c99
|
60 61 |
const void *data, size_t len, union kernfs_node_id *cgid) |
be1c63411
|
62 63 |
{ struct blk_io_trace *t; |
18cea4591
|
64 |
struct ring_buffer_event *event = NULL; |
e77405ad8
|
65 |
struct ring_buffer *buffer = NULL; |
18cea4591
|
66 67 68 |
int pc = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; |
ca1136c99
|
69 |
ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
18cea4591
|
70 71 |
if (blk_tracer) { |
12883efb6
|
72 |
buffer = blk_tr->trace_buffer.buffer; |
18cea4591
|
73 |
pc = preempt_count(); |
e77405ad8
|
74 |
event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
ca1136c99
|
75 |
sizeof(*t) + len + cgid_len, |
18cea4591
|
76 77 78 79 80 81 |
0, pc); if (!event) return; t = ring_buffer_event_data(event); goto record_it; } |
be1c63411
|
82 |
|
c71a89615
|
83 84 |
if (!bt->rchan) return; |
ca1136c99
|
85 |
t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len); |
d3d9d2a5e
|
86 |
if (t) { |
d3d9d2a5e
|
87 |
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; |
2997c8c4a
|
88 |
t->time = ktime_to_ns(ktime_get()); |
18cea4591
|
89 |
record_it: |
d3d9d2a5e
|
90 |
t->device = bt->dev; |
ca1136c99
|
91 |
t->action = action | (cgid ? __BLK_TN_CGROUP : 0); |
d3d9d2a5e
|
92 93 |
t->pid = pid; t->cpu = cpu; |
ca1136c99
|
94 95 96 97 |
t->pdu_len = len + cgid_len; if (cgid) memcpy((void *)t + sizeof(*t), cgid, cgid_len); memcpy((void *) t + sizeof(*t) + cgid_len, data, len); |
18cea4591
|
98 99 |
if (blk_tracer) |
b7f0c959e
|
100 |
trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); |
d3d9d2a5e
|
101 |
} |
be1c63411
|
102 103 104 |
} /* |
2056a782f
|
105 106 107 |
* Send out a notify for this process, if we haven't done so since a trace * started */ |
a404d5576
|
108 |
static void trace_note_tsk(struct task_struct *tsk) |
2056a782f
|
109 |
{ |
a404d5576
|
110 111 |
unsigned long flags; struct blk_trace *bt; |
a863055b1
|
112 |
tsk->btrace_seq = blktrace_seq; |
a404d5576
|
113 114 115 |
spin_lock_irqsave(&running_trace_lock, flags); list_for_each_entry(bt, &running_trace_list, running_list) { trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, |
ca1136c99
|
116 |
sizeof(tsk->comm), NULL); |
a404d5576
|
117 118 |
} spin_unlock_irqrestore(&running_trace_lock, flags); |
be1c63411
|
119 |
} |
2056a782f
|
120 |
|
be1c63411
|
121 122 |
static void trace_note_time(struct blk_trace *bt) { |
59a37f8ba
|
123 |
struct timespec64 now; |
be1c63411
|
124 125 |
unsigned long flags; u32 words[2]; |
59a37f8ba
|
126 127 128 |
/* need to check user space to see if this breaks in y2038 or y2106 */ ktime_get_real_ts64(&now); words[0] = (u32)now.tv_sec; |
be1c63411
|
129 130 131 |
words[1] = now.tv_nsec; local_irq_save(flags); |
ca1136c99
|
132 |
trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL); |
be1c63411
|
133 |
local_irq_restore(flags); |
2056a782f
|
134 |
} |
35fe6d763
|
135 136 |
void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, const char *fmt, ...) |
9d5f09a42
|
137 138 139 |
{ int n; va_list args; |
14a73f547
|
140 |
unsigned long flags; |
64565911c
|
141 |
char *buf; |
9d5f09a42
|
142 |
|
18cea4591
|
143 144 |
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) |
c71a89615
|
145 |
return; |
490da40d8
|
146 147 148 149 150 151 |
/* * If the BLK_TC_NOTIFY action mask isn't set, don't send any note * message to the trace. */ if (!(bt->act_mask & BLK_TC_NOTIFY)) return; |
14a73f547
|
152 |
local_irq_save(flags); |
d8a0349c0
|
153 |
buf = this_cpu_ptr(bt->msg_data); |
9d5f09a42
|
154 |
va_start(args, fmt); |
64565911c
|
155 |
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); |
9d5f09a42
|
156 |
va_end(args); |
35fe6d763
|
157 158 159 160 161 162 |
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) blkcg = NULL; #ifdef CONFIG_BLK_CGROUP trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL); #else |
ca1136c99
|
163 |
trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL); |
35fe6d763
|
164 |
#endif |
14a73f547
|
165 |
local_irq_restore(flags); |
9d5f09a42
|
166 167 |
} EXPORT_SYMBOL_GPL(__trace_note_message); |
2056a782f
|
168 169 170 171 172 |
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; |
d0deef5b1
|
173 |
if (sector && (sector < bt->start_lba || sector > bt->end_lba)) |
2056a782f
|
174 175 176 177 178 179 180 181 182 183 |
return 1; if (bt->pid && pid != bt->pid) return 1; return 0; } /* * Data direction bit lookup */ |
e4955c998
|
184 185 |
static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; |
2056a782f
|
186 |
|
7b6d91dae
|
187 |
#define BLK_TC_RAHEAD BLK_TC_AHEAD |
28a8f0d31
|
188 |
#define BLK_TC_PREFLUSH BLK_TC_FLUSH |
7b6d91dae
|
189 |
|
35ba8f708
|
190 |
/* The ilog2() calls fall out because they're constant */ |
7b6d91dae
|
191 192 |
#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) |
2056a782f
|
193 194 195 196 197 |
/* * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ |
5f3ea37c7
|
198 |
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, |
1b9a9ab78
|
199 |
int op, int op_flags, u32 what, int error, int pdu_len, |
ca1136c99
|
200 |
void *pdu_data, union kernfs_node_id *cgid) |
2056a782f
|
201 202 |
{ struct task_struct *tsk = current; |
c71a89615
|
203 |
struct ring_buffer_event *event = NULL; |
e77405ad8
|
204 |
struct ring_buffer *buffer = NULL; |
2056a782f
|
205 |
struct blk_io_trace *t; |
0a9877514
|
206 |
unsigned long flags = 0; |
2056a782f
|
207 208 |
unsigned long *sequence; pid_t pid; |
c71a89615
|
209 |
int cpu, pc = 0; |
18cea4591
|
210 |
bool blk_tracer = blk_tracer_enabled; |
ca1136c99
|
211 |
ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; |
2056a782f
|
212 |
|
18cea4591
|
213 |
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
2056a782f
|
214 |
return; |
1b9a9ab78
|
215 216 217 218 |
what |= ddir_act[op_is_write(op) ? WRITE : READ]; what |= MASK_TC_BIT(op_flags, SYNC); what |= MASK_TC_BIT(op_flags, RAHEAD); what |= MASK_TC_BIT(op_flags, META); |
28a8f0d31
|
219 |
what |= MASK_TC_BIT(op_flags, PREFLUSH); |
1b9a9ab78
|
220 |
what |= MASK_TC_BIT(op_flags, FUA); |
7afafc8a4
|
221 |
if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) |
1b9a9ab78
|
222 |
what |= BLK_TC_ACT(BLK_TC_DISCARD); |
3a5e02ced
|
223 224 |
if (op == REQ_OP_FLUSH) what |= BLK_TC_ACT(BLK_TC_FLUSH); |
ca1136c99
|
225 226 |
if (cgid) what |= __BLK_TA_CGROUP; |
2056a782f
|
227 228 |
pid = tsk->pid; |
d0deef5b1
|
229 |
if (act_log_check(bt, what, sector, pid)) |
2056a782f
|
230 |
return; |
c71a89615
|
231 |
cpu = raw_smp_processor_id(); |
18cea4591
|
232 |
if (blk_tracer) { |
c71a89615
|
233 |
tracing_record_cmdline(current); |
12883efb6
|
234 |
buffer = blk_tr->trace_buffer.buffer; |
51a763dd8
|
235 |
pc = preempt_count(); |
e77405ad8
|
236 |
event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
ca1136c99
|
237 |
sizeof(*t) + pdu_len + cgid_len, |
51a763dd8
|
238 |
0, pc); |
c71a89615
|
239 240 |
if (!event) return; |
51a763dd8
|
241 |
t = ring_buffer_event_data(event); |
c71a89615
|
242 243 |
goto record_it; } |
2056a782f
|
244 |
|
a404d5576
|
245 246 |
if (unlikely(tsk->btrace_seq != blktrace_seq)) trace_note_tsk(tsk); |
2056a782f
|
247 248 249 |
/* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq |
14a73f547
|
250 |
* from coming in and stepping on our toes. |
2056a782f
|
251 252 |
*/ local_irq_save(flags); |
ca1136c99
|
253 |
t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len); |
2056a782f
|
254 |
if (t) { |
2056a782f
|
255 256 257 258 |
sequence = per_cpu_ptr(bt->sequence, cpu); t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->sequence = ++(*sequence); |
2997c8c4a
|
259 |
t->time = ktime_to_ns(ktime_get()); |
c71a89615
|
260 |
record_it: |
08a06b83f
|
261 |
/* |
939b36697
|
262 263 264 265 266 267 268 |
* These two are not needed in ftrace as they are in the * generic trace_entry, filled by tracing_generic_entry_update, * but for the trace_event->bin() synthesizer benefit we do it * here too. */ t->cpu = cpu; t->pid = pid; |
08a06b83f
|
269 |
|
2056a782f
|
270 271 272 |
t->sector = sector; t->bytes = bytes; t->action = what; |
2056a782f
|
273 |
t->device = bt->dev; |
2056a782f
|
274 |
t->error = error; |
ca1136c99
|
275 |
t->pdu_len = pdu_len + cgid_len; |
2056a782f
|
276 |
|
ca1136c99
|
277 278 |
if (cgid_len) memcpy((void *)t + sizeof(*t), cgid, cgid_len); |
2056a782f
|
279 |
if (pdu_len) |
ca1136c99
|
280 |
memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); |
c71a89615
|
281 |
|
18cea4591
|
282 |
if (blk_tracer) { |
b7f0c959e
|
283 |
trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); |
c71a89615
|
284 285 |
return; } |
2056a782f
|
286 287 288 289 |
} local_irq_restore(flags); } |
ad5dd5493
|
290 |
static void blk_trace_free(struct blk_trace *bt) |
2056a782f
|
291 |
{ |
02c62304e
|
292 |
debugfs_remove(bt->msg_file); |
2056a782f
|
293 |
debugfs_remove(bt->dropped_file); |
f48fc4d32
|
294 |
relay_close(bt->rchan); |
39cbb602b
|
295 |
debugfs_remove(bt->dir); |
2056a782f
|
296 |
free_percpu(bt->sequence); |
64565911c
|
297 |
free_percpu(bt->msg_data); |
2056a782f
|
298 |
kfree(bt); |
ad5dd5493
|
299 |
} |
a6da0024f
|
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
static void get_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (++blk_probes_ref == 1) blk_register_tracepoints(); mutex_unlock(&blk_probe_mutex); } static void put_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (!--blk_probes_ref) blk_unregister_tracepoints(); mutex_unlock(&blk_probe_mutex); } |
ad5dd5493
|
315 316 |
static void blk_trace_cleanup(struct blk_trace *bt) { |
6f9cff84d
|
317 |
synchronize_rcu(); |
ad5dd5493
|
318 |
blk_trace_free(bt); |
a6da0024f
|
319 |
put_probe_ref(); |
2056a782f
|
320 |
} |
1f2cac107
|
321 |
static int __blk_trace_remove(struct request_queue *q) |
2056a782f
|
322 323 324 325 326 327 |
{ struct blk_trace *bt; bt = xchg(&q->blk_trace, NULL); if (!bt) return -EINVAL; |
555472048
|
328 |
if (bt->trace_state != Blktrace_running) |
2056a782f
|
329 330 331 332 |
blk_trace_cleanup(bt); return 0; } |
1f2cac107
|
333 334 335 336 337 338 339 340 341 342 343 |
int blk_trace_remove(struct request_queue *q) { int ret; mutex_lock(&q->blk_trace_mutex); ret = __blk_trace_remove(q); mutex_unlock(&q->blk_trace_mutex); return ret; } |
6da127ad0
|
344 |
EXPORT_SYMBOL_GPL(blk_trace_remove); |
2056a782f
|
345 |
|
2056a782f
|
346 347 348 349 350 351 352 353 354 355 356 |
static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct blk_trace *bt = filp->private_data; char buf[16]; snprintf(buf, sizeof(buf), "%u ", atomic_read(&bt->dropped)); return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); } |
2b8693c06
|
357 |
static const struct file_operations blk_dropped_fops = { |
2056a782f
|
358 |
.owner = THIS_MODULE, |
234e34058
|
359 |
.open = simple_open, |
2056a782f
|
360 |
.read = blk_dropped_read, |
6038f373a
|
361 |
.llseek = default_llseek, |
2056a782f
|
362 |
}; |
02c62304e
|
363 364 365 366 367 |
static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { char *msg; struct blk_trace *bt; |
7635b03ad
|
368 |
if (count >= BLK_TN_MAX_MSG) |
02c62304e
|
369 |
return -EINVAL; |
16e5c1fc3
|
370 371 372 |
msg = memdup_user_nul(buffer, count); if (IS_ERR(msg)) return PTR_ERR(msg); |
02c62304e
|
373 |
|
02c62304e
|
374 |
bt = filp->private_data; |
35fe6d763
|
375 |
__trace_note_message(bt, NULL, "%s", msg); |
02c62304e
|
376 377 378 379 380 381 382 |
kfree(msg); return count; } static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, |
234e34058
|
383 |
.open = simple_open, |
02c62304e
|
384 |
.write = blk_msg_write, |
6038f373a
|
385 |
.llseek = noop_llseek, |
02c62304e
|
386 |
}; |
2056a782f
|
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
/* * Keep track of how many times we encountered a full subbuffer, to aid * the user space app in telling how many lost events there were. */ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { struct blk_trace *bt; if (!relay_buf_full(buf)) return 1; bt = buf->chan->private_data; atomic_inc(&bt->dropped); return 0; } static int blk_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); |
f48fc4d32
|
407 |
|
2056a782f
|
408 409 410 411 412 |
return 0; } static struct dentry *blk_create_buf_file_callback(const char *filename, struct dentry *parent, |
f4ae40a6a
|
413 |
umode_t mode, |
2056a782f
|
414 415 416 417 418 419 420 421 422 423 424 425 |
struct rchan_buf *buf, int *is_global) { return debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); } static struct rchan_callbacks blk_relay_callbacks = { .subbuf_start = blk_subbuf_start_callback, .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, }; |
9908c3099
|
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 |
static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { struct hd_struct *part = NULL; if (bdev) part = bdev->bd_part; if (part) { bt->start_lba = part->start_sect; bt->end_lba = part->start_sect + part->nr_sects; } else { bt->start_lba = 0; bt->end_lba = -1ULL; } } |
2056a782f
|
442 443 444 |
/* * Setup everything required to start tracing */ |
a428d314e
|
445 446 447 |
static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts) |
2056a782f
|
448 |
{ |
cdea01b2b
|
449 |
struct blk_trace *bt = NULL; |
2056a782f
|
450 |
struct dentry *dir = NULL; |
ff14417c0
|
451 |
int ret; |
2056a782f
|
452 |
|
171044d44
|
453 |
if (!buts->buf_size || !buts->buf_nr) |
2056a782f
|
454 |
return -EINVAL; |
e1a413245
|
455 456 |
if (!blk_debugfs_root) return -ENOENT; |
0497b345e
|
457 458 |
strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; |
2056a782f
|
459 460 461 462 463 |
/* * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ |
ff14417c0
|
464 |
strreplace(buts->name, '/', '_'); |
2056a782f
|
465 |
|
2056a782f
|
466 467 |
bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) |
ad5dd5493
|
468 |
return -ENOMEM; |
2056a782f
|
469 |
|
ad5dd5493
|
470 |
ret = -ENOMEM; |
2056a782f
|
471 472 473 |
bt->sequence = alloc_percpu(unsigned long); if (!bt->sequence) goto err; |
313e458f8
|
474 |
bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); |
64565911c
|
475 476 |
if (!bt->msg_data) goto err; |
2056a782f
|
477 |
ret = -ENOENT; |
f48fc4d32
|
478 |
|
6ac93117a
|
479 480 481 |
dir = debugfs_lookup(buts->name, blk_debugfs_root); if (!dir) bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); |
2056a782f
|
482 |
|
6da127ad0
|
483 |
bt->dev = dev; |
2056a782f
|
484 |
atomic_set(&bt->dropped, 0); |
a404d5576
|
485 |
INIT_LIST_HEAD(&bt->running_list); |
2056a782f
|
486 487 |
ret = -EIO; |
939b36697
|
488 489 |
bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); |
2056a782f
|
490 |
|
02c62304e
|
491 |
bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); |
02c62304e
|
492 |
|
171044d44
|
493 494 |
bt->rchan = relay_open("trace", dir, buts->buf_size, buts->buf_nr, &blk_relay_callbacks, bt); |
2056a782f
|
495 496 |
if (!bt->rchan) goto err; |
2056a782f
|
497 |
|
171044d44
|
498 |
bt->act_mask = buts->act_mask; |
2056a782f
|
499 500 |
if (!bt->act_mask) bt->act_mask = (u16) -1; |
9908c3099
|
501 |
blk_trace_setup_lba(bt, bdev); |
2056a782f
|
502 |
|
d0deef5b1
|
503 504 505 506 507 |
/* overwrite with user settings */ if (buts->start_lba) bt->start_lba = buts->start_lba; if (buts->end_lba) bt->end_lba = buts->end_lba; |
171044d44
|
508 |
bt->pid = buts->pid; |
2056a782f
|
509 510 511 |
bt->trace_state = Blktrace_setup; ret = -EBUSY; |
cdea01b2b
|
512 |
if (cmpxchg(&q->blk_trace, NULL, bt)) |
2056a782f
|
513 |
goto err; |
2056a782f
|
514 |
|
a6da0024f
|
515 |
get_probe_ref(); |
cbe28296e
|
516 |
|
6ac93117a
|
517 |
ret = 0; |
2056a782f
|
518 |
err: |
6ac93117a
|
519 520 521 522 |
if (dir && !bt->dir) dput(dir); if (ret) blk_trace_free(bt); |
2056a782f
|
523 524 |
return ret; } |
171044d44
|
525 |
|
1f2cac107
|
526 527 |
static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) |
171044d44
|
528 529 530 531 532 533 534 |
{ struct blk_user_trace_setup buts; int ret; ret = copy_from_user(&buts, arg, sizeof(buts)); if (ret) return -EFAULT; |
d0deef5b1
|
535 |
ret = do_blk_trace_setup(q, name, dev, bdev, &buts); |
171044d44
|
536 537 |
if (ret) return ret; |
9a8c28c83
|
538 |
if (copy_to_user(arg, &buts, sizeof(buts))) { |
2967acbb2
|
539 |
__blk_trace_remove(q); |
171044d44
|
540 |
return -EFAULT; |
9a8c28c83
|
541 |
} |
171044d44
|
542 543 |
return 0; } |
1f2cac107
|
544 545 546 547 548 549 550 551 552 553 554 555 556 |
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { int ret; mutex_lock(&q->blk_trace_mutex); ret = __blk_trace_setup(q, name, dev, bdev, arg); mutex_unlock(&q->blk_trace_mutex); return ret; } |
6da127ad0
|
557 |
EXPORT_SYMBOL_GPL(blk_trace_setup); |
2056a782f
|
558 |
|
62c2a7d96
|
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 |
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) static int compat_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; struct compat_blk_user_trace_setup cbuts; int ret; if (copy_from_user(&cbuts, arg, sizeof(cbuts))) return -EFAULT; buts = (struct blk_user_trace_setup) { .act_mask = cbuts.act_mask, .buf_size = cbuts.buf_size, .buf_nr = cbuts.buf_nr, .start_lba = cbuts.start_lba, .end_lba = cbuts.end_lba, .pid = cbuts.pid, }; |
62c2a7d96
|
579 580 581 582 |
ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; |
f8c5e9448
|
583 |
if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { |
2967acbb2
|
584 |
__blk_trace_remove(q); |
62c2a7d96
|
585 586 587 588 589 590 |
return -EFAULT; } return 0; } #endif |
1f2cac107
|
591 |
static int __blk_trace_startstop(struct request_queue *q, int start) |
2056a782f
|
592 |
{ |
2056a782f
|
593 |
int ret; |
6f9cff84d
|
594 |
struct blk_trace *bt; |
2056a782f
|
595 |
|
6f9cff84d
|
596 597 |
bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->blk_trace_mutex)); |
939b36697
|
598 |
if (bt == NULL) |
2056a782f
|
599 600 601 602 603 604 605 606 607 608 609 610 611 |
return -EINVAL; /* * For starting a trace, we can transition from a setup or stopped * trace. For stopping a trace, the state must be running */ ret = -EINVAL; if (start) { if (bt->trace_state == Blktrace_setup || bt->trace_state == Blktrace_stopped) { blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; |
a404d5576
|
612 613 614 |
spin_lock_irq(&running_trace_lock); list_add(&bt->running_list, &running_trace_list); spin_unlock_irq(&running_trace_lock); |
be1c63411
|
615 616 |
trace_note_time(bt); |
2056a782f
|
617 618 619 620 621 |
ret = 0; } } else { if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; |
a404d5576
|
622 623 624 |
spin_lock_irq(&running_trace_lock); list_del_init(&bt->running_list); spin_unlock_irq(&running_trace_lock); |
2056a782f
|
625 626 627 628 629 630 631 |
relay_flush(bt->rchan); ret = 0; } } return ret; } |
1f2cac107
|
632 633 634 635 636 637 638 639 640 641 642 |
int blk_trace_startstop(struct request_queue *q, int start) { int ret; mutex_lock(&q->blk_trace_mutex); ret = __blk_trace_startstop(q, start); mutex_unlock(&q->blk_trace_mutex); return ret; } |
6da127ad0
|
643 |
EXPORT_SYMBOL_GPL(blk_trace_startstop); |
2056a782f
|
644 |
|
5acb3cc2c
|
645 646 647 648 649 |
/* * When reading or writing the blktrace sysfs files, the references to the * opened sysfs or device files should prevent the underlying block device * from being removed. So no further delete protection is really needed. */ |
2056a782f
|
650 651 652 |
/** * blk_trace_ioctl: - handle the ioctls associated with tracing * @bdev: the block device |
ef18012b2
|
653 |
* @cmd: the ioctl cmd |
2056a782f
|
654 655 656 657 658 |
* @arg: the argument data, if any * **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { |
165125e1e
|
659 |
struct request_queue *q; |
2056a782f
|
660 |
int ret, start = 0; |
6da127ad0
|
661 |
char b[BDEVNAME_SIZE]; |
2056a782f
|
662 663 664 665 |
q = bdev_get_queue(bdev); if (!q) return -ENXIO; |
5acb3cc2c
|
666 |
mutex_lock(&q->blk_trace_mutex); |
2056a782f
|
667 668 669 |
switch (cmd) { case BLKTRACESETUP: |
f36f21ecc
|
670 |
bdevname(bdev, b); |
1f2cac107
|
671 |
ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); |
2056a782f
|
672 |
break; |
62c2a7d96
|
673 674 675 676 677 678 |
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: bdevname(bdev, b); ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #endif |
2056a782f
|
679 680 |
case BLKTRACESTART: start = 1; |
f6d85f04e
|
681 |
/* fall through */ |
2056a782f
|
682 |
case BLKTRACESTOP: |
1f2cac107
|
683 |
ret = __blk_trace_startstop(q, start); |
2056a782f
|
684 685 |
break; case BLKTRACETEARDOWN: |
1f2cac107
|
686 |
ret = __blk_trace_remove(q); |
2056a782f
|
687 688 689 690 691 |
break; default: ret = -ENOTTY; break; } |
5acb3cc2c
|
692 |
mutex_unlock(&q->blk_trace_mutex); |
2056a782f
|
693 694 695 696 697 698 699 700 |
return ret; } /** * blk_trace_shutdown: - stop and cleanup trace structures * @q: the request queue associated with the device * **/ |
165125e1e
|
701 |
void blk_trace_shutdown(struct request_queue *q) |
2056a782f
|
702 |
{ |
1f2cac107
|
703 |
mutex_lock(&q->blk_trace_mutex); |
6f9cff84d
|
704 705 |
if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->blk_trace_mutex))) { |
1f2cac107
|
706 707 |
__blk_trace_startstop(q, 0); __blk_trace_remove(q); |
6c5c93415
|
708 |
} |
1f2cac107
|
709 710 |
mutex_unlock(&q->blk_trace_mutex); |
2056a782f
|
711 |
} |
5f3ea37c7
|
712 |
|
ca1136c99
|
713 714 715 716 |
#ifdef CONFIG_BLK_CGROUP static union kernfs_node_id * blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { |
6f9cff84d
|
717 |
struct blk_trace *bt; |
ca1136c99
|
718 |
|
6f9cff84d
|
719 720 |
/* We don't use the 'bt' value here except as an optimization... */ bt = rcu_dereference_protected(q->blk_trace, 1); |
ca1136c99
|
721 722 |
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; |
db6638d7d
|
723 |
if (!bio->bi_blkg) |
ca1136c99
|
724 |
return NULL; |
db6638d7d
|
725 |
return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); |
ca1136c99
|
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 |
} #else static union kernfs_node_id * blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { return NULL; } #endif static union kernfs_node_id * blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) { if (!rq->bio) return NULL; /* Use the first bio */ return blk_trace_bio_get_cgid(q, rq->bio); } |
5f3ea37c7
|
743 744 745 746 747 748 |
/* * blktrace probes */ /** * blk_add_trace_rq - Add a trace for a request oriented action |
5f3ea37c7
|
749 |
* @rq: the source request |
caf7df122
|
750 |
* @error: return status to log |
af5040da0
|
751 |
* @nr_bytes: number of completed bytes |
5f3ea37c7
|
752 |
* @what: the action |
ca1136c99
|
753 |
* @cgid: the cgroup info |
5f3ea37c7
|
754 755 756 757 758 |
* * Description: * Records an action against a request. Will log the bio offset + size. * **/ |
caf7df122
|
759 |
static void blk_add_trace_rq(struct request *rq, int error, |
ca1136c99
|
760 761 |
unsigned int nr_bytes, u32 what, union kernfs_node_id *cgid) |
5f3ea37c7
|
762 |
{ |
6f9cff84d
|
763 |
struct blk_trace *bt; |
5f3ea37c7
|
764 |
|
6f9cff84d
|
765 766 767 768 |
rcu_read_lock(); bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); |
5f3ea37c7
|
769 |
return; |
6f9cff84d
|
770 |
} |
5f3ea37c7
|
771 |
|
57292b58d
|
772 |
if (blk_rq_is_passthrough(rq)) |
5f3ea37c7
|
773 |
what |= BLK_TC_ACT(BLK_TC_PC); |
48b77ad60
|
774 |
else |
5f3ea37c7
|
775 |
what |= BLK_TC_ACT(BLK_TC_FS); |
48b77ad60
|
776 777 |
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), |
ca1136c99
|
778 |
rq->cmd_flags, what, error, 0, NULL, cgid); |
6f9cff84d
|
779 |
rcu_read_unlock(); |
5f3ea37c7
|
780 |
} |
38516ab59
|
781 782 |
static void blk_add_trace_rq_insert(void *ignore, struct request_queue *q, struct request *rq) |
5f3ea37c7
|
783 |
{ |
ca1136c99
|
784 785 |
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, blk_trace_request_get_cgid(q, rq)); |
5f3ea37c7
|
786 |
} |
38516ab59
|
787 788 |
static void blk_add_trace_rq_issue(void *ignore, struct request_queue *q, struct request *rq) |
5f3ea37c7
|
789 |
{ |
ca1136c99
|
790 791 |
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, blk_trace_request_get_cgid(q, rq)); |
5f3ea37c7
|
792 |
} |
38516ab59
|
793 794 |
static void blk_add_trace_rq_requeue(void *ignore, struct request_queue *q, |
939b36697
|
795 |
struct request *rq) |
5f3ea37c7
|
796 |
{ |
ca1136c99
|
797 798 |
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, blk_trace_request_get_cgid(q, rq)); |
5f3ea37c7
|
799 |
} |
caf7df122
|
800 801 |
static void blk_add_trace_rq_complete(void *ignore, struct request *rq, int error, unsigned int nr_bytes) |
5f3ea37c7
|
802 |
{ |
ca1136c99
|
803 804 |
blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, blk_trace_request_get_cgid(rq->q, rq)); |
5f3ea37c7
|
805 806 807 808 809 810 811 |
} /** * blk_add_trace_bio - Add a trace for a bio oriented action * @q: queue the io is for * @bio: the source bio * @what: the action |
797a455d2
|
812 |
* @error: error, if any |
5f3ea37c7
|
813 814 815 816 817 818 |
* * Description: * Records an action against a bio. Will log the bio offset + size. * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
1690102de
|
819 |
u32 what, int error) |
5f3ea37c7
|
820 |
{ |
6f9cff84d
|
821 |
struct blk_trace *bt; |
5f3ea37c7
|
822 |
|
6f9cff84d
|
823 824 825 826 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); |
5f3ea37c7
|
827 |
return; |
6f9cff84d
|
828 |
} |
5f3ea37c7
|
829 |
|
4f024f379
|
830 |
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, |
1690102de
|
831 832 |
bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); |
6f9cff84d
|
833 |
rcu_read_unlock(); |
5f3ea37c7
|
834 |
} |
38516ab59
|
835 836 |
static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) |
5f3ea37c7
|
837 |
{ |
1690102de
|
838 |
blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
5f3ea37c7
|
839 |
} |
0a82a8d13
|
840 841 842 |
static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) |
5f3ea37c7
|
843 |
{ |
1690102de
|
844 |
blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
5f3ea37c7
|
845 |
} |
38516ab59
|
846 847 |
static void blk_add_trace_bio_backmerge(void *ignore, struct request_queue *q, |
8c1cf6bb0
|
848 |
struct request *rq, |
939b36697
|
849 |
struct bio *bio) |
5f3ea37c7
|
850 |
{ |
1690102de
|
851 |
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
5f3ea37c7
|
852 |
} |
38516ab59
|
853 854 |
static void blk_add_trace_bio_frontmerge(void *ignore, struct request_queue *q, |
8c1cf6bb0
|
855 |
struct request *rq, |
939b36697
|
856 |
struct bio *bio) |
5f3ea37c7
|
857 |
{ |
1690102de
|
858 |
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
5f3ea37c7
|
859 |
} |
38516ab59
|
860 861 |
static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) |
5f3ea37c7
|
862 |
{ |
1690102de
|
863 |
blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); |
5f3ea37c7
|
864 |
} |
38516ab59
|
865 866 |
static void blk_add_trace_getrq(void *ignore, struct request_queue *q, |
939b36697
|
867 |
struct bio *bio, int rw) |
5f3ea37c7
|
868 869 |
{ if (bio) |
1690102de
|
870 |
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); |
5f3ea37c7
|
871 |
else { |
6f9cff84d
|
872 |
struct blk_trace *bt; |
5f3ea37c7
|
873 |
|
6f9cff84d
|
874 875 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); |
5f3ea37c7
|
876 |
if (bt) |
1b9a9ab78
|
877 |
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, |
ca1136c99
|
878 |
NULL, NULL); |
6f9cff84d
|
879 |
rcu_read_unlock(); |
5f3ea37c7
|
880 881 |
} } |
38516ab59
|
882 883 |
static void blk_add_trace_sleeprq(void *ignore, struct request_queue *q, |
939b36697
|
884 |
struct bio *bio, int rw) |
5f3ea37c7
|
885 886 |
{ if (bio) |
1690102de
|
887 |
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); |
5f3ea37c7
|
888 |
else { |
6f9cff84d
|
889 |
struct blk_trace *bt; |
5f3ea37c7
|
890 |
|
6f9cff84d
|
891 892 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); |
5f3ea37c7
|
893 |
if (bt) |
1b9a9ab78
|
894 |
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, |
ca1136c99
|
895 |
0, 0, NULL, NULL); |
6f9cff84d
|
896 |
rcu_read_unlock(); |
5f3ea37c7
|
897 898 |
} } |
38516ab59
|
899 |
static void blk_add_trace_plug(void *ignore, struct request_queue *q) |
5f3ea37c7
|
900 |
{ |
6f9cff84d
|
901 |
struct blk_trace *bt; |
5f3ea37c7
|
902 |
|
6f9cff84d
|
903 904 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); |
5f3ea37c7
|
905 |
if (bt) |
ca1136c99
|
906 |
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL); |
6f9cff84d
|
907 |
rcu_read_unlock(); |
5f3ea37c7
|
908 |
} |
49cac01e1
|
909 910 |
static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) |
5f3ea37c7
|
911 |
{ |
6f9cff84d
|
912 |
struct blk_trace *bt; |
5f3ea37c7
|
913 |
|
6f9cff84d
|
914 915 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); |
5f3ea37c7
|
916 |
if (bt) { |
94b5eb28b
|
917 |
__be64 rpdu = cpu_to_be64(depth); |
49cac01e1
|
918 |
u32 what; |
5f3ea37c7
|
919 |
|
49cac01e1
|
920 921 922 923 |
if (explicit) what = BLK_TA_UNPLUG_IO; else what = BLK_TA_UNPLUG_TIMER; |
ca1136c99
|
924 |
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL); |
5f3ea37c7
|
925 |
} |
6f9cff84d
|
926 |
rcu_read_unlock(); |
5f3ea37c7
|
927 |
} |
38516ab59
|
928 929 |
static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, |
5f3ea37c7
|
930 931 |
unsigned int pdu) { |
6f9cff84d
|
932 |
struct blk_trace *bt; |
5f3ea37c7
|
933 |
|
6f9cff84d
|
934 935 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); |
5f3ea37c7
|
936 937 |
if (bt) { __be64 rpdu = cpu_to_be64(pdu); |
4f024f379
|
938 |
__blk_add_trace(bt, bio->bi_iter.bi_sector, |
1eff9d322
|
939 |
bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, |
4e4cbee93
|
940 |
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), |
ca1136c99
|
941 |
&rpdu, blk_trace_bio_get_cgid(q, bio)); |
5f3ea37c7
|
942 |
} |
6f9cff84d
|
943 |
rcu_read_unlock(); |
5f3ea37c7
|
944 945 946 |
} /** |
d07335e51
|
947 |
* blk_add_trace_bio_remap - Add a trace for a bio-remap operation |
546cf44a1
|
948 |
* @ignore: trace callback data parameter (not used) |
5f3ea37c7
|
949 950 951 |
* @q: queue the io is for * @bio: the source bio * @dev: target device |
a42aaa3bb
|
952 |
* @from: source sector |
5f3ea37c7
|
953 954 955 956 957 958 |
* * Description: * Device mapper or raid target sometimes need to split a bio because * it spans a stripe (or similar). Add a trace for that action. * **/ |
d07335e51
|
959 960 961 |
static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) |
5f3ea37c7
|
962 |
{ |
6f9cff84d
|
963 |
struct blk_trace *bt; |
5f3ea37c7
|
964 |
struct blk_io_trace_remap r; |
6f9cff84d
|
965 966 967 968 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); |
5f3ea37c7
|
969 |
return; |
6f9cff84d
|
970 |
} |
5f3ea37c7
|
971 |
|
a42aaa3bb
|
972 |
r.device_from = cpu_to_be32(dev); |
74d46992e
|
973 |
r.device_to = cpu_to_be32(bio_dev(bio)); |
a42aaa3bb
|
974 |
r.sector_from = cpu_to_be64(from); |
5f3ea37c7
|
975 |
|
4f024f379
|
976 |
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, |
4e4cbee93
|
977 |
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, |
ca1136c99
|
978 |
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); |
6f9cff84d
|
979 |
rcu_read_unlock(); |
5f3ea37c7
|
980 981 982 |
} /** |
b0da3f0da
|
983 |
* blk_add_trace_rq_remap - Add a trace for a request-remap operation |
546cf44a1
|
984 |
* @ignore: trace callback data parameter (not used) |
b0da3f0da
|
985 986 987 988 989 990 991 992 993 994 |
* @q: queue the io is for * @rq: the source request * @dev: target device * @from: source sector * * Description: * Device mapper remaps request to other devices. * Add a trace for that action. * **/ |
38516ab59
|
995 996 |
static void blk_add_trace_rq_remap(void *ignore, struct request_queue *q, |
b0da3f0da
|
997 998 999 |
struct request *rq, dev_t dev, sector_t from) { |
6f9cff84d
|
1000 |
struct blk_trace *bt; |
b0da3f0da
|
1001 |
struct blk_io_trace_remap r; |
6f9cff84d
|
1002 1003 1004 1005 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); |
b0da3f0da
|
1006 |
return; |
6f9cff84d
|
1007 |
} |
b0da3f0da
|
1008 1009 1010 1011 1012 1013 |
r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), |
caf7df122
|
1014 |
rq_data_dir(rq), 0, BLK_TA_REMAP, 0, |
ca1136c99
|
1015 |
sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); |
6f9cff84d
|
1016 |
rcu_read_unlock(); |
b0da3f0da
|
1017 1018 1019 |
} /** |
5f3ea37c7
|
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 |
* blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data * * Description: * Some drivers might want to write driver-specific data per request. * **/ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { |
6f9cff84d
|
1034 |
struct blk_trace *bt; |
5f3ea37c7
|
1035 |
|
6f9cff84d
|
1036 1037 1038 1039 |
rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); |
5f3ea37c7
|
1040 |
return; |
6f9cff84d
|
1041 |
} |
5f3ea37c7
|
1042 |
|
48b77ad60
|
1043 |
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, |
ca1136c99
|
1044 1045 |
BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); |
6f9cff84d
|
1046 |
rcu_read_unlock(); |
5f3ea37c7
|
1047 1048 |
} EXPORT_SYMBOL_GPL(blk_add_driver_data); |
3c289ba7c
|
1049 |
static void blk_register_tracepoints(void) |
5f3ea37c7
|
1050 1051 |
{ int ret; |
38516ab59
|
1052 |
ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
5f3ea37c7
|
1053 |
WARN_ON(ret); |
38516ab59
|
1054 |
ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
5f3ea37c7
|
1055 |
WARN_ON(ret); |
38516ab59
|
1056 |
ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
5f3ea37c7
|
1057 |
WARN_ON(ret); |
38516ab59
|
1058 |
ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
5f3ea37c7
|
1059 |
WARN_ON(ret); |
38516ab59
|
1060 |
ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
5f3ea37c7
|
1061 |
WARN_ON(ret); |
38516ab59
|
1062 |
ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); |
5f3ea37c7
|
1063 |
WARN_ON(ret); |
38516ab59
|
1064 |
ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); |
5f3ea37c7
|
1065 |
WARN_ON(ret); |
38516ab59
|
1066 |
ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); |
5f3ea37c7
|
1067 |
WARN_ON(ret); |
38516ab59
|
1068 |
ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); |
5f3ea37c7
|
1069 |
WARN_ON(ret); |
38516ab59
|
1070 |
ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); |
5f3ea37c7
|
1071 |
WARN_ON(ret); |
38516ab59
|
1072 |
ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
5f3ea37c7
|
1073 |
WARN_ON(ret); |
38516ab59
|
1074 |
ret = register_trace_block_plug(blk_add_trace_plug, NULL); |
5f3ea37c7
|
1075 |
WARN_ON(ret); |
49cac01e1
|
1076 |
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); |
5f3ea37c7
|
1077 |
WARN_ON(ret); |
38516ab59
|
1078 |
ret = register_trace_block_split(blk_add_trace_split, NULL); |
5f3ea37c7
|
1079 |
WARN_ON(ret); |
d07335e51
|
1080 |
ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
5f3ea37c7
|
1081 |
WARN_ON(ret); |
38516ab59
|
1082 |
ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
b0da3f0da
|
1083 |
WARN_ON(ret); |
5f3ea37c7
|
1084 1085 1086 1087 |
} static void blk_unregister_tracepoints(void) { |
38516ab59
|
1088 |
unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
d07335e51
|
1089 |
unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
38516ab59
|
1090 |
unregister_trace_block_split(blk_add_trace_split, NULL); |
49cac01e1
|
1091 |
unregister_trace_block_unplug(blk_add_trace_unplug, NULL); |
38516ab59
|
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 |
unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
5f3ea37c7
|
1104 1105 1106 |
tracepoint_synchronize_unregister(); } |
c71a89615
|
1107 1108 1109 1110 1111 1112 1113 |
/* * struct blk_io_tracer formatting routines */ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) { |
157f9c00e
|
1114 |
int i = 0; |
65796348e
|
1115 |
int tc = t->action >> BLK_TC_SHIFT; |
157f9c00e
|
1116 |
|
ca1136c99
|
1117 |
if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) { |
18cea4591
|
1118 1119 1120 |
rwbs[i++] = 'N'; goto out; } |
c09c47cae
|
1121 1122 |
if (tc & BLK_TC_FLUSH) rwbs[i++] = 'F'; |
65796348e
|
1123 |
if (tc & BLK_TC_DISCARD) |
157f9c00e
|
1124 |
rwbs[i++] = 'D'; |
65796348e
|
1125 |
else if (tc & BLK_TC_WRITE) |
157f9c00e
|
1126 1127 1128 1129 1130 |
rwbs[i++] = 'W'; else if (t->bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; |
c09c47cae
|
1131 1132 |
if (tc & BLK_TC_FUA) rwbs[i++] = 'F'; |
65796348e
|
1133 |
if (tc & BLK_TC_AHEAD) |
157f9c00e
|
1134 |
rwbs[i++] = 'A'; |
65796348e
|
1135 |
if (tc & BLK_TC_SYNC) |
157f9c00e
|
1136 |
rwbs[i++] = 'S'; |
65796348e
|
1137 |
if (tc & BLK_TC_META) |
157f9c00e
|
1138 |
rwbs[i++] = 'M'; |
18cea4591
|
1139 |
out: |
157f9c00e
|
1140 |
rwbs[i] = '\0'; |
c71a89615
|
1141 1142 1143 1144 1145 1146 1147 |
} static inline const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) { return (const struct blk_io_trace *)ent; } |
ca1136c99
|
1148 1149 1150 1151 1152 1153 1154 |
static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) { return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(union kernfs_node_id) : 0); } static inline const void *cgid_start(const struct trace_entry *ent) |
c71a89615
|
1155 |
{ |
ca1136c99
|
1156 1157 1158 1159 1160 1161 1162 |
return (void *)(te_blk_io_trace(ent) + 1); } static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) { return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(union kernfs_node_id) : 0); |
c71a89615
|
1163 |
} |
66de7792c
|
1164 1165 1166 1167 1168 1169 1170 1171 1172 |
static inline u32 t_action(const struct trace_entry *ent) { return te_blk_io_trace(ent)->action; } static inline u32 t_bytes(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes; } |
c71a89615
|
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 |
static inline u32 t_sec(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes >> 9; } static inline unsigned long long t_sector(const struct trace_entry *ent) { return te_blk_io_trace(ent)->sector; } static inline __u16 t_error(const struct trace_entry *ent) { |
e0dc81bec
|
1185 |
return te_blk_io_trace(ent)->error; |
c71a89615
|
1186 |
} |
ca1136c99
|
1187 |
static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1188 |
{ |
ca1136c99
|
1189 |
const __u64 *val = pdu_start(ent, has_cg); |
c71a89615
|
1190 1191 1192 1193 |
return be64_to_cpu(*val); } static void get_pdu_remap(const struct trace_entry *ent, |
ca1136c99
|
1194 |
struct blk_io_trace_remap *r, bool has_cg) |
c71a89615
|
1195 |
{ |
ca1136c99
|
1196 |
const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); |
a42aaa3bb
|
1197 |
__u64 sector_from = __r->sector_from; |
c71a89615
|
1198 |
|
c71a89615
|
1199 |
r->device_from = be32_to_cpu(__r->device_from); |
a42aaa3bb
|
1200 1201 |
r->device_to = be32_to_cpu(__r->device_to); r->sector_from = be64_to_cpu(sector_from); |
c71a89615
|
1202 |
} |
ca1136c99
|
1203 1204 |
typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act, bool has_cg); |
b6a4b0c3a
|
1205 |
|
ca1136c99
|
1206 1207 |
static void blk_log_action_classic(struct trace_iterator *iter, const char *act, bool has_cg) |
c71a89615
|
1208 |
{ |
c09c47cae
|
1209 |
char rwbs[RWBS_LEN]; |
35ac51bfe
|
1210 1211 |
unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
c71a89615
|
1212 |
unsigned secs = (unsigned long)ts; |
b6a4b0c3a
|
1213 |
const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
c71a89615
|
1214 1215 |
fill_rwbs(rwbs, t); |
f4a1d08ce
|
1216 1217 1218 1219 |
trace_seq_printf(&iter->seq, "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", MAJOR(t->device), MINOR(t->device), iter->cpu, secs, nsec_rem, iter->ent->pid, act, rwbs); |
c71a89615
|
1220 |
} |
ca1136c99
|
1221 1222 |
static void blk_log_action(struct trace_iterator *iter, const char *act, bool has_cg) |
c71a89615
|
1223 |
{ |
c09c47cae
|
1224 |
char rwbs[RWBS_LEN]; |
b6a4b0c3a
|
1225 |
const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
c71a89615
|
1226 |
fill_rwbs(rwbs, t); |
ca1136c99
|
1227 1228 |
if (has_cg) { const union kernfs_node_id *id = cgid_start(iter->ent); |
69fd5c391
|
1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 |
if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { char blkcg_name_buf[NAME_MAX + 1] = "<...>"; cgroup_path_from_kernfs_id(id, blkcg_name_buf, sizeof(blkcg_name_buf)); trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", MAJOR(t->device), MINOR(t->device), blkcg_name_buf, act, rwbs); } else trace_seq_printf(&iter->seq, "%3d,%-3d %x,%-x %2s %3s ", |
ca1136c99
|
1240 1241 1242 1243 1244 |
MAJOR(t->device), MINOR(t->device), id->ino, id->generation, act, rwbs); } else trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", MAJOR(t->device), MINOR(t->device), act, rwbs); |
c71a89615
|
1245 |
} |
ca1136c99
|
1246 1247 |
static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
66de7792c
|
1248 |
{ |
049862579
|
1249 |
const unsigned char *pdu_buf; |
66de7792c
|
1250 |
int pdu_len; |
f4a1d08ce
|
1251 |
int i, end; |
66de7792c
|
1252 |
|
ca1136c99
|
1253 1254 |
pdu_buf = pdu_start(ent, has_cg); pdu_len = pdu_real_len(ent, has_cg); |
66de7792c
|
1255 1256 |
if (!pdu_len) |
f4a1d08ce
|
1257 |
return; |
66de7792c
|
1258 1259 1260 1261 1262 1263 |
/* find the last zero that needs to be printed */ for (end = pdu_len - 1; end >= 0; end--) if (pdu_buf[end]) break; end++; |
f4a1d08ce
|
1264 |
trace_seq_putc(s, '('); |
66de7792c
|
1265 1266 |
for (i = 0; i < pdu_len; i++) { |
f4a1d08ce
|
1267 1268 |
trace_seq_printf(s, "%s%02x", i == 0 ? "" : " ", pdu_buf[i]); |
66de7792c
|
1269 1270 1271 1272 1273 |
/* * stop when the rest is just zeroes and indicate so * with a ".." appended */ |
f4a1d08ce
|
1274 1275 1276 1277 |
if (i == end && end != pdu_len - 1) { trace_seq_puts(s, " ..) "); return; } |
66de7792c
|
1278 |
} |
f4a1d08ce
|
1279 |
trace_seq_puts(s, ") "); |
66de7792c
|
1280 |
} |
ca1136c99
|
1281 |
static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1282 |
{ |
4ca530852
|
1283 1284 1285 |
char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); |
c71a89615
|
1286 |
|
66de7792c
|
1287 |
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
f4a1d08ce
|
1288 |
trace_seq_printf(s, "%u ", t_bytes(ent)); |
ca1136c99
|
1289 |
blk_log_dump_pdu(s, ent, has_cg); |
f4a1d08ce
|
1290 1291 |
trace_seq_printf(s, "[%s] ", cmd); |
66de7792c
|
1292 1293 |
} else { if (t_sec(ent)) |
f4a1d08ce
|
1294 1295 |
trace_seq_printf(s, "%llu + %u [%s] ", |
66de7792c
|
1296 |
t_sector(ent), t_sec(ent), cmd); |
f4a1d08ce
|
1297 1298 1299 |
else trace_seq_printf(s, "[%s] ", cmd); |
66de7792c
|
1300 |
} |
c71a89615
|
1301 |
} |
f4a1d08ce
|
1302 |
static void blk_log_with_error(struct trace_seq *s, |
ca1136c99
|
1303 |
const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1304 |
{ |
66de7792c
|
1305 |
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
ca1136c99
|
1306 |
blk_log_dump_pdu(s, ent, has_cg); |
f4a1d08ce
|
1307 1308 |
trace_seq_printf(s, "[%d] ", t_error(ent)); |
66de7792c
|
1309 1310 |
} else { if (t_sec(ent)) |
f4a1d08ce
|
1311 1312 1313 1314 1315 1316 1317 1318 |
trace_seq_printf(s, "%llu + %u [%d] ", t_sector(ent), t_sec(ent), t_error(ent)); else trace_seq_printf(s, "%llu [%d] ", t_sector(ent), t_error(ent)); |
66de7792c
|
1319 |
} |
c71a89615
|
1320 |
} |
ca1136c99
|
1321 |
static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1322 |
{ |
a42aaa3bb
|
1323 |
struct blk_io_trace_remap r = { .device_from = 0, }; |
c71a89615
|
1324 |
|
ca1136c99
|
1325 |
get_pdu_remap(ent, &r, has_cg); |
f4a1d08ce
|
1326 1327 1328 1329 1330 |
trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu ", t_sector(ent), t_sec(ent), MAJOR(r.device_from), MINOR(r.device_from), (unsigned long long)r.sector_from); |
c71a89615
|
1331 |
} |
ca1136c99
|
1332 |
static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1333 |
{ |
4ca530852
|
1334 1335 1336 |
char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); |
f4a1d08ce
|
1337 1338 |
trace_seq_printf(s, "[%s] ", cmd); |
c71a89615
|
1339 |
} |
ca1136c99
|
1340 |
static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1341 |
{ |
4ca530852
|
1342 1343 1344 |
char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); |
ca1136c99
|
1345 1346 |
trace_seq_printf(s, "[%s] %llu ", cmd, get_pdu_int(ent, has_cg)); |
c71a89615
|
1347 |
} |
ca1136c99
|
1348 |
static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
c71a89615
|
1349 |
{ |
4ca530852
|
1350 1351 1352 |
char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); |
f4a1d08ce
|
1353 1354 |
trace_seq_printf(s, "%llu / %llu [%s] ", t_sector(ent), |
ca1136c99
|
1355 |
get_pdu_int(ent, has_cg), cmd); |
c71a89615
|
1356 |
} |
ca1136c99
|
1357 1358 |
static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) |
18cea4591
|
1359 |
{ |
18cea4591
|
1360 |
|
ca1136c99
|
1361 1362 |
trace_seq_putmem(s, pdu_start(ent, has_cg), pdu_real_len(ent, has_cg)); |
f4a1d08ce
|
1363 1364 |
trace_seq_putc(s, ' '); |
18cea4591
|
1365 |
} |
c71a89615
|
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 |
/* * struct tracer operations */ static void blk_tracer_print_header(struct seq_file *m) { if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return; seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG " "# | | | | | | "); } static void blk_tracer_start(struct trace_array *tr) { |
ad5dd5493
|
1382 |
blk_tracer_enabled = true; |
c71a89615
|
1383 1384 1385 1386 1387 1388 |
} static int blk_tracer_init(struct trace_array *tr) { blk_tr = tr; blk_tracer_start(tr); |
c71a89615
|
1389 1390 1391 1392 1393 |
return 0; } static void blk_tracer_stop(struct trace_array *tr) { |
ad5dd5493
|
1394 |
blk_tracer_enabled = false; |
c71a89615
|
1395 1396 1397 1398 |
} static void blk_tracer_reset(struct trace_array *tr) { |
c71a89615
|
1399 1400 |
blk_tracer_stop(tr); } |
e4955c998
|
1401 |
static const struct { |
c71a89615
|
1402 |
const char *act[2]; |
ca1136c99
|
1403 1404 |
void (*print)(struct trace_seq *s, const struct trace_entry *ent, bool has_cg); |
e4955c998
|
1405 |
} what2act[] = { |
ef18012b2
|
1406 |
[__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, |
c71a89615
|
1407 1408 1409 1410 1411 1412 1413 1414 1415 |
[__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, |
49cac01e1
|
1416 |
[__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, |
c71a89615
|
1417 1418 1419 1420 1421 |
[__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, }; |
b6a4b0c3a
|
1422 1423 |
static enum print_line_t print_one_line(struct trace_iterator *iter, bool classic) |
c71a89615
|
1424 |
{ |
983f938ae
|
1425 |
struct trace_array *tr = iter->tr; |
2c9b238eb
|
1426 |
struct trace_seq *s = &iter->seq; |
b6a4b0c3a
|
1427 1428 |
const struct blk_io_trace *t; u16 what; |
b6a4b0c3a
|
1429 1430 |
bool long_act; blk_log_action_t *log_action; |
ca1136c99
|
1431 |
bool has_cg; |
c71a89615
|
1432 |
|
b6a4b0c3a
|
1433 |
t = te_blk_io_trace(iter->ent); |
ca1136c99
|
1434 |
what = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP; |
983f938ae
|
1435 |
long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE); |
b6a4b0c3a
|
1436 |
log_action = classic ? &blk_log_action_classic : &blk_log_action; |
ca1136c99
|
1437 |
has_cg = t->action & __BLK_TA_CGROUP; |
08a06b83f
|
1438 |
|
ca1136c99
|
1439 1440 1441 |
if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) { log_action(iter, long_act ? "message" : "m", has_cg); blk_log_msg(s, iter->ent, has_cg); |
b7d7641e2
|
1442 |
return trace_handle_return(s); |
18cea4591
|
1443 |
} |
eb08f8eb0
|
1444 |
if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) |
f4a1d08ce
|
1445 1446 |
trace_seq_printf(s, "Unknown action %x ", what); |
c71a89615
|
1447 |
else { |
ca1136c99
|
1448 1449 |
log_action(iter, what2act[what].act[long_act], has_cg); what2act[what].print(s, iter->ent, has_cg); |
c71a89615
|
1450 |
} |
f4a1d08ce
|
1451 1452 |
return trace_handle_return(s); |
c71a89615
|
1453 |
} |
b6a4b0c3a
|
1454 |
static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, |
a9a577638
|
1455 |
int flags, struct trace_event *event) |
b6a4b0c3a
|
1456 |
{ |
b6a4b0c3a
|
1457 1458 |
return print_one_line(iter, false); } |
f4a1d08ce
|
1459 |
static void blk_trace_synthesize_old_trace(struct trace_iterator *iter) |
08a06b83f
|
1460 1461 1462 1463 1464 1465 |
{ struct trace_seq *s = &iter->seq; struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; const int offset = offsetof(struct blk_io_trace, sector); struct blk_io_trace old = { .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, |
6c051ce03
|
1466 |
.time = iter->ts, |
08a06b83f
|
1467 |
}; |
f4a1d08ce
|
1468 1469 1470 |
trace_seq_putmem(s, &old, offset); trace_seq_putmem(s, &t->sector, sizeof(old) - offset + t->pdu_len); |
08a06b83f
|
1471 |
} |
ae7462b4f
|
1472 |
static enum print_line_t |
a9a577638
|
1473 1474 |
blk_trace_event_print_binary(struct trace_iterator *iter, int flags, struct trace_event *event) |
08a06b83f
|
1475 |
{ |
f4a1d08ce
|
1476 1477 1478 |
blk_trace_synthesize_old_trace(iter); return trace_handle_return(&iter->seq); |
08a06b83f
|
1479 |
} |
c71a89615
|
1480 1481 |
static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { |
c71a89615
|
1482 1483 |
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; |
b6a4b0c3a
|
1484 |
return print_one_line(iter, true); |
c71a89615
|
1485 |
} |
8c1a49aed
|
1486 1487 |
static int blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) |
f3948f885
|
1488 1489 1490 1491 |
{ /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { if (set) |
983f938ae
|
1492 |
tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO; |
f3948f885
|
1493 |
else |
983f938ae
|
1494 |
tr->trace_flags |= TRACE_ITER_CONTEXT_INFO; |
f3948f885
|
1495 1496 1497 |
} return 0; } |
c71a89615
|
1498 1499 1500 1501 1502 1503 1504 1505 1506 |
static struct tracer blk_tracer __read_mostly = { .name = "blk", .init = blk_tracer_init, .reset = blk_tracer_reset, .start = blk_tracer_start, .stop = blk_tracer_stop, .print_header = blk_tracer_print_header, .print_line = blk_tracer_print_line, .flags = &blk_tracer_flags, |
f3948f885
|
1507 |
.set_flag = blk_tracer_set_flag, |
c71a89615
|
1508 |
}; |
a9a577638
|
1509 |
static struct trace_event_functions trace_blk_event_funcs = { |
c71a89615
|
1510 |
.trace = blk_trace_event_print, |
08a06b83f
|
1511 |
.binary = blk_trace_event_print_binary, |
c71a89615
|
1512 |
}; |
a9a577638
|
1513 1514 1515 1516 |
static struct trace_event trace_blk_event = { .type = TRACE_BLK, .funcs = &trace_blk_event_funcs, }; |
c71a89615
|
1517 1518 |
static int __init init_blk_tracer(void) { |
9023c9309
|
1519 |
if (!register_trace_event(&trace_blk_event)) { |
a395d6a7e
|
1520 1521 |
pr_warn("Warning: could not register block events "); |
c71a89615
|
1522 1523 1524 1525 |
return 1; } if (register_tracer(&blk_tracer) != 0) { |
a395d6a7e
|
1526 1527 |
pr_warn("Warning: could not register the block tracer "); |
9023c9309
|
1528 |
unregister_trace_event(&trace_blk_event); |
c71a89615
|
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 |
return 1; } return 0; } device_initcall(init_blk_tracer); static int blk_trace_remove_queue(struct request_queue *q) { struct blk_trace *bt; bt = xchg(&q->blk_trace, NULL); if (bt == NULL) return -EINVAL; |
a6da0024f
|
1544 |
put_probe_ref(); |
6f9cff84d
|
1545 |
synchronize_rcu(); |
ad5dd5493
|
1546 |
blk_trace_free(bt); |
c71a89615
|
1547 1548 1549 1550 1551 1552 |
return 0; } /* * Setup everything required to start tracing */ |
9908c3099
|
1553 1554 |
static int blk_trace_setup_queue(struct request_queue *q, struct block_device *bdev) |
c71a89615
|
1555 |
{ |
cdea01b2b
|
1556 |
struct blk_trace *bt = NULL; |
18cea4591
|
1557 |
int ret = -ENOMEM; |
c71a89615
|
1558 |
|
c71a89615
|
1559 1560 |
bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) |
15152e448
|
1561 |
return -ENOMEM; |
c71a89615
|
1562 |
|
18cea4591
|
1563 1564 1565 |
bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); if (!bt->msg_data) goto free_bt; |
9908c3099
|
1566 |
bt->dev = bdev->bd_dev; |
c71a89615
|
1567 |
bt->act_mask = (u16)-1; |
9908c3099
|
1568 1569 |
blk_trace_setup_lba(bt, bdev); |
c71a89615
|
1570 |
|
cdea01b2b
|
1571 1572 |
ret = -EBUSY; if (cmpxchg(&q->blk_trace, NULL, bt)) |
18cea4591
|
1573 |
goto free_bt; |
15152e448
|
1574 |
|
a6da0024f
|
1575 |
get_probe_ref(); |
c71a89615
|
1576 |
return 0; |
18cea4591
|
1577 1578 1579 1580 |
free_bt: blk_trace_free(bt); return ret; |
c71a89615
|
1581 1582 1583 1584 1585 |
} /* * sysfs interface to enable and configure tracing */ |
c71a89615
|
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 |
static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf); static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); #define BLK_TRACE_DEVICE_ATTR(_name) \ DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ sysfs_blk_trace_attr_show, \ sysfs_blk_trace_attr_store) |
cd649b8bb
|
1596 |
static BLK_TRACE_DEVICE_ATTR(enable); |
c71a89615
|
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 |
static BLK_TRACE_DEVICE_ATTR(act_mask); static BLK_TRACE_DEVICE_ATTR(pid); static BLK_TRACE_DEVICE_ATTR(start_lba); static BLK_TRACE_DEVICE_ATTR(end_lba); static struct attribute *blk_trace_attrs[] = { &dev_attr_enable.attr, &dev_attr_act_mask.attr, &dev_attr_pid.attr, &dev_attr_start_lba.attr, &dev_attr_end_lba.attr, NULL }; struct attribute_group blk_trace_attr_group = { .name = "trace", .attrs = blk_trace_attrs, }; |
093419971
|
1615 1616 1617 1618 1619 1620 |
static const struct { int mask; const char *str; } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, |
c09c47cae
|
1621 |
{ BLK_TC_FLUSH, "flush" }, |
093419971
|
1622 1623 1624 1625 1626 1627 1628 |
{ BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, { BLK_TC_ISSUE, "issue" }, { BLK_TC_COMPLETE, "complete" }, { BLK_TC_FS, "fs" }, { BLK_TC_PC, "pc" }, |
8d1547e08
|
1629 |
{ BLK_TC_NOTIFY, "notify" }, |
093419971
|
1630 1631 1632 1633 |
{ BLK_TC_AHEAD, "ahead" }, { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, |
c09c47cae
|
1634 |
{ BLK_TC_FUA, "fua" }, |
093419971
|
1635 1636 1637 |
}; static int blk_trace_str2mask(const char *str) |
c71a89615
|
1638 |
{ |
093419971
|
1639 |
int i; |
c71a89615
|
1640 |
int mask = 0; |
9eb85125c
|
1641 |
char *buf, *s, *token; |
c71a89615
|
1642 |
|
9eb85125c
|
1643 1644 |
buf = kstrdup(str, GFP_KERNEL); if (buf == NULL) |
c71a89615
|
1645 |
return -ENOMEM; |
9eb85125c
|
1646 |
s = strstrip(buf); |
c71a89615
|
1647 1648 |
while (1) { |
093419971
|
1649 1650 |
token = strsep(&s, ","); if (token == NULL) |
c71a89615
|
1651 |
break; |
093419971
|
1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 |
if (*token == '\0') continue; for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { if (strcasecmp(token, mask_maps[i].str) == 0) { mask |= mask_maps[i].mask; break; } } if (i == ARRAY_SIZE(mask_maps)) { mask = -EINVAL; break; } |
c71a89615
|
1665 |
} |
9eb85125c
|
1666 |
kfree(buf); |
c71a89615
|
1667 1668 1669 |
return mask; } |
093419971
|
1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 |
static ssize_t blk_trace_mask2str(char *buf, int mask) { int i; char *p = buf; for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { if (mask & mask_maps[i].mask) { p += sprintf(p, "%s%s", (p == buf) ? "" : ",", mask_maps[i].str); } } *p++ = ' '; return p - buf; } |
b125130b2
|
1686 1687 1688 1689 1690 1691 1692 |
static struct request_queue *blk_trace_get_queue(struct block_device *bdev) { if (bdev->bd_disk == NULL) return NULL; return bdev_get_queue(bdev); } |
c71a89615
|
1693 1694 1695 1696 1697 1698 1699 |
static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; |
6f9cff84d
|
1700 |
struct blk_trace *bt; |
c71a89615
|
1701 |
ssize_t ret = -ENXIO; |
c71a89615
|
1702 1703 |
bdev = bdget(part_devt(p)); if (bdev == NULL) |
01b284f9b
|
1704 |
goto out; |
c71a89615
|
1705 |
|
b125130b2
|
1706 |
q = blk_trace_get_queue(bdev); |
c71a89615
|
1707 1708 |
if (q == NULL) goto out_bdput; |
b125130b2
|
1709 |
|
5acb3cc2c
|
1710 |
mutex_lock(&q->blk_trace_mutex); |
cd649b8bb
|
1711 |
|
6f9cff84d
|
1712 1713 |
bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->blk_trace_mutex)); |
cd649b8bb
|
1714 |
if (attr == &dev_attr_enable) { |
6f9cff84d
|
1715 1716 |
ret = sprintf(buf, "%u ", !!bt); |
cd649b8bb
|
1717 1718 |
goto out_unlock_bdev; } |
6f9cff84d
|
1719 |
if (bt == NULL) |
c71a89615
|
1720 1721 1722 |
ret = sprintf(buf, "disabled "); else if (attr == &dev_attr_act_mask) |
6f9cff84d
|
1723 |
ret = blk_trace_mask2str(buf, bt->act_mask); |
c71a89615
|
1724 |
else if (attr == &dev_attr_pid) |
6f9cff84d
|
1725 1726 |
ret = sprintf(buf, "%u ", bt->pid); |
c71a89615
|
1727 |
else if (attr == &dev_attr_start_lba) |
6f9cff84d
|
1728 1729 |
ret = sprintf(buf, "%llu ", bt->start_lba); |
c71a89615
|
1730 |
else if (attr == &dev_attr_end_lba) |
6f9cff84d
|
1731 1732 |
ret = sprintf(buf, "%llu ", bt->end_lba); |
cd649b8bb
|
1733 1734 |
out_unlock_bdev: |
5acb3cc2c
|
1735 |
mutex_unlock(&q->blk_trace_mutex); |
c71a89615
|
1736 1737 |
out_bdput: bdput(bdev); |
01b284f9b
|
1738 |
out: |
c71a89615
|
1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 |
return ret; } static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct block_device *bdev; struct request_queue *q; struct hd_struct *p; |
6f9cff84d
|
1749 |
struct blk_trace *bt; |
c71a89615
|
1750 |
u64 value; |
093419971
|
1751 |
ssize_t ret = -EINVAL; |
c71a89615
|
1752 1753 1754 1755 1756 |
if (count == 0) goto out; if (attr == &dev_attr_act_mask) { |
5f3394530
|
1757 |
if (kstrtoull(buf, 0, &value)) { |
c71a89615
|
1758 |
/* Assume it is a list of trace category names */ |
093419971
|
1759 1760 |
ret = blk_trace_str2mask(buf); if (ret < 0) |
c71a89615
|
1761 |
goto out; |
093419971
|
1762 |
value = ret; |
c71a89615
|
1763 |
} |
5f3394530
|
1764 |
} else if (kstrtoull(buf, 0, &value)) |
c71a89615
|
1765 |
goto out; |
093419971
|
1766 |
ret = -ENXIO; |
c71a89615
|
1767 1768 1769 |
p = dev_to_part(dev); bdev = bdget(part_devt(p)); if (bdev == NULL) |
01b284f9b
|
1770 |
goto out; |
c71a89615
|
1771 |
|
b125130b2
|
1772 |
q = blk_trace_get_queue(bdev); |
c71a89615
|
1773 1774 |
if (q == NULL) goto out_bdput; |
5acb3cc2c
|
1775 |
mutex_lock(&q->blk_trace_mutex); |
cd649b8bb
|
1776 |
|
6f9cff84d
|
1777 1778 |
bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->blk_trace_mutex)); |
cd649b8bb
|
1779 |
if (attr == &dev_attr_enable) { |
6f9cff84d
|
1780 |
if (!!value == !!bt) { |
757d91400
|
1781 1782 1783 |
ret = 0; goto out_unlock_bdev; } |
cd649b8bb
|
1784 |
if (value) |
9908c3099
|
1785 |
ret = blk_trace_setup_queue(q, bdev); |
cd649b8bb
|
1786 1787 1788 1789 |
else ret = blk_trace_remove_queue(q); goto out_unlock_bdev; } |
c71a89615
|
1790 |
ret = 0; |
6f9cff84d
|
1791 |
if (bt == NULL) |
9908c3099
|
1792 |
ret = blk_trace_setup_queue(q, bdev); |
c71a89615
|
1793 1794 1795 |
if (ret == 0) { if (attr == &dev_attr_act_mask) |
6f9cff84d
|
1796 |
bt->act_mask = value; |
c71a89615
|
1797 |
else if (attr == &dev_attr_pid) |
6f9cff84d
|
1798 |
bt->pid = value; |
c71a89615
|
1799 |
else if (attr == &dev_attr_start_lba) |
6f9cff84d
|
1800 |
bt->start_lba = value; |
c71a89615
|
1801 |
else if (attr == &dev_attr_end_lba) |
6f9cff84d
|
1802 |
bt->end_lba = value; |
c71a89615
|
1803 |
} |
cd649b8bb
|
1804 1805 |
out_unlock_bdev: |
5acb3cc2c
|
1806 |
mutex_unlock(&q->blk_trace_mutex); |
c71a89615
|
1807 1808 |
out_bdput: bdput(bdev); |
c71a89615
|
1809 |
out: |
cd649b8bb
|
1810 |
return ret ? ret : count; |
c71a89615
|
1811 |
} |
cd649b8bb
|
1812 |
|
1d54ad6da
|
1813 1814 1815 1816 |
int blk_trace_init_sysfs(struct device *dev) { return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } |
48c0d4d4c
|
1817 1818 1819 1820 |
void blk_trace_remove_sysfs(struct device *dev) { sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); } |
55782138e
|
1821 1822 1823 |
#endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING |
ef295ecf0
|
1824 |
void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) |
55782138e
|
1825 1826 |
{ int i = 0; |
ef295ecf0
|
1827 |
if (op & REQ_PREFLUSH) |
c09c47cae
|
1828 |
rwbs[i++] = 'F'; |
ef295ecf0
|
1829 |
switch (op & REQ_OP_MASK) { |
1b9a9ab78
|
1830 1831 |
case REQ_OP_WRITE: case REQ_OP_WRITE_SAME: |
55782138e
|
1832 |
rwbs[i++] = 'W'; |
1b9a9ab78
|
1833 1834 |
break; case REQ_OP_DISCARD: |
55782138e
|
1835 |
rwbs[i++] = 'D'; |
1b9a9ab78
|
1836 |
break; |
288dab8a3
|
1837 1838 1839 1840 |
case REQ_OP_SECURE_ERASE: rwbs[i++] = 'D'; rwbs[i++] = 'E'; break; |
3a5e02ced
|
1841 1842 1843 |
case REQ_OP_FLUSH: rwbs[i++] = 'F'; break; |
1b9a9ab78
|
1844 |
case REQ_OP_READ: |
55782138e
|
1845 |
rwbs[i++] = 'R'; |
1b9a9ab78
|
1846 1847 |
break; default: |
55782138e
|
1848 |
rwbs[i++] = 'N'; |
1b9a9ab78
|
1849 |
} |
55782138e
|
1850 |
|
ef295ecf0
|
1851 |
if (op & REQ_FUA) |
c09c47cae
|
1852 |
rwbs[i++] = 'F'; |
ef295ecf0
|
1853 |
if (op & REQ_RAHEAD) |
55782138e
|
1854 |
rwbs[i++] = 'A'; |
ef295ecf0
|
1855 |
if (op & REQ_SYNC) |
55782138e
|
1856 |
rwbs[i++] = 'S'; |
ef295ecf0
|
1857 |
if (op & REQ_META) |
55782138e
|
1858 1859 1860 1861 |
rwbs[i++] = 'M'; rwbs[i] = '\0'; } |
9ca8f8e51
|
1862 |
EXPORT_SYMBOL_GPL(blk_fill_rwbs); |
55782138e
|
1863 |
|
55782138e
|
1864 |
#endif /* CONFIG_EVENT_TRACING */ |