Commit a358324466b171e145df20bdb74fe81759906de6
1 parent
4143c5cb36
Exists in
master
and in
7 other branches
ring-buffer: buffer record on/off switch
Impact: enable/disable ring buffer recording API added Several kernel developers have requested that there be a way to stop recording into the ring buffers with a simple switch that can also be enabled from userspace. This patch addes a new kernel API to the ring buffers called: tracing_on() tracing_off() When tracing_off() is called, all ring buffers will not be able to record into their buffers. tracing_on() will enable the ring buffers again. These two act like an on/off switch. That is, there is no counting of the number of times tracing_off or tracing_on has been called. A new file is added to the debugfs/tracing directory called tracing_on This allows for userspace applications to also flip the switch. echo 0 > debugfs/tracing/tracing_on disables the tracing. echo 1 > /debugfs/tracing/tracing_on enables it. Note, this does not disable or enable any tracers. It only sets or clears a flag that needs to be set in order for the ring buffers to write to their buffers. It is a global flag, and affects all ring buffers. The buffers start out with tracing_on enabled. There are now three flags that control recording into the buffers: tracing_on: which affects all ring buffer tracers. buffer->record_disabled: which affects an allocated buffer, which may be set if an anomaly is detected, and tracing is disabled. cpu_buffer->record_disabled: which is set by tracing_stop() or if an anomaly is detected. tracing_start can not reenable this if an anomaly occurred. The userspace debugfs/tracing/tracing_enabled is implemented with tracing_stop() but the user space code can not enable it if the kernel called tracing_stop(). Userspace can enable the tracing_on even if the kernel disabled it. It is just a switch used to stop tracing if a condition was hit. tracing_on is not for protecting critical areas in the kernel nor is it for stopping tracing if an anomaly occurred. This is because userspace can reenable it at any time. Side effect: With this patch, I discovered a dead variable in ftrace.c called tracing_on. This patch removes it. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Showing 3 changed files with 106 additions and 6 deletions Inline Diff
include/linux/ring_buffer.h
1 | #ifndef _LINUX_RING_BUFFER_H | 1 | #ifndef _LINUX_RING_BUFFER_H |
2 | #define _LINUX_RING_BUFFER_H | 2 | #define _LINUX_RING_BUFFER_H |
3 | 3 | ||
4 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
5 | #include <linux/seq_file.h> | 5 | #include <linux/seq_file.h> |
6 | 6 | ||
7 | struct ring_buffer; | 7 | struct ring_buffer; |
8 | struct ring_buffer_iter; | 8 | struct ring_buffer_iter; |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * Don't reference this struct directly, use functions below. | 11 | * Don't reference this struct directly, use functions below. |
12 | */ | 12 | */ |
13 | struct ring_buffer_event { | 13 | struct ring_buffer_event { |
14 | u32 type:2, len:3, time_delta:27; | 14 | u32 type:2, len:3, time_delta:27; |
15 | u32 array[]; | 15 | u32 array[]; |
16 | }; | 16 | }; |
17 | 17 | ||
18 | /** | 18 | /** |
19 | * enum ring_buffer_type - internal ring buffer types | 19 | * enum ring_buffer_type - internal ring buffer types |
20 | * | 20 | * |
21 | * @RINGBUF_TYPE_PADDING: Left over page padding | 21 | * @RINGBUF_TYPE_PADDING: Left over page padding |
22 | * array is ignored | 22 | * array is ignored |
23 | * size is variable depending on how much | 23 | * size is variable depending on how much |
24 | * padding is needed | 24 | * padding is needed |
25 | * | 25 | * |
26 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta | 26 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta |
27 | * array[0] = time delta (28 .. 59) | 27 | * array[0] = time delta (28 .. 59) |
28 | * size = 8 bytes | 28 | * size = 8 bytes |
29 | * | 29 | * |
30 | * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock | 30 | * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock |
31 | * array[0] = tv_nsec | 31 | * array[0] = tv_nsec |
32 | * array[1] = tv_sec | 32 | * array[1] = tv_sec |
33 | * size = 16 bytes | 33 | * size = 16 bytes |
34 | * | 34 | * |
35 | * @RINGBUF_TYPE_DATA: Data record | 35 | * @RINGBUF_TYPE_DATA: Data record |
36 | * If len is zero: | 36 | * If len is zero: |
37 | * array[0] holds the actual length | 37 | * array[0] holds the actual length |
38 | * array[1..(length+3)/4-1] holds data | 38 | * array[1..(length+3)/4-1] holds data |
39 | * else | 39 | * else |
40 | * length = len << 2 | 40 | * length = len << 2 |
41 | * array[0..(length+3)/4] holds data | 41 | * array[0..(length+3)/4] holds data |
42 | */ | 42 | */ |
43 | enum ring_buffer_type { | 43 | enum ring_buffer_type { |
44 | RINGBUF_TYPE_PADDING, | 44 | RINGBUF_TYPE_PADDING, |
45 | RINGBUF_TYPE_TIME_EXTEND, | 45 | RINGBUF_TYPE_TIME_EXTEND, |
46 | /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ | 46 | /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ |
47 | RINGBUF_TYPE_TIME_STAMP, | 47 | RINGBUF_TYPE_TIME_STAMP, |
48 | RINGBUF_TYPE_DATA, | 48 | RINGBUF_TYPE_DATA, |
49 | }; | 49 | }; |
50 | 50 | ||
51 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); | 51 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); |
52 | void *ring_buffer_event_data(struct ring_buffer_event *event); | 52 | void *ring_buffer_event_data(struct ring_buffer_event *event); |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * ring_buffer_event_time_delta - return the delta timestamp of the event | 55 | * ring_buffer_event_time_delta - return the delta timestamp of the event |
56 | * @event: the event to get the delta timestamp of | 56 | * @event: the event to get the delta timestamp of |
57 | * | 57 | * |
58 | * The delta timestamp is the 27 bit timestamp since the last event. | 58 | * The delta timestamp is the 27 bit timestamp since the last event. |
59 | */ | 59 | */ |
60 | static inline unsigned | 60 | static inline unsigned |
61 | ring_buffer_event_time_delta(struct ring_buffer_event *event) | 61 | ring_buffer_event_time_delta(struct ring_buffer_event *event) |
62 | { | 62 | { |
63 | return event->time_delta; | 63 | return event->time_delta; |
64 | } | 64 | } |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * size is in bytes for each per CPU buffer. | 67 | * size is in bytes for each per CPU buffer. |
68 | */ | 68 | */ |
69 | struct ring_buffer * | 69 | struct ring_buffer * |
70 | ring_buffer_alloc(unsigned long size, unsigned flags); | 70 | ring_buffer_alloc(unsigned long size, unsigned flags); |
71 | void ring_buffer_free(struct ring_buffer *buffer); | 71 | void ring_buffer_free(struct ring_buffer *buffer); |
72 | 72 | ||
73 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); | 73 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); |
74 | 74 | ||
75 | struct ring_buffer_event * | 75 | struct ring_buffer_event * |
76 | ring_buffer_lock_reserve(struct ring_buffer *buffer, | 76 | ring_buffer_lock_reserve(struct ring_buffer *buffer, |
77 | unsigned long length, | 77 | unsigned long length, |
78 | unsigned long *flags); | 78 | unsigned long *flags); |
79 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 79 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
80 | struct ring_buffer_event *event, | 80 | struct ring_buffer_event *event, |
81 | unsigned long flags); | 81 | unsigned long flags); |
82 | int ring_buffer_write(struct ring_buffer *buffer, | 82 | int ring_buffer_write(struct ring_buffer *buffer, |
83 | unsigned long length, void *data); | 83 | unsigned long length, void *data); |
84 | 84 | ||
85 | struct ring_buffer_event * | 85 | struct ring_buffer_event * |
86 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); | 86 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); |
87 | struct ring_buffer_event * | 87 | struct ring_buffer_event * |
88 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); | 88 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); |
89 | 89 | ||
90 | struct ring_buffer_iter * | 90 | struct ring_buffer_iter * |
91 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu); | 91 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu); |
92 | void ring_buffer_read_finish(struct ring_buffer_iter *iter); | 92 | void ring_buffer_read_finish(struct ring_buffer_iter *iter); |
93 | 93 | ||
94 | struct ring_buffer_event * | 94 | struct ring_buffer_event * |
95 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); | 95 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); |
96 | struct ring_buffer_event * | 96 | struct ring_buffer_event * |
97 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); | 97 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); |
98 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter); | 98 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter); |
99 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter); | 99 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter); |
100 | 100 | ||
101 | unsigned long ring_buffer_size(struct ring_buffer *buffer); | 101 | unsigned long ring_buffer_size(struct ring_buffer *buffer); |
102 | 102 | ||
103 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); | 103 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); |
104 | void ring_buffer_reset(struct ring_buffer *buffer); | 104 | void ring_buffer_reset(struct ring_buffer *buffer); |
105 | 105 | ||
106 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | 106 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, |
107 | struct ring_buffer *buffer_b, int cpu); | 107 | struct ring_buffer *buffer_b, int cpu); |
108 | 108 | ||
109 | int ring_buffer_empty(struct ring_buffer *buffer); | 109 | int ring_buffer_empty(struct ring_buffer *buffer); |
110 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); | 110 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); |
111 | 111 | ||
112 | void ring_buffer_record_disable(struct ring_buffer *buffer); | 112 | void ring_buffer_record_disable(struct ring_buffer *buffer); |
113 | void ring_buffer_record_enable(struct ring_buffer *buffer); | 113 | void ring_buffer_record_enable(struct ring_buffer *buffer); |
114 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); | 114 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); |
115 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); | 115 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); |
116 | 116 | ||
117 | unsigned long ring_buffer_entries(struct ring_buffer *buffer); | 117 | unsigned long ring_buffer_entries(struct ring_buffer *buffer); |
118 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer); | 118 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer); |
119 | 119 | ||
120 | u64 ring_buffer_time_stamp(int cpu); | 120 | u64 ring_buffer_time_stamp(int cpu); |
121 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); | 121 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); |
122 | 122 | ||
123 | void tracing_on(void); | ||
124 | void tracing_off(void); | ||
125 | |||
123 | enum ring_buffer_flags { | 126 | enum ring_buffer_flags { |
124 | RB_FL_OVERWRITE = 1 << 0, | 127 | RB_FL_OVERWRITE = 1 << 0, |
125 | }; | 128 | }; |
126 | 129 | ||
127 | #endif /* _LINUX_RING_BUFFER_H */ | 130 | #endif /* _LINUX_RING_BUFFER_H */ |
128 | 131 |
kernel/trace/ftrace.c
1 | /* | 1 | /* |
2 | * Infrastructure for profiling code inserted by 'gcc -pg'. | 2 | * Infrastructure for profiling code inserted by 'gcc -pg'. |
3 | * | 3 | * |
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> |
5 | * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com> | 5 | * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com> |
6 | * | 6 | * |
7 | * Originally ported from the -rt patch by: | 7 | * Originally ported from the -rt patch by: |
8 | * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> | 8 | * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> |
9 | * | 9 | * |
10 | * Based on code in the latency_tracer, that is: | 10 | * Based on code in the latency_tracer, that is: |
11 | * | 11 | * |
12 | * Copyright (C) 2004-2006 Ingo Molnar | 12 | * Copyright (C) 2004-2006 Ingo Molnar |
13 | * Copyright (C) 2004 William Lee Irwin III | 13 | * Copyright (C) 2004 William Lee Irwin III |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/stop_machine.h> | 16 | #include <linux/stop_machine.h> |
17 | #include <linux/clocksource.h> | 17 | #include <linux/clocksource.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/debugfs.h> | 20 | #include <linux/debugfs.h> |
21 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
22 | #include <linux/kthread.h> | 22 | #include <linux/kthread.h> |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/kprobes.h> | 24 | #include <linux/kprobes.h> |
25 | #include <linux/ftrace.h> | 25 | #include <linux/ftrace.h> |
26 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
27 | #include <linux/ctype.h> | 27 | #include <linux/ctype.h> |
28 | #include <linux/list.h> | 28 | #include <linux/list.h> |
29 | 29 | ||
30 | #include <asm/ftrace.h> | 30 | #include <asm/ftrace.h> |
31 | 31 | ||
32 | #include "trace.h" | 32 | #include "trace.h" |
33 | 33 | ||
34 | #define FTRACE_WARN_ON(cond) \ | 34 | #define FTRACE_WARN_ON(cond) \ |
35 | do { \ | 35 | do { \ |
36 | if (WARN_ON(cond)) \ | 36 | if (WARN_ON(cond)) \ |
37 | ftrace_kill(); \ | 37 | ftrace_kill(); \ |
38 | } while (0) | 38 | } while (0) |
39 | 39 | ||
40 | #define FTRACE_WARN_ON_ONCE(cond) \ | 40 | #define FTRACE_WARN_ON_ONCE(cond) \ |
41 | do { \ | 41 | do { \ |
42 | if (WARN_ON_ONCE(cond)) \ | 42 | if (WARN_ON_ONCE(cond)) \ |
43 | ftrace_kill(); \ | 43 | ftrace_kill(); \ |
44 | } while (0) | 44 | } while (0) |
45 | 45 | ||
46 | /* ftrace_enabled is a method to turn ftrace on or off */ | 46 | /* ftrace_enabled is a method to turn ftrace on or off */ |
47 | int ftrace_enabled __read_mostly; | 47 | int ftrace_enabled __read_mostly; |
48 | static int last_ftrace_enabled; | 48 | static int last_ftrace_enabled; |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * ftrace_disabled is set when an anomaly is discovered. | 51 | * ftrace_disabled is set when an anomaly is discovered. |
52 | * ftrace_disabled is much stronger than ftrace_enabled. | 52 | * ftrace_disabled is much stronger than ftrace_enabled. |
53 | */ | 53 | */ |
54 | static int ftrace_disabled __read_mostly; | 54 | static int ftrace_disabled __read_mostly; |
55 | 55 | ||
56 | static DEFINE_SPINLOCK(ftrace_lock); | 56 | static DEFINE_SPINLOCK(ftrace_lock); |
57 | static DEFINE_MUTEX(ftrace_sysctl_lock); | 57 | static DEFINE_MUTEX(ftrace_sysctl_lock); |
58 | 58 | ||
59 | static struct ftrace_ops ftrace_list_end __read_mostly = | 59 | static struct ftrace_ops ftrace_list_end __read_mostly = |
60 | { | 60 | { |
61 | .func = ftrace_stub, | 61 | .func = ftrace_stub, |
62 | }; | 62 | }; |
63 | 63 | ||
64 | static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; | 64 | static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; |
65 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | 65 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; |
66 | 66 | ||
67 | static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) | 67 | static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) |
68 | { | 68 | { |
69 | struct ftrace_ops *op = ftrace_list; | 69 | struct ftrace_ops *op = ftrace_list; |
70 | 70 | ||
71 | /* in case someone actually ports this to alpha! */ | 71 | /* in case someone actually ports this to alpha! */ |
72 | read_barrier_depends(); | 72 | read_barrier_depends(); |
73 | 73 | ||
74 | while (op != &ftrace_list_end) { | 74 | while (op != &ftrace_list_end) { |
75 | /* silly alpha */ | 75 | /* silly alpha */ |
76 | read_barrier_depends(); | 76 | read_barrier_depends(); |
77 | op->func(ip, parent_ip); | 77 | op->func(ip, parent_ip); |
78 | op = op->next; | 78 | op = op->next; |
79 | }; | 79 | }; |
80 | } | 80 | } |
81 | 81 | ||
82 | /** | 82 | /** |
83 | * clear_ftrace_function - reset the ftrace function | 83 | * clear_ftrace_function - reset the ftrace function |
84 | * | 84 | * |
85 | * This NULLs the ftrace function and in essence stops | 85 | * This NULLs the ftrace function and in essence stops |
86 | * tracing. There may be lag | 86 | * tracing. There may be lag |
87 | */ | 87 | */ |
88 | void clear_ftrace_function(void) | 88 | void clear_ftrace_function(void) |
89 | { | 89 | { |
90 | ftrace_trace_function = ftrace_stub; | 90 | ftrace_trace_function = ftrace_stub; |
91 | } | 91 | } |
92 | 92 | ||
93 | static int __register_ftrace_function(struct ftrace_ops *ops) | 93 | static int __register_ftrace_function(struct ftrace_ops *ops) |
94 | { | 94 | { |
95 | /* should not be called from interrupt context */ | 95 | /* should not be called from interrupt context */ |
96 | spin_lock(&ftrace_lock); | 96 | spin_lock(&ftrace_lock); |
97 | 97 | ||
98 | ops->next = ftrace_list; | 98 | ops->next = ftrace_list; |
99 | /* | 99 | /* |
100 | * We are entering ops into the ftrace_list but another | 100 | * We are entering ops into the ftrace_list but another |
101 | * CPU might be walking that list. We need to make sure | 101 | * CPU might be walking that list. We need to make sure |
102 | * the ops->next pointer is valid before another CPU sees | 102 | * the ops->next pointer is valid before another CPU sees |
103 | * the ops pointer included into the ftrace_list. | 103 | * the ops pointer included into the ftrace_list. |
104 | */ | 104 | */ |
105 | smp_wmb(); | 105 | smp_wmb(); |
106 | ftrace_list = ops; | 106 | ftrace_list = ops; |
107 | 107 | ||
108 | if (ftrace_enabled) { | 108 | if (ftrace_enabled) { |
109 | /* | 109 | /* |
110 | * For one func, simply call it directly. | 110 | * For one func, simply call it directly. |
111 | * For more than one func, call the chain. | 111 | * For more than one func, call the chain. |
112 | */ | 112 | */ |
113 | if (ops->next == &ftrace_list_end) | 113 | if (ops->next == &ftrace_list_end) |
114 | ftrace_trace_function = ops->func; | 114 | ftrace_trace_function = ops->func; |
115 | else | 115 | else |
116 | ftrace_trace_function = ftrace_list_func; | 116 | ftrace_trace_function = ftrace_list_func; |
117 | } | 117 | } |
118 | 118 | ||
119 | spin_unlock(&ftrace_lock); | 119 | spin_unlock(&ftrace_lock); |
120 | 120 | ||
121 | return 0; | 121 | return 0; |
122 | } | 122 | } |
123 | 123 | ||
124 | static int __unregister_ftrace_function(struct ftrace_ops *ops) | 124 | static int __unregister_ftrace_function(struct ftrace_ops *ops) |
125 | { | 125 | { |
126 | struct ftrace_ops **p; | 126 | struct ftrace_ops **p; |
127 | int ret = 0; | 127 | int ret = 0; |
128 | 128 | ||
129 | /* should not be called from interrupt context */ | 129 | /* should not be called from interrupt context */ |
130 | spin_lock(&ftrace_lock); | 130 | spin_lock(&ftrace_lock); |
131 | 131 | ||
132 | /* | 132 | /* |
133 | * If we are removing the last function, then simply point | 133 | * If we are removing the last function, then simply point |
134 | * to the ftrace_stub. | 134 | * to the ftrace_stub. |
135 | */ | 135 | */ |
136 | if (ftrace_list == ops && ops->next == &ftrace_list_end) { | 136 | if (ftrace_list == ops && ops->next == &ftrace_list_end) { |
137 | ftrace_trace_function = ftrace_stub; | 137 | ftrace_trace_function = ftrace_stub; |
138 | ftrace_list = &ftrace_list_end; | 138 | ftrace_list = &ftrace_list_end; |
139 | goto out; | 139 | goto out; |
140 | } | 140 | } |
141 | 141 | ||
142 | for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) | 142 | for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) |
143 | if (*p == ops) | 143 | if (*p == ops) |
144 | break; | 144 | break; |
145 | 145 | ||
146 | if (*p != ops) { | 146 | if (*p != ops) { |
147 | ret = -1; | 147 | ret = -1; |
148 | goto out; | 148 | goto out; |
149 | } | 149 | } |
150 | 150 | ||
151 | *p = (*p)->next; | 151 | *p = (*p)->next; |
152 | 152 | ||
153 | if (ftrace_enabled) { | 153 | if (ftrace_enabled) { |
154 | /* If we only have one func left, then call that directly */ | 154 | /* If we only have one func left, then call that directly */ |
155 | if (ftrace_list == &ftrace_list_end || | 155 | if (ftrace_list == &ftrace_list_end || |
156 | ftrace_list->next == &ftrace_list_end) | 156 | ftrace_list->next == &ftrace_list_end) |
157 | ftrace_trace_function = ftrace_list->func; | 157 | ftrace_trace_function = ftrace_list->func; |
158 | } | 158 | } |
159 | 159 | ||
160 | out: | 160 | out: |
161 | spin_unlock(&ftrace_lock); | 161 | spin_unlock(&ftrace_lock); |
162 | 162 | ||
163 | return ret; | 163 | return ret; |
164 | } | 164 | } |
165 | 165 | ||
166 | #ifdef CONFIG_DYNAMIC_FTRACE | 166 | #ifdef CONFIG_DYNAMIC_FTRACE |
167 | #ifndef CONFIG_FTRACE_MCOUNT_RECORD | 167 | #ifndef CONFIG_FTRACE_MCOUNT_RECORD |
168 | # error Dynamic ftrace depends on MCOUNT_RECORD | 168 | # error Dynamic ftrace depends on MCOUNT_RECORD |
169 | #endif | 169 | #endif |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * Since MCOUNT_ADDR may point to mcount itself, we do not want | 172 | * Since MCOUNT_ADDR may point to mcount itself, we do not want |
173 | * to get it confused by reading a reference in the code as we | 173 | * to get it confused by reading a reference in the code as we |
174 | * are parsing on objcopy output of text. Use a variable for | 174 | * are parsing on objcopy output of text. Use a variable for |
175 | * it instead. | 175 | * it instead. |
176 | */ | 176 | */ |
177 | static unsigned long mcount_addr = MCOUNT_ADDR; | 177 | static unsigned long mcount_addr = MCOUNT_ADDR; |
178 | 178 | ||
179 | enum { | 179 | enum { |
180 | FTRACE_ENABLE_CALLS = (1 << 0), | 180 | FTRACE_ENABLE_CALLS = (1 << 0), |
181 | FTRACE_DISABLE_CALLS = (1 << 1), | 181 | FTRACE_DISABLE_CALLS = (1 << 1), |
182 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | 182 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), |
183 | FTRACE_ENABLE_MCOUNT = (1 << 3), | 183 | FTRACE_ENABLE_MCOUNT = (1 << 3), |
184 | FTRACE_DISABLE_MCOUNT = (1 << 4), | 184 | FTRACE_DISABLE_MCOUNT = (1 << 4), |
185 | }; | 185 | }; |
186 | 186 | ||
187 | static int ftrace_filtered; | 187 | static int ftrace_filtered; |
188 | static int tracing_on; | ||
189 | 188 | ||
190 | static LIST_HEAD(ftrace_new_addrs); | 189 | static LIST_HEAD(ftrace_new_addrs); |
191 | 190 | ||
192 | static DEFINE_MUTEX(ftrace_regex_lock); | 191 | static DEFINE_MUTEX(ftrace_regex_lock); |
193 | 192 | ||
194 | struct ftrace_page { | 193 | struct ftrace_page { |
195 | struct ftrace_page *next; | 194 | struct ftrace_page *next; |
196 | unsigned long index; | 195 | unsigned long index; |
197 | struct dyn_ftrace records[]; | 196 | struct dyn_ftrace records[]; |
198 | }; | 197 | }; |
199 | 198 | ||
200 | #define ENTRIES_PER_PAGE \ | 199 | #define ENTRIES_PER_PAGE \ |
201 | ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) | 200 | ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) |
202 | 201 | ||
203 | /* estimate from running different kernels */ | 202 | /* estimate from running different kernels */ |
204 | #define NR_TO_INIT 10000 | 203 | #define NR_TO_INIT 10000 |
205 | 204 | ||
206 | static struct ftrace_page *ftrace_pages_start; | 205 | static struct ftrace_page *ftrace_pages_start; |
207 | static struct ftrace_page *ftrace_pages; | 206 | static struct ftrace_page *ftrace_pages; |
208 | 207 | ||
209 | static struct dyn_ftrace *ftrace_free_records; | 208 | static struct dyn_ftrace *ftrace_free_records; |
210 | 209 | ||
211 | 210 | ||
212 | #ifdef CONFIG_KPROBES | 211 | #ifdef CONFIG_KPROBES |
213 | 212 | ||
214 | static int frozen_record_count; | 213 | static int frozen_record_count; |
215 | 214 | ||
216 | static inline void freeze_record(struct dyn_ftrace *rec) | 215 | static inline void freeze_record(struct dyn_ftrace *rec) |
217 | { | 216 | { |
218 | if (!(rec->flags & FTRACE_FL_FROZEN)) { | 217 | if (!(rec->flags & FTRACE_FL_FROZEN)) { |
219 | rec->flags |= FTRACE_FL_FROZEN; | 218 | rec->flags |= FTRACE_FL_FROZEN; |
220 | frozen_record_count++; | 219 | frozen_record_count++; |
221 | } | 220 | } |
222 | } | 221 | } |
223 | 222 | ||
224 | static inline void unfreeze_record(struct dyn_ftrace *rec) | 223 | static inline void unfreeze_record(struct dyn_ftrace *rec) |
225 | { | 224 | { |
226 | if (rec->flags & FTRACE_FL_FROZEN) { | 225 | if (rec->flags & FTRACE_FL_FROZEN) { |
227 | rec->flags &= ~FTRACE_FL_FROZEN; | 226 | rec->flags &= ~FTRACE_FL_FROZEN; |
228 | frozen_record_count--; | 227 | frozen_record_count--; |
229 | } | 228 | } |
230 | } | 229 | } |
231 | 230 | ||
232 | static inline int record_frozen(struct dyn_ftrace *rec) | 231 | static inline int record_frozen(struct dyn_ftrace *rec) |
233 | { | 232 | { |
234 | return rec->flags & FTRACE_FL_FROZEN; | 233 | return rec->flags & FTRACE_FL_FROZEN; |
235 | } | 234 | } |
236 | #else | 235 | #else |
237 | # define freeze_record(rec) ({ 0; }) | 236 | # define freeze_record(rec) ({ 0; }) |
238 | # define unfreeze_record(rec) ({ 0; }) | 237 | # define unfreeze_record(rec) ({ 0; }) |
239 | # define record_frozen(rec) ({ 0; }) | 238 | # define record_frozen(rec) ({ 0; }) |
240 | #endif /* CONFIG_KPROBES */ | 239 | #endif /* CONFIG_KPROBES */ |
241 | 240 | ||
242 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 241 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
243 | { | 242 | { |
244 | rec->ip = (unsigned long)ftrace_free_records; | 243 | rec->ip = (unsigned long)ftrace_free_records; |
245 | ftrace_free_records = rec; | 244 | ftrace_free_records = rec; |
246 | rec->flags |= FTRACE_FL_FREE; | 245 | rec->flags |= FTRACE_FL_FREE; |
247 | } | 246 | } |
248 | 247 | ||
249 | void ftrace_release(void *start, unsigned long size) | 248 | void ftrace_release(void *start, unsigned long size) |
250 | { | 249 | { |
251 | struct dyn_ftrace *rec; | 250 | struct dyn_ftrace *rec; |
252 | struct ftrace_page *pg; | 251 | struct ftrace_page *pg; |
253 | unsigned long s = (unsigned long)start; | 252 | unsigned long s = (unsigned long)start; |
254 | unsigned long e = s + size; | 253 | unsigned long e = s + size; |
255 | int i; | 254 | int i; |
256 | 255 | ||
257 | if (ftrace_disabled || !start) | 256 | if (ftrace_disabled || !start) |
258 | return; | 257 | return; |
259 | 258 | ||
260 | /* should not be called from interrupt context */ | 259 | /* should not be called from interrupt context */ |
261 | spin_lock(&ftrace_lock); | 260 | spin_lock(&ftrace_lock); |
262 | 261 | ||
263 | for (pg = ftrace_pages_start; pg; pg = pg->next) { | 262 | for (pg = ftrace_pages_start; pg; pg = pg->next) { |
264 | for (i = 0; i < pg->index; i++) { | 263 | for (i = 0; i < pg->index; i++) { |
265 | rec = &pg->records[i]; | 264 | rec = &pg->records[i]; |
266 | 265 | ||
267 | if ((rec->ip >= s) && (rec->ip < e)) | 266 | if ((rec->ip >= s) && (rec->ip < e)) |
268 | ftrace_free_rec(rec); | 267 | ftrace_free_rec(rec); |
269 | } | 268 | } |
270 | } | 269 | } |
271 | spin_unlock(&ftrace_lock); | 270 | spin_unlock(&ftrace_lock); |
272 | } | 271 | } |
273 | 272 | ||
274 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) | 273 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) |
275 | { | 274 | { |
276 | struct dyn_ftrace *rec; | 275 | struct dyn_ftrace *rec; |
277 | 276 | ||
278 | /* First check for freed records */ | 277 | /* First check for freed records */ |
279 | if (ftrace_free_records) { | 278 | if (ftrace_free_records) { |
280 | rec = ftrace_free_records; | 279 | rec = ftrace_free_records; |
281 | 280 | ||
282 | if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { | 281 | if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { |
283 | FTRACE_WARN_ON_ONCE(1); | 282 | FTRACE_WARN_ON_ONCE(1); |
284 | ftrace_free_records = NULL; | 283 | ftrace_free_records = NULL; |
285 | return NULL; | 284 | return NULL; |
286 | } | 285 | } |
287 | 286 | ||
288 | ftrace_free_records = (void *)rec->ip; | 287 | ftrace_free_records = (void *)rec->ip; |
289 | memset(rec, 0, sizeof(*rec)); | 288 | memset(rec, 0, sizeof(*rec)); |
290 | return rec; | 289 | return rec; |
291 | } | 290 | } |
292 | 291 | ||
293 | if (ftrace_pages->index == ENTRIES_PER_PAGE) { | 292 | if (ftrace_pages->index == ENTRIES_PER_PAGE) { |
294 | if (!ftrace_pages->next) { | 293 | if (!ftrace_pages->next) { |
295 | /* allocate another page */ | 294 | /* allocate another page */ |
296 | ftrace_pages->next = | 295 | ftrace_pages->next = |
297 | (void *)get_zeroed_page(GFP_KERNEL); | 296 | (void *)get_zeroed_page(GFP_KERNEL); |
298 | if (!ftrace_pages->next) | 297 | if (!ftrace_pages->next) |
299 | return NULL; | 298 | return NULL; |
300 | } | 299 | } |
301 | ftrace_pages = ftrace_pages->next; | 300 | ftrace_pages = ftrace_pages->next; |
302 | } | 301 | } |
303 | 302 | ||
304 | return &ftrace_pages->records[ftrace_pages->index++]; | 303 | return &ftrace_pages->records[ftrace_pages->index++]; |
305 | } | 304 | } |
306 | 305 | ||
307 | static struct dyn_ftrace * | 306 | static struct dyn_ftrace * |
308 | ftrace_record_ip(unsigned long ip) | 307 | ftrace_record_ip(unsigned long ip) |
309 | { | 308 | { |
310 | struct dyn_ftrace *rec; | 309 | struct dyn_ftrace *rec; |
311 | 310 | ||
312 | if (!ftrace_enabled || ftrace_disabled) | 311 | if (!ftrace_enabled || ftrace_disabled) |
313 | return NULL; | 312 | return NULL; |
314 | 313 | ||
315 | rec = ftrace_alloc_dyn_node(ip); | 314 | rec = ftrace_alloc_dyn_node(ip); |
316 | if (!rec) | 315 | if (!rec) |
317 | return NULL; | 316 | return NULL; |
318 | 317 | ||
319 | rec->ip = ip; | 318 | rec->ip = ip; |
320 | 319 | ||
321 | list_add(&rec->list, &ftrace_new_addrs); | 320 | list_add(&rec->list, &ftrace_new_addrs); |
322 | 321 | ||
323 | return rec; | 322 | return rec; |
324 | } | 323 | } |
325 | 324 | ||
326 | #define FTRACE_ADDR ((long)(ftrace_caller)) | 325 | #define FTRACE_ADDR ((long)(ftrace_caller)) |
327 | 326 | ||
328 | static int | 327 | static int |
329 | __ftrace_replace_code(struct dyn_ftrace *rec, | 328 | __ftrace_replace_code(struct dyn_ftrace *rec, |
330 | unsigned char *old, unsigned char *new, int enable) | 329 | unsigned char *old, unsigned char *new, int enable) |
331 | { | 330 | { |
332 | unsigned long ip, fl; | 331 | unsigned long ip, fl; |
333 | 332 | ||
334 | ip = rec->ip; | 333 | ip = rec->ip; |
335 | 334 | ||
336 | if (ftrace_filtered && enable) { | 335 | if (ftrace_filtered && enable) { |
337 | /* | 336 | /* |
338 | * If filtering is on: | 337 | * If filtering is on: |
339 | * | 338 | * |
340 | * If this record is set to be filtered and | 339 | * If this record is set to be filtered and |
341 | * is enabled then do nothing. | 340 | * is enabled then do nothing. |
342 | * | 341 | * |
343 | * If this record is set to be filtered and | 342 | * If this record is set to be filtered and |
344 | * it is not enabled, enable it. | 343 | * it is not enabled, enable it. |
345 | * | 344 | * |
346 | * If this record is not set to be filtered | 345 | * If this record is not set to be filtered |
347 | * and it is not enabled do nothing. | 346 | * and it is not enabled do nothing. |
348 | * | 347 | * |
349 | * If this record is set not to trace then | 348 | * If this record is set not to trace then |
350 | * do nothing. | 349 | * do nothing. |
351 | * | 350 | * |
352 | * If this record is set not to trace and | 351 | * If this record is set not to trace and |
353 | * it is enabled then disable it. | 352 | * it is enabled then disable it. |
354 | * | 353 | * |
355 | * If this record is not set to be filtered and | 354 | * If this record is not set to be filtered and |
356 | * it is enabled, disable it. | 355 | * it is enabled, disable it. |
357 | */ | 356 | */ |
358 | 357 | ||
359 | fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | | 358 | fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | |
360 | FTRACE_FL_ENABLED); | 359 | FTRACE_FL_ENABLED); |
361 | 360 | ||
362 | if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || | 361 | if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || |
363 | (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || | 362 | (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || |
364 | !fl || (fl == FTRACE_FL_NOTRACE)) | 363 | !fl || (fl == FTRACE_FL_NOTRACE)) |
365 | return 0; | 364 | return 0; |
366 | 365 | ||
367 | /* | 366 | /* |
368 | * If it is enabled disable it, | 367 | * If it is enabled disable it, |
369 | * otherwise enable it! | 368 | * otherwise enable it! |
370 | */ | 369 | */ |
371 | if (fl & FTRACE_FL_ENABLED) { | 370 | if (fl & FTRACE_FL_ENABLED) { |
372 | /* swap new and old */ | 371 | /* swap new and old */ |
373 | new = old; | 372 | new = old; |
374 | old = ftrace_call_replace(ip, FTRACE_ADDR); | 373 | old = ftrace_call_replace(ip, FTRACE_ADDR); |
375 | rec->flags &= ~FTRACE_FL_ENABLED; | 374 | rec->flags &= ~FTRACE_FL_ENABLED; |
376 | } else { | 375 | } else { |
377 | new = ftrace_call_replace(ip, FTRACE_ADDR); | 376 | new = ftrace_call_replace(ip, FTRACE_ADDR); |
378 | rec->flags |= FTRACE_FL_ENABLED; | 377 | rec->flags |= FTRACE_FL_ENABLED; |
379 | } | 378 | } |
380 | } else { | 379 | } else { |
381 | 380 | ||
382 | if (enable) { | 381 | if (enable) { |
383 | /* | 382 | /* |
384 | * If this record is set not to trace and is | 383 | * If this record is set not to trace and is |
385 | * not enabled, do nothing. | 384 | * not enabled, do nothing. |
386 | */ | 385 | */ |
387 | fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); | 386 | fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); |
388 | if (fl == FTRACE_FL_NOTRACE) | 387 | if (fl == FTRACE_FL_NOTRACE) |
389 | return 0; | 388 | return 0; |
390 | 389 | ||
391 | new = ftrace_call_replace(ip, FTRACE_ADDR); | 390 | new = ftrace_call_replace(ip, FTRACE_ADDR); |
392 | } else | 391 | } else |
393 | old = ftrace_call_replace(ip, FTRACE_ADDR); | 392 | old = ftrace_call_replace(ip, FTRACE_ADDR); |
394 | 393 | ||
395 | if (enable) { | 394 | if (enable) { |
396 | if (rec->flags & FTRACE_FL_ENABLED) | 395 | if (rec->flags & FTRACE_FL_ENABLED) |
397 | return 0; | 396 | return 0; |
398 | rec->flags |= FTRACE_FL_ENABLED; | 397 | rec->flags |= FTRACE_FL_ENABLED; |
399 | } else { | 398 | } else { |
400 | if (!(rec->flags & FTRACE_FL_ENABLED)) | 399 | if (!(rec->flags & FTRACE_FL_ENABLED)) |
401 | return 0; | 400 | return 0; |
402 | rec->flags &= ~FTRACE_FL_ENABLED; | 401 | rec->flags &= ~FTRACE_FL_ENABLED; |
403 | } | 402 | } |
404 | } | 403 | } |
405 | 404 | ||
406 | return ftrace_modify_code(ip, old, new); | 405 | return ftrace_modify_code(ip, old, new); |
407 | } | 406 | } |
408 | 407 | ||
409 | static void ftrace_replace_code(int enable) | 408 | static void ftrace_replace_code(int enable) |
410 | { | 409 | { |
411 | int i, failed; | 410 | int i, failed; |
412 | unsigned char *new = NULL, *old = NULL; | 411 | unsigned char *new = NULL, *old = NULL; |
413 | struct dyn_ftrace *rec; | 412 | struct dyn_ftrace *rec; |
414 | struct ftrace_page *pg; | 413 | struct ftrace_page *pg; |
415 | 414 | ||
416 | if (enable) | 415 | if (enable) |
417 | old = ftrace_nop_replace(); | 416 | old = ftrace_nop_replace(); |
418 | else | 417 | else |
419 | new = ftrace_nop_replace(); | 418 | new = ftrace_nop_replace(); |
420 | 419 | ||
421 | for (pg = ftrace_pages_start; pg; pg = pg->next) { | 420 | for (pg = ftrace_pages_start; pg; pg = pg->next) { |
422 | for (i = 0; i < pg->index; i++) { | 421 | for (i = 0; i < pg->index; i++) { |
423 | rec = &pg->records[i]; | 422 | rec = &pg->records[i]; |
424 | 423 | ||
425 | /* don't modify code that has already faulted */ | 424 | /* don't modify code that has already faulted */ |
426 | if (rec->flags & FTRACE_FL_FAILED) | 425 | if (rec->flags & FTRACE_FL_FAILED) |
427 | continue; | 426 | continue; |
428 | 427 | ||
429 | /* ignore updates to this record's mcount site */ | 428 | /* ignore updates to this record's mcount site */ |
430 | if (get_kprobe((void *)rec->ip)) { | 429 | if (get_kprobe((void *)rec->ip)) { |
431 | freeze_record(rec); | 430 | freeze_record(rec); |
432 | continue; | 431 | continue; |
433 | } else { | 432 | } else { |
434 | unfreeze_record(rec); | 433 | unfreeze_record(rec); |
435 | } | 434 | } |
436 | 435 | ||
437 | failed = __ftrace_replace_code(rec, old, new, enable); | 436 | failed = __ftrace_replace_code(rec, old, new, enable); |
438 | if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { | 437 | if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { |
439 | rec->flags |= FTRACE_FL_FAILED; | 438 | rec->flags |= FTRACE_FL_FAILED; |
440 | if ((system_state == SYSTEM_BOOTING) || | 439 | if ((system_state == SYSTEM_BOOTING) || |
441 | !core_kernel_text(rec->ip)) { | 440 | !core_kernel_text(rec->ip)) { |
442 | ftrace_free_rec(rec); | 441 | ftrace_free_rec(rec); |
443 | } | 442 | } |
444 | } | 443 | } |
445 | } | 444 | } |
446 | } | 445 | } |
447 | } | 446 | } |
448 | 447 | ||
449 | static void print_ip_ins(const char *fmt, unsigned char *p) | 448 | static void print_ip_ins(const char *fmt, unsigned char *p) |
450 | { | 449 | { |
451 | int i; | 450 | int i; |
452 | 451 | ||
453 | printk(KERN_CONT "%s", fmt); | 452 | printk(KERN_CONT "%s", fmt); |
454 | 453 | ||
455 | for (i = 0; i < MCOUNT_INSN_SIZE; i++) | 454 | for (i = 0; i < MCOUNT_INSN_SIZE; i++) |
456 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); | 455 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); |
457 | } | 456 | } |
458 | 457 | ||
459 | static int | 458 | static int |
460 | ftrace_code_disable(struct dyn_ftrace *rec) | 459 | ftrace_code_disable(struct dyn_ftrace *rec) |
461 | { | 460 | { |
462 | unsigned long ip; | 461 | unsigned long ip; |
463 | unsigned char *nop, *call; | 462 | unsigned char *nop, *call; |
464 | int ret; | 463 | int ret; |
465 | 464 | ||
466 | ip = rec->ip; | 465 | ip = rec->ip; |
467 | 466 | ||
468 | nop = ftrace_nop_replace(); | 467 | nop = ftrace_nop_replace(); |
469 | call = ftrace_call_replace(ip, mcount_addr); | 468 | call = ftrace_call_replace(ip, mcount_addr); |
470 | 469 | ||
471 | ret = ftrace_modify_code(ip, call, nop); | 470 | ret = ftrace_modify_code(ip, call, nop); |
472 | if (ret) { | 471 | if (ret) { |
473 | switch (ret) { | 472 | switch (ret) { |
474 | case -EFAULT: | 473 | case -EFAULT: |
475 | FTRACE_WARN_ON_ONCE(1); | 474 | FTRACE_WARN_ON_ONCE(1); |
476 | pr_info("ftrace faulted on modifying "); | 475 | pr_info("ftrace faulted on modifying "); |
477 | print_ip_sym(ip); | 476 | print_ip_sym(ip); |
478 | break; | 477 | break; |
479 | case -EINVAL: | 478 | case -EINVAL: |
480 | FTRACE_WARN_ON_ONCE(1); | 479 | FTRACE_WARN_ON_ONCE(1); |
481 | pr_info("ftrace failed to modify "); | 480 | pr_info("ftrace failed to modify "); |
482 | print_ip_sym(ip); | 481 | print_ip_sym(ip); |
483 | print_ip_ins(" expected: ", call); | 482 | print_ip_ins(" expected: ", call); |
484 | print_ip_ins(" actual: ", (unsigned char *)ip); | 483 | print_ip_ins(" actual: ", (unsigned char *)ip); |
485 | print_ip_ins(" replace: ", nop); | 484 | print_ip_ins(" replace: ", nop); |
486 | printk(KERN_CONT "\n"); | 485 | printk(KERN_CONT "\n"); |
487 | break; | 486 | break; |
488 | case -EPERM: | 487 | case -EPERM: |
489 | FTRACE_WARN_ON_ONCE(1); | 488 | FTRACE_WARN_ON_ONCE(1); |
490 | pr_info("ftrace faulted on writing "); | 489 | pr_info("ftrace faulted on writing "); |
491 | print_ip_sym(ip); | 490 | print_ip_sym(ip); |
492 | break; | 491 | break; |
493 | default: | 492 | default: |
494 | FTRACE_WARN_ON_ONCE(1); | 493 | FTRACE_WARN_ON_ONCE(1); |
495 | pr_info("ftrace faulted on unknown error "); | 494 | pr_info("ftrace faulted on unknown error "); |
496 | print_ip_sym(ip); | 495 | print_ip_sym(ip); |
497 | } | 496 | } |
498 | 497 | ||
499 | rec->flags |= FTRACE_FL_FAILED; | 498 | rec->flags |= FTRACE_FL_FAILED; |
500 | return 0; | 499 | return 0; |
501 | } | 500 | } |
502 | return 1; | 501 | return 1; |
503 | } | 502 | } |
504 | 503 | ||
505 | static int __ftrace_modify_code(void *data) | 504 | static int __ftrace_modify_code(void *data) |
506 | { | 505 | { |
507 | int *command = data; | 506 | int *command = data; |
508 | 507 | ||
509 | if (*command & FTRACE_ENABLE_CALLS) { | 508 | if (*command & FTRACE_ENABLE_CALLS) |
510 | ftrace_replace_code(1); | 509 | ftrace_replace_code(1); |
511 | tracing_on = 1; | 510 | else if (*command & FTRACE_DISABLE_CALLS) |
512 | } else if (*command & FTRACE_DISABLE_CALLS) { | ||
513 | ftrace_replace_code(0); | 511 | ftrace_replace_code(0); |
514 | tracing_on = 0; | ||
515 | } | ||
516 | 512 | ||
517 | if (*command & FTRACE_UPDATE_TRACE_FUNC) | 513 | if (*command & FTRACE_UPDATE_TRACE_FUNC) |
518 | ftrace_update_ftrace_func(ftrace_trace_function); | 514 | ftrace_update_ftrace_func(ftrace_trace_function); |
519 | 515 | ||
520 | return 0; | 516 | return 0; |
521 | } | 517 | } |
522 | 518 | ||
523 | static void ftrace_run_update_code(int command) | 519 | static void ftrace_run_update_code(int command) |
524 | { | 520 | { |
525 | stop_machine(__ftrace_modify_code, &command, NULL); | 521 | stop_machine(__ftrace_modify_code, &command, NULL); |
526 | } | 522 | } |
527 | 523 | ||
528 | static ftrace_func_t saved_ftrace_func; | 524 | static ftrace_func_t saved_ftrace_func; |
529 | static int ftrace_start; | 525 | static int ftrace_start; |
530 | static DEFINE_MUTEX(ftrace_start_lock); | 526 | static DEFINE_MUTEX(ftrace_start_lock); |
531 | 527 | ||
532 | static void ftrace_startup(void) | 528 | static void ftrace_startup(void) |
533 | { | 529 | { |
534 | int command = 0; | 530 | int command = 0; |
535 | 531 | ||
536 | if (unlikely(ftrace_disabled)) | 532 | if (unlikely(ftrace_disabled)) |
537 | return; | 533 | return; |
538 | 534 | ||
539 | mutex_lock(&ftrace_start_lock); | 535 | mutex_lock(&ftrace_start_lock); |
540 | ftrace_start++; | 536 | ftrace_start++; |
541 | if (ftrace_start == 1) | 537 | if (ftrace_start == 1) |
542 | command |= FTRACE_ENABLE_CALLS; | 538 | command |= FTRACE_ENABLE_CALLS; |
543 | 539 | ||
544 | if (saved_ftrace_func != ftrace_trace_function) { | 540 | if (saved_ftrace_func != ftrace_trace_function) { |
545 | saved_ftrace_func = ftrace_trace_function; | 541 | saved_ftrace_func = ftrace_trace_function; |
546 | command |= FTRACE_UPDATE_TRACE_FUNC; | 542 | command |= FTRACE_UPDATE_TRACE_FUNC; |
547 | } | 543 | } |
548 | 544 | ||
549 | if (!command || !ftrace_enabled) | 545 | if (!command || !ftrace_enabled) |
550 | goto out; | 546 | goto out; |
551 | 547 | ||
552 | ftrace_run_update_code(command); | 548 | ftrace_run_update_code(command); |
553 | out: | 549 | out: |
554 | mutex_unlock(&ftrace_start_lock); | 550 | mutex_unlock(&ftrace_start_lock); |
555 | } | 551 | } |
556 | 552 | ||
557 | static void ftrace_shutdown(void) | 553 | static void ftrace_shutdown(void) |
558 | { | 554 | { |
559 | int command = 0; | 555 | int command = 0; |
560 | 556 | ||
561 | if (unlikely(ftrace_disabled)) | 557 | if (unlikely(ftrace_disabled)) |
562 | return; | 558 | return; |
563 | 559 | ||
564 | mutex_lock(&ftrace_start_lock); | 560 | mutex_lock(&ftrace_start_lock); |
565 | ftrace_start--; | 561 | ftrace_start--; |
566 | if (!ftrace_start) | 562 | if (!ftrace_start) |
567 | command |= FTRACE_DISABLE_CALLS; | 563 | command |= FTRACE_DISABLE_CALLS; |
568 | 564 | ||
569 | if (saved_ftrace_func != ftrace_trace_function) { | 565 | if (saved_ftrace_func != ftrace_trace_function) { |
570 | saved_ftrace_func = ftrace_trace_function; | 566 | saved_ftrace_func = ftrace_trace_function; |
571 | command |= FTRACE_UPDATE_TRACE_FUNC; | 567 | command |= FTRACE_UPDATE_TRACE_FUNC; |
572 | } | 568 | } |
573 | 569 | ||
574 | if (!command || !ftrace_enabled) | 570 | if (!command || !ftrace_enabled) |
575 | goto out; | 571 | goto out; |
576 | 572 | ||
577 | ftrace_run_update_code(command); | 573 | ftrace_run_update_code(command); |
578 | out: | 574 | out: |
579 | mutex_unlock(&ftrace_start_lock); | 575 | mutex_unlock(&ftrace_start_lock); |
580 | } | 576 | } |
581 | 577 | ||
582 | static void ftrace_startup_sysctl(void) | 578 | static void ftrace_startup_sysctl(void) |
583 | { | 579 | { |
584 | int command = FTRACE_ENABLE_MCOUNT; | 580 | int command = FTRACE_ENABLE_MCOUNT; |
585 | 581 | ||
586 | if (unlikely(ftrace_disabled)) | 582 | if (unlikely(ftrace_disabled)) |
587 | return; | 583 | return; |
588 | 584 | ||
589 | mutex_lock(&ftrace_start_lock); | 585 | mutex_lock(&ftrace_start_lock); |
590 | /* Force update next time */ | 586 | /* Force update next time */ |
591 | saved_ftrace_func = NULL; | 587 | saved_ftrace_func = NULL; |
592 | /* ftrace_start is true if we want ftrace running */ | 588 | /* ftrace_start is true if we want ftrace running */ |
593 | if (ftrace_start) | 589 | if (ftrace_start) |
594 | command |= FTRACE_ENABLE_CALLS; | 590 | command |= FTRACE_ENABLE_CALLS; |
595 | 591 | ||
596 | ftrace_run_update_code(command); | 592 | ftrace_run_update_code(command); |
597 | mutex_unlock(&ftrace_start_lock); | 593 | mutex_unlock(&ftrace_start_lock); |
598 | } | 594 | } |
599 | 595 | ||
600 | static void ftrace_shutdown_sysctl(void) | 596 | static void ftrace_shutdown_sysctl(void) |
601 | { | 597 | { |
602 | int command = FTRACE_DISABLE_MCOUNT; | 598 | int command = FTRACE_DISABLE_MCOUNT; |
603 | 599 | ||
604 | if (unlikely(ftrace_disabled)) | 600 | if (unlikely(ftrace_disabled)) |
605 | return; | 601 | return; |
606 | 602 | ||
607 | mutex_lock(&ftrace_start_lock); | 603 | mutex_lock(&ftrace_start_lock); |
608 | /* ftrace_start is true if ftrace is running */ | 604 | /* ftrace_start is true if ftrace is running */ |
609 | if (ftrace_start) | 605 | if (ftrace_start) |
610 | command |= FTRACE_DISABLE_CALLS; | 606 | command |= FTRACE_DISABLE_CALLS; |
611 | 607 | ||
612 | ftrace_run_update_code(command); | 608 | ftrace_run_update_code(command); |
613 | mutex_unlock(&ftrace_start_lock); | 609 | mutex_unlock(&ftrace_start_lock); |
614 | } | 610 | } |
615 | 611 | ||
616 | static cycle_t ftrace_update_time; | 612 | static cycle_t ftrace_update_time; |
617 | static unsigned long ftrace_update_cnt; | 613 | static unsigned long ftrace_update_cnt; |
618 | unsigned long ftrace_update_tot_cnt; | 614 | unsigned long ftrace_update_tot_cnt; |
619 | 615 | ||
620 | static int ftrace_update_code(void) | 616 | static int ftrace_update_code(void) |
621 | { | 617 | { |
622 | struct dyn_ftrace *p, *t; | 618 | struct dyn_ftrace *p, *t; |
623 | cycle_t start, stop; | 619 | cycle_t start, stop; |
624 | 620 | ||
625 | start = ftrace_now(raw_smp_processor_id()); | 621 | start = ftrace_now(raw_smp_processor_id()); |
626 | ftrace_update_cnt = 0; | 622 | ftrace_update_cnt = 0; |
627 | 623 | ||
628 | list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { | 624 | list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { |
629 | 625 | ||
630 | /* If something went wrong, bail without enabling anything */ | 626 | /* If something went wrong, bail without enabling anything */ |
631 | if (unlikely(ftrace_disabled)) | 627 | if (unlikely(ftrace_disabled)) |
632 | return -1; | 628 | return -1; |
633 | 629 | ||
634 | list_del_init(&p->list); | 630 | list_del_init(&p->list); |
635 | 631 | ||
636 | /* convert record (i.e, patch mcount-call with NOP) */ | 632 | /* convert record (i.e, patch mcount-call with NOP) */ |
637 | if (ftrace_code_disable(p)) { | 633 | if (ftrace_code_disable(p)) { |
638 | p->flags |= FTRACE_FL_CONVERTED; | 634 | p->flags |= FTRACE_FL_CONVERTED; |
639 | ftrace_update_cnt++; | 635 | ftrace_update_cnt++; |
640 | } else | 636 | } else |
641 | ftrace_free_rec(p); | 637 | ftrace_free_rec(p); |
642 | } | 638 | } |
643 | 639 | ||
644 | stop = ftrace_now(raw_smp_processor_id()); | 640 | stop = ftrace_now(raw_smp_processor_id()); |
645 | ftrace_update_time = stop - start; | 641 | ftrace_update_time = stop - start; |
646 | ftrace_update_tot_cnt += ftrace_update_cnt; | 642 | ftrace_update_tot_cnt += ftrace_update_cnt; |
647 | 643 | ||
648 | return 0; | 644 | return 0; |
649 | } | 645 | } |
650 | 646 | ||
651 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) | 647 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) |
652 | { | 648 | { |
653 | struct ftrace_page *pg; | 649 | struct ftrace_page *pg; |
654 | int cnt; | 650 | int cnt; |
655 | int i; | 651 | int i; |
656 | 652 | ||
657 | /* allocate a few pages */ | 653 | /* allocate a few pages */ |
658 | ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); | 654 | ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); |
659 | if (!ftrace_pages_start) | 655 | if (!ftrace_pages_start) |
660 | return -1; | 656 | return -1; |
661 | 657 | ||
662 | /* | 658 | /* |
663 | * Allocate a few more pages. | 659 | * Allocate a few more pages. |
664 | * | 660 | * |
665 | * TODO: have some parser search vmlinux before | 661 | * TODO: have some parser search vmlinux before |
666 | * final linking to find all calls to ftrace. | 662 | * final linking to find all calls to ftrace. |
667 | * Then we can: | 663 | * Then we can: |
668 | * a) know how many pages to allocate. | 664 | * a) know how many pages to allocate. |
669 | * and/or | 665 | * and/or |
670 | * b) set up the table then. | 666 | * b) set up the table then. |
671 | * | 667 | * |
672 | * The dynamic code is still necessary for | 668 | * The dynamic code is still necessary for |
673 | * modules. | 669 | * modules. |
674 | */ | 670 | */ |
675 | 671 | ||
676 | pg = ftrace_pages = ftrace_pages_start; | 672 | pg = ftrace_pages = ftrace_pages_start; |
677 | 673 | ||
678 | cnt = num_to_init / ENTRIES_PER_PAGE; | 674 | cnt = num_to_init / ENTRIES_PER_PAGE; |
679 | pr_info("ftrace: allocating %ld entries in %d pages\n", | 675 | pr_info("ftrace: allocating %ld entries in %d pages\n", |
680 | num_to_init, cnt); | 676 | num_to_init, cnt); |
681 | 677 | ||
682 | for (i = 0; i < cnt; i++) { | 678 | for (i = 0; i < cnt; i++) { |
683 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); | 679 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); |
684 | 680 | ||
685 | /* If we fail, we'll try later anyway */ | 681 | /* If we fail, we'll try later anyway */ |
686 | if (!pg->next) | 682 | if (!pg->next) |
687 | break; | 683 | break; |
688 | 684 | ||
689 | pg = pg->next; | 685 | pg = pg->next; |
690 | } | 686 | } |
691 | 687 | ||
692 | return 0; | 688 | return 0; |
693 | } | 689 | } |
694 | 690 | ||
695 | enum { | 691 | enum { |
696 | FTRACE_ITER_FILTER = (1 << 0), | 692 | FTRACE_ITER_FILTER = (1 << 0), |
697 | FTRACE_ITER_CONT = (1 << 1), | 693 | FTRACE_ITER_CONT = (1 << 1), |
698 | FTRACE_ITER_NOTRACE = (1 << 2), | 694 | FTRACE_ITER_NOTRACE = (1 << 2), |
699 | FTRACE_ITER_FAILURES = (1 << 3), | 695 | FTRACE_ITER_FAILURES = (1 << 3), |
700 | }; | 696 | }; |
701 | 697 | ||
702 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 698 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
703 | 699 | ||
704 | struct ftrace_iterator { | 700 | struct ftrace_iterator { |
705 | loff_t pos; | 701 | loff_t pos; |
706 | struct ftrace_page *pg; | 702 | struct ftrace_page *pg; |
707 | unsigned idx; | 703 | unsigned idx; |
708 | unsigned flags; | 704 | unsigned flags; |
709 | unsigned char buffer[FTRACE_BUFF_MAX+1]; | 705 | unsigned char buffer[FTRACE_BUFF_MAX+1]; |
710 | unsigned buffer_idx; | 706 | unsigned buffer_idx; |
711 | unsigned filtered; | 707 | unsigned filtered; |
712 | }; | 708 | }; |
713 | 709 | ||
714 | static void * | 710 | static void * |
715 | t_next(struct seq_file *m, void *v, loff_t *pos) | 711 | t_next(struct seq_file *m, void *v, loff_t *pos) |
716 | { | 712 | { |
717 | struct ftrace_iterator *iter = m->private; | 713 | struct ftrace_iterator *iter = m->private; |
718 | struct dyn_ftrace *rec = NULL; | 714 | struct dyn_ftrace *rec = NULL; |
719 | 715 | ||
720 | (*pos)++; | 716 | (*pos)++; |
721 | 717 | ||
722 | /* should not be called from interrupt context */ | 718 | /* should not be called from interrupt context */ |
723 | spin_lock(&ftrace_lock); | 719 | spin_lock(&ftrace_lock); |
724 | retry: | 720 | retry: |
725 | if (iter->idx >= iter->pg->index) { | 721 | if (iter->idx >= iter->pg->index) { |
726 | if (iter->pg->next) { | 722 | if (iter->pg->next) { |
727 | iter->pg = iter->pg->next; | 723 | iter->pg = iter->pg->next; |
728 | iter->idx = 0; | 724 | iter->idx = 0; |
729 | goto retry; | 725 | goto retry; |
730 | } | 726 | } |
731 | } else { | 727 | } else { |
732 | rec = &iter->pg->records[iter->idx++]; | 728 | rec = &iter->pg->records[iter->idx++]; |
733 | if ((rec->flags & FTRACE_FL_FREE) || | 729 | if ((rec->flags & FTRACE_FL_FREE) || |
734 | 730 | ||
735 | (!(iter->flags & FTRACE_ITER_FAILURES) && | 731 | (!(iter->flags & FTRACE_ITER_FAILURES) && |
736 | (rec->flags & FTRACE_FL_FAILED)) || | 732 | (rec->flags & FTRACE_FL_FAILED)) || |
737 | 733 | ||
738 | ((iter->flags & FTRACE_ITER_FAILURES) && | 734 | ((iter->flags & FTRACE_ITER_FAILURES) && |
739 | !(rec->flags & FTRACE_FL_FAILED)) || | 735 | !(rec->flags & FTRACE_FL_FAILED)) || |
740 | 736 | ||
741 | ((iter->flags & FTRACE_ITER_NOTRACE) && | 737 | ((iter->flags & FTRACE_ITER_NOTRACE) && |
742 | !(rec->flags & FTRACE_FL_NOTRACE))) { | 738 | !(rec->flags & FTRACE_FL_NOTRACE))) { |
743 | rec = NULL; | 739 | rec = NULL; |
744 | goto retry; | 740 | goto retry; |
745 | } | 741 | } |
746 | } | 742 | } |
747 | spin_unlock(&ftrace_lock); | 743 | spin_unlock(&ftrace_lock); |
748 | 744 | ||
749 | iter->pos = *pos; | 745 | iter->pos = *pos; |
750 | 746 | ||
751 | return rec; | 747 | return rec; |
752 | } | 748 | } |
753 | 749 | ||
754 | static void *t_start(struct seq_file *m, loff_t *pos) | 750 | static void *t_start(struct seq_file *m, loff_t *pos) |
755 | { | 751 | { |
756 | struct ftrace_iterator *iter = m->private; | 752 | struct ftrace_iterator *iter = m->private; |
757 | void *p = NULL; | 753 | void *p = NULL; |
758 | loff_t l = -1; | 754 | loff_t l = -1; |
759 | 755 | ||
760 | if (*pos != iter->pos) { | 756 | if (*pos != iter->pos) { |
761 | for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) | 757 | for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) |
762 | ; | 758 | ; |
763 | } else { | 759 | } else { |
764 | l = *pos; | 760 | l = *pos; |
765 | p = t_next(m, p, &l); | 761 | p = t_next(m, p, &l); |
766 | } | 762 | } |
767 | 763 | ||
768 | return p; | 764 | return p; |
769 | } | 765 | } |
770 | 766 | ||
771 | static void t_stop(struct seq_file *m, void *p) | 767 | static void t_stop(struct seq_file *m, void *p) |
772 | { | 768 | { |
773 | } | 769 | } |
774 | 770 | ||
775 | static int t_show(struct seq_file *m, void *v) | 771 | static int t_show(struct seq_file *m, void *v) |
776 | { | 772 | { |
777 | struct dyn_ftrace *rec = v; | 773 | struct dyn_ftrace *rec = v; |
778 | char str[KSYM_SYMBOL_LEN]; | 774 | char str[KSYM_SYMBOL_LEN]; |
779 | 775 | ||
780 | if (!rec) | 776 | if (!rec) |
781 | return 0; | 777 | return 0; |
782 | 778 | ||
783 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); | 779 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); |
784 | 780 | ||
785 | seq_printf(m, "%s\n", str); | 781 | seq_printf(m, "%s\n", str); |
786 | 782 | ||
787 | return 0; | 783 | return 0; |
788 | } | 784 | } |
789 | 785 | ||
790 | static struct seq_operations show_ftrace_seq_ops = { | 786 | static struct seq_operations show_ftrace_seq_ops = { |
791 | .start = t_start, | 787 | .start = t_start, |
792 | .next = t_next, | 788 | .next = t_next, |
793 | .stop = t_stop, | 789 | .stop = t_stop, |
794 | .show = t_show, | 790 | .show = t_show, |
795 | }; | 791 | }; |
796 | 792 | ||
797 | static int | 793 | static int |
798 | ftrace_avail_open(struct inode *inode, struct file *file) | 794 | ftrace_avail_open(struct inode *inode, struct file *file) |
799 | { | 795 | { |
800 | struct ftrace_iterator *iter; | 796 | struct ftrace_iterator *iter; |
801 | int ret; | 797 | int ret; |
802 | 798 | ||
803 | if (unlikely(ftrace_disabled)) | 799 | if (unlikely(ftrace_disabled)) |
804 | return -ENODEV; | 800 | return -ENODEV; |
805 | 801 | ||
806 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 802 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
807 | if (!iter) | 803 | if (!iter) |
808 | return -ENOMEM; | 804 | return -ENOMEM; |
809 | 805 | ||
810 | iter->pg = ftrace_pages_start; | 806 | iter->pg = ftrace_pages_start; |
811 | iter->pos = -1; | 807 | iter->pos = -1; |
812 | 808 | ||
813 | ret = seq_open(file, &show_ftrace_seq_ops); | 809 | ret = seq_open(file, &show_ftrace_seq_ops); |
814 | if (!ret) { | 810 | if (!ret) { |
815 | struct seq_file *m = file->private_data; | 811 | struct seq_file *m = file->private_data; |
816 | 812 | ||
817 | m->private = iter; | 813 | m->private = iter; |
818 | } else { | 814 | } else { |
819 | kfree(iter); | 815 | kfree(iter); |
820 | } | 816 | } |
821 | 817 | ||
822 | return ret; | 818 | return ret; |
823 | } | 819 | } |
824 | 820 | ||
825 | int ftrace_avail_release(struct inode *inode, struct file *file) | 821 | int ftrace_avail_release(struct inode *inode, struct file *file) |
826 | { | 822 | { |
827 | struct seq_file *m = (struct seq_file *)file->private_data; | 823 | struct seq_file *m = (struct seq_file *)file->private_data; |
828 | struct ftrace_iterator *iter = m->private; | 824 | struct ftrace_iterator *iter = m->private; |
829 | 825 | ||
830 | seq_release(inode, file); | 826 | seq_release(inode, file); |
831 | kfree(iter); | 827 | kfree(iter); |
832 | 828 | ||
833 | return 0; | 829 | return 0; |
834 | } | 830 | } |
835 | 831 | ||
836 | static int | 832 | static int |
837 | ftrace_failures_open(struct inode *inode, struct file *file) | 833 | ftrace_failures_open(struct inode *inode, struct file *file) |
838 | { | 834 | { |
839 | int ret; | 835 | int ret; |
840 | struct seq_file *m; | 836 | struct seq_file *m; |
841 | struct ftrace_iterator *iter; | 837 | struct ftrace_iterator *iter; |
842 | 838 | ||
843 | ret = ftrace_avail_open(inode, file); | 839 | ret = ftrace_avail_open(inode, file); |
844 | if (!ret) { | 840 | if (!ret) { |
845 | m = (struct seq_file *)file->private_data; | 841 | m = (struct seq_file *)file->private_data; |
846 | iter = (struct ftrace_iterator *)m->private; | 842 | iter = (struct ftrace_iterator *)m->private; |
847 | iter->flags = FTRACE_ITER_FAILURES; | 843 | iter->flags = FTRACE_ITER_FAILURES; |
848 | } | 844 | } |
849 | 845 | ||
850 | return ret; | 846 | return ret; |
851 | } | 847 | } |
852 | 848 | ||
853 | 849 | ||
854 | static void ftrace_filter_reset(int enable) | 850 | static void ftrace_filter_reset(int enable) |
855 | { | 851 | { |
856 | struct ftrace_page *pg; | 852 | struct ftrace_page *pg; |
857 | struct dyn_ftrace *rec; | 853 | struct dyn_ftrace *rec; |
858 | unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; | 854 | unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; |
859 | unsigned i; | 855 | unsigned i; |
860 | 856 | ||
861 | /* should not be called from interrupt context */ | 857 | /* should not be called from interrupt context */ |
862 | spin_lock(&ftrace_lock); | 858 | spin_lock(&ftrace_lock); |
863 | if (enable) | 859 | if (enable) |
864 | ftrace_filtered = 0; | 860 | ftrace_filtered = 0; |
865 | pg = ftrace_pages_start; | 861 | pg = ftrace_pages_start; |
866 | while (pg) { | 862 | while (pg) { |
867 | for (i = 0; i < pg->index; i++) { | 863 | for (i = 0; i < pg->index; i++) { |
868 | rec = &pg->records[i]; | 864 | rec = &pg->records[i]; |
869 | if (rec->flags & FTRACE_FL_FAILED) | 865 | if (rec->flags & FTRACE_FL_FAILED) |
870 | continue; | 866 | continue; |
871 | rec->flags &= ~type; | 867 | rec->flags &= ~type; |
872 | } | 868 | } |
873 | pg = pg->next; | 869 | pg = pg->next; |
874 | } | 870 | } |
875 | spin_unlock(&ftrace_lock); | 871 | spin_unlock(&ftrace_lock); |
876 | } | 872 | } |
877 | 873 | ||
878 | static int | 874 | static int |
879 | ftrace_regex_open(struct inode *inode, struct file *file, int enable) | 875 | ftrace_regex_open(struct inode *inode, struct file *file, int enable) |
880 | { | 876 | { |
881 | struct ftrace_iterator *iter; | 877 | struct ftrace_iterator *iter; |
882 | int ret = 0; | 878 | int ret = 0; |
883 | 879 | ||
884 | if (unlikely(ftrace_disabled)) | 880 | if (unlikely(ftrace_disabled)) |
885 | return -ENODEV; | 881 | return -ENODEV; |
886 | 882 | ||
887 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 883 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
888 | if (!iter) | 884 | if (!iter) |
889 | return -ENOMEM; | 885 | return -ENOMEM; |
890 | 886 | ||
891 | mutex_lock(&ftrace_regex_lock); | 887 | mutex_lock(&ftrace_regex_lock); |
892 | if ((file->f_mode & FMODE_WRITE) && | 888 | if ((file->f_mode & FMODE_WRITE) && |
893 | !(file->f_flags & O_APPEND)) | 889 | !(file->f_flags & O_APPEND)) |
894 | ftrace_filter_reset(enable); | 890 | ftrace_filter_reset(enable); |
895 | 891 | ||
896 | if (file->f_mode & FMODE_READ) { | 892 | if (file->f_mode & FMODE_READ) { |
897 | iter->pg = ftrace_pages_start; | 893 | iter->pg = ftrace_pages_start; |
898 | iter->pos = -1; | 894 | iter->pos = -1; |
899 | iter->flags = enable ? FTRACE_ITER_FILTER : | 895 | iter->flags = enable ? FTRACE_ITER_FILTER : |
900 | FTRACE_ITER_NOTRACE; | 896 | FTRACE_ITER_NOTRACE; |
901 | 897 | ||
902 | ret = seq_open(file, &show_ftrace_seq_ops); | 898 | ret = seq_open(file, &show_ftrace_seq_ops); |
903 | if (!ret) { | 899 | if (!ret) { |
904 | struct seq_file *m = file->private_data; | 900 | struct seq_file *m = file->private_data; |
905 | m->private = iter; | 901 | m->private = iter; |
906 | } else | 902 | } else |
907 | kfree(iter); | 903 | kfree(iter); |
908 | } else | 904 | } else |
909 | file->private_data = iter; | 905 | file->private_data = iter; |
910 | mutex_unlock(&ftrace_regex_lock); | 906 | mutex_unlock(&ftrace_regex_lock); |
911 | 907 | ||
912 | return ret; | 908 | return ret; |
913 | } | 909 | } |
914 | 910 | ||
915 | static int | 911 | static int |
916 | ftrace_filter_open(struct inode *inode, struct file *file) | 912 | ftrace_filter_open(struct inode *inode, struct file *file) |
917 | { | 913 | { |
918 | return ftrace_regex_open(inode, file, 1); | 914 | return ftrace_regex_open(inode, file, 1); |
919 | } | 915 | } |
920 | 916 | ||
921 | static int | 917 | static int |
922 | ftrace_notrace_open(struct inode *inode, struct file *file) | 918 | ftrace_notrace_open(struct inode *inode, struct file *file) |
923 | { | 919 | { |
924 | return ftrace_regex_open(inode, file, 0); | 920 | return ftrace_regex_open(inode, file, 0); |
925 | } | 921 | } |
926 | 922 | ||
927 | static ssize_t | 923 | static ssize_t |
928 | ftrace_regex_read(struct file *file, char __user *ubuf, | 924 | ftrace_regex_read(struct file *file, char __user *ubuf, |
929 | size_t cnt, loff_t *ppos) | 925 | size_t cnt, loff_t *ppos) |
930 | { | 926 | { |
931 | if (file->f_mode & FMODE_READ) | 927 | if (file->f_mode & FMODE_READ) |
932 | return seq_read(file, ubuf, cnt, ppos); | 928 | return seq_read(file, ubuf, cnt, ppos); |
933 | else | 929 | else |
934 | return -EPERM; | 930 | return -EPERM; |
935 | } | 931 | } |
936 | 932 | ||
937 | static loff_t | 933 | static loff_t |
938 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | 934 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) |
939 | { | 935 | { |
940 | loff_t ret; | 936 | loff_t ret; |
941 | 937 | ||
942 | if (file->f_mode & FMODE_READ) | 938 | if (file->f_mode & FMODE_READ) |
943 | ret = seq_lseek(file, offset, origin); | 939 | ret = seq_lseek(file, offset, origin); |
944 | else | 940 | else |
945 | file->f_pos = ret = 1; | 941 | file->f_pos = ret = 1; |
946 | 942 | ||
947 | return ret; | 943 | return ret; |
948 | } | 944 | } |
949 | 945 | ||
950 | enum { | 946 | enum { |
951 | MATCH_FULL, | 947 | MATCH_FULL, |
952 | MATCH_FRONT_ONLY, | 948 | MATCH_FRONT_ONLY, |
953 | MATCH_MIDDLE_ONLY, | 949 | MATCH_MIDDLE_ONLY, |
954 | MATCH_END_ONLY, | 950 | MATCH_END_ONLY, |
955 | }; | 951 | }; |
956 | 952 | ||
957 | static void | 953 | static void |
958 | ftrace_match(unsigned char *buff, int len, int enable) | 954 | ftrace_match(unsigned char *buff, int len, int enable) |
959 | { | 955 | { |
960 | char str[KSYM_SYMBOL_LEN]; | 956 | char str[KSYM_SYMBOL_LEN]; |
961 | char *search = NULL; | 957 | char *search = NULL; |
962 | struct ftrace_page *pg; | 958 | struct ftrace_page *pg; |
963 | struct dyn_ftrace *rec; | 959 | struct dyn_ftrace *rec; |
964 | int type = MATCH_FULL; | 960 | int type = MATCH_FULL; |
965 | unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; | 961 | unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; |
966 | unsigned i, match = 0, search_len = 0; | 962 | unsigned i, match = 0, search_len = 0; |
967 | 963 | ||
968 | for (i = 0; i < len; i++) { | 964 | for (i = 0; i < len; i++) { |
969 | if (buff[i] == '*') { | 965 | if (buff[i] == '*') { |
970 | if (!i) { | 966 | if (!i) { |
971 | search = buff + i + 1; | 967 | search = buff + i + 1; |
972 | type = MATCH_END_ONLY; | 968 | type = MATCH_END_ONLY; |
973 | search_len = len - (i + 1); | 969 | search_len = len - (i + 1); |
974 | } else { | 970 | } else { |
975 | if (type == MATCH_END_ONLY) { | 971 | if (type == MATCH_END_ONLY) { |
976 | type = MATCH_MIDDLE_ONLY; | 972 | type = MATCH_MIDDLE_ONLY; |
977 | } else { | 973 | } else { |
978 | match = i; | 974 | match = i; |
979 | type = MATCH_FRONT_ONLY; | 975 | type = MATCH_FRONT_ONLY; |
980 | } | 976 | } |
981 | buff[i] = 0; | 977 | buff[i] = 0; |
982 | break; | 978 | break; |
983 | } | 979 | } |
984 | } | 980 | } |
985 | } | 981 | } |
986 | 982 | ||
987 | /* should not be called from interrupt context */ | 983 | /* should not be called from interrupt context */ |
988 | spin_lock(&ftrace_lock); | 984 | spin_lock(&ftrace_lock); |
989 | if (enable) | 985 | if (enable) |
990 | ftrace_filtered = 1; | 986 | ftrace_filtered = 1; |
991 | pg = ftrace_pages_start; | 987 | pg = ftrace_pages_start; |
992 | while (pg) { | 988 | while (pg) { |
993 | for (i = 0; i < pg->index; i++) { | 989 | for (i = 0; i < pg->index; i++) { |
994 | int matched = 0; | 990 | int matched = 0; |
995 | char *ptr; | 991 | char *ptr; |
996 | 992 | ||
997 | rec = &pg->records[i]; | 993 | rec = &pg->records[i]; |
998 | if (rec->flags & FTRACE_FL_FAILED) | 994 | if (rec->flags & FTRACE_FL_FAILED) |
999 | continue; | 995 | continue; |
1000 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); | 996 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); |
1001 | switch (type) { | 997 | switch (type) { |
1002 | case MATCH_FULL: | 998 | case MATCH_FULL: |
1003 | if (strcmp(str, buff) == 0) | 999 | if (strcmp(str, buff) == 0) |
1004 | matched = 1; | 1000 | matched = 1; |
1005 | break; | 1001 | break; |
1006 | case MATCH_FRONT_ONLY: | 1002 | case MATCH_FRONT_ONLY: |
1007 | if (memcmp(str, buff, match) == 0) | 1003 | if (memcmp(str, buff, match) == 0) |
1008 | matched = 1; | 1004 | matched = 1; |
1009 | break; | 1005 | break; |
1010 | case MATCH_MIDDLE_ONLY: | 1006 | case MATCH_MIDDLE_ONLY: |
1011 | if (strstr(str, search)) | 1007 | if (strstr(str, search)) |
1012 | matched = 1; | 1008 | matched = 1; |
1013 | break; | 1009 | break; |
1014 | case MATCH_END_ONLY: | 1010 | case MATCH_END_ONLY: |
1015 | ptr = strstr(str, search); | 1011 | ptr = strstr(str, search); |
1016 | if (ptr && (ptr[search_len] == 0)) | 1012 | if (ptr && (ptr[search_len] == 0)) |
1017 | matched = 1; | 1013 | matched = 1; |
1018 | break; | 1014 | break; |
1019 | } | 1015 | } |
1020 | if (matched) | 1016 | if (matched) |
1021 | rec->flags |= flag; | 1017 | rec->flags |= flag; |
1022 | } | 1018 | } |
1023 | pg = pg->next; | 1019 | pg = pg->next; |
1024 | } | 1020 | } |
1025 | spin_unlock(&ftrace_lock); | 1021 | spin_unlock(&ftrace_lock); |
1026 | } | 1022 | } |
1027 | 1023 | ||
1028 | static ssize_t | 1024 | static ssize_t |
1029 | ftrace_regex_write(struct file *file, const char __user *ubuf, | 1025 | ftrace_regex_write(struct file *file, const char __user *ubuf, |
1030 | size_t cnt, loff_t *ppos, int enable) | 1026 | size_t cnt, loff_t *ppos, int enable) |
1031 | { | 1027 | { |
1032 | struct ftrace_iterator *iter; | 1028 | struct ftrace_iterator *iter; |
1033 | char ch; | 1029 | char ch; |
1034 | size_t read = 0; | 1030 | size_t read = 0; |
1035 | ssize_t ret; | 1031 | ssize_t ret; |
1036 | 1032 | ||
1037 | if (!cnt || cnt < 0) | 1033 | if (!cnt || cnt < 0) |
1038 | return 0; | 1034 | return 0; |
1039 | 1035 | ||
1040 | mutex_lock(&ftrace_regex_lock); | 1036 | mutex_lock(&ftrace_regex_lock); |
1041 | 1037 | ||
1042 | if (file->f_mode & FMODE_READ) { | 1038 | if (file->f_mode & FMODE_READ) { |
1043 | struct seq_file *m = file->private_data; | 1039 | struct seq_file *m = file->private_data; |
1044 | iter = m->private; | 1040 | iter = m->private; |
1045 | } else | 1041 | } else |
1046 | iter = file->private_data; | 1042 | iter = file->private_data; |
1047 | 1043 | ||
1048 | if (!*ppos) { | 1044 | if (!*ppos) { |
1049 | iter->flags &= ~FTRACE_ITER_CONT; | 1045 | iter->flags &= ~FTRACE_ITER_CONT; |
1050 | iter->buffer_idx = 0; | 1046 | iter->buffer_idx = 0; |
1051 | } | 1047 | } |
1052 | 1048 | ||
1053 | ret = get_user(ch, ubuf++); | 1049 | ret = get_user(ch, ubuf++); |
1054 | if (ret) | 1050 | if (ret) |
1055 | goto out; | 1051 | goto out; |
1056 | read++; | 1052 | read++; |
1057 | cnt--; | 1053 | cnt--; |
1058 | 1054 | ||
1059 | if (!(iter->flags & ~FTRACE_ITER_CONT)) { | 1055 | if (!(iter->flags & ~FTRACE_ITER_CONT)) { |
1060 | /* skip white space */ | 1056 | /* skip white space */ |
1061 | while (cnt && isspace(ch)) { | 1057 | while (cnt && isspace(ch)) { |
1062 | ret = get_user(ch, ubuf++); | 1058 | ret = get_user(ch, ubuf++); |
1063 | if (ret) | 1059 | if (ret) |
1064 | goto out; | 1060 | goto out; |
1065 | read++; | 1061 | read++; |
1066 | cnt--; | 1062 | cnt--; |
1067 | } | 1063 | } |
1068 | 1064 | ||
1069 | if (isspace(ch)) { | 1065 | if (isspace(ch)) { |
1070 | file->f_pos += read; | 1066 | file->f_pos += read; |
1071 | ret = read; | 1067 | ret = read; |
1072 | goto out; | 1068 | goto out; |
1073 | } | 1069 | } |
1074 | 1070 | ||
1075 | iter->buffer_idx = 0; | 1071 | iter->buffer_idx = 0; |
1076 | } | 1072 | } |
1077 | 1073 | ||
1078 | while (cnt && !isspace(ch)) { | 1074 | while (cnt && !isspace(ch)) { |
1079 | if (iter->buffer_idx < FTRACE_BUFF_MAX) | 1075 | if (iter->buffer_idx < FTRACE_BUFF_MAX) |
1080 | iter->buffer[iter->buffer_idx++] = ch; | 1076 | iter->buffer[iter->buffer_idx++] = ch; |
1081 | else { | 1077 | else { |
1082 | ret = -EINVAL; | 1078 | ret = -EINVAL; |
1083 | goto out; | 1079 | goto out; |
1084 | } | 1080 | } |
1085 | ret = get_user(ch, ubuf++); | 1081 | ret = get_user(ch, ubuf++); |
1086 | if (ret) | 1082 | if (ret) |
1087 | goto out; | 1083 | goto out; |
1088 | read++; | 1084 | read++; |
1089 | cnt--; | 1085 | cnt--; |
1090 | } | 1086 | } |
1091 | 1087 | ||
1092 | if (isspace(ch)) { | 1088 | if (isspace(ch)) { |
1093 | iter->filtered++; | 1089 | iter->filtered++; |
1094 | iter->buffer[iter->buffer_idx] = 0; | 1090 | iter->buffer[iter->buffer_idx] = 0; |
1095 | ftrace_match(iter->buffer, iter->buffer_idx, enable); | 1091 | ftrace_match(iter->buffer, iter->buffer_idx, enable); |
1096 | iter->buffer_idx = 0; | 1092 | iter->buffer_idx = 0; |
1097 | } else | 1093 | } else |
1098 | iter->flags |= FTRACE_ITER_CONT; | 1094 | iter->flags |= FTRACE_ITER_CONT; |
1099 | 1095 | ||
1100 | 1096 | ||
1101 | file->f_pos += read; | 1097 | file->f_pos += read; |
1102 | 1098 | ||
1103 | ret = read; | 1099 | ret = read; |
1104 | out: | 1100 | out: |
1105 | mutex_unlock(&ftrace_regex_lock); | 1101 | mutex_unlock(&ftrace_regex_lock); |
1106 | 1102 | ||
1107 | return ret; | 1103 | return ret; |
1108 | } | 1104 | } |
1109 | 1105 | ||
1110 | static ssize_t | 1106 | static ssize_t |
1111 | ftrace_filter_write(struct file *file, const char __user *ubuf, | 1107 | ftrace_filter_write(struct file *file, const char __user *ubuf, |
1112 | size_t cnt, loff_t *ppos) | 1108 | size_t cnt, loff_t *ppos) |
1113 | { | 1109 | { |
1114 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); | 1110 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); |
1115 | } | 1111 | } |
1116 | 1112 | ||
1117 | static ssize_t | 1113 | static ssize_t |
1118 | ftrace_notrace_write(struct file *file, const char __user *ubuf, | 1114 | ftrace_notrace_write(struct file *file, const char __user *ubuf, |
1119 | size_t cnt, loff_t *ppos) | 1115 | size_t cnt, loff_t *ppos) |
1120 | { | 1116 | { |
1121 | return ftrace_regex_write(file, ubuf, cnt, ppos, 0); | 1117 | return ftrace_regex_write(file, ubuf, cnt, ppos, 0); |
1122 | } | 1118 | } |
1123 | 1119 | ||
1124 | static void | 1120 | static void |
1125 | ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) | 1121 | ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) |
1126 | { | 1122 | { |
1127 | if (unlikely(ftrace_disabled)) | 1123 | if (unlikely(ftrace_disabled)) |
1128 | return; | 1124 | return; |
1129 | 1125 | ||
1130 | mutex_lock(&ftrace_regex_lock); | 1126 | mutex_lock(&ftrace_regex_lock); |
1131 | if (reset) | 1127 | if (reset) |
1132 | ftrace_filter_reset(enable); | 1128 | ftrace_filter_reset(enable); |
1133 | if (buf) | 1129 | if (buf) |
1134 | ftrace_match(buf, len, enable); | 1130 | ftrace_match(buf, len, enable); |
1135 | mutex_unlock(&ftrace_regex_lock); | 1131 | mutex_unlock(&ftrace_regex_lock); |
1136 | } | 1132 | } |
1137 | 1133 | ||
1138 | /** | 1134 | /** |
1139 | * ftrace_set_filter - set a function to filter on in ftrace | 1135 | * ftrace_set_filter - set a function to filter on in ftrace |
1140 | * @buf - the string that holds the function filter text. | 1136 | * @buf - the string that holds the function filter text. |
1141 | * @len - the length of the string. | 1137 | * @len - the length of the string. |
1142 | * @reset - non zero to reset all filters before applying this filter. | 1138 | * @reset - non zero to reset all filters before applying this filter. |
1143 | * | 1139 | * |
1144 | * Filters denote which functions should be enabled when tracing is enabled. | 1140 | * Filters denote which functions should be enabled when tracing is enabled. |
1145 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. | 1141 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. |
1146 | */ | 1142 | */ |
1147 | void ftrace_set_filter(unsigned char *buf, int len, int reset) | 1143 | void ftrace_set_filter(unsigned char *buf, int len, int reset) |
1148 | { | 1144 | { |
1149 | ftrace_set_regex(buf, len, reset, 1); | 1145 | ftrace_set_regex(buf, len, reset, 1); |
1150 | } | 1146 | } |
1151 | 1147 | ||
1152 | /** | 1148 | /** |
1153 | * ftrace_set_notrace - set a function to not trace in ftrace | 1149 | * ftrace_set_notrace - set a function to not trace in ftrace |
1154 | * @buf - the string that holds the function notrace text. | 1150 | * @buf - the string that holds the function notrace text. |
1155 | * @len - the length of the string. | 1151 | * @len - the length of the string. |
1156 | * @reset - non zero to reset all filters before applying this filter. | 1152 | * @reset - non zero to reset all filters before applying this filter. |
1157 | * | 1153 | * |
1158 | * Notrace Filters denote which functions should not be enabled when tracing | 1154 | * Notrace Filters denote which functions should not be enabled when tracing |
1159 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled | 1155 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled |
1160 | * for tracing. | 1156 | * for tracing. |
1161 | */ | 1157 | */ |
1162 | void ftrace_set_notrace(unsigned char *buf, int len, int reset) | 1158 | void ftrace_set_notrace(unsigned char *buf, int len, int reset) |
1163 | { | 1159 | { |
1164 | ftrace_set_regex(buf, len, reset, 0); | 1160 | ftrace_set_regex(buf, len, reset, 0); |
1165 | } | 1161 | } |
1166 | 1162 | ||
1167 | static int | 1163 | static int |
1168 | ftrace_regex_release(struct inode *inode, struct file *file, int enable) | 1164 | ftrace_regex_release(struct inode *inode, struct file *file, int enable) |
1169 | { | 1165 | { |
1170 | struct seq_file *m = (struct seq_file *)file->private_data; | 1166 | struct seq_file *m = (struct seq_file *)file->private_data; |
1171 | struct ftrace_iterator *iter; | 1167 | struct ftrace_iterator *iter; |
1172 | 1168 | ||
1173 | mutex_lock(&ftrace_regex_lock); | 1169 | mutex_lock(&ftrace_regex_lock); |
1174 | if (file->f_mode & FMODE_READ) { | 1170 | if (file->f_mode & FMODE_READ) { |
1175 | iter = m->private; | 1171 | iter = m->private; |
1176 | 1172 | ||
1177 | seq_release(inode, file); | 1173 | seq_release(inode, file); |
1178 | } else | 1174 | } else |
1179 | iter = file->private_data; | 1175 | iter = file->private_data; |
1180 | 1176 | ||
1181 | if (iter->buffer_idx) { | 1177 | if (iter->buffer_idx) { |
1182 | iter->filtered++; | 1178 | iter->filtered++; |
1183 | iter->buffer[iter->buffer_idx] = 0; | 1179 | iter->buffer[iter->buffer_idx] = 0; |
1184 | ftrace_match(iter->buffer, iter->buffer_idx, enable); | 1180 | ftrace_match(iter->buffer, iter->buffer_idx, enable); |
1185 | } | 1181 | } |
1186 | 1182 | ||
1187 | mutex_lock(&ftrace_sysctl_lock); | 1183 | mutex_lock(&ftrace_sysctl_lock); |
1188 | mutex_lock(&ftrace_start_lock); | 1184 | mutex_lock(&ftrace_start_lock); |
1189 | if (iter->filtered && ftrace_start && ftrace_enabled) | 1185 | if (iter->filtered && ftrace_start && ftrace_enabled) |
1190 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 1186 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
1191 | mutex_unlock(&ftrace_start_lock); | 1187 | mutex_unlock(&ftrace_start_lock); |
1192 | mutex_unlock(&ftrace_sysctl_lock); | 1188 | mutex_unlock(&ftrace_sysctl_lock); |
1193 | 1189 | ||
1194 | kfree(iter); | 1190 | kfree(iter); |
1195 | mutex_unlock(&ftrace_regex_lock); | 1191 | mutex_unlock(&ftrace_regex_lock); |
1196 | return 0; | 1192 | return 0; |
1197 | } | 1193 | } |
1198 | 1194 | ||
1199 | static int | 1195 | static int |
1200 | ftrace_filter_release(struct inode *inode, struct file *file) | 1196 | ftrace_filter_release(struct inode *inode, struct file *file) |
1201 | { | 1197 | { |
1202 | return ftrace_regex_release(inode, file, 1); | 1198 | return ftrace_regex_release(inode, file, 1); |
1203 | } | 1199 | } |
1204 | 1200 | ||
1205 | static int | 1201 | static int |
1206 | ftrace_notrace_release(struct inode *inode, struct file *file) | 1202 | ftrace_notrace_release(struct inode *inode, struct file *file) |
1207 | { | 1203 | { |
1208 | return ftrace_regex_release(inode, file, 0); | 1204 | return ftrace_regex_release(inode, file, 0); |
1209 | } | 1205 | } |
1210 | 1206 | ||
1211 | static struct file_operations ftrace_avail_fops = { | 1207 | static struct file_operations ftrace_avail_fops = { |
1212 | .open = ftrace_avail_open, | 1208 | .open = ftrace_avail_open, |
1213 | .read = seq_read, | 1209 | .read = seq_read, |
1214 | .llseek = seq_lseek, | 1210 | .llseek = seq_lseek, |
1215 | .release = ftrace_avail_release, | 1211 | .release = ftrace_avail_release, |
1216 | }; | 1212 | }; |
1217 | 1213 | ||
1218 | static struct file_operations ftrace_failures_fops = { | 1214 | static struct file_operations ftrace_failures_fops = { |
1219 | .open = ftrace_failures_open, | 1215 | .open = ftrace_failures_open, |
1220 | .read = seq_read, | 1216 | .read = seq_read, |
1221 | .llseek = seq_lseek, | 1217 | .llseek = seq_lseek, |
1222 | .release = ftrace_avail_release, | 1218 | .release = ftrace_avail_release, |
1223 | }; | 1219 | }; |
1224 | 1220 | ||
1225 | static struct file_operations ftrace_filter_fops = { | 1221 | static struct file_operations ftrace_filter_fops = { |
1226 | .open = ftrace_filter_open, | 1222 | .open = ftrace_filter_open, |
1227 | .read = ftrace_regex_read, | 1223 | .read = ftrace_regex_read, |
1228 | .write = ftrace_filter_write, | 1224 | .write = ftrace_filter_write, |
1229 | .llseek = ftrace_regex_lseek, | 1225 | .llseek = ftrace_regex_lseek, |
1230 | .release = ftrace_filter_release, | 1226 | .release = ftrace_filter_release, |
1231 | }; | 1227 | }; |
1232 | 1228 | ||
1233 | static struct file_operations ftrace_notrace_fops = { | 1229 | static struct file_operations ftrace_notrace_fops = { |
1234 | .open = ftrace_notrace_open, | 1230 | .open = ftrace_notrace_open, |
1235 | .read = ftrace_regex_read, | 1231 | .read = ftrace_regex_read, |
1236 | .write = ftrace_notrace_write, | 1232 | .write = ftrace_notrace_write, |
1237 | .llseek = ftrace_regex_lseek, | 1233 | .llseek = ftrace_regex_lseek, |
1238 | .release = ftrace_notrace_release, | 1234 | .release = ftrace_notrace_release, |
1239 | }; | 1235 | }; |
1240 | 1236 | ||
1241 | static __init int ftrace_init_debugfs(void) | 1237 | static __init int ftrace_init_debugfs(void) |
1242 | { | 1238 | { |
1243 | struct dentry *d_tracer; | 1239 | struct dentry *d_tracer; |
1244 | struct dentry *entry; | 1240 | struct dentry *entry; |
1245 | 1241 | ||
1246 | d_tracer = tracing_init_dentry(); | 1242 | d_tracer = tracing_init_dentry(); |
1247 | 1243 | ||
1248 | entry = debugfs_create_file("available_filter_functions", 0444, | 1244 | entry = debugfs_create_file("available_filter_functions", 0444, |
1249 | d_tracer, NULL, &ftrace_avail_fops); | 1245 | d_tracer, NULL, &ftrace_avail_fops); |
1250 | if (!entry) | 1246 | if (!entry) |
1251 | pr_warning("Could not create debugfs " | 1247 | pr_warning("Could not create debugfs " |
1252 | "'available_filter_functions' entry\n"); | 1248 | "'available_filter_functions' entry\n"); |
1253 | 1249 | ||
1254 | entry = debugfs_create_file("failures", 0444, | 1250 | entry = debugfs_create_file("failures", 0444, |
1255 | d_tracer, NULL, &ftrace_failures_fops); | 1251 | d_tracer, NULL, &ftrace_failures_fops); |
1256 | if (!entry) | 1252 | if (!entry) |
1257 | pr_warning("Could not create debugfs 'failures' entry\n"); | 1253 | pr_warning("Could not create debugfs 'failures' entry\n"); |
1258 | 1254 | ||
1259 | entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, | 1255 | entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, |
1260 | NULL, &ftrace_filter_fops); | 1256 | NULL, &ftrace_filter_fops); |
1261 | if (!entry) | 1257 | if (!entry) |
1262 | pr_warning("Could not create debugfs " | 1258 | pr_warning("Could not create debugfs " |
1263 | "'set_ftrace_filter' entry\n"); | 1259 | "'set_ftrace_filter' entry\n"); |
1264 | 1260 | ||
1265 | entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, | 1261 | entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, |
1266 | NULL, &ftrace_notrace_fops); | 1262 | NULL, &ftrace_notrace_fops); |
1267 | if (!entry) | 1263 | if (!entry) |
1268 | pr_warning("Could not create debugfs " | 1264 | pr_warning("Could not create debugfs " |
1269 | "'set_ftrace_notrace' entry\n"); | 1265 | "'set_ftrace_notrace' entry\n"); |
1270 | 1266 | ||
1271 | return 0; | 1267 | return 0; |
1272 | } | 1268 | } |
1273 | 1269 | ||
1274 | fs_initcall(ftrace_init_debugfs); | 1270 | fs_initcall(ftrace_init_debugfs); |
1275 | 1271 | ||
1276 | static int ftrace_convert_nops(unsigned long *start, | 1272 | static int ftrace_convert_nops(unsigned long *start, |
1277 | unsigned long *end) | 1273 | unsigned long *end) |
1278 | { | 1274 | { |
1279 | unsigned long *p; | 1275 | unsigned long *p; |
1280 | unsigned long addr; | 1276 | unsigned long addr; |
1281 | unsigned long flags; | 1277 | unsigned long flags; |
1282 | 1278 | ||
1283 | mutex_lock(&ftrace_start_lock); | 1279 | mutex_lock(&ftrace_start_lock); |
1284 | p = start; | 1280 | p = start; |
1285 | while (p < end) { | 1281 | while (p < end) { |
1286 | addr = ftrace_call_adjust(*p++); | 1282 | addr = ftrace_call_adjust(*p++); |
1287 | ftrace_record_ip(addr); | 1283 | ftrace_record_ip(addr); |
1288 | } | 1284 | } |
1289 | 1285 | ||
1290 | /* disable interrupts to prevent kstop machine */ | 1286 | /* disable interrupts to prevent kstop machine */ |
1291 | local_irq_save(flags); | 1287 | local_irq_save(flags); |
1292 | ftrace_update_code(); | 1288 | ftrace_update_code(); |
1293 | local_irq_restore(flags); | 1289 | local_irq_restore(flags); |
1294 | mutex_unlock(&ftrace_start_lock); | 1290 | mutex_unlock(&ftrace_start_lock); |
1295 | 1291 | ||
1296 | return 0; | 1292 | return 0; |
1297 | } | 1293 | } |
1298 | 1294 | ||
1299 | void ftrace_init_module(unsigned long *start, unsigned long *end) | 1295 | void ftrace_init_module(unsigned long *start, unsigned long *end) |
1300 | { | 1296 | { |
1301 | if (ftrace_disabled || start == end) | 1297 | if (ftrace_disabled || start == end) |
1302 | return; | 1298 | return; |
1303 | ftrace_convert_nops(start, end); | 1299 | ftrace_convert_nops(start, end); |
1304 | } | 1300 | } |
1305 | 1301 | ||
1306 | extern unsigned long __start_mcount_loc[]; | 1302 | extern unsigned long __start_mcount_loc[]; |
1307 | extern unsigned long __stop_mcount_loc[]; | 1303 | extern unsigned long __stop_mcount_loc[]; |
1308 | 1304 | ||
1309 | void __init ftrace_init(void) | 1305 | void __init ftrace_init(void) |
1310 | { | 1306 | { |
1311 | unsigned long count, addr, flags; | 1307 | unsigned long count, addr, flags; |
1312 | int ret; | 1308 | int ret; |
1313 | 1309 | ||
1314 | /* Keep the ftrace pointer to the stub */ | 1310 | /* Keep the ftrace pointer to the stub */ |
1315 | addr = (unsigned long)ftrace_stub; | 1311 | addr = (unsigned long)ftrace_stub; |
1316 | 1312 | ||
1317 | local_irq_save(flags); | 1313 | local_irq_save(flags); |
1318 | ftrace_dyn_arch_init(&addr); | 1314 | ftrace_dyn_arch_init(&addr); |
1319 | local_irq_restore(flags); | 1315 | local_irq_restore(flags); |
1320 | 1316 | ||
1321 | /* ftrace_dyn_arch_init places the return code in addr */ | 1317 | /* ftrace_dyn_arch_init places the return code in addr */ |
1322 | if (addr) | 1318 | if (addr) |
1323 | goto failed; | 1319 | goto failed; |
1324 | 1320 | ||
1325 | count = __stop_mcount_loc - __start_mcount_loc; | 1321 | count = __stop_mcount_loc - __start_mcount_loc; |
1326 | 1322 | ||
1327 | ret = ftrace_dyn_table_alloc(count); | 1323 | ret = ftrace_dyn_table_alloc(count); |
1328 | if (ret) | 1324 | if (ret) |
1329 | goto failed; | 1325 | goto failed; |
1330 | 1326 | ||
1331 | last_ftrace_enabled = ftrace_enabled = 1; | 1327 | last_ftrace_enabled = ftrace_enabled = 1; |
1332 | 1328 | ||
1333 | ret = ftrace_convert_nops(__start_mcount_loc, | 1329 | ret = ftrace_convert_nops(__start_mcount_loc, |
1334 | __stop_mcount_loc); | 1330 | __stop_mcount_loc); |
1335 | 1331 | ||
1336 | return; | 1332 | return; |
1337 | failed: | 1333 | failed: |
1338 | ftrace_disabled = 1; | 1334 | ftrace_disabled = 1; |
1339 | } | 1335 | } |
1340 | 1336 | ||
1341 | #else | 1337 | #else |
1342 | 1338 | ||
1343 | static int __init ftrace_nodyn_init(void) | 1339 | static int __init ftrace_nodyn_init(void) |
1344 | { | 1340 | { |
1345 | ftrace_enabled = 1; | 1341 | ftrace_enabled = 1; |
1346 | return 0; | 1342 | return 0; |
1347 | } | 1343 | } |
1348 | device_initcall(ftrace_nodyn_init); | 1344 | device_initcall(ftrace_nodyn_init); |
1349 | 1345 | ||
1350 | # define ftrace_startup() do { } while (0) | 1346 | # define ftrace_startup() do { } while (0) |
1351 | # define ftrace_shutdown() do { } while (0) | 1347 | # define ftrace_shutdown() do { } while (0) |
1352 | # define ftrace_startup_sysctl() do { } while (0) | 1348 | # define ftrace_startup_sysctl() do { } while (0) |
1353 | # define ftrace_shutdown_sysctl() do { } while (0) | 1349 | # define ftrace_shutdown_sysctl() do { } while (0) |
1354 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1350 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1355 | 1351 | ||
1356 | /** | 1352 | /** |
1357 | * ftrace_kill - kill ftrace | 1353 | * ftrace_kill - kill ftrace |
1358 | * | 1354 | * |
1359 | * This function should be used by panic code. It stops ftrace | 1355 | * This function should be used by panic code. It stops ftrace |
1360 | * but in a not so nice way. If you need to simply kill ftrace | 1356 | * but in a not so nice way. If you need to simply kill ftrace |
1361 | * from a non-atomic section, use ftrace_kill. | 1357 | * from a non-atomic section, use ftrace_kill. |
1362 | */ | 1358 | */ |
1363 | void ftrace_kill(void) | 1359 | void ftrace_kill(void) |
1364 | { | 1360 | { |
1365 | ftrace_disabled = 1; | 1361 | ftrace_disabled = 1; |
1366 | ftrace_enabled = 0; | 1362 | ftrace_enabled = 0; |
1367 | clear_ftrace_function(); | 1363 | clear_ftrace_function(); |
1368 | } | 1364 | } |
1369 | 1365 | ||
1370 | /** | 1366 | /** |
1371 | * register_ftrace_function - register a function for profiling | 1367 | * register_ftrace_function - register a function for profiling |
1372 | * @ops - ops structure that holds the function for profiling. | 1368 | * @ops - ops structure that holds the function for profiling. |
1373 | * | 1369 | * |
1374 | * Register a function to be called by all functions in the | 1370 | * Register a function to be called by all functions in the |
1375 | * kernel. | 1371 | * kernel. |
1376 | * | 1372 | * |
1377 | * Note: @ops->func and all the functions it calls must be labeled | 1373 | * Note: @ops->func and all the functions it calls must be labeled |
1378 | * with "notrace", otherwise it will go into a | 1374 | * with "notrace", otherwise it will go into a |
1379 | * recursive loop. | 1375 | * recursive loop. |
1380 | */ | 1376 | */ |
1381 | int register_ftrace_function(struct ftrace_ops *ops) | 1377 | int register_ftrace_function(struct ftrace_ops *ops) |
1382 | { | 1378 | { |
1383 | int ret; | 1379 | int ret; |
1384 | 1380 | ||
1385 | if (unlikely(ftrace_disabled)) | 1381 | if (unlikely(ftrace_disabled)) |
1386 | return -1; | 1382 | return -1; |
1387 | 1383 | ||
1388 | mutex_lock(&ftrace_sysctl_lock); | 1384 | mutex_lock(&ftrace_sysctl_lock); |
1389 | ret = __register_ftrace_function(ops); | 1385 | ret = __register_ftrace_function(ops); |
1390 | ftrace_startup(); | 1386 | ftrace_startup(); |
1391 | mutex_unlock(&ftrace_sysctl_lock); | 1387 | mutex_unlock(&ftrace_sysctl_lock); |
1392 | 1388 | ||
1393 | return ret; | 1389 | return ret; |
1394 | } | 1390 | } |
1395 | 1391 | ||
1396 | /** | 1392 | /** |
1397 | * unregister_ftrace_function - unresgister a function for profiling. | 1393 | * unregister_ftrace_function - unresgister a function for profiling. |
1398 | * @ops - ops structure that holds the function to unregister | 1394 | * @ops - ops structure that holds the function to unregister |
1399 | * | 1395 | * |
1400 | * Unregister a function that was added to be called by ftrace profiling. | 1396 | * Unregister a function that was added to be called by ftrace profiling. |
1401 | */ | 1397 | */ |
1402 | int unregister_ftrace_function(struct ftrace_ops *ops) | 1398 | int unregister_ftrace_function(struct ftrace_ops *ops) |
1403 | { | 1399 | { |
1404 | int ret; | 1400 | int ret; |
1405 | 1401 | ||
1406 | mutex_lock(&ftrace_sysctl_lock); | 1402 | mutex_lock(&ftrace_sysctl_lock); |
1407 | ret = __unregister_ftrace_function(ops); | 1403 | ret = __unregister_ftrace_function(ops); |
1408 | ftrace_shutdown(); | 1404 | ftrace_shutdown(); |
1409 | mutex_unlock(&ftrace_sysctl_lock); | 1405 | mutex_unlock(&ftrace_sysctl_lock); |
1410 | 1406 | ||
1411 | return ret; | 1407 | return ret; |
1412 | } | 1408 | } |
1413 | 1409 | ||
1414 | int | 1410 | int |
1415 | ftrace_enable_sysctl(struct ctl_table *table, int write, | 1411 | ftrace_enable_sysctl(struct ctl_table *table, int write, |
1416 | struct file *file, void __user *buffer, size_t *lenp, | 1412 | struct file *file, void __user *buffer, size_t *lenp, |
1417 | loff_t *ppos) | 1413 | loff_t *ppos) |
1418 | { | 1414 | { |
1419 | int ret; | 1415 | int ret; |
1420 | 1416 | ||
1421 | if (unlikely(ftrace_disabled)) | 1417 | if (unlikely(ftrace_disabled)) |
1422 | return -ENODEV; | 1418 | return -ENODEV; |
1423 | 1419 | ||
1424 | mutex_lock(&ftrace_sysctl_lock); | 1420 | mutex_lock(&ftrace_sysctl_lock); |
1425 | 1421 | ||
1426 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); | 1422 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); |
1427 | 1423 | ||
1428 | if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) | 1424 | if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) |
1429 | goto out; | 1425 | goto out; |
1430 | 1426 | ||
1431 | last_ftrace_enabled = ftrace_enabled; | 1427 | last_ftrace_enabled = ftrace_enabled; |
1432 | 1428 | ||
1433 | if (ftrace_enabled) { | 1429 | if (ftrace_enabled) { |
1434 | 1430 | ||
1435 | ftrace_startup_sysctl(); | 1431 | ftrace_startup_sysctl(); |
1436 | 1432 | ||
1437 | /* we are starting ftrace again */ | 1433 | /* we are starting ftrace again */ |
1438 | if (ftrace_list != &ftrace_list_end) { | 1434 | if (ftrace_list != &ftrace_list_end) { |
1439 | if (ftrace_list->next == &ftrace_list_end) | 1435 | if (ftrace_list->next == &ftrace_list_end) |
1440 | ftrace_trace_function = ftrace_list->func; | 1436 | ftrace_trace_function = ftrace_list->func; |
1441 | else | 1437 | else |
1442 | ftrace_trace_function = ftrace_list_func; | 1438 | ftrace_trace_function = ftrace_list_func; |
1443 | } | 1439 | } |
1444 | 1440 | ||
1445 | } else { | 1441 | } else { |
1446 | /* stopping ftrace calls (just send to ftrace_stub) */ | 1442 | /* stopping ftrace calls (just send to ftrace_stub) */ |
1447 | ftrace_trace_function = ftrace_stub; | 1443 | ftrace_trace_function = ftrace_stub; |
1448 | 1444 | ||
1449 | ftrace_shutdown_sysctl(); | 1445 | ftrace_shutdown_sysctl(); |
1450 | } | 1446 | } |
1451 | 1447 | ||
1452 | out: | 1448 | out: |
1453 | mutex_unlock(&ftrace_sysctl_lock); | 1449 | mutex_unlock(&ftrace_sysctl_lock); |
1454 | return ret; | 1450 | return ret; |
1455 | } | 1451 | } |
1456 | 1452 | ||
1457 | 1453 |
kernel/trace/ring_buffer.c
1 | /* | 1 | /* |
2 | * Generic ring buffer | 2 | * Generic ring buffer |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/spinlock.h> | 7 | #include <linux/spinlock.h> |
8 | #include <linux/debugfs.h> | 8 | #include <linux/debugfs.h> |
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/mutex.h> | 12 | #include <linux/mutex.h> |
13 | #include <linux/sched.h> /* used for sched_clock() (for now) */ | 13 | #include <linux/sched.h> /* used for sched_clock() (for now) */ |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/hash.h> | 15 | #include <linux/hash.h> |
16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | 18 | ||
19 | #include "trace.h" | ||
20 | |||
21 | /* Global flag to disable all recording to ring buffers */ | ||
22 | static int ring_buffers_off __read_mostly; | ||
23 | |||
24 | /** | ||
25 | * tracing_on - enable all tracing buffers | ||
26 | * | ||
27 | * This function enables all tracing buffers that may have been | ||
28 | * disabled with tracing_off. | ||
29 | */ | ||
30 | void tracing_on(void) | ||
31 | { | ||
32 | ring_buffers_off = 0; | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * tracing_off - turn off all tracing buffers | ||
37 | * | ||
38 | * This function stops all tracing buffers from recording data. | ||
39 | * It does not disable any overhead the tracers themselves may | ||
40 | * be causing. This function simply causes all recording to | ||
41 | * the ring buffers to fail. | ||
42 | */ | ||
43 | void tracing_off(void) | ||
44 | { | ||
45 | ring_buffers_off = 1; | ||
46 | } | ||
47 | |||
19 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 48 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
20 | #define DEBUG_SHIFT 0 | 49 | #define DEBUG_SHIFT 0 |
21 | 50 | ||
22 | /* FIXME!!! */ | 51 | /* FIXME!!! */ |
23 | u64 ring_buffer_time_stamp(int cpu) | 52 | u64 ring_buffer_time_stamp(int cpu) |
24 | { | 53 | { |
25 | /* shift to debug/test normalization and TIME_EXTENTS */ | 54 | /* shift to debug/test normalization and TIME_EXTENTS */ |
26 | return sched_clock() << DEBUG_SHIFT; | 55 | return sched_clock() << DEBUG_SHIFT; |
27 | } | 56 | } |
28 | 57 | ||
29 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 58 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) |
30 | { | 59 | { |
31 | /* Just stupid testing the normalize function and deltas */ | 60 | /* Just stupid testing the normalize function and deltas */ |
32 | *ts >>= DEBUG_SHIFT; | 61 | *ts >>= DEBUG_SHIFT; |
33 | } | 62 | } |
34 | 63 | ||
35 | #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) | 64 | #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) |
36 | #define RB_ALIGNMENT_SHIFT 2 | 65 | #define RB_ALIGNMENT_SHIFT 2 |
37 | #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) | 66 | #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) |
38 | #define RB_MAX_SMALL_DATA 28 | 67 | #define RB_MAX_SMALL_DATA 28 |
39 | 68 | ||
40 | enum { | 69 | enum { |
41 | RB_LEN_TIME_EXTEND = 8, | 70 | RB_LEN_TIME_EXTEND = 8, |
42 | RB_LEN_TIME_STAMP = 16, | 71 | RB_LEN_TIME_STAMP = 16, |
43 | }; | 72 | }; |
44 | 73 | ||
45 | /* inline for ring buffer fast paths */ | 74 | /* inline for ring buffer fast paths */ |
46 | static inline unsigned | 75 | static inline unsigned |
47 | rb_event_length(struct ring_buffer_event *event) | 76 | rb_event_length(struct ring_buffer_event *event) |
48 | { | 77 | { |
49 | unsigned length; | 78 | unsigned length; |
50 | 79 | ||
51 | switch (event->type) { | 80 | switch (event->type) { |
52 | case RINGBUF_TYPE_PADDING: | 81 | case RINGBUF_TYPE_PADDING: |
53 | /* undefined */ | 82 | /* undefined */ |
54 | return -1; | 83 | return -1; |
55 | 84 | ||
56 | case RINGBUF_TYPE_TIME_EXTEND: | 85 | case RINGBUF_TYPE_TIME_EXTEND: |
57 | return RB_LEN_TIME_EXTEND; | 86 | return RB_LEN_TIME_EXTEND; |
58 | 87 | ||
59 | case RINGBUF_TYPE_TIME_STAMP: | 88 | case RINGBUF_TYPE_TIME_STAMP: |
60 | return RB_LEN_TIME_STAMP; | 89 | return RB_LEN_TIME_STAMP; |
61 | 90 | ||
62 | case RINGBUF_TYPE_DATA: | 91 | case RINGBUF_TYPE_DATA: |
63 | if (event->len) | 92 | if (event->len) |
64 | length = event->len << RB_ALIGNMENT_SHIFT; | 93 | length = event->len << RB_ALIGNMENT_SHIFT; |
65 | else | 94 | else |
66 | length = event->array[0]; | 95 | length = event->array[0]; |
67 | return length + RB_EVNT_HDR_SIZE; | 96 | return length + RB_EVNT_HDR_SIZE; |
68 | default: | 97 | default: |
69 | BUG(); | 98 | BUG(); |
70 | } | 99 | } |
71 | /* not hit */ | 100 | /* not hit */ |
72 | return 0; | 101 | return 0; |
73 | } | 102 | } |
74 | 103 | ||
75 | /** | 104 | /** |
76 | * ring_buffer_event_length - return the length of the event | 105 | * ring_buffer_event_length - return the length of the event |
77 | * @event: the event to get the length of | 106 | * @event: the event to get the length of |
78 | */ | 107 | */ |
79 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 108 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
80 | { | 109 | { |
81 | return rb_event_length(event); | 110 | return rb_event_length(event); |
82 | } | 111 | } |
83 | 112 | ||
84 | /* inline for ring buffer fast paths */ | 113 | /* inline for ring buffer fast paths */ |
85 | static inline void * | 114 | static inline void * |
86 | rb_event_data(struct ring_buffer_event *event) | 115 | rb_event_data(struct ring_buffer_event *event) |
87 | { | 116 | { |
88 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 117 | BUG_ON(event->type != RINGBUF_TYPE_DATA); |
89 | /* If length is in len field, then array[0] has the data */ | 118 | /* If length is in len field, then array[0] has the data */ |
90 | if (event->len) | 119 | if (event->len) |
91 | return (void *)&event->array[0]; | 120 | return (void *)&event->array[0]; |
92 | /* Otherwise length is in array[0] and array[1] has the data */ | 121 | /* Otherwise length is in array[0] and array[1] has the data */ |
93 | return (void *)&event->array[1]; | 122 | return (void *)&event->array[1]; |
94 | } | 123 | } |
95 | 124 | ||
96 | /** | 125 | /** |
97 | * ring_buffer_event_data - return the data of the event | 126 | * ring_buffer_event_data - return the data of the event |
98 | * @event: the event to get the data from | 127 | * @event: the event to get the data from |
99 | */ | 128 | */ |
100 | void *ring_buffer_event_data(struct ring_buffer_event *event) | 129 | void *ring_buffer_event_data(struct ring_buffer_event *event) |
101 | { | 130 | { |
102 | return rb_event_data(event); | 131 | return rb_event_data(event); |
103 | } | 132 | } |
104 | 133 | ||
105 | #define for_each_buffer_cpu(buffer, cpu) \ | 134 | #define for_each_buffer_cpu(buffer, cpu) \ |
106 | for_each_cpu_mask(cpu, buffer->cpumask) | 135 | for_each_cpu_mask(cpu, buffer->cpumask) |
107 | 136 | ||
108 | #define TS_SHIFT 27 | 137 | #define TS_SHIFT 27 |
109 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 138 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
110 | #define TS_DELTA_TEST (~TS_MASK) | 139 | #define TS_DELTA_TEST (~TS_MASK) |
111 | 140 | ||
112 | /* | 141 | /* |
113 | * This hack stolen from mm/slob.c. | 142 | * This hack stolen from mm/slob.c. |
114 | * We can store per page timing information in the page frame of the page. | 143 | * We can store per page timing information in the page frame of the page. |
115 | * Thanks to Peter Zijlstra for suggesting this idea. | 144 | * Thanks to Peter Zijlstra for suggesting this idea. |
116 | */ | 145 | */ |
117 | struct buffer_page { | 146 | struct buffer_page { |
118 | u64 time_stamp; /* page time stamp */ | 147 | u64 time_stamp; /* page time stamp */ |
119 | local_t write; /* index for next write */ | 148 | local_t write; /* index for next write */ |
120 | local_t commit; /* write commited index */ | 149 | local_t commit; /* write commited index */ |
121 | unsigned read; /* index for next read */ | 150 | unsigned read; /* index for next read */ |
122 | struct list_head list; /* list of free pages */ | 151 | struct list_head list; /* list of free pages */ |
123 | void *page; /* Actual data page */ | 152 | void *page; /* Actual data page */ |
124 | }; | 153 | }; |
125 | 154 | ||
126 | /* | 155 | /* |
127 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing | 156 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing |
128 | * this issue out. | 157 | * this issue out. |
129 | */ | 158 | */ |
130 | static inline void free_buffer_page(struct buffer_page *bpage) | 159 | static inline void free_buffer_page(struct buffer_page *bpage) |
131 | { | 160 | { |
132 | if (bpage->page) | 161 | if (bpage->page) |
133 | free_page((unsigned long)bpage->page); | 162 | free_page((unsigned long)bpage->page); |
134 | kfree(bpage); | 163 | kfree(bpage); |
135 | } | 164 | } |
136 | 165 | ||
137 | /* | 166 | /* |
138 | * We need to fit the time_stamp delta into 27 bits. | 167 | * We need to fit the time_stamp delta into 27 bits. |
139 | */ | 168 | */ |
140 | static inline int test_time_stamp(u64 delta) | 169 | static inline int test_time_stamp(u64 delta) |
141 | { | 170 | { |
142 | if (delta & TS_DELTA_TEST) | 171 | if (delta & TS_DELTA_TEST) |
143 | return 1; | 172 | return 1; |
144 | return 0; | 173 | return 0; |
145 | } | 174 | } |
146 | 175 | ||
147 | #define BUF_PAGE_SIZE PAGE_SIZE | 176 | #define BUF_PAGE_SIZE PAGE_SIZE |
148 | 177 | ||
149 | /* | 178 | /* |
150 | * head_page == tail_page && head == tail then buffer is empty. | 179 | * head_page == tail_page && head == tail then buffer is empty. |
151 | */ | 180 | */ |
152 | struct ring_buffer_per_cpu { | 181 | struct ring_buffer_per_cpu { |
153 | int cpu; | 182 | int cpu; |
154 | struct ring_buffer *buffer; | 183 | struct ring_buffer *buffer; |
155 | spinlock_t lock; | 184 | spinlock_t lock; |
156 | struct lock_class_key lock_key; | 185 | struct lock_class_key lock_key; |
157 | struct list_head pages; | 186 | struct list_head pages; |
158 | struct buffer_page *head_page; /* read from head */ | 187 | struct buffer_page *head_page; /* read from head */ |
159 | struct buffer_page *tail_page; /* write to tail */ | 188 | struct buffer_page *tail_page; /* write to tail */ |
160 | struct buffer_page *commit_page; /* commited pages */ | 189 | struct buffer_page *commit_page; /* commited pages */ |
161 | struct buffer_page *reader_page; | 190 | struct buffer_page *reader_page; |
162 | unsigned long overrun; | 191 | unsigned long overrun; |
163 | unsigned long entries; | 192 | unsigned long entries; |
164 | u64 write_stamp; | 193 | u64 write_stamp; |
165 | u64 read_stamp; | 194 | u64 read_stamp; |
166 | atomic_t record_disabled; | 195 | atomic_t record_disabled; |
167 | }; | 196 | }; |
168 | 197 | ||
169 | struct ring_buffer { | 198 | struct ring_buffer { |
170 | unsigned long size; | 199 | unsigned long size; |
171 | unsigned pages; | 200 | unsigned pages; |
172 | unsigned flags; | 201 | unsigned flags; |
173 | int cpus; | 202 | int cpus; |
174 | cpumask_t cpumask; | 203 | cpumask_t cpumask; |
175 | atomic_t record_disabled; | 204 | atomic_t record_disabled; |
176 | 205 | ||
177 | struct mutex mutex; | 206 | struct mutex mutex; |
178 | 207 | ||
179 | struct ring_buffer_per_cpu **buffers; | 208 | struct ring_buffer_per_cpu **buffers; |
180 | }; | 209 | }; |
181 | 210 | ||
182 | struct ring_buffer_iter { | 211 | struct ring_buffer_iter { |
183 | struct ring_buffer_per_cpu *cpu_buffer; | 212 | struct ring_buffer_per_cpu *cpu_buffer; |
184 | unsigned long head; | 213 | unsigned long head; |
185 | struct buffer_page *head_page; | 214 | struct buffer_page *head_page; |
186 | u64 read_stamp; | 215 | u64 read_stamp; |
187 | }; | 216 | }; |
188 | 217 | ||
189 | #define RB_WARN_ON(buffer, cond) \ | 218 | #define RB_WARN_ON(buffer, cond) \ |
190 | do { \ | 219 | do { \ |
191 | if (unlikely(cond)) { \ | 220 | if (unlikely(cond)) { \ |
192 | atomic_inc(&buffer->record_disabled); \ | 221 | atomic_inc(&buffer->record_disabled); \ |
193 | WARN_ON(1); \ | 222 | WARN_ON(1); \ |
194 | } \ | 223 | } \ |
195 | } while (0) | 224 | } while (0) |
196 | 225 | ||
197 | #define RB_WARN_ON_RET(buffer, cond) \ | 226 | #define RB_WARN_ON_RET(buffer, cond) \ |
198 | do { \ | 227 | do { \ |
199 | if (unlikely(cond)) { \ | 228 | if (unlikely(cond)) { \ |
200 | atomic_inc(&buffer->record_disabled); \ | 229 | atomic_inc(&buffer->record_disabled); \ |
201 | WARN_ON(1); \ | 230 | WARN_ON(1); \ |
202 | return -1; \ | 231 | return -1; \ |
203 | } \ | 232 | } \ |
204 | } while (0) | 233 | } while (0) |
205 | 234 | ||
206 | #define RB_WARN_ON_ONCE(buffer, cond) \ | 235 | #define RB_WARN_ON_ONCE(buffer, cond) \ |
207 | do { \ | 236 | do { \ |
208 | static int once; \ | 237 | static int once; \ |
209 | if (unlikely(cond) && !once) { \ | 238 | if (unlikely(cond) && !once) { \ |
210 | once++; \ | 239 | once++; \ |
211 | atomic_inc(&buffer->record_disabled); \ | 240 | atomic_inc(&buffer->record_disabled); \ |
212 | WARN_ON(1); \ | 241 | WARN_ON(1); \ |
213 | } \ | 242 | } \ |
214 | } while (0) | 243 | } while (0) |
215 | 244 | ||
216 | /** | 245 | /** |
217 | * check_pages - integrity check of buffer pages | 246 | * check_pages - integrity check of buffer pages |
218 | * @cpu_buffer: CPU buffer with pages to test | 247 | * @cpu_buffer: CPU buffer with pages to test |
219 | * | 248 | * |
220 | * As a safty measure we check to make sure the data pages have not | 249 | * As a safty measure we check to make sure the data pages have not |
221 | * been corrupted. | 250 | * been corrupted. |
222 | */ | 251 | */ |
223 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 252 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
224 | { | 253 | { |
225 | struct list_head *head = &cpu_buffer->pages; | 254 | struct list_head *head = &cpu_buffer->pages; |
226 | struct buffer_page *page, *tmp; | 255 | struct buffer_page *page, *tmp; |
227 | 256 | ||
228 | RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); | 257 | RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); |
229 | RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); | 258 | RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); |
230 | 259 | ||
231 | list_for_each_entry_safe(page, tmp, head, list) { | 260 | list_for_each_entry_safe(page, tmp, head, list) { |
232 | RB_WARN_ON_RET(cpu_buffer, | 261 | RB_WARN_ON_RET(cpu_buffer, |
233 | page->list.next->prev != &page->list); | 262 | page->list.next->prev != &page->list); |
234 | RB_WARN_ON_RET(cpu_buffer, | 263 | RB_WARN_ON_RET(cpu_buffer, |
235 | page->list.prev->next != &page->list); | 264 | page->list.prev->next != &page->list); |
236 | } | 265 | } |
237 | 266 | ||
238 | return 0; | 267 | return 0; |
239 | } | 268 | } |
240 | 269 | ||
241 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 270 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, |
242 | unsigned nr_pages) | 271 | unsigned nr_pages) |
243 | { | 272 | { |
244 | struct list_head *head = &cpu_buffer->pages; | 273 | struct list_head *head = &cpu_buffer->pages; |
245 | struct buffer_page *page, *tmp; | 274 | struct buffer_page *page, *tmp; |
246 | unsigned long addr; | 275 | unsigned long addr; |
247 | LIST_HEAD(pages); | 276 | LIST_HEAD(pages); |
248 | unsigned i; | 277 | unsigned i; |
249 | 278 | ||
250 | for (i = 0; i < nr_pages; i++) { | 279 | for (i = 0; i < nr_pages; i++) { |
251 | page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), | 280 | page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), |
252 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 281 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); |
253 | if (!page) | 282 | if (!page) |
254 | goto free_pages; | 283 | goto free_pages; |
255 | list_add(&page->list, &pages); | 284 | list_add(&page->list, &pages); |
256 | 285 | ||
257 | addr = __get_free_page(GFP_KERNEL); | 286 | addr = __get_free_page(GFP_KERNEL); |
258 | if (!addr) | 287 | if (!addr) |
259 | goto free_pages; | 288 | goto free_pages; |
260 | page->page = (void *)addr; | 289 | page->page = (void *)addr; |
261 | } | 290 | } |
262 | 291 | ||
263 | list_splice(&pages, head); | 292 | list_splice(&pages, head); |
264 | 293 | ||
265 | rb_check_pages(cpu_buffer); | 294 | rb_check_pages(cpu_buffer); |
266 | 295 | ||
267 | return 0; | 296 | return 0; |
268 | 297 | ||
269 | free_pages: | 298 | free_pages: |
270 | list_for_each_entry_safe(page, tmp, &pages, list) { | 299 | list_for_each_entry_safe(page, tmp, &pages, list) { |
271 | list_del_init(&page->list); | 300 | list_del_init(&page->list); |
272 | free_buffer_page(page); | 301 | free_buffer_page(page); |
273 | } | 302 | } |
274 | return -ENOMEM; | 303 | return -ENOMEM; |
275 | } | 304 | } |
276 | 305 | ||
277 | static struct ring_buffer_per_cpu * | 306 | static struct ring_buffer_per_cpu * |
278 | rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | 307 | rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) |
279 | { | 308 | { |
280 | struct ring_buffer_per_cpu *cpu_buffer; | 309 | struct ring_buffer_per_cpu *cpu_buffer; |
281 | struct buffer_page *page; | 310 | struct buffer_page *page; |
282 | unsigned long addr; | 311 | unsigned long addr; |
283 | int ret; | 312 | int ret; |
284 | 313 | ||
285 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), | 314 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), |
286 | GFP_KERNEL, cpu_to_node(cpu)); | 315 | GFP_KERNEL, cpu_to_node(cpu)); |
287 | if (!cpu_buffer) | 316 | if (!cpu_buffer) |
288 | return NULL; | 317 | return NULL; |
289 | 318 | ||
290 | cpu_buffer->cpu = cpu; | 319 | cpu_buffer->cpu = cpu; |
291 | cpu_buffer->buffer = buffer; | 320 | cpu_buffer->buffer = buffer; |
292 | spin_lock_init(&cpu_buffer->lock); | 321 | spin_lock_init(&cpu_buffer->lock); |
293 | INIT_LIST_HEAD(&cpu_buffer->pages); | 322 | INIT_LIST_HEAD(&cpu_buffer->pages); |
294 | 323 | ||
295 | page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), | 324 | page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), |
296 | GFP_KERNEL, cpu_to_node(cpu)); | 325 | GFP_KERNEL, cpu_to_node(cpu)); |
297 | if (!page) | 326 | if (!page) |
298 | goto fail_free_buffer; | 327 | goto fail_free_buffer; |
299 | 328 | ||
300 | cpu_buffer->reader_page = page; | 329 | cpu_buffer->reader_page = page; |
301 | addr = __get_free_page(GFP_KERNEL); | 330 | addr = __get_free_page(GFP_KERNEL); |
302 | if (!addr) | 331 | if (!addr) |
303 | goto fail_free_reader; | 332 | goto fail_free_reader; |
304 | page->page = (void *)addr; | 333 | page->page = (void *)addr; |
305 | 334 | ||
306 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 335 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
307 | 336 | ||
308 | ret = rb_allocate_pages(cpu_buffer, buffer->pages); | 337 | ret = rb_allocate_pages(cpu_buffer, buffer->pages); |
309 | if (ret < 0) | 338 | if (ret < 0) |
310 | goto fail_free_reader; | 339 | goto fail_free_reader; |
311 | 340 | ||
312 | cpu_buffer->head_page | 341 | cpu_buffer->head_page |
313 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 342 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); |
314 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 343 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; |
315 | 344 | ||
316 | return cpu_buffer; | 345 | return cpu_buffer; |
317 | 346 | ||
318 | fail_free_reader: | 347 | fail_free_reader: |
319 | free_buffer_page(cpu_buffer->reader_page); | 348 | free_buffer_page(cpu_buffer->reader_page); |
320 | 349 | ||
321 | fail_free_buffer: | 350 | fail_free_buffer: |
322 | kfree(cpu_buffer); | 351 | kfree(cpu_buffer); |
323 | return NULL; | 352 | return NULL; |
324 | } | 353 | } |
325 | 354 | ||
326 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 355 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) |
327 | { | 356 | { |
328 | struct list_head *head = &cpu_buffer->pages; | 357 | struct list_head *head = &cpu_buffer->pages; |
329 | struct buffer_page *page, *tmp; | 358 | struct buffer_page *page, *tmp; |
330 | 359 | ||
331 | list_del_init(&cpu_buffer->reader_page->list); | 360 | list_del_init(&cpu_buffer->reader_page->list); |
332 | free_buffer_page(cpu_buffer->reader_page); | 361 | free_buffer_page(cpu_buffer->reader_page); |
333 | 362 | ||
334 | list_for_each_entry_safe(page, tmp, head, list) { | 363 | list_for_each_entry_safe(page, tmp, head, list) { |
335 | list_del_init(&page->list); | 364 | list_del_init(&page->list); |
336 | free_buffer_page(page); | 365 | free_buffer_page(page); |
337 | } | 366 | } |
338 | kfree(cpu_buffer); | 367 | kfree(cpu_buffer); |
339 | } | 368 | } |
340 | 369 | ||
341 | /* | 370 | /* |
342 | * Causes compile errors if the struct buffer_page gets bigger | 371 | * Causes compile errors if the struct buffer_page gets bigger |
343 | * than the struct page. | 372 | * than the struct page. |
344 | */ | 373 | */ |
345 | extern int ring_buffer_page_too_big(void); | 374 | extern int ring_buffer_page_too_big(void); |
346 | 375 | ||
347 | /** | 376 | /** |
348 | * ring_buffer_alloc - allocate a new ring_buffer | 377 | * ring_buffer_alloc - allocate a new ring_buffer |
349 | * @size: the size in bytes that is needed. | 378 | * @size: the size in bytes that is needed. |
350 | * @flags: attributes to set for the ring buffer. | 379 | * @flags: attributes to set for the ring buffer. |
351 | * | 380 | * |
352 | * Currently the only flag that is available is the RB_FL_OVERWRITE | 381 | * Currently the only flag that is available is the RB_FL_OVERWRITE |
353 | * flag. This flag means that the buffer will overwrite old data | 382 | * flag. This flag means that the buffer will overwrite old data |
354 | * when the buffer wraps. If this flag is not set, the buffer will | 383 | * when the buffer wraps. If this flag is not set, the buffer will |
355 | * drop data when the tail hits the head. | 384 | * drop data when the tail hits the head. |
356 | */ | 385 | */ |
357 | struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | 386 | struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) |
358 | { | 387 | { |
359 | struct ring_buffer *buffer; | 388 | struct ring_buffer *buffer; |
360 | int bsize; | 389 | int bsize; |
361 | int cpu; | 390 | int cpu; |
362 | 391 | ||
363 | /* Paranoid! Optimizes out when all is well */ | 392 | /* Paranoid! Optimizes out when all is well */ |
364 | if (sizeof(struct buffer_page) > sizeof(struct page)) | 393 | if (sizeof(struct buffer_page) > sizeof(struct page)) |
365 | ring_buffer_page_too_big(); | 394 | ring_buffer_page_too_big(); |
366 | 395 | ||
367 | 396 | ||
368 | /* keep it in its own cache line */ | 397 | /* keep it in its own cache line */ |
369 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 398 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
370 | GFP_KERNEL); | 399 | GFP_KERNEL); |
371 | if (!buffer) | 400 | if (!buffer) |
372 | return NULL; | 401 | return NULL; |
373 | 402 | ||
374 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 403 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
375 | buffer->flags = flags; | 404 | buffer->flags = flags; |
376 | 405 | ||
377 | /* need at least two pages */ | 406 | /* need at least two pages */ |
378 | if (buffer->pages == 1) | 407 | if (buffer->pages == 1) |
379 | buffer->pages++; | 408 | buffer->pages++; |
380 | 409 | ||
381 | buffer->cpumask = cpu_possible_map; | 410 | buffer->cpumask = cpu_possible_map; |
382 | buffer->cpus = nr_cpu_ids; | 411 | buffer->cpus = nr_cpu_ids; |
383 | 412 | ||
384 | bsize = sizeof(void *) * nr_cpu_ids; | 413 | bsize = sizeof(void *) * nr_cpu_ids; |
385 | buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), | 414 | buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), |
386 | GFP_KERNEL); | 415 | GFP_KERNEL); |
387 | if (!buffer->buffers) | 416 | if (!buffer->buffers) |
388 | goto fail_free_buffer; | 417 | goto fail_free_buffer; |
389 | 418 | ||
390 | for_each_buffer_cpu(buffer, cpu) { | 419 | for_each_buffer_cpu(buffer, cpu) { |
391 | buffer->buffers[cpu] = | 420 | buffer->buffers[cpu] = |
392 | rb_allocate_cpu_buffer(buffer, cpu); | 421 | rb_allocate_cpu_buffer(buffer, cpu); |
393 | if (!buffer->buffers[cpu]) | 422 | if (!buffer->buffers[cpu]) |
394 | goto fail_free_buffers; | 423 | goto fail_free_buffers; |
395 | } | 424 | } |
396 | 425 | ||
397 | mutex_init(&buffer->mutex); | 426 | mutex_init(&buffer->mutex); |
398 | 427 | ||
399 | return buffer; | 428 | return buffer; |
400 | 429 | ||
401 | fail_free_buffers: | 430 | fail_free_buffers: |
402 | for_each_buffer_cpu(buffer, cpu) { | 431 | for_each_buffer_cpu(buffer, cpu) { |
403 | if (buffer->buffers[cpu]) | 432 | if (buffer->buffers[cpu]) |
404 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 433 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
405 | } | 434 | } |
406 | kfree(buffer->buffers); | 435 | kfree(buffer->buffers); |
407 | 436 | ||
408 | fail_free_buffer: | 437 | fail_free_buffer: |
409 | kfree(buffer); | 438 | kfree(buffer); |
410 | return NULL; | 439 | return NULL; |
411 | } | 440 | } |
412 | 441 | ||
413 | /** | 442 | /** |
414 | * ring_buffer_free - free a ring buffer. | 443 | * ring_buffer_free - free a ring buffer. |
415 | * @buffer: the buffer to free. | 444 | * @buffer: the buffer to free. |
416 | */ | 445 | */ |
417 | void | 446 | void |
418 | ring_buffer_free(struct ring_buffer *buffer) | 447 | ring_buffer_free(struct ring_buffer *buffer) |
419 | { | 448 | { |
420 | int cpu; | 449 | int cpu; |
421 | 450 | ||
422 | for_each_buffer_cpu(buffer, cpu) | 451 | for_each_buffer_cpu(buffer, cpu) |
423 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 452 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
424 | 453 | ||
425 | kfree(buffer); | 454 | kfree(buffer); |
426 | } | 455 | } |
427 | 456 | ||
428 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); | 457 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
429 | 458 | ||
430 | static void | 459 | static void |
431 | rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | 460 | rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) |
432 | { | 461 | { |
433 | struct buffer_page *page; | 462 | struct buffer_page *page; |
434 | struct list_head *p; | 463 | struct list_head *p; |
435 | unsigned i; | 464 | unsigned i; |
436 | 465 | ||
437 | atomic_inc(&cpu_buffer->record_disabled); | 466 | atomic_inc(&cpu_buffer->record_disabled); |
438 | synchronize_sched(); | 467 | synchronize_sched(); |
439 | 468 | ||
440 | for (i = 0; i < nr_pages; i++) { | 469 | for (i = 0; i < nr_pages; i++) { |
441 | BUG_ON(list_empty(&cpu_buffer->pages)); | 470 | BUG_ON(list_empty(&cpu_buffer->pages)); |
442 | p = cpu_buffer->pages.next; | 471 | p = cpu_buffer->pages.next; |
443 | page = list_entry(p, struct buffer_page, list); | 472 | page = list_entry(p, struct buffer_page, list); |
444 | list_del_init(&page->list); | 473 | list_del_init(&page->list); |
445 | free_buffer_page(page); | 474 | free_buffer_page(page); |
446 | } | 475 | } |
447 | BUG_ON(list_empty(&cpu_buffer->pages)); | 476 | BUG_ON(list_empty(&cpu_buffer->pages)); |
448 | 477 | ||
449 | rb_reset_cpu(cpu_buffer); | 478 | rb_reset_cpu(cpu_buffer); |
450 | 479 | ||
451 | rb_check_pages(cpu_buffer); | 480 | rb_check_pages(cpu_buffer); |
452 | 481 | ||
453 | atomic_dec(&cpu_buffer->record_disabled); | 482 | atomic_dec(&cpu_buffer->record_disabled); |
454 | 483 | ||
455 | } | 484 | } |
456 | 485 | ||
457 | static void | 486 | static void |
458 | rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | 487 | rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, |
459 | struct list_head *pages, unsigned nr_pages) | 488 | struct list_head *pages, unsigned nr_pages) |
460 | { | 489 | { |
461 | struct buffer_page *page; | 490 | struct buffer_page *page; |
462 | struct list_head *p; | 491 | struct list_head *p; |
463 | unsigned i; | 492 | unsigned i; |
464 | 493 | ||
465 | atomic_inc(&cpu_buffer->record_disabled); | 494 | atomic_inc(&cpu_buffer->record_disabled); |
466 | synchronize_sched(); | 495 | synchronize_sched(); |
467 | 496 | ||
468 | for (i = 0; i < nr_pages; i++) { | 497 | for (i = 0; i < nr_pages; i++) { |
469 | BUG_ON(list_empty(pages)); | 498 | BUG_ON(list_empty(pages)); |
470 | p = pages->next; | 499 | p = pages->next; |
471 | page = list_entry(p, struct buffer_page, list); | 500 | page = list_entry(p, struct buffer_page, list); |
472 | list_del_init(&page->list); | 501 | list_del_init(&page->list); |
473 | list_add_tail(&page->list, &cpu_buffer->pages); | 502 | list_add_tail(&page->list, &cpu_buffer->pages); |
474 | } | 503 | } |
475 | rb_reset_cpu(cpu_buffer); | 504 | rb_reset_cpu(cpu_buffer); |
476 | 505 | ||
477 | rb_check_pages(cpu_buffer); | 506 | rb_check_pages(cpu_buffer); |
478 | 507 | ||
479 | atomic_dec(&cpu_buffer->record_disabled); | 508 | atomic_dec(&cpu_buffer->record_disabled); |
480 | } | 509 | } |
481 | 510 | ||
482 | /** | 511 | /** |
483 | * ring_buffer_resize - resize the ring buffer | 512 | * ring_buffer_resize - resize the ring buffer |
484 | * @buffer: the buffer to resize. | 513 | * @buffer: the buffer to resize. |
485 | * @size: the new size. | 514 | * @size: the new size. |
486 | * | 515 | * |
487 | * The tracer is responsible for making sure that the buffer is | 516 | * The tracer is responsible for making sure that the buffer is |
488 | * not being used while changing the size. | 517 | * not being used while changing the size. |
489 | * Note: We may be able to change the above requirement by using | 518 | * Note: We may be able to change the above requirement by using |
490 | * RCU synchronizations. | 519 | * RCU synchronizations. |
491 | * | 520 | * |
492 | * Minimum size is 2 * BUF_PAGE_SIZE. | 521 | * Minimum size is 2 * BUF_PAGE_SIZE. |
493 | * | 522 | * |
494 | * Returns -1 on failure. | 523 | * Returns -1 on failure. |
495 | */ | 524 | */ |
496 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | 525 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) |
497 | { | 526 | { |
498 | struct ring_buffer_per_cpu *cpu_buffer; | 527 | struct ring_buffer_per_cpu *cpu_buffer; |
499 | unsigned nr_pages, rm_pages, new_pages; | 528 | unsigned nr_pages, rm_pages, new_pages; |
500 | struct buffer_page *page, *tmp; | 529 | struct buffer_page *page, *tmp; |
501 | unsigned long buffer_size; | 530 | unsigned long buffer_size; |
502 | unsigned long addr; | 531 | unsigned long addr; |
503 | LIST_HEAD(pages); | 532 | LIST_HEAD(pages); |
504 | int i, cpu; | 533 | int i, cpu; |
505 | 534 | ||
506 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 535 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
507 | size *= BUF_PAGE_SIZE; | 536 | size *= BUF_PAGE_SIZE; |
508 | buffer_size = buffer->pages * BUF_PAGE_SIZE; | 537 | buffer_size = buffer->pages * BUF_PAGE_SIZE; |
509 | 538 | ||
510 | /* we need a minimum of two pages */ | 539 | /* we need a minimum of two pages */ |
511 | if (size < BUF_PAGE_SIZE * 2) | 540 | if (size < BUF_PAGE_SIZE * 2) |
512 | size = BUF_PAGE_SIZE * 2; | 541 | size = BUF_PAGE_SIZE * 2; |
513 | 542 | ||
514 | if (size == buffer_size) | 543 | if (size == buffer_size) |
515 | return size; | 544 | return size; |
516 | 545 | ||
517 | mutex_lock(&buffer->mutex); | 546 | mutex_lock(&buffer->mutex); |
518 | 547 | ||
519 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 548 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
520 | 549 | ||
521 | if (size < buffer_size) { | 550 | if (size < buffer_size) { |
522 | 551 | ||
523 | /* easy case, just free pages */ | 552 | /* easy case, just free pages */ |
524 | BUG_ON(nr_pages >= buffer->pages); | 553 | BUG_ON(nr_pages >= buffer->pages); |
525 | 554 | ||
526 | rm_pages = buffer->pages - nr_pages; | 555 | rm_pages = buffer->pages - nr_pages; |
527 | 556 | ||
528 | for_each_buffer_cpu(buffer, cpu) { | 557 | for_each_buffer_cpu(buffer, cpu) { |
529 | cpu_buffer = buffer->buffers[cpu]; | 558 | cpu_buffer = buffer->buffers[cpu]; |
530 | rb_remove_pages(cpu_buffer, rm_pages); | 559 | rb_remove_pages(cpu_buffer, rm_pages); |
531 | } | 560 | } |
532 | goto out; | 561 | goto out; |
533 | } | 562 | } |
534 | 563 | ||
535 | /* | 564 | /* |
536 | * This is a bit more difficult. We only want to add pages | 565 | * This is a bit more difficult. We only want to add pages |
537 | * when we can allocate enough for all CPUs. We do this | 566 | * when we can allocate enough for all CPUs. We do this |
538 | * by allocating all the pages and storing them on a local | 567 | * by allocating all the pages and storing them on a local |
539 | * link list. If we succeed in our allocation, then we | 568 | * link list. If we succeed in our allocation, then we |
540 | * add these pages to the cpu_buffers. Otherwise we just free | 569 | * add these pages to the cpu_buffers. Otherwise we just free |
541 | * them all and return -ENOMEM; | 570 | * them all and return -ENOMEM; |
542 | */ | 571 | */ |
543 | BUG_ON(nr_pages <= buffer->pages); | 572 | BUG_ON(nr_pages <= buffer->pages); |
544 | new_pages = nr_pages - buffer->pages; | 573 | new_pages = nr_pages - buffer->pages; |
545 | 574 | ||
546 | for_each_buffer_cpu(buffer, cpu) { | 575 | for_each_buffer_cpu(buffer, cpu) { |
547 | for (i = 0; i < new_pages; i++) { | 576 | for (i = 0; i < new_pages; i++) { |
548 | page = kzalloc_node(ALIGN(sizeof(*page), | 577 | page = kzalloc_node(ALIGN(sizeof(*page), |
549 | cache_line_size()), | 578 | cache_line_size()), |
550 | GFP_KERNEL, cpu_to_node(cpu)); | 579 | GFP_KERNEL, cpu_to_node(cpu)); |
551 | if (!page) | 580 | if (!page) |
552 | goto free_pages; | 581 | goto free_pages; |
553 | list_add(&page->list, &pages); | 582 | list_add(&page->list, &pages); |
554 | addr = __get_free_page(GFP_KERNEL); | 583 | addr = __get_free_page(GFP_KERNEL); |
555 | if (!addr) | 584 | if (!addr) |
556 | goto free_pages; | 585 | goto free_pages; |
557 | page->page = (void *)addr; | 586 | page->page = (void *)addr; |
558 | } | 587 | } |
559 | } | 588 | } |
560 | 589 | ||
561 | for_each_buffer_cpu(buffer, cpu) { | 590 | for_each_buffer_cpu(buffer, cpu) { |
562 | cpu_buffer = buffer->buffers[cpu]; | 591 | cpu_buffer = buffer->buffers[cpu]; |
563 | rb_insert_pages(cpu_buffer, &pages, new_pages); | 592 | rb_insert_pages(cpu_buffer, &pages, new_pages); |
564 | } | 593 | } |
565 | 594 | ||
566 | BUG_ON(!list_empty(&pages)); | 595 | BUG_ON(!list_empty(&pages)); |
567 | 596 | ||
568 | out: | 597 | out: |
569 | buffer->pages = nr_pages; | 598 | buffer->pages = nr_pages; |
570 | mutex_unlock(&buffer->mutex); | 599 | mutex_unlock(&buffer->mutex); |
571 | 600 | ||
572 | return size; | 601 | return size; |
573 | 602 | ||
574 | free_pages: | 603 | free_pages: |
575 | list_for_each_entry_safe(page, tmp, &pages, list) { | 604 | list_for_each_entry_safe(page, tmp, &pages, list) { |
576 | list_del_init(&page->list); | 605 | list_del_init(&page->list); |
577 | free_buffer_page(page); | 606 | free_buffer_page(page); |
578 | } | 607 | } |
579 | return -ENOMEM; | 608 | return -ENOMEM; |
580 | } | 609 | } |
581 | 610 | ||
582 | static inline int rb_null_event(struct ring_buffer_event *event) | 611 | static inline int rb_null_event(struct ring_buffer_event *event) |
583 | { | 612 | { |
584 | return event->type == RINGBUF_TYPE_PADDING; | 613 | return event->type == RINGBUF_TYPE_PADDING; |
585 | } | 614 | } |
586 | 615 | ||
587 | static inline void *__rb_page_index(struct buffer_page *page, unsigned index) | 616 | static inline void *__rb_page_index(struct buffer_page *page, unsigned index) |
588 | { | 617 | { |
589 | return page->page + index; | 618 | return page->page + index; |
590 | } | 619 | } |
591 | 620 | ||
592 | static inline struct ring_buffer_event * | 621 | static inline struct ring_buffer_event * |
593 | rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) | 622 | rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) |
594 | { | 623 | { |
595 | return __rb_page_index(cpu_buffer->reader_page, | 624 | return __rb_page_index(cpu_buffer->reader_page, |
596 | cpu_buffer->reader_page->read); | 625 | cpu_buffer->reader_page->read); |
597 | } | 626 | } |
598 | 627 | ||
599 | static inline struct ring_buffer_event * | 628 | static inline struct ring_buffer_event * |
600 | rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) | 629 | rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) |
601 | { | 630 | { |
602 | return __rb_page_index(cpu_buffer->head_page, | 631 | return __rb_page_index(cpu_buffer->head_page, |
603 | cpu_buffer->head_page->read); | 632 | cpu_buffer->head_page->read); |
604 | } | 633 | } |
605 | 634 | ||
606 | static inline struct ring_buffer_event * | 635 | static inline struct ring_buffer_event * |
607 | rb_iter_head_event(struct ring_buffer_iter *iter) | 636 | rb_iter_head_event(struct ring_buffer_iter *iter) |
608 | { | 637 | { |
609 | return __rb_page_index(iter->head_page, iter->head); | 638 | return __rb_page_index(iter->head_page, iter->head); |
610 | } | 639 | } |
611 | 640 | ||
612 | static inline unsigned rb_page_write(struct buffer_page *bpage) | 641 | static inline unsigned rb_page_write(struct buffer_page *bpage) |
613 | { | 642 | { |
614 | return local_read(&bpage->write); | 643 | return local_read(&bpage->write); |
615 | } | 644 | } |
616 | 645 | ||
617 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 646 | static inline unsigned rb_page_commit(struct buffer_page *bpage) |
618 | { | 647 | { |
619 | return local_read(&bpage->commit); | 648 | return local_read(&bpage->commit); |
620 | } | 649 | } |
621 | 650 | ||
622 | /* Size is determined by what has been commited */ | 651 | /* Size is determined by what has been commited */ |
623 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 652 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
624 | { | 653 | { |
625 | return rb_page_commit(bpage); | 654 | return rb_page_commit(bpage); |
626 | } | 655 | } |
627 | 656 | ||
628 | static inline unsigned | 657 | static inline unsigned |
629 | rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) | 658 | rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) |
630 | { | 659 | { |
631 | return rb_page_commit(cpu_buffer->commit_page); | 660 | return rb_page_commit(cpu_buffer->commit_page); |
632 | } | 661 | } |
633 | 662 | ||
634 | static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | 663 | static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) |
635 | { | 664 | { |
636 | return rb_page_commit(cpu_buffer->head_page); | 665 | return rb_page_commit(cpu_buffer->head_page); |
637 | } | 666 | } |
638 | 667 | ||
639 | /* | 668 | /* |
640 | * When the tail hits the head and the buffer is in overwrite mode, | 669 | * When the tail hits the head and the buffer is in overwrite mode, |
641 | * the head jumps to the next page and all content on the previous | 670 | * the head jumps to the next page and all content on the previous |
642 | * page is discarded. But before doing so, we update the overrun | 671 | * page is discarded. But before doing so, we update the overrun |
643 | * variable of the buffer. | 672 | * variable of the buffer. |
644 | */ | 673 | */ |
645 | static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) | 674 | static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) |
646 | { | 675 | { |
647 | struct ring_buffer_event *event; | 676 | struct ring_buffer_event *event; |
648 | unsigned long head; | 677 | unsigned long head; |
649 | 678 | ||
650 | for (head = 0; head < rb_head_size(cpu_buffer); | 679 | for (head = 0; head < rb_head_size(cpu_buffer); |
651 | head += rb_event_length(event)) { | 680 | head += rb_event_length(event)) { |
652 | 681 | ||
653 | event = __rb_page_index(cpu_buffer->head_page, head); | 682 | event = __rb_page_index(cpu_buffer->head_page, head); |
654 | BUG_ON(rb_null_event(event)); | 683 | BUG_ON(rb_null_event(event)); |
655 | /* Only count data entries */ | 684 | /* Only count data entries */ |
656 | if (event->type != RINGBUF_TYPE_DATA) | 685 | if (event->type != RINGBUF_TYPE_DATA) |
657 | continue; | 686 | continue; |
658 | cpu_buffer->overrun++; | 687 | cpu_buffer->overrun++; |
659 | cpu_buffer->entries--; | 688 | cpu_buffer->entries--; |
660 | } | 689 | } |
661 | } | 690 | } |
662 | 691 | ||
663 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | 692 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, |
664 | struct buffer_page **page) | 693 | struct buffer_page **page) |
665 | { | 694 | { |
666 | struct list_head *p = (*page)->list.next; | 695 | struct list_head *p = (*page)->list.next; |
667 | 696 | ||
668 | if (p == &cpu_buffer->pages) | 697 | if (p == &cpu_buffer->pages) |
669 | p = p->next; | 698 | p = p->next; |
670 | 699 | ||
671 | *page = list_entry(p, struct buffer_page, list); | 700 | *page = list_entry(p, struct buffer_page, list); |
672 | } | 701 | } |
673 | 702 | ||
674 | static inline unsigned | 703 | static inline unsigned |
675 | rb_event_index(struct ring_buffer_event *event) | 704 | rb_event_index(struct ring_buffer_event *event) |
676 | { | 705 | { |
677 | unsigned long addr = (unsigned long)event; | 706 | unsigned long addr = (unsigned long)event; |
678 | 707 | ||
679 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 708 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
680 | } | 709 | } |
681 | 710 | ||
682 | static inline int | 711 | static inline int |
683 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 712 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
684 | struct ring_buffer_event *event) | 713 | struct ring_buffer_event *event) |
685 | { | 714 | { |
686 | unsigned long addr = (unsigned long)event; | 715 | unsigned long addr = (unsigned long)event; |
687 | unsigned long index; | 716 | unsigned long index; |
688 | 717 | ||
689 | index = rb_event_index(event); | 718 | index = rb_event_index(event); |
690 | addr &= PAGE_MASK; | 719 | addr &= PAGE_MASK; |
691 | 720 | ||
692 | return cpu_buffer->commit_page->page == (void *)addr && | 721 | return cpu_buffer->commit_page->page == (void *)addr && |
693 | rb_commit_index(cpu_buffer) == index; | 722 | rb_commit_index(cpu_buffer) == index; |
694 | } | 723 | } |
695 | 724 | ||
696 | static inline void | 725 | static inline void |
697 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | 726 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, |
698 | struct ring_buffer_event *event) | 727 | struct ring_buffer_event *event) |
699 | { | 728 | { |
700 | unsigned long addr = (unsigned long)event; | 729 | unsigned long addr = (unsigned long)event; |
701 | unsigned long index; | 730 | unsigned long index; |
702 | 731 | ||
703 | index = rb_event_index(event); | 732 | index = rb_event_index(event); |
704 | addr &= PAGE_MASK; | 733 | addr &= PAGE_MASK; |
705 | 734 | ||
706 | while (cpu_buffer->commit_page->page != (void *)addr) { | 735 | while (cpu_buffer->commit_page->page != (void *)addr) { |
707 | RB_WARN_ON(cpu_buffer, | 736 | RB_WARN_ON(cpu_buffer, |
708 | cpu_buffer->commit_page == cpu_buffer->tail_page); | 737 | cpu_buffer->commit_page == cpu_buffer->tail_page); |
709 | cpu_buffer->commit_page->commit = | 738 | cpu_buffer->commit_page->commit = |
710 | cpu_buffer->commit_page->write; | 739 | cpu_buffer->commit_page->write; |
711 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 740 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); |
712 | cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; | 741 | cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; |
713 | } | 742 | } |
714 | 743 | ||
715 | /* Now set the commit to the event's index */ | 744 | /* Now set the commit to the event's index */ |
716 | local_set(&cpu_buffer->commit_page->commit, index); | 745 | local_set(&cpu_buffer->commit_page->commit, index); |
717 | } | 746 | } |
718 | 747 | ||
719 | static inline void | 748 | static inline void |
720 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 749 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
721 | { | 750 | { |
722 | /* | 751 | /* |
723 | * We only race with interrupts and NMIs on this CPU. | 752 | * We only race with interrupts and NMIs on this CPU. |
724 | * If we own the commit event, then we can commit | 753 | * If we own the commit event, then we can commit |
725 | * all others that interrupted us, since the interruptions | 754 | * all others that interrupted us, since the interruptions |
726 | * are in stack format (they finish before they come | 755 | * are in stack format (they finish before they come |
727 | * back to us). This allows us to do a simple loop to | 756 | * back to us). This allows us to do a simple loop to |
728 | * assign the commit to the tail. | 757 | * assign the commit to the tail. |
729 | */ | 758 | */ |
730 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 759 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { |
731 | cpu_buffer->commit_page->commit = | 760 | cpu_buffer->commit_page->commit = |
732 | cpu_buffer->commit_page->write; | 761 | cpu_buffer->commit_page->write; |
733 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 762 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); |
734 | cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; | 763 | cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; |
735 | /* add barrier to keep gcc from optimizing too much */ | 764 | /* add barrier to keep gcc from optimizing too much */ |
736 | barrier(); | 765 | barrier(); |
737 | } | 766 | } |
738 | while (rb_commit_index(cpu_buffer) != | 767 | while (rb_commit_index(cpu_buffer) != |
739 | rb_page_write(cpu_buffer->commit_page)) { | 768 | rb_page_write(cpu_buffer->commit_page)) { |
740 | cpu_buffer->commit_page->commit = | 769 | cpu_buffer->commit_page->commit = |
741 | cpu_buffer->commit_page->write; | 770 | cpu_buffer->commit_page->write; |
742 | barrier(); | 771 | barrier(); |
743 | } | 772 | } |
744 | } | 773 | } |
745 | 774 | ||
746 | static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 775 | static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
747 | { | 776 | { |
748 | cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; | 777 | cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; |
749 | cpu_buffer->reader_page->read = 0; | 778 | cpu_buffer->reader_page->read = 0; |
750 | } | 779 | } |
751 | 780 | ||
752 | static inline void rb_inc_iter(struct ring_buffer_iter *iter) | 781 | static inline void rb_inc_iter(struct ring_buffer_iter *iter) |
753 | { | 782 | { |
754 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 783 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
755 | 784 | ||
756 | /* | 785 | /* |
757 | * The iterator could be on the reader page (it starts there). | 786 | * The iterator could be on the reader page (it starts there). |
758 | * But the head could have moved, since the reader was | 787 | * But the head could have moved, since the reader was |
759 | * found. Check for this case and assign the iterator | 788 | * found. Check for this case and assign the iterator |
760 | * to the head page instead of next. | 789 | * to the head page instead of next. |
761 | */ | 790 | */ |
762 | if (iter->head_page == cpu_buffer->reader_page) | 791 | if (iter->head_page == cpu_buffer->reader_page) |
763 | iter->head_page = cpu_buffer->head_page; | 792 | iter->head_page = cpu_buffer->head_page; |
764 | else | 793 | else |
765 | rb_inc_page(cpu_buffer, &iter->head_page); | 794 | rb_inc_page(cpu_buffer, &iter->head_page); |
766 | 795 | ||
767 | iter->read_stamp = iter->head_page->time_stamp; | 796 | iter->read_stamp = iter->head_page->time_stamp; |
768 | iter->head = 0; | 797 | iter->head = 0; |
769 | } | 798 | } |
770 | 799 | ||
771 | /** | 800 | /** |
772 | * ring_buffer_update_event - update event type and data | 801 | * ring_buffer_update_event - update event type and data |
773 | * @event: the even to update | 802 | * @event: the even to update |
774 | * @type: the type of event | 803 | * @type: the type of event |
775 | * @length: the size of the event field in the ring buffer | 804 | * @length: the size of the event field in the ring buffer |
776 | * | 805 | * |
777 | * Update the type and data fields of the event. The length | 806 | * Update the type and data fields of the event. The length |
778 | * is the actual size that is written to the ring buffer, | 807 | * is the actual size that is written to the ring buffer, |
779 | * and with this, we can determine what to place into the | 808 | * and with this, we can determine what to place into the |
780 | * data field. | 809 | * data field. |
781 | */ | 810 | */ |
782 | static inline void | 811 | static inline void |
783 | rb_update_event(struct ring_buffer_event *event, | 812 | rb_update_event(struct ring_buffer_event *event, |
784 | unsigned type, unsigned length) | 813 | unsigned type, unsigned length) |
785 | { | 814 | { |
786 | event->type = type; | 815 | event->type = type; |
787 | 816 | ||
788 | switch (type) { | 817 | switch (type) { |
789 | 818 | ||
790 | case RINGBUF_TYPE_PADDING: | 819 | case RINGBUF_TYPE_PADDING: |
791 | break; | 820 | break; |
792 | 821 | ||
793 | case RINGBUF_TYPE_TIME_EXTEND: | 822 | case RINGBUF_TYPE_TIME_EXTEND: |
794 | event->len = | 823 | event->len = |
795 | (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) | 824 | (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) |
796 | >> RB_ALIGNMENT_SHIFT; | 825 | >> RB_ALIGNMENT_SHIFT; |
797 | break; | 826 | break; |
798 | 827 | ||
799 | case RINGBUF_TYPE_TIME_STAMP: | 828 | case RINGBUF_TYPE_TIME_STAMP: |
800 | event->len = | 829 | event->len = |
801 | (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) | 830 | (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) |
802 | >> RB_ALIGNMENT_SHIFT; | 831 | >> RB_ALIGNMENT_SHIFT; |
803 | break; | 832 | break; |
804 | 833 | ||
805 | case RINGBUF_TYPE_DATA: | 834 | case RINGBUF_TYPE_DATA: |
806 | length -= RB_EVNT_HDR_SIZE; | 835 | length -= RB_EVNT_HDR_SIZE; |
807 | if (length > RB_MAX_SMALL_DATA) { | 836 | if (length > RB_MAX_SMALL_DATA) { |
808 | event->len = 0; | 837 | event->len = 0; |
809 | event->array[0] = length; | 838 | event->array[0] = length; |
810 | } else | 839 | } else |
811 | event->len = | 840 | event->len = |
812 | (length + (RB_ALIGNMENT-1)) | 841 | (length + (RB_ALIGNMENT-1)) |
813 | >> RB_ALIGNMENT_SHIFT; | 842 | >> RB_ALIGNMENT_SHIFT; |
814 | break; | 843 | break; |
815 | default: | 844 | default: |
816 | BUG(); | 845 | BUG(); |
817 | } | 846 | } |
818 | } | 847 | } |
819 | 848 | ||
820 | static inline unsigned rb_calculate_event_length(unsigned length) | 849 | static inline unsigned rb_calculate_event_length(unsigned length) |
821 | { | 850 | { |
822 | struct ring_buffer_event event; /* Used only for sizeof array */ | 851 | struct ring_buffer_event event; /* Used only for sizeof array */ |
823 | 852 | ||
824 | /* zero length can cause confusions */ | 853 | /* zero length can cause confusions */ |
825 | if (!length) | 854 | if (!length) |
826 | length = 1; | 855 | length = 1; |
827 | 856 | ||
828 | if (length > RB_MAX_SMALL_DATA) | 857 | if (length > RB_MAX_SMALL_DATA) |
829 | length += sizeof(event.array[0]); | 858 | length += sizeof(event.array[0]); |
830 | 859 | ||
831 | length += RB_EVNT_HDR_SIZE; | 860 | length += RB_EVNT_HDR_SIZE; |
832 | length = ALIGN(length, RB_ALIGNMENT); | 861 | length = ALIGN(length, RB_ALIGNMENT); |
833 | 862 | ||
834 | return length; | 863 | return length; |
835 | } | 864 | } |
836 | 865 | ||
837 | static struct ring_buffer_event * | 866 | static struct ring_buffer_event * |
838 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 867 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
839 | unsigned type, unsigned long length, u64 *ts) | 868 | unsigned type, unsigned long length, u64 *ts) |
840 | { | 869 | { |
841 | struct buffer_page *tail_page, *head_page, *reader_page; | 870 | struct buffer_page *tail_page, *head_page, *reader_page; |
842 | unsigned long tail, write; | 871 | unsigned long tail, write; |
843 | struct ring_buffer *buffer = cpu_buffer->buffer; | 872 | struct ring_buffer *buffer = cpu_buffer->buffer; |
844 | struct ring_buffer_event *event; | 873 | struct ring_buffer_event *event; |
845 | unsigned long flags; | 874 | unsigned long flags; |
846 | 875 | ||
847 | tail_page = cpu_buffer->tail_page; | 876 | tail_page = cpu_buffer->tail_page; |
848 | write = local_add_return(length, &tail_page->write); | 877 | write = local_add_return(length, &tail_page->write); |
849 | tail = write - length; | 878 | tail = write - length; |
850 | 879 | ||
851 | /* See if we shot pass the end of this buffer page */ | 880 | /* See if we shot pass the end of this buffer page */ |
852 | if (write > BUF_PAGE_SIZE) { | 881 | if (write > BUF_PAGE_SIZE) { |
853 | struct buffer_page *next_page = tail_page; | 882 | struct buffer_page *next_page = tail_page; |
854 | 883 | ||
855 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 884 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
856 | 885 | ||
857 | rb_inc_page(cpu_buffer, &next_page); | 886 | rb_inc_page(cpu_buffer, &next_page); |
858 | 887 | ||
859 | head_page = cpu_buffer->head_page; | 888 | head_page = cpu_buffer->head_page; |
860 | reader_page = cpu_buffer->reader_page; | 889 | reader_page = cpu_buffer->reader_page; |
861 | 890 | ||
862 | /* we grabbed the lock before incrementing */ | 891 | /* we grabbed the lock before incrementing */ |
863 | RB_WARN_ON(cpu_buffer, next_page == reader_page); | 892 | RB_WARN_ON(cpu_buffer, next_page == reader_page); |
864 | 893 | ||
865 | /* | 894 | /* |
866 | * If for some reason, we had an interrupt storm that made | 895 | * If for some reason, we had an interrupt storm that made |
867 | * it all the way around the buffer, bail, and warn | 896 | * it all the way around the buffer, bail, and warn |
868 | * about it. | 897 | * about it. |
869 | */ | 898 | */ |
870 | if (unlikely(next_page == cpu_buffer->commit_page)) { | 899 | if (unlikely(next_page == cpu_buffer->commit_page)) { |
871 | WARN_ON_ONCE(1); | 900 | WARN_ON_ONCE(1); |
872 | goto out_unlock; | 901 | goto out_unlock; |
873 | } | 902 | } |
874 | 903 | ||
875 | if (next_page == head_page) { | 904 | if (next_page == head_page) { |
876 | if (!(buffer->flags & RB_FL_OVERWRITE)) { | 905 | if (!(buffer->flags & RB_FL_OVERWRITE)) { |
877 | /* reset write */ | 906 | /* reset write */ |
878 | if (tail <= BUF_PAGE_SIZE) | 907 | if (tail <= BUF_PAGE_SIZE) |
879 | local_set(&tail_page->write, tail); | 908 | local_set(&tail_page->write, tail); |
880 | goto out_unlock; | 909 | goto out_unlock; |
881 | } | 910 | } |
882 | 911 | ||
883 | /* tail_page has not moved yet? */ | 912 | /* tail_page has not moved yet? */ |
884 | if (tail_page == cpu_buffer->tail_page) { | 913 | if (tail_page == cpu_buffer->tail_page) { |
885 | /* count overflows */ | 914 | /* count overflows */ |
886 | rb_update_overflow(cpu_buffer); | 915 | rb_update_overflow(cpu_buffer); |
887 | 916 | ||
888 | rb_inc_page(cpu_buffer, &head_page); | 917 | rb_inc_page(cpu_buffer, &head_page); |
889 | cpu_buffer->head_page = head_page; | 918 | cpu_buffer->head_page = head_page; |
890 | cpu_buffer->head_page->read = 0; | 919 | cpu_buffer->head_page->read = 0; |
891 | } | 920 | } |
892 | } | 921 | } |
893 | 922 | ||
894 | /* | 923 | /* |
895 | * If the tail page is still the same as what we think | 924 | * If the tail page is still the same as what we think |
896 | * it is, then it is up to us to update the tail | 925 | * it is, then it is up to us to update the tail |
897 | * pointer. | 926 | * pointer. |
898 | */ | 927 | */ |
899 | if (tail_page == cpu_buffer->tail_page) { | 928 | if (tail_page == cpu_buffer->tail_page) { |
900 | local_set(&next_page->write, 0); | 929 | local_set(&next_page->write, 0); |
901 | local_set(&next_page->commit, 0); | 930 | local_set(&next_page->commit, 0); |
902 | cpu_buffer->tail_page = next_page; | 931 | cpu_buffer->tail_page = next_page; |
903 | 932 | ||
904 | /* reread the time stamp */ | 933 | /* reread the time stamp */ |
905 | *ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 934 | *ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
906 | cpu_buffer->tail_page->time_stamp = *ts; | 935 | cpu_buffer->tail_page->time_stamp = *ts; |
907 | } | 936 | } |
908 | 937 | ||
909 | /* | 938 | /* |
910 | * The actual tail page has moved forward. | 939 | * The actual tail page has moved forward. |
911 | */ | 940 | */ |
912 | if (tail < BUF_PAGE_SIZE) { | 941 | if (tail < BUF_PAGE_SIZE) { |
913 | /* Mark the rest of the page with padding */ | 942 | /* Mark the rest of the page with padding */ |
914 | event = __rb_page_index(tail_page, tail); | 943 | event = __rb_page_index(tail_page, tail); |
915 | event->type = RINGBUF_TYPE_PADDING; | 944 | event->type = RINGBUF_TYPE_PADDING; |
916 | } | 945 | } |
917 | 946 | ||
918 | if (tail <= BUF_PAGE_SIZE) | 947 | if (tail <= BUF_PAGE_SIZE) |
919 | /* Set the write back to the previous setting */ | 948 | /* Set the write back to the previous setting */ |
920 | local_set(&tail_page->write, tail); | 949 | local_set(&tail_page->write, tail); |
921 | 950 | ||
922 | /* | 951 | /* |
923 | * If this was a commit entry that failed, | 952 | * If this was a commit entry that failed, |
924 | * increment that too | 953 | * increment that too |
925 | */ | 954 | */ |
926 | if (tail_page == cpu_buffer->commit_page && | 955 | if (tail_page == cpu_buffer->commit_page && |
927 | tail == rb_commit_index(cpu_buffer)) { | 956 | tail == rb_commit_index(cpu_buffer)) { |
928 | rb_set_commit_to_write(cpu_buffer); | 957 | rb_set_commit_to_write(cpu_buffer); |
929 | } | 958 | } |
930 | 959 | ||
931 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); | 960 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); |
932 | 961 | ||
933 | /* fail and let the caller try again */ | 962 | /* fail and let the caller try again */ |
934 | return ERR_PTR(-EAGAIN); | 963 | return ERR_PTR(-EAGAIN); |
935 | } | 964 | } |
936 | 965 | ||
937 | /* We reserved something on the buffer */ | 966 | /* We reserved something on the buffer */ |
938 | 967 | ||
939 | BUG_ON(write > BUF_PAGE_SIZE); | 968 | BUG_ON(write > BUF_PAGE_SIZE); |
940 | 969 | ||
941 | event = __rb_page_index(tail_page, tail); | 970 | event = __rb_page_index(tail_page, tail); |
942 | rb_update_event(event, type, length); | 971 | rb_update_event(event, type, length); |
943 | 972 | ||
944 | /* | 973 | /* |
945 | * If this is a commit and the tail is zero, then update | 974 | * If this is a commit and the tail is zero, then update |
946 | * this page's time stamp. | 975 | * this page's time stamp. |
947 | */ | 976 | */ |
948 | if (!tail && rb_is_commit(cpu_buffer, event)) | 977 | if (!tail && rb_is_commit(cpu_buffer, event)) |
949 | cpu_buffer->commit_page->time_stamp = *ts; | 978 | cpu_buffer->commit_page->time_stamp = *ts; |
950 | 979 | ||
951 | return event; | 980 | return event; |
952 | 981 | ||
953 | out_unlock: | 982 | out_unlock: |
954 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); | 983 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); |
955 | return NULL; | 984 | return NULL; |
956 | } | 985 | } |
957 | 986 | ||
958 | static int | 987 | static int |
959 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 988 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
960 | u64 *ts, u64 *delta) | 989 | u64 *ts, u64 *delta) |
961 | { | 990 | { |
962 | struct ring_buffer_event *event; | 991 | struct ring_buffer_event *event; |
963 | static int once; | 992 | static int once; |
964 | int ret; | 993 | int ret; |
965 | 994 | ||
966 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 995 | if (unlikely(*delta > (1ULL << 59) && !once++)) { |
967 | printk(KERN_WARNING "Delta way too big! %llu" | 996 | printk(KERN_WARNING "Delta way too big! %llu" |
968 | " ts=%llu write stamp = %llu\n", | 997 | " ts=%llu write stamp = %llu\n", |
969 | (unsigned long long)*delta, | 998 | (unsigned long long)*delta, |
970 | (unsigned long long)*ts, | 999 | (unsigned long long)*ts, |
971 | (unsigned long long)cpu_buffer->write_stamp); | 1000 | (unsigned long long)cpu_buffer->write_stamp); |
972 | WARN_ON(1); | 1001 | WARN_ON(1); |
973 | } | 1002 | } |
974 | 1003 | ||
975 | /* | 1004 | /* |
976 | * The delta is too big, we to add a | 1005 | * The delta is too big, we to add a |
977 | * new timestamp. | 1006 | * new timestamp. |
978 | */ | 1007 | */ |
979 | event = __rb_reserve_next(cpu_buffer, | 1008 | event = __rb_reserve_next(cpu_buffer, |
980 | RINGBUF_TYPE_TIME_EXTEND, | 1009 | RINGBUF_TYPE_TIME_EXTEND, |
981 | RB_LEN_TIME_EXTEND, | 1010 | RB_LEN_TIME_EXTEND, |
982 | ts); | 1011 | ts); |
983 | if (!event) | 1012 | if (!event) |
984 | return -EBUSY; | 1013 | return -EBUSY; |
985 | 1014 | ||
986 | if (PTR_ERR(event) == -EAGAIN) | 1015 | if (PTR_ERR(event) == -EAGAIN) |
987 | return -EAGAIN; | 1016 | return -EAGAIN; |
988 | 1017 | ||
989 | /* Only a commited time event can update the write stamp */ | 1018 | /* Only a commited time event can update the write stamp */ |
990 | if (rb_is_commit(cpu_buffer, event)) { | 1019 | if (rb_is_commit(cpu_buffer, event)) { |
991 | /* | 1020 | /* |
992 | * If this is the first on the page, then we need to | 1021 | * If this is the first on the page, then we need to |
993 | * update the page itself, and just put in a zero. | 1022 | * update the page itself, and just put in a zero. |
994 | */ | 1023 | */ |
995 | if (rb_event_index(event)) { | 1024 | if (rb_event_index(event)) { |
996 | event->time_delta = *delta & TS_MASK; | 1025 | event->time_delta = *delta & TS_MASK; |
997 | event->array[0] = *delta >> TS_SHIFT; | 1026 | event->array[0] = *delta >> TS_SHIFT; |
998 | } else { | 1027 | } else { |
999 | cpu_buffer->commit_page->time_stamp = *ts; | 1028 | cpu_buffer->commit_page->time_stamp = *ts; |
1000 | event->time_delta = 0; | 1029 | event->time_delta = 0; |
1001 | event->array[0] = 0; | 1030 | event->array[0] = 0; |
1002 | } | 1031 | } |
1003 | cpu_buffer->write_stamp = *ts; | 1032 | cpu_buffer->write_stamp = *ts; |
1004 | /* let the caller know this was the commit */ | 1033 | /* let the caller know this was the commit */ |
1005 | ret = 1; | 1034 | ret = 1; |
1006 | } else { | 1035 | } else { |
1007 | /* Darn, this is just wasted space */ | 1036 | /* Darn, this is just wasted space */ |
1008 | event->time_delta = 0; | 1037 | event->time_delta = 0; |
1009 | event->array[0] = 0; | 1038 | event->array[0] = 0; |
1010 | ret = 0; | 1039 | ret = 0; |
1011 | } | 1040 | } |
1012 | 1041 | ||
1013 | *delta = 0; | 1042 | *delta = 0; |
1014 | 1043 | ||
1015 | return ret; | 1044 | return ret; |
1016 | } | 1045 | } |
1017 | 1046 | ||
1018 | static struct ring_buffer_event * | 1047 | static struct ring_buffer_event * |
1019 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1048 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
1020 | unsigned type, unsigned long length) | 1049 | unsigned type, unsigned long length) |
1021 | { | 1050 | { |
1022 | struct ring_buffer_event *event; | 1051 | struct ring_buffer_event *event; |
1023 | u64 ts, delta; | 1052 | u64 ts, delta; |
1024 | int commit = 0; | 1053 | int commit = 0; |
1025 | int nr_loops = 0; | 1054 | int nr_loops = 0; |
1026 | 1055 | ||
1027 | again: | 1056 | again: |
1028 | /* | 1057 | /* |
1029 | * We allow for interrupts to reenter here and do a trace. | 1058 | * We allow for interrupts to reenter here and do a trace. |
1030 | * If one does, it will cause this original code to loop | 1059 | * If one does, it will cause this original code to loop |
1031 | * back here. Even with heavy interrupts happening, this | 1060 | * back here. Even with heavy interrupts happening, this |
1032 | * should only happen a few times in a row. If this happens | 1061 | * should only happen a few times in a row. If this happens |
1033 | * 1000 times in a row, there must be either an interrupt | 1062 | * 1000 times in a row, there must be either an interrupt |
1034 | * storm or we have something buggy. | 1063 | * storm or we have something buggy. |
1035 | * Bail! | 1064 | * Bail! |
1036 | */ | 1065 | */ |
1037 | if (unlikely(++nr_loops > 1000)) { | 1066 | if (unlikely(++nr_loops > 1000)) { |
1038 | RB_WARN_ON(cpu_buffer, 1); | 1067 | RB_WARN_ON(cpu_buffer, 1); |
1039 | return NULL; | 1068 | return NULL; |
1040 | } | 1069 | } |
1041 | 1070 | ||
1042 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1071 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); |
1043 | 1072 | ||
1044 | /* | 1073 | /* |
1045 | * Only the first commit can update the timestamp. | 1074 | * Only the first commit can update the timestamp. |
1046 | * Yes there is a race here. If an interrupt comes in | 1075 | * Yes there is a race here. If an interrupt comes in |
1047 | * just after the conditional and it traces too, then it | 1076 | * just after the conditional and it traces too, then it |
1048 | * will also check the deltas. More than one timestamp may | 1077 | * will also check the deltas. More than one timestamp may |
1049 | * also be made. But only the entry that did the actual | 1078 | * also be made. But only the entry that did the actual |
1050 | * commit will be something other than zero. | 1079 | * commit will be something other than zero. |
1051 | */ | 1080 | */ |
1052 | if (cpu_buffer->tail_page == cpu_buffer->commit_page && | 1081 | if (cpu_buffer->tail_page == cpu_buffer->commit_page && |
1053 | rb_page_write(cpu_buffer->tail_page) == | 1082 | rb_page_write(cpu_buffer->tail_page) == |
1054 | rb_commit_index(cpu_buffer)) { | 1083 | rb_commit_index(cpu_buffer)) { |
1055 | 1084 | ||
1056 | delta = ts - cpu_buffer->write_stamp; | 1085 | delta = ts - cpu_buffer->write_stamp; |
1057 | 1086 | ||
1058 | /* make sure this delta is calculated here */ | 1087 | /* make sure this delta is calculated here */ |
1059 | barrier(); | 1088 | barrier(); |
1060 | 1089 | ||
1061 | /* Did the write stamp get updated already? */ | 1090 | /* Did the write stamp get updated already? */ |
1062 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1091 | if (unlikely(ts < cpu_buffer->write_stamp)) |
1063 | delta = 0; | 1092 | delta = 0; |
1064 | 1093 | ||
1065 | if (test_time_stamp(delta)) { | 1094 | if (test_time_stamp(delta)) { |
1066 | 1095 | ||
1067 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1096 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1068 | 1097 | ||
1069 | if (commit == -EBUSY) | 1098 | if (commit == -EBUSY) |
1070 | return NULL; | 1099 | return NULL; |
1071 | 1100 | ||
1072 | if (commit == -EAGAIN) | 1101 | if (commit == -EAGAIN) |
1073 | goto again; | 1102 | goto again; |
1074 | 1103 | ||
1075 | RB_WARN_ON(cpu_buffer, commit < 0); | 1104 | RB_WARN_ON(cpu_buffer, commit < 0); |
1076 | } | 1105 | } |
1077 | } else | 1106 | } else |
1078 | /* Non commits have zero deltas */ | 1107 | /* Non commits have zero deltas */ |
1079 | delta = 0; | 1108 | delta = 0; |
1080 | 1109 | ||
1081 | event = __rb_reserve_next(cpu_buffer, type, length, &ts); | 1110 | event = __rb_reserve_next(cpu_buffer, type, length, &ts); |
1082 | if (PTR_ERR(event) == -EAGAIN) | 1111 | if (PTR_ERR(event) == -EAGAIN) |
1083 | goto again; | 1112 | goto again; |
1084 | 1113 | ||
1085 | if (!event) { | 1114 | if (!event) { |
1086 | if (unlikely(commit)) | 1115 | if (unlikely(commit)) |
1087 | /* | 1116 | /* |
1088 | * Ouch! We needed a timestamp and it was commited. But | 1117 | * Ouch! We needed a timestamp and it was commited. But |
1089 | * we didn't get our event reserved. | 1118 | * we didn't get our event reserved. |
1090 | */ | 1119 | */ |
1091 | rb_set_commit_to_write(cpu_buffer); | 1120 | rb_set_commit_to_write(cpu_buffer); |
1092 | return NULL; | 1121 | return NULL; |
1093 | } | 1122 | } |
1094 | 1123 | ||
1095 | /* | 1124 | /* |
1096 | * If the timestamp was commited, make the commit our entry | 1125 | * If the timestamp was commited, make the commit our entry |
1097 | * now so that we will update it when needed. | 1126 | * now so that we will update it when needed. |
1098 | */ | 1127 | */ |
1099 | if (commit) | 1128 | if (commit) |
1100 | rb_set_commit_event(cpu_buffer, event); | 1129 | rb_set_commit_event(cpu_buffer, event); |
1101 | else if (!rb_is_commit(cpu_buffer, event)) | 1130 | else if (!rb_is_commit(cpu_buffer, event)) |
1102 | delta = 0; | 1131 | delta = 0; |
1103 | 1132 | ||
1104 | event->time_delta = delta; | 1133 | event->time_delta = delta; |
1105 | 1134 | ||
1106 | return event; | 1135 | return event; |
1107 | } | 1136 | } |
1108 | 1137 | ||
1109 | static DEFINE_PER_CPU(int, rb_need_resched); | 1138 | static DEFINE_PER_CPU(int, rb_need_resched); |
1110 | 1139 | ||
1111 | /** | 1140 | /** |
1112 | * ring_buffer_lock_reserve - reserve a part of the buffer | 1141 | * ring_buffer_lock_reserve - reserve a part of the buffer |
1113 | * @buffer: the ring buffer to reserve from | 1142 | * @buffer: the ring buffer to reserve from |
1114 | * @length: the length of the data to reserve (excluding event header) | 1143 | * @length: the length of the data to reserve (excluding event header) |
1115 | * @flags: a pointer to save the interrupt flags | 1144 | * @flags: a pointer to save the interrupt flags |
1116 | * | 1145 | * |
1117 | * Returns a reseverd event on the ring buffer to copy directly to. | 1146 | * Returns a reseverd event on the ring buffer to copy directly to. |
1118 | * The user of this interface will need to get the body to write into | 1147 | * The user of this interface will need to get the body to write into |
1119 | * and can use the ring_buffer_event_data() interface. | 1148 | * and can use the ring_buffer_event_data() interface. |
1120 | * | 1149 | * |
1121 | * The length is the length of the data needed, not the event length | 1150 | * The length is the length of the data needed, not the event length |
1122 | * which also includes the event header. | 1151 | * which also includes the event header. |
1123 | * | 1152 | * |
1124 | * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. | 1153 | * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. |
1125 | * If NULL is returned, then nothing has been allocated or locked. | 1154 | * If NULL is returned, then nothing has been allocated or locked. |
1126 | */ | 1155 | */ |
1127 | struct ring_buffer_event * | 1156 | struct ring_buffer_event * |
1128 | ring_buffer_lock_reserve(struct ring_buffer *buffer, | 1157 | ring_buffer_lock_reserve(struct ring_buffer *buffer, |
1129 | unsigned long length, | 1158 | unsigned long length, |
1130 | unsigned long *flags) | 1159 | unsigned long *flags) |
1131 | { | 1160 | { |
1132 | struct ring_buffer_per_cpu *cpu_buffer; | 1161 | struct ring_buffer_per_cpu *cpu_buffer; |
1133 | struct ring_buffer_event *event; | 1162 | struct ring_buffer_event *event; |
1134 | int cpu, resched; | 1163 | int cpu, resched; |
1135 | 1164 | ||
1165 | if (ring_buffers_off) | ||
1166 | return NULL; | ||
1167 | |||
1136 | if (atomic_read(&buffer->record_disabled)) | 1168 | if (atomic_read(&buffer->record_disabled)) |
1137 | return NULL; | 1169 | return NULL; |
1138 | 1170 | ||
1139 | /* If we are tracing schedule, we don't want to recurse */ | 1171 | /* If we are tracing schedule, we don't want to recurse */ |
1140 | resched = need_resched(); | 1172 | resched = need_resched(); |
1141 | preempt_disable_notrace(); | 1173 | preempt_disable_notrace(); |
1142 | 1174 | ||
1143 | cpu = raw_smp_processor_id(); | 1175 | cpu = raw_smp_processor_id(); |
1144 | 1176 | ||
1145 | if (!cpu_isset(cpu, buffer->cpumask)) | 1177 | if (!cpu_isset(cpu, buffer->cpumask)) |
1146 | goto out; | 1178 | goto out; |
1147 | 1179 | ||
1148 | cpu_buffer = buffer->buffers[cpu]; | 1180 | cpu_buffer = buffer->buffers[cpu]; |
1149 | 1181 | ||
1150 | if (atomic_read(&cpu_buffer->record_disabled)) | 1182 | if (atomic_read(&cpu_buffer->record_disabled)) |
1151 | goto out; | 1183 | goto out; |
1152 | 1184 | ||
1153 | length = rb_calculate_event_length(length); | 1185 | length = rb_calculate_event_length(length); |
1154 | if (length > BUF_PAGE_SIZE) | 1186 | if (length > BUF_PAGE_SIZE) |
1155 | goto out; | 1187 | goto out; |
1156 | 1188 | ||
1157 | event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); | 1189 | event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); |
1158 | if (!event) | 1190 | if (!event) |
1159 | goto out; | 1191 | goto out; |
1160 | 1192 | ||
1161 | /* | 1193 | /* |
1162 | * Need to store resched state on this cpu. | 1194 | * Need to store resched state on this cpu. |
1163 | * Only the first needs to. | 1195 | * Only the first needs to. |
1164 | */ | 1196 | */ |
1165 | 1197 | ||
1166 | if (preempt_count() == 1) | 1198 | if (preempt_count() == 1) |
1167 | per_cpu(rb_need_resched, cpu) = resched; | 1199 | per_cpu(rb_need_resched, cpu) = resched; |
1168 | 1200 | ||
1169 | return event; | 1201 | return event; |
1170 | 1202 | ||
1171 | out: | 1203 | out: |
1172 | if (resched) | 1204 | if (resched) |
1173 | preempt_enable_notrace(); | 1205 | preempt_enable_notrace(); |
1174 | else | 1206 | else |
1175 | preempt_enable_notrace(); | 1207 | preempt_enable_notrace(); |
1176 | return NULL; | 1208 | return NULL; |
1177 | } | 1209 | } |
1178 | 1210 | ||
1179 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1211 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1180 | struct ring_buffer_event *event) | 1212 | struct ring_buffer_event *event) |
1181 | { | 1213 | { |
1182 | cpu_buffer->entries++; | 1214 | cpu_buffer->entries++; |
1183 | 1215 | ||
1184 | /* Only process further if we own the commit */ | 1216 | /* Only process further if we own the commit */ |
1185 | if (!rb_is_commit(cpu_buffer, event)) | 1217 | if (!rb_is_commit(cpu_buffer, event)) |
1186 | return; | 1218 | return; |
1187 | 1219 | ||
1188 | cpu_buffer->write_stamp += event->time_delta; | 1220 | cpu_buffer->write_stamp += event->time_delta; |
1189 | 1221 | ||
1190 | rb_set_commit_to_write(cpu_buffer); | 1222 | rb_set_commit_to_write(cpu_buffer); |
1191 | } | 1223 | } |
1192 | 1224 | ||
1193 | /** | 1225 | /** |
1194 | * ring_buffer_unlock_commit - commit a reserved | 1226 | * ring_buffer_unlock_commit - commit a reserved |
1195 | * @buffer: The buffer to commit to | 1227 | * @buffer: The buffer to commit to |
1196 | * @event: The event pointer to commit. | 1228 | * @event: The event pointer to commit. |
1197 | * @flags: the interrupt flags received from ring_buffer_lock_reserve. | 1229 | * @flags: the interrupt flags received from ring_buffer_lock_reserve. |
1198 | * | 1230 | * |
1199 | * This commits the data to the ring buffer, and releases any locks held. | 1231 | * This commits the data to the ring buffer, and releases any locks held. |
1200 | * | 1232 | * |
1201 | * Must be paired with ring_buffer_lock_reserve. | 1233 | * Must be paired with ring_buffer_lock_reserve. |
1202 | */ | 1234 | */ |
1203 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 1235 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
1204 | struct ring_buffer_event *event, | 1236 | struct ring_buffer_event *event, |
1205 | unsigned long flags) | 1237 | unsigned long flags) |
1206 | { | 1238 | { |
1207 | struct ring_buffer_per_cpu *cpu_buffer; | 1239 | struct ring_buffer_per_cpu *cpu_buffer; |
1208 | int cpu = raw_smp_processor_id(); | 1240 | int cpu = raw_smp_processor_id(); |
1209 | 1241 | ||
1210 | cpu_buffer = buffer->buffers[cpu]; | 1242 | cpu_buffer = buffer->buffers[cpu]; |
1211 | 1243 | ||
1212 | rb_commit(cpu_buffer, event); | 1244 | rb_commit(cpu_buffer, event); |
1213 | 1245 | ||
1214 | /* | 1246 | /* |
1215 | * Only the last preempt count needs to restore preemption. | 1247 | * Only the last preempt count needs to restore preemption. |
1216 | */ | 1248 | */ |
1217 | if (preempt_count() == 1) { | 1249 | if (preempt_count() == 1) { |
1218 | if (per_cpu(rb_need_resched, cpu)) | 1250 | if (per_cpu(rb_need_resched, cpu)) |
1219 | preempt_enable_no_resched_notrace(); | 1251 | preempt_enable_no_resched_notrace(); |
1220 | else | 1252 | else |
1221 | preempt_enable_notrace(); | 1253 | preempt_enable_notrace(); |
1222 | } else | 1254 | } else |
1223 | preempt_enable_no_resched_notrace(); | 1255 | preempt_enable_no_resched_notrace(); |
1224 | 1256 | ||
1225 | return 0; | 1257 | return 0; |
1226 | } | 1258 | } |
1227 | 1259 | ||
1228 | /** | 1260 | /** |
1229 | * ring_buffer_write - write data to the buffer without reserving | 1261 | * ring_buffer_write - write data to the buffer without reserving |
1230 | * @buffer: The ring buffer to write to. | 1262 | * @buffer: The ring buffer to write to. |
1231 | * @length: The length of the data being written (excluding the event header) | 1263 | * @length: The length of the data being written (excluding the event header) |
1232 | * @data: The data to write to the buffer. | 1264 | * @data: The data to write to the buffer. |
1233 | * | 1265 | * |
1234 | * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as | 1266 | * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as |
1235 | * one function. If you already have the data to write to the buffer, it | 1267 | * one function. If you already have the data to write to the buffer, it |
1236 | * may be easier to simply call this function. | 1268 | * may be easier to simply call this function. |
1237 | * | 1269 | * |
1238 | * Note, like ring_buffer_lock_reserve, the length is the length of the data | 1270 | * Note, like ring_buffer_lock_reserve, the length is the length of the data |
1239 | * and not the length of the event which would hold the header. | 1271 | * and not the length of the event which would hold the header. |
1240 | */ | 1272 | */ |
1241 | int ring_buffer_write(struct ring_buffer *buffer, | 1273 | int ring_buffer_write(struct ring_buffer *buffer, |
1242 | unsigned long length, | 1274 | unsigned long length, |
1243 | void *data) | 1275 | void *data) |
1244 | { | 1276 | { |
1245 | struct ring_buffer_per_cpu *cpu_buffer; | 1277 | struct ring_buffer_per_cpu *cpu_buffer; |
1246 | struct ring_buffer_event *event; | 1278 | struct ring_buffer_event *event; |
1247 | unsigned long event_length; | 1279 | unsigned long event_length; |
1248 | void *body; | 1280 | void *body; |
1249 | int ret = -EBUSY; | 1281 | int ret = -EBUSY; |
1250 | int cpu, resched; | 1282 | int cpu, resched; |
1251 | 1283 | ||
1284 | if (ring_buffers_off) | ||
1285 | return -EBUSY; | ||
1286 | |||
1252 | if (atomic_read(&buffer->record_disabled)) | 1287 | if (atomic_read(&buffer->record_disabled)) |
1253 | return -EBUSY; | 1288 | return -EBUSY; |
1254 | 1289 | ||
1255 | resched = need_resched(); | 1290 | resched = need_resched(); |
1256 | preempt_disable_notrace(); | 1291 | preempt_disable_notrace(); |
1257 | 1292 | ||
1258 | cpu = raw_smp_processor_id(); | 1293 | cpu = raw_smp_processor_id(); |
1259 | 1294 | ||
1260 | if (!cpu_isset(cpu, buffer->cpumask)) | 1295 | if (!cpu_isset(cpu, buffer->cpumask)) |
1261 | goto out; | 1296 | goto out; |
1262 | 1297 | ||
1263 | cpu_buffer = buffer->buffers[cpu]; | 1298 | cpu_buffer = buffer->buffers[cpu]; |
1264 | 1299 | ||
1265 | if (atomic_read(&cpu_buffer->record_disabled)) | 1300 | if (atomic_read(&cpu_buffer->record_disabled)) |
1266 | goto out; | 1301 | goto out; |
1267 | 1302 | ||
1268 | event_length = rb_calculate_event_length(length); | 1303 | event_length = rb_calculate_event_length(length); |
1269 | event = rb_reserve_next_event(cpu_buffer, | 1304 | event = rb_reserve_next_event(cpu_buffer, |
1270 | RINGBUF_TYPE_DATA, event_length); | 1305 | RINGBUF_TYPE_DATA, event_length); |
1271 | if (!event) | 1306 | if (!event) |
1272 | goto out; | 1307 | goto out; |
1273 | 1308 | ||
1274 | body = rb_event_data(event); | 1309 | body = rb_event_data(event); |
1275 | 1310 | ||
1276 | memcpy(body, data, length); | 1311 | memcpy(body, data, length); |
1277 | 1312 | ||
1278 | rb_commit(cpu_buffer, event); | 1313 | rb_commit(cpu_buffer, event); |
1279 | 1314 | ||
1280 | ret = 0; | 1315 | ret = 0; |
1281 | out: | 1316 | out: |
1282 | if (resched) | 1317 | if (resched) |
1283 | preempt_enable_no_resched_notrace(); | 1318 | preempt_enable_no_resched_notrace(); |
1284 | else | 1319 | else |
1285 | preempt_enable_notrace(); | 1320 | preempt_enable_notrace(); |
1286 | 1321 | ||
1287 | return ret; | 1322 | return ret; |
1288 | } | 1323 | } |
1289 | 1324 | ||
1290 | static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 1325 | static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
1291 | { | 1326 | { |
1292 | struct buffer_page *reader = cpu_buffer->reader_page; | 1327 | struct buffer_page *reader = cpu_buffer->reader_page; |
1293 | struct buffer_page *head = cpu_buffer->head_page; | 1328 | struct buffer_page *head = cpu_buffer->head_page; |
1294 | struct buffer_page *commit = cpu_buffer->commit_page; | 1329 | struct buffer_page *commit = cpu_buffer->commit_page; |
1295 | 1330 | ||
1296 | return reader->read == rb_page_commit(reader) && | 1331 | return reader->read == rb_page_commit(reader) && |
1297 | (commit == reader || | 1332 | (commit == reader || |
1298 | (commit == head && | 1333 | (commit == head && |
1299 | head->read == rb_page_commit(commit))); | 1334 | head->read == rb_page_commit(commit))); |
1300 | } | 1335 | } |
1301 | 1336 | ||
1302 | /** | 1337 | /** |
1303 | * ring_buffer_record_disable - stop all writes into the buffer | 1338 | * ring_buffer_record_disable - stop all writes into the buffer |
1304 | * @buffer: The ring buffer to stop writes to. | 1339 | * @buffer: The ring buffer to stop writes to. |
1305 | * | 1340 | * |
1306 | * This prevents all writes to the buffer. Any attempt to write | 1341 | * This prevents all writes to the buffer. Any attempt to write |
1307 | * to the buffer after this will fail and return NULL. | 1342 | * to the buffer after this will fail and return NULL. |
1308 | * | 1343 | * |
1309 | * The caller should call synchronize_sched() after this. | 1344 | * The caller should call synchronize_sched() after this. |
1310 | */ | 1345 | */ |
1311 | void ring_buffer_record_disable(struct ring_buffer *buffer) | 1346 | void ring_buffer_record_disable(struct ring_buffer *buffer) |
1312 | { | 1347 | { |
1313 | atomic_inc(&buffer->record_disabled); | 1348 | atomic_inc(&buffer->record_disabled); |
1314 | } | 1349 | } |
1315 | 1350 | ||
1316 | /** | 1351 | /** |
1317 | * ring_buffer_record_enable - enable writes to the buffer | 1352 | * ring_buffer_record_enable - enable writes to the buffer |
1318 | * @buffer: The ring buffer to enable writes | 1353 | * @buffer: The ring buffer to enable writes |
1319 | * | 1354 | * |
1320 | * Note, multiple disables will need the same number of enables | 1355 | * Note, multiple disables will need the same number of enables |
1321 | * to truely enable the writing (much like preempt_disable). | 1356 | * to truely enable the writing (much like preempt_disable). |
1322 | */ | 1357 | */ |
1323 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 1358 | void ring_buffer_record_enable(struct ring_buffer *buffer) |
1324 | { | 1359 | { |
1325 | atomic_dec(&buffer->record_disabled); | 1360 | atomic_dec(&buffer->record_disabled); |
1326 | } | 1361 | } |
1327 | 1362 | ||
1328 | /** | 1363 | /** |
1329 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer | 1364 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer |
1330 | * @buffer: The ring buffer to stop writes to. | 1365 | * @buffer: The ring buffer to stop writes to. |
1331 | * @cpu: The CPU buffer to stop | 1366 | * @cpu: The CPU buffer to stop |
1332 | * | 1367 | * |
1333 | * This prevents all writes to the buffer. Any attempt to write | 1368 | * This prevents all writes to the buffer. Any attempt to write |
1334 | * to the buffer after this will fail and return NULL. | 1369 | * to the buffer after this will fail and return NULL. |
1335 | * | 1370 | * |
1336 | * The caller should call synchronize_sched() after this. | 1371 | * The caller should call synchronize_sched() after this. |
1337 | */ | 1372 | */ |
1338 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) | 1373 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) |
1339 | { | 1374 | { |
1340 | struct ring_buffer_per_cpu *cpu_buffer; | 1375 | struct ring_buffer_per_cpu *cpu_buffer; |
1341 | 1376 | ||
1342 | if (!cpu_isset(cpu, buffer->cpumask)) | 1377 | if (!cpu_isset(cpu, buffer->cpumask)) |
1343 | return; | 1378 | return; |
1344 | 1379 | ||
1345 | cpu_buffer = buffer->buffers[cpu]; | 1380 | cpu_buffer = buffer->buffers[cpu]; |
1346 | atomic_inc(&cpu_buffer->record_disabled); | 1381 | atomic_inc(&cpu_buffer->record_disabled); |
1347 | } | 1382 | } |
1348 | 1383 | ||
1349 | /** | 1384 | /** |
1350 | * ring_buffer_record_enable_cpu - enable writes to the buffer | 1385 | * ring_buffer_record_enable_cpu - enable writes to the buffer |
1351 | * @buffer: The ring buffer to enable writes | 1386 | * @buffer: The ring buffer to enable writes |
1352 | * @cpu: The CPU to enable. | 1387 | * @cpu: The CPU to enable. |
1353 | * | 1388 | * |
1354 | * Note, multiple disables will need the same number of enables | 1389 | * Note, multiple disables will need the same number of enables |
1355 | * to truely enable the writing (much like preempt_disable). | 1390 | * to truely enable the writing (much like preempt_disable). |
1356 | */ | 1391 | */ |
1357 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 1392 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) |
1358 | { | 1393 | { |
1359 | struct ring_buffer_per_cpu *cpu_buffer; | 1394 | struct ring_buffer_per_cpu *cpu_buffer; |
1360 | 1395 | ||
1361 | if (!cpu_isset(cpu, buffer->cpumask)) | 1396 | if (!cpu_isset(cpu, buffer->cpumask)) |
1362 | return; | 1397 | return; |
1363 | 1398 | ||
1364 | cpu_buffer = buffer->buffers[cpu]; | 1399 | cpu_buffer = buffer->buffers[cpu]; |
1365 | atomic_dec(&cpu_buffer->record_disabled); | 1400 | atomic_dec(&cpu_buffer->record_disabled); |
1366 | } | 1401 | } |
1367 | 1402 | ||
1368 | /** | 1403 | /** |
1369 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 1404 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
1370 | * @buffer: The ring buffer | 1405 | * @buffer: The ring buffer |
1371 | * @cpu: The per CPU buffer to get the entries from. | 1406 | * @cpu: The per CPU buffer to get the entries from. |
1372 | */ | 1407 | */ |
1373 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 1408 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
1374 | { | 1409 | { |
1375 | struct ring_buffer_per_cpu *cpu_buffer; | 1410 | struct ring_buffer_per_cpu *cpu_buffer; |
1376 | 1411 | ||
1377 | if (!cpu_isset(cpu, buffer->cpumask)) | 1412 | if (!cpu_isset(cpu, buffer->cpumask)) |
1378 | return 0; | 1413 | return 0; |
1379 | 1414 | ||
1380 | cpu_buffer = buffer->buffers[cpu]; | 1415 | cpu_buffer = buffer->buffers[cpu]; |
1381 | return cpu_buffer->entries; | 1416 | return cpu_buffer->entries; |
1382 | } | 1417 | } |
1383 | 1418 | ||
1384 | /** | 1419 | /** |
1385 | * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer | 1420 | * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer |
1386 | * @buffer: The ring buffer | 1421 | * @buffer: The ring buffer |
1387 | * @cpu: The per CPU buffer to get the number of overruns from | 1422 | * @cpu: The per CPU buffer to get the number of overruns from |
1388 | */ | 1423 | */ |
1389 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | 1424 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) |
1390 | { | 1425 | { |
1391 | struct ring_buffer_per_cpu *cpu_buffer; | 1426 | struct ring_buffer_per_cpu *cpu_buffer; |
1392 | 1427 | ||
1393 | if (!cpu_isset(cpu, buffer->cpumask)) | 1428 | if (!cpu_isset(cpu, buffer->cpumask)) |
1394 | return 0; | 1429 | return 0; |
1395 | 1430 | ||
1396 | cpu_buffer = buffer->buffers[cpu]; | 1431 | cpu_buffer = buffer->buffers[cpu]; |
1397 | return cpu_buffer->overrun; | 1432 | return cpu_buffer->overrun; |
1398 | } | 1433 | } |
1399 | 1434 | ||
1400 | /** | 1435 | /** |
1401 | * ring_buffer_entries - get the number of entries in a buffer | 1436 | * ring_buffer_entries - get the number of entries in a buffer |
1402 | * @buffer: The ring buffer | 1437 | * @buffer: The ring buffer |
1403 | * | 1438 | * |
1404 | * Returns the total number of entries in the ring buffer | 1439 | * Returns the total number of entries in the ring buffer |
1405 | * (all CPU entries) | 1440 | * (all CPU entries) |
1406 | */ | 1441 | */ |
1407 | unsigned long ring_buffer_entries(struct ring_buffer *buffer) | 1442 | unsigned long ring_buffer_entries(struct ring_buffer *buffer) |
1408 | { | 1443 | { |
1409 | struct ring_buffer_per_cpu *cpu_buffer; | 1444 | struct ring_buffer_per_cpu *cpu_buffer; |
1410 | unsigned long entries = 0; | 1445 | unsigned long entries = 0; |
1411 | int cpu; | 1446 | int cpu; |
1412 | 1447 | ||
1413 | /* if you care about this being correct, lock the buffer */ | 1448 | /* if you care about this being correct, lock the buffer */ |
1414 | for_each_buffer_cpu(buffer, cpu) { | 1449 | for_each_buffer_cpu(buffer, cpu) { |
1415 | cpu_buffer = buffer->buffers[cpu]; | 1450 | cpu_buffer = buffer->buffers[cpu]; |
1416 | entries += cpu_buffer->entries; | 1451 | entries += cpu_buffer->entries; |
1417 | } | 1452 | } |
1418 | 1453 | ||
1419 | return entries; | 1454 | return entries; |
1420 | } | 1455 | } |
1421 | 1456 | ||
1422 | /** | 1457 | /** |
1423 | * ring_buffer_overrun_cpu - get the number of overruns in buffer | 1458 | * ring_buffer_overrun_cpu - get the number of overruns in buffer |
1424 | * @buffer: The ring buffer | 1459 | * @buffer: The ring buffer |
1425 | * | 1460 | * |
1426 | * Returns the total number of overruns in the ring buffer | 1461 | * Returns the total number of overruns in the ring buffer |
1427 | * (all CPU entries) | 1462 | * (all CPU entries) |
1428 | */ | 1463 | */ |
1429 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer) | 1464 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer) |
1430 | { | 1465 | { |
1431 | struct ring_buffer_per_cpu *cpu_buffer; | 1466 | struct ring_buffer_per_cpu *cpu_buffer; |
1432 | unsigned long overruns = 0; | 1467 | unsigned long overruns = 0; |
1433 | int cpu; | 1468 | int cpu; |
1434 | 1469 | ||
1435 | /* if you care about this being correct, lock the buffer */ | 1470 | /* if you care about this being correct, lock the buffer */ |
1436 | for_each_buffer_cpu(buffer, cpu) { | 1471 | for_each_buffer_cpu(buffer, cpu) { |
1437 | cpu_buffer = buffer->buffers[cpu]; | 1472 | cpu_buffer = buffer->buffers[cpu]; |
1438 | overruns += cpu_buffer->overrun; | 1473 | overruns += cpu_buffer->overrun; |
1439 | } | 1474 | } |
1440 | 1475 | ||
1441 | return overruns; | 1476 | return overruns; |
1442 | } | 1477 | } |
1443 | 1478 | ||
1444 | /** | 1479 | /** |
1445 | * ring_buffer_iter_reset - reset an iterator | 1480 | * ring_buffer_iter_reset - reset an iterator |
1446 | * @iter: The iterator to reset | 1481 | * @iter: The iterator to reset |
1447 | * | 1482 | * |
1448 | * Resets the iterator, so that it will start from the beginning | 1483 | * Resets the iterator, so that it will start from the beginning |
1449 | * again. | 1484 | * again. |
1450 | */ | 1485 | */ |
1451 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) | 1486 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) |
1452 | { | 1487 | { |
1453 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1488 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
1454 | 1489 | ||
1455 | /* Iterator usage is expected to have record disabled */ | 1490 | /* Iterator usage is expected to have record disabled */ |
1456 | if (list_empty(&cpu_buffer->reader_page->list)) { | 1491 | if (list_empty(&cpu_buffer->reader_page->list)) { |
1457 | iter->head_page = cpu_buffer->head_page; | 1492 | iter->head_page = cpu_buffer->head_page; |
1458 | iter->head = cpu_buffer->head_page->read; | 1493 | iter->head = cpu_buffer->head_page->read; |
1459 | } else { | 1494 | } else { |
1460 | iter->head_page = cpu_buffer->reader_page; | 1495 | iter->head_page = cpu_buffer->reader_page; |
1461 | iter->head = cpu_buffer->reader_page->read; | 1496 | iter->head = cpu_buffer->reader_page->read; |
1462 | } | 1497 | } |
1463 | if (iter->head) | 1498 | if (iter->head) |
1464 | iter->read_stamp = cpu_buffer->read_stamp; | 1499 | iter->read_stamp = cpu_buffer->read_stamp; |
1465 | else | 1500 | else |
1466 | iter->read_stamp = iter->head_page->time_stamp; | 1501 | iter->read_stamp = iter->head_page->time_stamp; |
1467 | } | 1502 | } |
1468 | 1503 | ||
1469 | /** | 1504 | /** |
1470 | * ring_buffer_iter_empty - check if an iterator has no more to read | 1505 | * ring_buffer_iter_empty - check if an iterator has no more to read |
1471 | * @iter: The iterator to check | 1506 | * @iter: The iterator to check |
1472 | */ | 1507 | */ |
1473 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter) | 1508 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter) |
1474 | { | 1509 | { |
1475 | struct ring_buffer_per_cpu *cpu_buffer; | 1510 | struct ring_buffer_per_cpu *cpu_buffer; |
1476 | 1511 | ||
1477 | cpu_buffer = iter->cpu_buffer; | 1512 | cpu_buffer = iter->cpu_buffer; |
1478 | 1513 | ||
1479 | return iter->head_page == cpu_buffer->commit_page && | 1514 | return iter->head_page == cpu_buffer->commit_page && |
1480 | iter->head == rb_commit_index(cpu_buffer); | 1515 | iter->head == rb_commit_index(cpu_buffer); |
1481 | } | 1516 | } |
1482 | 1517 | ||
1483 | static void | 1518 | static void |
1484 | rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 1519 | rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
1485 | struct ring_buffer_event *event) | 1520 | struct ring_buffer_event *event) |
1486 | { | 1521 | { |
1487 | u64 delta; | 1522 | u64 delta; |
1488 | 1523 | ||
1489 | switch (event->type) { | 1524 | switch (event->type) { |
1490 | case RINGBUF_TYPE_PADDING: | 1525 | case RINGBUF_TYPE_PADDING: |
1491 | return; | 1526 | return; |
1492 | 1527 | ||
1493 | case RINGBUF_TYPE_TIME_EXTEND: | 1528 | case RINGBUF_TYPE_TIME_EXTEND: |
1494 | delta = event->array[0]; | 1529 | delta = event->array[0]; |
1495 | delta <<= TS_SHIFT; | 1530 | delta <<= TS_SHIFT; |
1496 | delta += event->time_delta; | 1531 | delta += event->time_delta; |
1497 | cpu_buffer->read_stamp += delta; | 1532 | cpu_buffer->read_stamp += delta; |
1498 | return; | 1533 | return; |
1499 | 1534 | ||
1500 | case RINGBUF_TYPE_TIME_STAMP: | 1535 | case RINGBUF_TYPE_TIME_STAMP: |
1501 | /* FIXME: not implemented */ | 1536 | /* FIXME: not implemented */ |
1502 | return; | 1537 | return; |
1503 | 1538 | ||
1504 | case RINGBUF_TYPE_DATA: | 1539 | case RINGBUF_TYPE_DATA: |
1505 | cpu_buffer->read_stamp += event->time_delta; | 1540 | cpu_buffer->read_stamp += event->time_delta; |
1506 | return; | 1541 | return; |
1507 | 1542 | ||
1508 | default: | 1543 | default: |
1509 | BUG(); | 1544 | BUG(); |
1510 | } | 1545 | } |
1511 | return; | 1546 | return; |
1512 | } | 1547 | } |
1513 | 1548 | ||
1514 | static void | 1549 | static void |
1515 | rb_update_iter_read_stamp(struct ring_buffer_iter *iter, | 1550 | rb_update_iter_read_stamp(struct ring_buffer_iter *iter, |
1516 | struct ring_buffer_event *event) | 1551 | struct ring_buffer_event *event) |
1517 | { | 1552 | { |
1518 | u64 delta; | 1553 | u64 delta; |
1519 | 1554 | ||
1520 | switch (event->type) { | 1555 | switch (event->type) { |
1521 | case RINGBUF_TYPE_PADDING: | 1556 | case RINGBUF_TYPE_PADDING: |
1522 | return; | 1557 | return; |
1523 | 1558 | ||
1524 | case RINGBUF_TYPE_TIME_EXTEND: | 1559 | case RINGBUF_TYPE_TIME_EXTEND: |
1525 | delta = event->array[0]; | 1560 | delta = event->array[0]; |
1526 | delta <<= TS_SHIFT; | 1561 | delta <<= TS_SHIFT; |
1527 | delta += event->time_delta; | 1562 | delta += event->time_delta; |
1528 | iter->read_stamp += delta; | 1563 | iter->read_stamp += delta; |
1529 | return; | 1564 | return; |
1530 | 1565 | ||
1531 | case RINGBUF_TYPE_TIME_STAMP: | 1566 | case RINGBUF_TYPE_TIME_STAMP: |
1532 | /* FIXME: not implemented */ | 1567 | /* FIXME: not implemented */ |
1533 | return; | 1568 | return; |
1534 | 1569 | ||
1535 | case RINGBUF_TYPE_DATA: | 1570 | case RINGBUF_TYPE_DATA: |
1536 | iter->read_stamp += event->time_delta; | 1571 | iter->read_stamp += event->time_delta; |
1537 | return; | 1572 | return; |
1538 | 1573 | ||
1539 | default: | 1574 | default: |
1540 | BUG(); | 1575 | BUG(); |
1541 | } | 1576 | } |
1542 | return; | 1577 | return; |
1543 | } | 1578 | } |
1544 | 1579 | ||
1545 | static struct buffer_page * | 1580 | static struct buffer_page * |
1546 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 1581 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
1547 | { | 1582 | { |
1548 | struct buffer_page *reader = NULL; | 1583 | struct buffer_page *reader = NULL; |
1549 | unsigned long flags; | 1584 | unsigned long flags; |
1550 | int nr_loops = 0; | 1585 | int nr_loops = 0; |
1551 | 1586 | ||
1552 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1587 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
1553 | 1588 | ||
1554 | again: | 1589 | again: |
1555 | /* | 1590 | /* |
1556 | * This should normally only loop twice. But because the | 1591 | * This should normally only loop twice. But because the |
1557 | * start of the reader inserts an empty page, it causes | 1592 | * start of the reader inserts an empty page, it causes |
1558 | * a case where we will loop three times. There should be no | 1593 | * a case where we will loop three times. There should be no |
1559 | * reason to loop four times (that I know of). | 1594 | * reason to loop four times (that I know of). |
1560 | */ | 1595 | */ |
1561 | if (unlikely(++nr_loops > 3)) { | 1596 | if (unlikely(++nr_loops > 3)) { |
1562 | RB_WARN_ON(cpu_buffer, 1); | 1597 | RB_WARN_ON(cpu_buffer, 1); |
1563 | reader = NULL; | 1598 | reader = NULL; |
1564 | goto out; | 1599 | goto out; |
1565 | } | 1600 | } |
1566 | 1601 | ||
1567 | reader = cpu_buffer->reader_page; | 1602 | reader = cpu_buffer->reader_page; |
1568 | 1603 | ||
1569 | /* If there's more to read, return this page */ | 1604 | /* If there's more to read, return this page */ |
1570 | if (cpu_buffer->reader_page->read < rb_page_size(reader)) | 1605 | if (cpu_buffer->reader_page->read < rb_page_size(reader)) |
1571 | goto out; | 1606 | goto out; |
1572 | 1607 | ||
1573 | /* Never should we have an index greater than the size */ | 1608 | /* Never should we have an index greater than the size */ |
1574 | RB_WARN_ON(cpu_buffer, | 1609 | RB_WARN_ON(cpu_buffer, |
1575 | cpu_buffer->reader_page->read > rb_page_size(reader)); | 1610 | cpu_buffer->reader_page->read > rb_page_size(reader)); |
1576 | 1611 | ||
1577 | /* check if we caught up to the tail */ | 1612 | /* check if we caught up to the tail */ |
1578 | reader = NULL; | 1613 | reader = NULL; |
1579 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) | 1614 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) |
1580 | goto out; | 1615 | goto out; |
1581 | 1616 | ||
1582 | /* | 1617 | /* |
1583 | * Splice the empty reader page into the list around the head. | 1618 | * Splice the empty reader page into the list around the head. |
1584 | * Reset the reader page to size zero. | 1619 | * Reset the reader page to size zero. |
1585 | */ | 1620 | */ |
1586 | 1621 | ||
1587 | reader = cpu_buffer->head_page; | 1622 | reader = cpu_buffer->head_page; |
1588 | cpu_buffer->reader_page->list.next = reader->list.next; | 1623 | cpu_buffer->reader_page->list.next = reader->list.next; |
1589 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 1624 | cpu_buffer->reader_page->list.prev = reader->list.prev; |
1590 | 1625 | ||
1591 | local_set(&cpu_buffer->reader_page->write, 0); | 1626 | local_set(&cpu_buffer->reader_page->write, 0); |
1592 | local_set(&cpu_buffer->reader_page->commit, 0); | 1627 | local_set(&cpu_buffer->reader_page->commit, 0); |
1593 | 1628 | ||
1594 | /* Make the reader page now replace the head */ | 1629 | /* Make the reader page now replace the head */ |
1595 | reader->list.prev->next = &cpu_buffer->reader_page->list; | 1630 | reader->list.prev->next = &cpu_buffer->reader_page->list; |
1596 | reader->list.next->prev = &cpu_buffer->reader_page->list; | 1631 | reader->list.next->prev = &cpu_buffer->reader_page->list; |
1597 | 1632 | ||
1598 | /* | 1633 | /* |
1599 | * If the tail is on the reader, then we must set the head | 1634 | * If the tail is on the reader, then we must set the head |
1600 | * to the inserted page, otherwise we set it one before. | 1635 | * to the inserted page, otherwise we set it one before. |
1601 | */ | 1636 | */ |
1602 | cpu_buffer->head_page = cpu_buffer->reader_page; | 1637 | cpu_buffer->head_page = cpu_buffer->reader_page; |
1603 | 1638 | ||
1604 | if (cpu_buffer->commit_page != reader) | 1639 | if (cpu_buffer->commit_page != reader) |
1605 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | 1640 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); |
1606 | 1641 | ||
1607 | /* Finally update the reader page to the new head */ | 1642 | /* Finally update the reader page to the new head */ |
1608 | cpu_buffer->reader_page = reader; | 1643 | cpu_buffer->reader_page = reader; |
1609 | rb_reset_reader_page(cpu_buffer); | 1644 | rb_reset_reader_page(cpu_buffer); |
1610 | 1645 | ||
1611 | goto again; | 1646 | goto again; |
1612 | 1647 | ||
1613 | out: | 1648 | out: |
1614 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); | 1649 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); |
1615 | 1650 | ||
1616 | return reader; | 1651 | return reader; |
1617 | } | 1652 | } |
1618 | 1653 | ||
1619 | static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | 1654 | static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) |
1620 | { | 1655 | { |
1621 | struct ring_buffer_event *event; | 1656 | struct ring_buffer_event *event; |
1622 | struct buffer_page *reader; | 1657 | struct buffer_page *reader; |
1623 | unsigned length; | 1658 | unsigned length; |
1624 | 1659 | ||
1625 | reader = rb_get_reader_page(cpu_buffer); | 1660 | reader = rb_get_reader_page(cpu_buffer); |
1626 | 1661 | ||
1627 | /* This function should not be called when buffer is empty */ | 1662 | /* This function should not be called when buffer is empty */ |
1628 | BUG_ON(!reader); | 1663 | BUG_ON(!reader); |
1629 | 1664 | ||
1630 | event = rb_reader_event(cpu_buffer); | 1665 | event = rb_reader_event(cpu_buffer); |
1631 | 1666 | ||
1632 | if (event->type == RINGBUF_TYPE_DATA) | 1667 | if (event->type == RINGBUF_TYPE_DATA) |
1633 | cpu_buffer->entries--; | 1668 | cpu_buffer->entries--; |
1634 | 1669 | ||
1635 | rb_update_read_stamp(cpu_buffer, event); | 1670 | rb_update_read_stamp(cpu_buffer, event); |
1636 | 1671 | ||
1637 | length = rb_event_length(event); | 1672 | length = rb_event_length(event); |
1638 | cpu_buffer->reader_page->read += length; | 1673 | cpu_buffer->reader_page->read += length; |
1639 | } | 1674 | } |
1640 | 1675 | ||
1641 | static void rb_advance_iter(struct ring_buffer_iter *iter) | 1676 | static void rb_advance_iter(struct ring_buffer_iter *iter) |
1642 | { | 1677 | { |
1643 | struct ring_buffer *buffer; | 1678 | struct ring_buffer *buffer; |
1644 | struct ring_buffer_per_cpu *cpu_buffer; | 1679 | struct ring_buffer_per_cpu *cpu_buffer; |
1645 | struct ring_buffer_event *event; | 1680 | struct ring_buffer_event *event; |
1646 | unsigned length; | 1681 | unsigned length; |
1647 | 1682 | ||
1648 | cpu_buffer = iter->cpu_buffer; | 1683 | cpu_buffer = iter->cpu_buffer; |
1649 | buffer = cpu_buffer->buffer; | 1684 | buffer = cpu_buffer->buffer; |
1650 | 1685 | ||
1651 | /* | 1686 | /* |
1652 | * Check if we are at the end of the buffer. | 1687 | * Check if we are at the end of the buffer. |
1653 | */ | 1688 | */ |
1654 | if (iter->head >= rb_page_size(iter->head_page)) { | 1689 | if (iter->head >= rb_page_size(iter->head_page)) { |
1655 | BUG_ON(iter->head_page == cpu_buffer->commit_page); | 1690 | BUG_ON(iter->head_page == cpu_buffer->commit_page); |
1656 | rb_inc_iter(iter); | 1691 | rb_inc_iter(iter); |
1657 | return; | 1692 | return; |
1658 | } | 1693 | } |
1659 | 1694 | ||
1660 | event = rb_iter_head_event(iter); | 1695 | event = rb_iter_head_event(iter); |
1661 | 1696 | ||
1662 | length = rb_event_length(event); | 1697 | length = rb_event_length(event); |
1663 | 1698 | ||
1664 | /* | 1699 | /* |
1665 | * This should not be called to advance the header if we are | 1700 | * This should not be called to advance the header if we are |
1666 | * at the tail of the buffer. | 1701 | * at the tail of the buffer. |
1667 | */ | 1702 | */ |
1668 | BUG_ON((iter->head_page == cpu_buffer->commit_page) && | 1703 | BUG_ON((iter->head_page == cpu_buffer->commit_page) && |
1669 | (iter->head + length > rb_commit_index(cpu_buffer))); | 1704 | (iter->head + length > rb_commit_index(cpu_buffer))); |
1670 | 1705 | ||
1671 | rb_update_iter_read_stamp(iter, event); | 1706 | rb_update_iter_read_stamp(iter, event); |
1672 | 1707 | ||
1673 | iter->head += length; | 1708 | iter->head += length; |
1674 | 1709 | ||
1675 | /* check for end of page padding */ | 1710 | /* check for end of page padding */ |
1676 | if ((iter->head >= rb_page_size(iter->head_page)) && | 1711 | if ((iter->head >= rb_page_size(iter->head_page)) && |
1677 | (iter->head_page != cpu_buffer->commit_page)) | 1712 | (iter->head_page != cpu_buffer->commit_page)) |
1678 | rb_advance_iter(iter); | 1713 | rb_advance_iter(iter); |
1679 | } | 1714 | } |
1680 | 1715 | ||
1681 | /** | 1716 | /** |
1682 | * ring_buffer_peek - peek at the next event to be read | 1717 | * ring_buffer_peek - peek at the next event to be read |
1683 | * @buffer: The ring buffer to read | 1718 | * @buffer: The ring buffer to read |
1684 | * @cpu: The cpu to peak at | 1719 | * @cpu: The cpu to peak at |
1685 | * @ts: The timestamp counter of this event. | 1720 | * @ts: The timestamp counter of this event. |
1686 | * | 1721 | * |
1687 | * This will return the event that will be read next, but does | 1722 | * This will return the event that will be read next, but does |
1688 | * not consume the data. | 1723 | * not consume the data. |
1689 | */ | 1724 | */ |
1690 | struct ring_buffer_event * | 1725 | struct ring_buffer_event * |
1691 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 1726 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) |
1692 | { | 1727 | { |
1693 | struct ring_buffer_per_cpu *cpu_buffer; | 1728 | struct ring_buffer_per_cpu *cpu_buffer; |
1694 | struct ring_buffer_event *event; | 1729 | struct ring_buffer_event *event; |
1695 | struct buffer_page *reader; | 1730 | struct buffer_page *reader; |
1696 | int nr_loops = 0; | 1731 | int nr_loops = 0; |
1697 | 1732 | ||
1698 | if (!cpu_isset(cpu, buffer->cpumask)) | 1733 | if (!cpu_isset(cpu, buffer->cpumask)) |
1699 | return NULL; | 1734 | return NULL; |
1700 | 1735 | ||
1701 | cpu_buffer = buffer->buffers[cpu]; | 1736 | cpu_buffer = buffer->buffers[cpu]; |
1702 | 1737 | ||
1703 | again: | 1738 | again: |
1704 | /* | 1739 | /* |
1705 | * We repeat when a timestamp is encountered. It is possible | 1740 | * We repeat when a timestamp is encountered. It is possible |
1706 | * to get multiple timestamps from an interrupt entering just | 1741 | * to get multiple timestamps from an interrupt entering just |
1707 | * as one timestamp is about to be written. The max times | 1742 | * as one timestamp is about to be written. The max times |
1708 | * that this can happen is the number of nested interrupts we | 1743 | * that this can happen is the number of nested interrupts we |
1709 | * can have. Nesting 10 deep of interrupts is clearly | 1744 | * can have. Nesting 10 deep of interrupts is clearly |
1710 | * an anomaly. | 1745 | * an anomaly. |
1711 | */ | 1746 | */ |
1712 | if (unlikely(++nr_loops > 10)) { | 1747 | if (unlikely(++nr_loops > 10)) { |
1713 | RB_WARN_ON(cpu_buffer, 1); | 1748 | RB_WARN_ON(cpu_buffer, 1); |
1714 | return NULL; | 1749 | return NULL; |
1715 | } | 1750 | } |
1716 | 1751 | ||
1717 | reader = rb_get_reader_page(cpu_buffer); | 1752 | reader = rb_get_reader_page(cpu_buffer); |
1718 | if (!reader) | 1753 | if (!reader) |
1719 | return NULL; | 1754 | return NULL; |
1720 | 1755 | ||
1721 | event = rb_reader_event(cpu_buffer); | 1756 | event = rb_reader_event(cpu_buffer); |
1722 | 1757 | ||
1723 | switch (event->type) { | 1758 | switch (event->type) { |
1724 | case RINGBUF_TYPE_PADDING: | 1759 | case RINGBUF_TYPE_PADDING: |
1725 | RB_WARN_ON(cpu_buffer, 1); | 1760 | RB_WARN_ON(cpu_buffer, 1); |
1726 | rb_advance_reader(cpu_buffer); | 1761 | rb_advance_reader(cpu_buffer); |
1727 | return NULL; | 1762 | return NULL; |
1728 | 1763 | ||
1729 | case RINGBUF_TYPE_TIME_EXTEND: | 1764 | case RINGBUF_TYPE_TIME_EXTEND: |
1730 | /* Internal data, OK to advance */ | 1765 | /* Internal data, OK to advance */ |
1731 | rb_advance_reader(cpu_buffer); | 1766 | rb_advance_reader(cpu_buffer); |
1732 | goto again; | 1767 | goto again; |
1733 | 1768 | ||
1734 | case RINGBUF_TYPE_TIME_STAMP: | 1769 | case RINGBUF_TYPE_TIME_STAMP: |
1735 | /* FIXME: not implemented */ | 1770 | /* FIXME: not implemented */ |
1736 | rb_advance_reader(cpu_buffer); | 1771 | rb_advance_reader(cpu_buffer); |
1737 | goto again; | 1772 | goto again; |
1738 | 1773 | ||
1739 | case RINGBUF_TYPE_DATA: | 1774 | case RINGBUF_TYPE_DATA: |
1740 | if (ts) { | 1775 | if (ts) { |
1741 | *ts = cpu_buffer->read_stamp + event->time_delta; | 1776 | *ts = cpu_buffer->read_stamp + event->time_delta; |
1742 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 1777 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); |
1743 | } | 1778 | } |
1744 | return event; | 1779 | return event; |
1745 | 1780 | ||
1746 | default: | 1781 | default: |
1747 | BUG(); | 1782 | BUG(); |
1748 | } | 1783 | } |
1749 | 1784 | ||
1750 | return NULL; | 1785 | return NULL; |
1751 | } | 1786 | } |
1752 | 1787 | ||
1753 | /** | 1788 | /** |
1754 | * ring_buffer_iter_peek - peek at the next event to be read | 1789 | * ring_buffer_iter_peek - peek at the next event to be read |
1755 | * @iter: The ring buffer iterator | 1790 | * @iter: The ring buffer iterator |
1756 | * @ts: The timestamp counter of this event. | 1791 | * @ts: The timestamp counter of this event. |
1757 | * | 1792 | * |
1758 | * This will return the event that will be read next, but does | 1793 | * This will return the event that will be read next, but does |
1759 | * not increment the iterator. | 1794 | * not increment the iterator. |
1760 | */ | 1795 | */ |
1761 | struct ring_buffer_event * | 1796 | struct ring_buffer_event * |
1762 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | 1797 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) |
1763 | { | 1798 | { |
1764 | struct ring_buffer *buffer; | 1799 | struct ring_buffer *buffer; |
1765 | struct ring_buffer_per_cpu *cpu_buffer; | 1800 | struct ring_buffer_per_cpu *cpu_buffer; |
1766 | struct ring_buffer_event *event; | 1801 | struct ring_buffer_event *event; |
1767 | int nr_loops = 0; | 1802 | int nr_loops = 0; |
1768 | 1803 | ||
1769 | if (ring_buffer_iter_empty(iter)) | 1804 | if (ring_buffer_iter_empty(iter)) |
1770 | return NULL; | 1805 | return NULL; |
1771 | 1806 | ||
1772 | cpu_buffer = iter->cpu_buffer; | 1807 | cpu_buffer = iter->cpu_buffer; |
1773 | buffer = cpu_buffer->buffer; | 1808 | buffer = cpu_buffer->buffer; |
1774 | 1809 | ||
1775 | again: | 1810 | again: |
1776 | /* | 1811 | /* |
1777 | * We repeat when a timestamp is encountered. It is possible | 1812 | * We repeat when a timestamp is encountered. It is possible |
1778 | * to get multiple timestamps from an interrupt entering just | 1813 | * to get multiple timestamps from an interrupt entering just |
1779 | * as one timestamp is about to be written. The max times | 1814 | * as one timestamp is about to be written. The max times |
1780 | * that this can happen is the number of nested interrupts we | 1815 | * that this can happen is the number of nested interrupts we |
1781 | * can have. Nesting 10 deep of interrupts is clearly | 1816 | * can have. Nesting 10 deep of interrupts is clearly |
1782 | * an anomaly. | 1817 | * an anomaly. |
1783 | */ | 1818 | */ |
1784 | if (unlikely(++nr_loops > 10)) { | 1819 | if (unlikely(++nr_loops > 10)) { |
1785 | RB_WARN_ON(cpu_buffer, 1); | 1820 | RB_WARN_ON(cpu_buffer, 1); |
1786 | return NULL; | 1821 | return NULL; |
1787 | } | 1822 | } |
1788 | 1823 | ||
1789 | if (rb_per_cpu_empty(cpu_buffer)) | 1824 | if (rb_per_cpu_empty(cpu_buffer)) |
1790 | return NULL; | 1825 | return NULL; |
1791 | 1826 | ||
1792 | event = rb_iter_head_event(iter); | 1827 | event = rb_iter_head_event(iter); |
1793 | 1828 | ||
1794 | switch (event->type) { | 1829 | switch (event->type) { |
1795 | case RINGBUF_TYPE_PADDING: | 1830 | case RINGBUF_TYPE_PADDING: |
1796 | rb_inc_iter(iter); | 1831 | rb_inc_iter(iter); |
1797 | goto again; | 1832 | goto again; |
1798 | 1833 | ||
1799 | case RINGBUF_TYPE_TIME_EXTEND: | 1834 | case RINGBUF_TYPE_TIME_EXTEND: |
1800 | /* Internal data, OK to advance */ | 1835 | /* Internal data, OK to advance */ |
1801 | rb_advance_iter(iter); | 1836 | rb_advance_iter(iter); |
1802 | goto again; | 1837 | goto again; |
1803 | 1838 | ||
1804 | case RINGBUF_TYPE_TIME_STAMP: | 1839 | case RINGBUF_TYPE_TIME_STAMP: |
1805 | /* FIXME: not implemented */ | 1840 | /* FIXME: not implemented */ |
1806 | rb_advance_iter(iter); | 1841 | rb_advance_iter(iter); |
1807 | goto again; | 1842 | goto again; |
1808 | 1843 | ||
1809 | case RINGBUF_TYPE_DATA: | 1844 | case RINGBUF_TYPE_DATA: |
1810 | if (ts) { | 1845 | if (ts) { |
1811 | *ts = iter->read_stamp + event->time_delta; | 1846 | *ts = iter->read_stamp + event->time_delta; |
1812 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 1847 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); |
1813 | } | 1848 | } |
1814 | return event; | 1849 | return event; |
1815 | 1850 | ||
1816 | default: | 1851 | default: |
1817 | BUG(); | 1852 | BUG(); |
1818 | } | 1853 | } |
1819 | 1854 | ||
1820 | return NULL; | 1855 | return NULL; |
1821 | } | 1856 | } |
1822 | 1857 | ||
1823 | /** | 1858 | /** |
1824 | * ring_buffer_consume - return an event and consume it | 1859 | * ring_buffer_consume - return an event and consume it |
1825 | * @buffer: The ring buffer to get the next event from | 1860 | * @buffer: The ring buffer to get the next event from |
1826 | * | 1861 | * |
1827 | * Returns the next event in the ring buffer, and that event is consumed. | 1862 | * Returns the next event in the ring buffer, and that event is consumed. |
1828 | * Meaning, that sequential reads will keep returning a different event, | 1863 | * Meaning, that sequential reads will keep returning a different event, |
1829 | * and eventually empty the ring buffer if the producer is slower. | 1864 | * and eventually empty the ring buffer if the producer is slower. |
1830 | */ | 1865 | */ |
1831 | struct ring_buffer_event * | 1866 | struct ring_buffer_event * |
1832 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 1867 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) |
1833 | { | 1868 | { |
1834 | struct ring_buffer_per_cpu *cpu_buffer; | 1869 | struct ring_buffer_per_cpu *cpu_buffer; |
1835 | struct ring_buffer_event *event; | 1870 | struct ring_buffer_event *event; |
1836 | 1871 | ||
1837 | if (!cpu_isset(cpu, buffer->cpumask)) | 1872 | if (!cpu_isset(cpu, buffer->cpumask)) |
1838 | return NULL; | 1873 | return NULL; |
1839 | 1874 | ||
1840 | event = ring_buffer_peek(buffer, cpu, ts); | 1875 | event = ring_buffer_peek(buffer, cpu, ts); |
1841 | if (!event) | 1876 | if (!event) |
1842 | return NULL; | 1877 | return NULL; |
1843 | 1878 | ||
1844 | cpu_buffer = buffer->buffers[cpu]; | 1879 | cpu_buffer = buffer->buffers[cpu]; |
1845 | rb_advance_reader(cpu_buffer); | 1880 | rb_advance_reader(cpu_buffer); |
1846 | 1881 | ||
1847 | return event; | 1882 | return event; |
1848 | } | 1883 | } |
1849 | 1884 | ||
1850 | /** | 1885 | /** |
1851 | * ring_buffer_read_start - start a non consuming read of the buffer | 1886 | * ring_buffer_read_start - start a non consuming read of the buffer |
1852 | * @buffer: The ring buffer to read from | 1887 | * @buffer: The ring buffer to read from |
1853 | * @cpu: The cpu buffer to iterate over | 1888 | * @cpu: The cpu buffer to iterate over |
1854 | * | 1889 | * |
1855 | * This starts up an iteration through the buffer. It also disables | 1890 | * This starts up an iteration through the buffer. It also disables |
1856 | * the recording to the buffer until the reading is finished. | 1891 | * the recording to the buffer until the reading is finished. |
1857 | * This prevents the reading from being corrupted. This is not | 1892 | * This prevents the reading from being corrupted. This is not |
1858 | * a consuming read, so a producer is not expected. | 1893 | * a consuming read, so a producer is not expected. |
1859 | * | 1894 | * |
1860 | * Must be paired with ring_buffer_finish. | 1895 | * Must be paired with ring_buffer_finish. |
1861 | */ | 1896 | */ |
1862 | struct ring_buffer_iter * | 1897 | struct ring_buffer_iter * |
1863 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 1898 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) |
1864 | { | 1899 | { |
1865 | struct ring_buffer_per_cpu *cpu_buffer; | 1900 | struct ring_buffer_per_cpu *cpu_buffer; |
1866 | struct ring_buffer_iter *iter; | 1901 | struct ring_buffer_iter *iter; |
1867 | unsigned long flags; | 1902 | unsigned long flags; |
1868 | 1903 | ||
1869 | if (!cpu_isset(cpu, buffer->cpumask)) | 1904 | if (!cpu_isset(cpu, buffer->cpumask)) |
1870 | return NULL; | 1905 | return NULL; |
1871 | 1906 | ||
1872 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); | 1907 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); |
1873 | if (!iter) | 1908 | if (!iter) |
1874 | return NULL; | 1909 | return NULL; |
1875 | 1910 | ||
1876 | cpu_buffer = buffer->buffers[cpu]; | 1911 | cpu_buffer = buffer->buffers[cpu]; |
1877 | 1912 | ||
1878 | iter->cpu_buffer = cpu_buffer; | 1913 | iter->cpu_buffer = cpu_buffer; |
1879 | 1914 | ||
1880 | atomic_inc(&cpu_buffer->record_disabled); | 1915 | atomic_inc(&cpu_buffer->record_disabled); |
1881 | synchronize_sched(); | 1916 | synchronize_sched(); |
1882 | 1917 | ||
1883 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 1918 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
1884 | ring_buffer_iter_reset(iter); | 1919 | ring_buffer_iter_reset(iter); |
1885 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); | 1920 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); |
1886 | 1921 | ||
1887 | return iter; | 1922 | return iter; |
1888 | } | 1923 | } |
1889 | 1924 | ||
1890 | /** | 1925 | /** |
1891 | * ring_buffer_finish - finish reading the iterator of the buffer | 1926 | * ring_buffer_finish - finish reading the iterator of the buffer |
1892 | * @iter: The iterator retrieved by ring_buffer_start | 1927 | * @iter: The iterator retrieved by ring_buffer_start |
1893 | * | 1928 | * |
1894 | * This re-enables the recording to the buffer, and frees the | 1929 | * This re-enables the recording to the buffer, and frees the |
1895 | * iterator. | 1930 | * iterator. |
1896 | */ | 1931 | */ |
1897 | void | 1932 | void |
1898 | ring_buffer_read_finish(struct ring_buffer_iter *iter) | 1933 | ring_buffer_read_finish(struct ring_buffer_iter *iter) |
1899 | { | 1934 | { |
1900 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1935 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
1901 | 1936 | ||
1902 | atomic_dec(&cpu_buffer->record_disabled); | 1937 | atomic_dec(&cpu_buffer->record_disabled); |
1903 | kfree(iter); | 1938 | kfree(iter); |
1904 | } | 1939 | } |
1905 | 1940 | ||
1906 | /** | 1941 | /** |
1907 | * ring_buffer_read - read the next item in the ring buffer by the iterator | 1942 | * ring_buffer_read - read the next item in the ring buffer by the iterator |
1908 | * @iter: The ring buffer iterator | 1943 | * @iter: The ring buffer iterator |
1909 | * @ts: The time stamp of the event read. | 1944 | * @ts: The time stamp of the event read. |
1910 | * | 1945 | * |
1911 | * This reads the next event in the ring buffer and increments the iterator. | 1946 | * This reads the next event in the ring buffer and increments the iterator. |
1912 | */ | 1947 | */ |
1913 | struct ring_buffer_event * | 1948 | struct ring_buffer_event * |
1914 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | 1949 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) |
1915 | { | 1950 | { |
1916 | struct ring_buffer_event *event; | 1951 | struct ring_buffer_event *event; |
1917 | 1952 | ||
1918 | event = ring_buffer_iter_peek(iter, ts); | 1953 | event = ring_buffer_iter_peek(iter, ts); |
1919 | if (!event) | 1954 | if (!event) |
1920 | return NULL; | 1955 | return NULL; |
1921 | 1956 | ||
1922 | rb_advance_iter(iter); | 1957 | rb_advance_iter(iter); |
1923 | 1958 | ||
1924 | return event; | 1959 | return event; |
1925 | } | 1960 | } |
1926 | 1961 | ||
1927 | /** | 1962 | /** |
1928 | * ring_buffer_size - return the size of the ring buffer (in bytes) | 1963 | * ring_buffer_size - return the size of the ring buffer (in bytes) |
1929 | * @buffer: The ring buffer. | 1964 | * @buffer: The ring buffer. |
1930 | */ | 1965 | */ |
1931 | unsigned long ring_buffer_size(struct ring_buffer *buffer) | 1966 | unsigned long ring_buffer_size(struct ring_buffer *buffer) |
1932 | { | 1967 | { |
1933 | return BUF_PAGE_SIZE * buffer->pages; | 1968 | return BUF_PAGE_SIZE * buffer->pages; |
1934 | } | 1969 | } |
1935 | 1970 | ||
1936 | static void | 1971 | static void |
1937 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 1972 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) |
1938 | { | 1973 | { |
1939 | cpu_buffer->head_page | 1974 | cpu_buffer->head_page |
1940 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 1975 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); |
1941 | local_set(&cpu_buffer->head_page->write, 0); | 1976 | local_set(&cpu_buffer->head_page->write, 0); |
1942 | local_set(&cpu_buffer->head_page->commit, 0); | 1977 | local_set(&cpu_buffer->head_page->commit, 0); |
1943 | 1978 | ||
1944 | cpu_buffer->head_page->read = 0; | 1979 | cpu_buffer->head_page->read = 0; |
1945 | 1980 | ||
1946 | cpu_buffer->tail_page = cpu_buffer->head_page; | 1981 | cpu_buffer->tail_page = cpu_buffer->head_page; |
1947 | cpu_buffer->commit_page = cpu_buffer->head_page; | 1982 | cpu_buffer->commit_page = cpu_buffer->head_page; |
1948 | 1983 | ||
1949 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1984 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
1950 | local_set(&cpu_buffer->reader_page->write, 0); | 1985 | local_set(&cpu_buffer->reader_page->write, 0); |
1951 | local_set(&cpu_buffer->reader_page->commit, 0); | 1986 | local_set(&cpu_buffer->reader_page->commit, 0); |
1952 | cpu_buffer->reader_page->read = 0; | 1987 | cpu_buffer->reader_page->read = 0; |
1953 | 1988 | ||
1954 | cpu_buffer->overrun = 0; | 1989 | cpu_buffer->overrun = 0; |
1955 | cpu_buffer->entries = 0; | 1990 | cpu_buffer->entries = 0; |
1956 | } | 1991 | } |
1957 | 1992 | ||
1958 | /** | 1993 | /** |
1959 | * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer | 1994 | * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer |
1960 | * @buffer: The ring buffer to reset a per cpu buffer of | 1995 | * @buffer: The ring buffer to reset a per cpu buffer of |
1961 | * @cpu: The CPU buffer to be reset | 1996 | * @cpu: The CPU buffer to be reset |
1962 | */ | 1997 | */ |
1963 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | 1998 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
1964 | { | 1999 | { |
1965 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2000 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
1966 | unsigned long flags; | 2001 | unsigned long flags; |
1967 | 2002 | ||
1968 | if (!cpu_isset(cpu, buffer->cpumask)) | 2003 | if (!cpu_isset(cpu, buffer->cpumask)) |
1969 | return; | 2004 | return; |
1970 | 2005 | ||
1971 | spin_lock_irqsave(&cpu_buffer->lock, flags); | 2006 | spin_lock_irqsave(&cpu_buffer->lock, flags); |
1972 | 2007 | ||
1973 | rb_reset_cpu(cpu_buffer); | 2008 | rb_reset_cpu(cpu_buffer); |
1974 | 2009 | ||
1975 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); | 2010 | spin_unlock_irqrestore(&cpu_buffer->lock, flags); |
1976 | } | 2011 | } |
1977 | 2012 | ||
1978 | /** | 2013 | /** |
1979 | * ring_buffer_reset - reset a ring buffer | 2014 | * ring_buffer_reset - reset a ring buffer |
1980 | * @buffer: The ring buffer to reset all cpu buffers | 2015 | * @buffer: The ring buffer to reset all cpu buffers |
1981 | */ | 2016 | */ |
1982 | void ring_buffer_reset(struct ring_buffer *buffer) | 2017 | void ring_buffer_reset(struct ring_buffer *buffer) |
1983 | { | 2018 | { |
1984 | int cpu; | 2019 | int cpu; |
1985 | 2020 | ||
1986 | for_each_buffer_cpu(buffer, cpu) | 2021 | for_each_buffer_cpu(buffer, cpu) |
1987 | ring_buffer_reset_cpu(buffer, cpu); | 2022 | ring_buffer_reset_cpu(buffer, cpu); |
1988 | } | 2023 | } |
1989 | 2024 | ||
1990 | /** | 2025 | /** |
1991 | * rind_buffer_empty - is the ring buffer empty? | 2026 | * rind_buffer_empty - is the ring buffer empty? |
1992 | * @buffer: The ring buffer to test | 2027 | * @buffer: The ring buffer to test |
1993 | */ | 2028 | */ |
1994 | int ring_buffer_empty(struct ring_buffer *buffer) | 2029 | int ring_buffer_empty(struct ring_buffer *buffer) |
1995 | { | 2030 | { |
1996 | struct ring_buffer_per_cpu *cpu_buffer; | 2031 | struct ring_buffer_per_cpu *cpu_buffer; |
1997 | int cpu; | 2032 | int cpu; |
1998 | 2033 | ||
1999 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 2034 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
2000 | for_each_buffer_cpu(buffer, cpu) { | 2035 | for_each_buffer_cpu(buffer, cpu) { |
2001 | cpu_buffer = buffer->buffers[cpu]; | 2036 | cpu_buffer = buffer->buffers[cpu]; |
2002 | if (!rb_per_cpu_empty(cpu_buffer)) | 2037 | if (!rb_per_cpu_empty(cpu_buffer)) |
2003 | return 0; | 2038 | return 0; |
2004 | } | 2039 | } |
2005 | return 1; | 2040 | return 1; |
2006 | } | 2041 | } |
2007 | 2042 | ||
2008 | /** | 2043 | /** |
2009 | * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? | 2044 | * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? |
2010 | * @buffer: The ring buffer | 2045 | * @buffer: The ring buffer |
2011 | * @cpu: The CPU buffer to test | 2046 | * @cpu: The CPU buffer to test |
2012 | */ | 2047 | */ |
2013 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2048 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2014 | { | 2049 | { |
2015 | struct ring_buffer_per_cpu *cpu_buffer; | 2050 | struct ring_buffer_per_cpu *cpu_buffer; |
2016 | 2051 | ||
2017 | if (!cpu_isset(cpu, buffer->cpumask)) | 2052 | if (!cpu_isset(cpu, buffer->cpumask)) |
2018 | return 1; | 2053 | return 1; |
2019 | 2054 | ||
2020 | cpu_buffer = buffer->buffers[cpu]; | 2055 | cpu_buffer = buffer->buffers[cpu]; |
2021 | return rb_per_cpu_empty(cpu_buffer); | 2056 | return rb_per_cpu_empty(cpu_buffer); |
2022 | } | 2057 | } |
2023 | 2058 | ||
2024 | /** | 2059 | /** |
2025 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 2060 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers |
2026 | * @buffer_a: One buffer to swap with | 2061 | * @buffer_a: One buffer to swap with |
2027 | * @buffer_b: The other buffer to swap with | 2062 | * @buffer_b: The other buffer to swap with |
2028 | * | 2063 | * |
2029 | * This function is useful for tracers that want to take a "snapshot" | 2064 | * This function is useful for tracers that want to take a "snapshot" |
2030 | * of a CPU buffer and has another back up buffer lying around. | 2065 | * of a CPU buffer and has another back up buffer lying around. |
2031 | * it is expected that the tracer handles the cpu buffer not being | 2066 | * it is expected that the tracer handles the cpu buffer not being |
2032 | * used at the moment. | 2067 | * used at the moment. |
2033 | */ | 2068 | */ |
2034 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | 2069 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, |
2035 | struct ring_buffer *buffer_b, int cpu) | 2070 | struct ring_buffer *buffer_b, int cpu) |
2036 | { | 2071 | { |
2037 | struct ring_buffer_per_cpu *cpu_buffer_a; | 2072 | struct ring_buffer_per_cpu *cpu_buffer_a; |
2038 | struct ring_buffer_per_cpu *cpu_buffer_b; | 2073 | struct ring_buffer_per_cpu *cpu_buffer_b; |
2039 | 2074 | ||
2040 | if (!cpu_isset(cpu, buffer_a->cpumask) || | 2075 | if (!cpu_isset(cpu, buffer_a->cpumask) || |
2041 | !cpu_isset(cpu, buffer_b->cpumask)) | 2076 | !cpu_isset(cpu, buffer_b->cpumask)) |
2042 | return -EINVAL; | 2077 | return -EINVAL; |
2043 | 2078 | ||
2044 | /* At least make sure the two buffers are somewhat the same */ | 2079 | /* At least make sure the two buffers are somewhat the same */ |
2045 | if (buffer_a->size != buffer_b->size || | 2080 | if (buffer_a->size != buffer_b->size || |
2046 | buffer_a->pages != buffer_b->pages) | 2081 | buffer_a->pages != buffer_b->pages) |
2047 | return -EINVAL; | 2082 | return -EINVAL; |
2048 | 2083 | ||
2049 | cpu_buffer_a = buffer_a->buffers[cpu]; | 2084 | cpu_buffer_a = buffer_a->buffers[cpu]; |
2050 | cpu_buffer_b = buffer_b->buffers[cpu]; | 2085 | cpu_buffer_b = buffer_b->buffers[cpu]; |
2051 | 2086 | ||
2052 | /* | 2087 | /* |
2053 | * We can't do a synchronize_sched here because this | 2088 | * We can't do a synchronize_sched here because this |
2054 | * function can be called in atomic context. | 2089 | * function can be called in atomic context. |
2055 | * Normally this will be called from the same CPU as cpu. | 2090 | * Normally this will be called from the same CPU as cpu. |
2056 | * If not it's up to the caller to protect this. | 2091 | * If not it's up to the caller to protect this. |
2057 | */ | 2092 | */ |
2058 | atomic_inc(&cpu_buffer_a->record_disabled); | 2093 | atomic_inc(&cpu_buffer_a->record_disabled); |
2059 | atomic_inc(&cpu_buffer_b->record_disabled); | 2094 | atomic_inc(&cpu_buffer_b->record_disabled); |
2060 | 2095 | ||
2061 | buffer_a->buffers[cpu] = cpu_buffer_b; | 2096 | buffer_a->buffers[cpu] = cpu_buffer_b; |
2062 | buffer_b->buffers[cpu] = cpu_buffer_a; | 2097 | buffer_b->buffers[cpu] = cpu_buffer_a; |
2063 | 2098 | ||
2064 | cpu_buffer_b->buffer = buffer_a; | 2099 | cpu_buffer_b->buffer = buffer_a; |
2065 | cpu_buffer_a->buffer = buffer_b; | 2100 | cpu_buffer_a->buffer = buffer_b; |
2066 | 2101 | ||
2067 | atomic_dec(&cpu_buffer_a->record_disabled); | 2102 | atomic_dec(&cpu_buffer_a->record_disabled); |
2068 | atomic_dec(&cpu_buffer_b->record_disabled); | 2103 | atomic_dec(&cpu_buffer_b->record_disabled); |
2069 | 2104 | ||
2070 | return 0; | 2105 | return 0; |
2071 | } | 2106 | } |
2107 | |||
2108 | static ssize_t | ||
2109 | rb_simple_read(struct file *filp, char __user *ubuf, | ||
2110 | size_t cnt, loff_t *ppos) | ||
2111 | { | ||
2112 | int *p = filp->private_data; | ||
2113 | char buf[64]; | ||
2114 | int r; | ||
2115 | |||
2116 | /* !ring_buffers_off == tracing_on */ | ||
2117 | r = sprintf(buf, "%d\n", !*p); | ||
2118 | |||
2119 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
2120 | } | ||
2121 | |||
2122 | static ssize_t | ||
2123 | rb_simple_write(struct file *filp, const char __user *ubuf, | ||
2124 | size_t cnt, loff_t *ppos) | ||
2125 | { | ||
2126 | int *p = filp->private_data; | ||
2127 | char buf[64]; | ||
2128 | long val; | ||
2129 | int ret; | ||
2130 | |||
2131 | if (cnt >= sizeof(buf)) | ||
2132 | return -EINVAL; | ||
2133 | |||
2134 | if (copy_from_user(&buf, ubuf, cnt)) | ||
2135 | return -EFAULT; | ||
2136 | |||
2137 | buf[cnt] = 0; | ||
2138 | |||
2139 | ret = strict_strtoul(buf, 10, &val); | ||
2140 | if (ret < 0) | ||
2141 | return ret; | ||
2142 | |||
2143 | /* !ring_buffers_off == tracing_on */ | ||
2144 | *p = !val; | ||
2145 | |||
2146 | (*ppos)++; | ||
2147 | |||
2148 | return cnt; | ||
2149 | } | ||
2150 | |||
2151 | static struct file_operations rb_simple_fops = { | ||
2152 | .open = tracing_open_generic, | ||
2153 | .read = rb_simple_read, | ||
2154 | .write = rb_simple_write, | ||
2155 | }; | ||
2156 | |||
2157 | |||
2158 | static __init int rb_init_debugfs(void) | ||
2159 | { | ||
2160 | struct dentry *d_tracer; | ||
2161 | struct dentry *entry; | ||
2162 | |||
2163 | d_tracer = tracing_init_dentry(); | ||
2164 | |||
2165 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | ||
2166 | &ring_buffers_off, &rb_simple_fops); | ||
2167 | if (!entry) | ||
2168 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
2169 | |||
2170 | return 0; | ||
2171 | } | ||
2172 | |||
2173 | fs_initcall(rb_init_debugfs); | ||
2072 | 2174 |