Commit 884bfe89a462fcc85c8abd96171519cf2fe70929
Committed by
Steven Rostedt
1 parent
f43c738bfa
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
ring-buffer: Add a 'dropped events' counter
The existing 'overrun' counter is incremented when the ring buffer wraps around, with overflow on (the default). We wanted a way to count requests lost from the buffer filling up with overflow off, too. I decided to add a new counter instead of retro-fitting the existing one because it seems like a different statistic to count conceptually, and also because of how the code was structured. Link: http://lkml.kernel.org/r/1310765038-26399-1-git-send-email-slavapestov@google.com Signed-off-by: Slava Pestov <slavapestov@google.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Showing 3 changed files with 39 additions and 6 deletions Inline Diff
include/linux/ring_buffer.h
1 | #ifndef _LINUX_RING_BUFFER_H | 1 | #ifndef _LINUX_RING_BUFFER_H |
2 | #define _LINUX_RING_BUFFER_H | 2 | #define _LINUX_RING_BUFFER_H |
3 | 3 | ||
4 | #include <linux/kmemcheck.h> | 4 | #include <linux/kmemcheck.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
7 | 7 | ||
8 | struct ring_buffer; | 8 | struct ring_buffer; |
9 | struct ring_buffer_iter; | 9 | struct ring_buffer_iter; |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Don't refer to this struct directly, use functions below. | 12 | * Don't refer to this struct directly, use functions below. |
13 | */ | 13 | */ |
14 | struct ring_buffer_event { | 14 | struct ring_buffer_event { |
15 | kmemcheck_bitfield_begin(bitfield); | 15 | kmemcheck_bitfield_begin(bitfield); |
16 | u32 type_len:5, time_delta:27; | 16 | u32 type_len:5, time_delta:27; |
17 | kmemcheck_bitfield_end(bitfield); | 17 | kmemcheck_bitfield_end(bitfield); |
18 | 18 | ||
19 | u32 array[]; | 19 | u32 array[]; |
20 | }; | 20 | }; |
21 | 21 | ||
22 | /** | 22 | /** |
23 | * enum ring_buffer_type - internal ring buffer types | 23 | * enum ring_buffer_type - internal ring buffer types |
24 | * | 24 | * |
25 | * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event | 25 | * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event |
26 | * If time_delta is 0: | 26 | * If time_delta is 0: |
27 | * array is ignored | 27 | * array is ignored |
28 | * size is variable depending on how much | 28 | * size is variable depending on how much |
29 | * padding is needed | 29 | * padding is needed |
30 | * If time_delta is non zero: | 30 | * If time_delta is non zero: |
31 | * array[0] holds the actual length | 31 | * array[0] holds the actual length |
32 | * size = 4 + length (bytes) | 32 | * size = 4 + length (bytes) |
33 | * | 33 | * |
34 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta | 34 | * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta |
35 | * array[0] = time delta (28 .. 59) | 35 | * array[0] = time delta (28 .. 59) |
36 | * size = 8 bytes | 36 | * size = 8 bytes |
37 | * | 37 | * |
38 | * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock | 38 | * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock |
39 | * array[0] = tv_nsec | 39 | * array[0] = tv_nsec |
40 | * array[1..2] = tv_sec | 40 | * array[1..2] = tv_sec |
41 | * size = 16 bytes | 41 | * size = 16 bytes |
42 | * | 42 | * |
43 | * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: | 43 | * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: |
44 | * Data record | 44 | * Data record |
45 | * If type_len is zero: | 45 | * If type_len is zero: |
46 | * array[0] holds the actual length | 46 | * array[0] holds the actual length |
47 | * array[1..(length+3)/4] holds data | 47 | * array[1..(length+3)/4] holds data |
48 | * size = 4 + length (bytes) | 48 | * size = 4 + length (bytes) |
49 | * else | 49 | * else |
50 | * length = type_len << 2 | 50 | * length = type_len << 2 |
51 | * array[0..(length+3)/4-1] holds data | 51 | * array[0..(length+3)/4-1] holds data |
52 | * size = 4 + length (bytes) | 52 | * size = 4 + length (bytes) |
53 | */ | 53 | */ |
54 | enum ring_buffer_type { | 54 | enum ring_buffer_type { |
55 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, | 55 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, |
56 | RINGBUF_TYPE_PADDING, | 56 | RINGBUF_TYPE_PADDING, |
57 | RINGBUF_TYPE_TIME_EXTEND, | 57 | RINGBUF_TYPE_TIME_EXTEND, |
58 | /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ | 58 | /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ |
59 | RINGBUF_TYPE_TIME_STAMP, | 59 | RINGBUF_TYPE_TIME_STAMP, |
60 | }; | 60 | }; |
61 | 61 | ||
62 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); | 62 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); |
63 | void *ring_buffer_event_data(struct ring_buffer_event *event); | 63 | void *ring_buffer_event_data(struct ring_buffer_event *event); |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * ring_buffer_discard_commit will remove an event that has not | 66 | * ring_buffer_discard_commit will remove an event that has not |
67 | * ben committed yet. If this is used, then ring_buffer_unlock_commit | 67 | * ben committed yet. If this is used, then ring_buffer_unlock_commit |
68 | * must not be called on the discarded event. This function | 68 | * must not be called on the discarded event. This function |
69 | * will try to remove the event from the ring buffer completely | 69 | * will try to remove the event from the ring buffer completely |
70 | * if another event has not been written after it. | 70 | * if another event has not been written after it. |
71 | * | 71 | * |
72 | * Example use: | 72 | * Example use: |
73 | * | 73 | * |
74 | * if (some_condition) | 74 | * if (some_condition) |
75 | * ring_buffer_discard_commit(buffer, event); | 75 | * ring_buffer_discard_commit(buffer, event); |
76 | * else | 76 | * else |
77 | * ring_buffer_unlock_commit(buffer, event); | 77 | * ring_buffer_unlock_commit(buffer, event); |
78 | */ | 78 | */ |
79 | void ring_buffer_discard_commit(struct ring_buffer *buffer, | 79 | void ring_buffer_discard_commit(struct ring_buffer *buffer, |
80 | struct ring_buffer_event *event); | 80 | struct ring_buffer_event *event); |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * size is in bytes for each per CPU buffer. | 83 | * size is in bytes for each per CPU buffer. |
84 | */ | 84 | */ |
85 | struct ring_buffer * | 85 | struct ring_buffer * |
86 | __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); | 86 | __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * Because the ring buffer is generic, if other users of the ring buffer get | 89 | * Because the ring buffer is generic, if other users of the ring buffer get |
90 | * traced by ftrace, it can produce lockdep warnings. We need to keep each | 90 | * traced by ftrace, it can produce lockdep warnings. We need to keep each |
91 | * ring buffer's lock class separate. | 91 | * ring buffer's lock class separate. |
92 | */ | 92 | */ |
93 | #define ring_buffer_alloc(size, flags) \ | 93 | #define ring_buffer_alloc(size, flags) \ |
94 | ({ \ | 94 | ({ \ |
95 | static struct lock_class_key __key; \ | 95 | static struct lock_class_key __key; \ |
96 | __ring_buffer_alloc((size), (flags), &__key); \ | 96 | __ring_buffer_alloc((size), (flags), &__key); \ |
97 | }) | 97 | }) |
98 | 98 | ||
99 | #define RING_BUFFER_ALL_CPUS -1 | 99 | #define RING_BUFFER_ALL_CPUS -1 |
100 | 100 | ||
101 | void ring_buffer_free(struct ring_buffer *buffer); | 101 | void ring_buffer_free(struct ring_buffer *buffer); |
102 | 102 | ||
103 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu); | 103 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu); |
104 | 104 | ||
105 | void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); | 105 | void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); |
106 | 106 | ||
107 | struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, | 107 | struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, |
108 | unsigned long length); | 108 | unsigned long length); |
109 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 109 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
110 | struct ring_buffer_event *event); | 110 | struct ring_buffer_event *event); |
111 | int ring_buffer_write(struct ring_buffer *buffer, | 111 | int ring_buffer_write(struct ring_buffer *buffer, |
112 | unsigned long length, void *data); | 112 | unsigned long length, void *data); |
113 | 113 | ||
114 | struct ring_buffer_event * | 114 | struct ring_buffer_event * |
115 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | 115 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
116 | unsigned long *lost_events); | 116 | unsigned long *lost_events); |
117 | struct ring_buffer_event * | 117 | struct ring_buffer_event * |
118 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, | 118 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
119 | unsigned long *lost_events); | 119 | unsigned long *lost_events); |
120 | 120 | ||
121 | struct ring_buffer_iter * | 121 | struct ring_buffer_iter * |
122 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); | 122 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); |
123 | void ring_buffer_read_prepare_sync(void); | 123 | void ring_buffer_read_prepare_sync(void); |
124 | void ring_buffer_read_start(struct ring_buffer_iter *iter); | 124 | void ring_buffer_read_start(struct ring_buffer_iter *iter); |
125 | void ring_buffer_read_finish(struct ring_buffer_iter *iter); | 125 | void ring_buffer_read_finish(struct ring_buffer_iter *iter); |
126 | 126 | ||
127 | struct ring_buffer_event * | 127 | struct ring_buffer_event * |
128 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); | 128 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); |
129 | struct ring_buffer_event * | 129 | struct ring_buffer_event * |
130 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); | 130 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); |
131 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter); | 131 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter); |
132 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter); | 132 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter); |
133 | 133 | ||
134 | unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu); | 134 | unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu); |
135 | 135 | ||
136 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); | 136 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); |
137 | void ring_buffer_reset(struct ring_buffer *buffer); | 137 | void ring_buffer_reset(struct ring_buffer *buffer); |
138 | 138 | ||
139 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | 139 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP |
140 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | 140 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, |
141 | struct ring_buffer *buffer_b, int cpu); | 141 | struct ring_buffer *buffer_b, int cpu); |
142 | #else | 142 | #else |
143 | static inline int | 143 | static inline int |
144 | ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | 144 | ring_buffer_swap_cpu(struct ring_buffer *buffer_a, |
145 | struct ring_buffer *buffer_b, int cpu) | 145 | struct ring_buffer *buffer_b, int cpu) |
146 | { | 146 | { |
147 | return -ENODEV; | 147 | return -ENODEV; |
148 | } | 148 | } |
149 | #endif | 149 | #endif |
150 | 150 | ||
151 | int ring_buffer_empty(struct ring_buffer *buffer); | 151 | int ring_buffer_empty(struct ring_buffer *buffer); |
152 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); | 152 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); |
153 | 153 | ||
154 | void ring_buffer_record_disable(struct ring_buffer *buffer); | 154 | void ring_buffer_record_disable(struct ring_buffer *buffer); |
155 | void ring_buffer_record_enable(struct ring_buffer *buffer); | 155 | void ring_buffer_record_enable(struct ring_buffer *buffer); |
156 | void ring_buffer_record_off(struct ring_buffer *buffer); | 156 | void ring_buffer_record_off(struct ring_buffer *buffer); |
157 | void ring_buffer_record_on(struct ring_buffer *buffer); | 157 | void ring_buffer_record_on(struct ring_buffer *buffer); |
158 | int ring_buffer_record_is_on(struct ring_buffer *buffer); | 158 | int ring_buffer_record_is_on(struct ring_buffer *buffer); |
159 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); | 159 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); |
160 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); | 160 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); |
161 | 161 | ||
162 | unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); | 162 | unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); |
163 | unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu); | 163 | unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu); |
164 | unsigned long ring_buffer_entries(struct ring_buffer *buffer); | 164 | unsigned long ring_buffer_entries(struct ring_buffer *buffer); |
165 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer); | 165 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer); |
166 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); | 166 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); |
167 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); | 167 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); |
168 | unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); | 168 | unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); |
169 | unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); | ||
169 | 170 | ||
170 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); | 171 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); |
171 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | 172 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, |
172 | int cpu, u64 *ts); | 173 | int cpu, u64 *ts); |
173 | void ring_buffer_set_clock(struct ring_buffer *buffer, | 174 | void ring_buffer_set_clock(struct ring_buffer *buffer, |
174 | u64 (*clock)(void)); | 175 | u64 (*clock)(void)); |
175 | 176 | ||
176 | size_t ring_buffer_page_len(void *page); | 177 | size_t ring_buffer_page_len(void *page); |
177 | 178 | ||
178 | 179 | ||
179 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); | 180 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); |
180 | void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); | 181 | void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); |
181 | int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, | 182 | int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, |
182 | size_t len, int cpu, int full); | 183 | size_t len, int cpu, int full); |
183 | 184 | ||
184 | struct trace_seq; | 185 | struct trace_seq; |
185 | 186 | ||
186 | int ring_buffer_print_entry_header(struct trace_seq *s); | 187 | int ring_buffer_print_entry_header(struct trace_seq *s); |
187 | int ring_buffer_print_page_header(struct trace_seq *s); | 188 | int ring_buffer_print_page_header(struct trace_seq *s); |
188 | 189 | ||
189 | enum ring_buffer_flags { | 190 | enum ring_buffer_flags { |
190 | RB_FL_OVERWRITE = 1 << 0, | 191 | RB_FL_OVERWRITE = 1 << 0, |
191 | }; | 192 | }; |
192 | 193 | ||
193 | #endif /* _LINUX_RING_BUFFER_H */ | 194 | #endif /* _LINUX_RING_BUFFER_H */ |
194 | 195 |
kernel/trace/ring_buffer.c
1 | /* | 1 | /* |
2 | * Generic ring buffer | 2 | * Generic ring buffer |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | 7 | #include <linux/trace_clock.h> |
8 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
12 | #include <linux/kmemcheck.h> | 12 | #include <linux/kmemcheck.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
19 | #include <linux/list.h> | 19 | #include <linux/list.h> |
20 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | 22 | ||
23 | #include <asm/local.h> | 23 | #include <asm/local.h> |
24 | #include "trace.h" | 24 | #include "trace.h" |
25 | 25 | ||
26 | static void update_pages_handler(struct work_struct *work); | 26 | static void update_pages_handler(struct work_struct *work); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * The ring buffer header is special. We must manually up keep it. | 29 | * The ring buffer header is special. We must manually up keep it. |
30 | */ | 30 | */ |
31 | int ring_buffer_print_entry_header(struct trace_seq *s) | 31 | int ring_buffer_print_entry_header(struct trace_seq *s) |
32 | { | 32 | { |
33 | int ret; | 33 | int ret; |
34 | 34 | ||
35 | ret = trace_seq_printf(s, "# compressed entry header\n"); | 35 | ret = trace_seq_printf(s, "# compressed entry header\n"); |
36 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); | 36 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); |
37 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); | 37 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); |
38 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); | 38 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); |
39 | ret = trace_seq_printf(s, "\n"); | 39 | ret = trace_seq_printf(s, "\n"); |
40 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | 40 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", |
41 | RINGBUF_TYPE_PADDING); | 41 | RINGBUF_TYPE_PADDING); |
42 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | 42 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", |
43 | RINGBUF_TYPE_TIME_EXTEND); | 43 | RINGBUF_TYPE_TIME_EXTEND); |
44 | ret = trace_seq_printf(s, "\tdata max type_len == %d\n", | 44 | ret = trace_seq_printf(s, "\tdata max type_len == %d\n", |
45 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 45 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
46 | 46 | ||
47 | return ret; | 47 | return ret; |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * The ring buffer is made up of a list of pages. A separate list of pages is | 51 | * The ring buffer is made up of a list of pages. A separate list of pages is |
52 | * allocated for each CPU. A writer may only write to a buffer that is | 52 | * allocated for each CPU. A writer may only write to a buffer that is |
53 | * associated with the CPU it is currently executing on. A reader may read | 53 | * associated with the CPU it is currently executing on. A reader may read |
54 | * from any per cpu buffer. | 54 | * from any per cpu buffer. |
55 | * | 55 | * |
56 | * The reader is special. For each per cpu buffer, the reader has its own | 56 | * The reader is special. For each per cpu buffer, the reader has its own |
57 | * reader page. When a reader has read the entire reader page, this reader | 57 | * reader page. When a reader has read the entire reader page, this reader |
58 | * page is swapped with another page in the ring buffer. | 58 | * page is swapped with another page in the ring buffer. |
59 | * | 59 | * |
60 | * Now, as long as the writer is off the reader page, the reader can do what | 60 | * Now, as long as the writer is off the reader page, the reader can do what |
61 | * ever it wants with that page. The writer will never write to that page | 61 | * ever it wants with that page. The writer will never write to that page |
62 | * again (as long as it is out of the ring buffer). | 62 | * again (as long as it is out of the ring buffer). |
63 | * | 63 | * |
64 | * Here's some silly ASCII art. | 64 | * Here's some silly ASCII art. |
65 | * | 65 | * |
66 | * +------+ | 66 | * +------+ |
67 | * |reader| RING BUFFER | 67 | * |reader| RING BUFFER |
68 | * |page | | 68 | * |page | |
69 | * +------+ +---+ +---+ +---+ | 69 | * +------+ +---+ +---+ +---+ |
70 | * | |-->| |-->| | | 70 | * | |-->| |-->| | |
71 | * +---+ +---+ +---+ | 71 | * +---+ +---+ +---+ |
72 | * ^ | | 72 | * ^ | |
73 | * | | | 73 | * | | |
74 | * +---------------+ | 74 | * +---------------+ |
75 | * | 75 | * |
76 | * | 76 | * |
77 | * +------+ | 77 | * +------+ |
78 | * |reader| RING BUFFER | 78 | * |reader| RING BUFFER |
79 | * |page |------------------v | 79 | * |page |------------------v |
80 | * +------+ +---+ +---+ +---+ | 80 | * +------+ +---+ +---+ +---+ |
81 | * | |-->| |-->| | | 81 | * | |-->| |-->| | |
82 | * +---+ +---+ +---+ | 82 | * +---+ +---+ +---+ |
83 | * ^ | | 83 | * ^ | |
84 | * | | | 84 | * | | |
85 | * +---------------+ | 85 | * +---------------+ |
86 | * | 86 | * |
87 | * | 87 | * |
88 | * +------+ | 88 | * +------+ |
89 | * |reader| RING BUFFER | 89 | * |reader| RING BUFFER |
90 | * |page |------------------v | 90 | * |page |------------------v |
91 | * +------+ +---+ +---+ +---+ | 91 | * +------+ +---+ +---+ +---+ |
92 | * ^ | |-->| |-->| | | 92 | * ^ | |-->| |-->| | |
93 | * | +---+ +---+ +---+ | 93 | * | +---+ +---+ +---+ |
94 | * | | | 94 | * | | |
95 | * | | | 95 | * | | |
96 | * +------------------------------+ | 96 | * +------------------------------+ |
97 | * | 97 | * |
98 | * | 98 | * |
99 | * +------+ | 99 | * +------+ |
100 | * |buffer| RING BUFFER | 100 | * |buffer| RING BUFFER |
101 | * |page |------------------v | 101 | * |page |------------------v |
102 | * +------+ +---+ +---+ +---+ | 102 | * +------+ +---+ +---+ +---+ |
103 | * ^ | | | |-->| | | 103 | * ^ | | | |-->| | |
104 | * | New +---+ +---+ +---+ | 104 | * | New +---+ +---+ +---+ |
105 | * | Reader------^ | | 105 | * | Reader------^ | |
106 | * | page | | 106 | * | page | |
107 | * +------------------------------+ | 107 | * +------------------------------+ |
108 | * | 108 | * |
109 | * | 109 | * |
110 | * After we make this swap, the reader can hand this page off to the splice | 110 | * After we make this swap, the reader can hand this page off to the splice |
111 | * code and be done with it. It can even allocate a new page if it needs to | 111 | * code and be done with it. It can even allocate a new page if it needs to |
112 | * and swap that into the ring buffer. | 112 | * and swap that into the ring buffer. |
113 | * | 113 | * |
114 | * We will be using cmpxchg soon to make all this lockless. | 114 | * We will be using cmpxchg soon to make all this lockless. |
115 | * | 115 | * |
116 | */ | 116 | */ |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * A fast way to enable or disable all ring buffers is to | 119 | * A fast way to enable or disable all ring buffers is to |
120 | * call tracing_on or tracing_off. Turning off the ring buffers | 120 | * call tracing_on or tracing_off. Turning off the ring buffers |
121 | * prevents all ring buffers from being recorded to. | 121 | * prevents all ring buffers from being recorded to. |
122 | * Turning this switch on, makes it OK to write to the | 122 | * Turning this switch on, makes it OK to write to the |
123 | * ring buffer, if the ring buffer is enabled itself. | 123 | * ring buffer, if the ring buffer is enabled itself. |
124 | * | 124 | * |
125 | * There's three layers that must be on in order to write | 125 | * There's three layers that must be on in order to write |
126 | * to the ring buffer. | 126 | * to the ring buffer. |
127 | * | 127 | * |
128 | * 1) This global flag must be set. | 128 | * 1) This global flag must be set. |
129 | * 2) The ring buffer must be enabled for recording. | 129 | * 2) The ring buffer must be enabled for recording. |
130 | * 3) The per cpu buffer must be enabled for recording. | 130 | * 3) The per cpu buffer must be enabled for recording. |
131 | * | 131 | * |
132 | * In case of an anomaly, this global flag has a bit set that | 132 | * In case of an anomaly, this global flag has a bit set that |
133 | * will permantly disable all ring buffers. | 133 | * will permantly disable all ring buffers. |
134 | */ | 134 | */ |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Global flag to disable all recording to ring buffers | 137 | * Global flag to disable all recording to ring buffers |
138 | * This has two bits: ON, DISABLED | 138 | * This has two bits: ON, DISABLED |
139 | * | 139 | * |
140 | * ON DISABLED | 140 | * ON DISABLED |
141 | * ---- ---------- | 141 | * ---- ---------- |
142 | * 0 0 : ring buffers are off | 142 | * 0 0 : ring buffers are off |
143 | * 1 0 : ring buffers are on | 143 | * 1 0 : ring buffers are on |
144 | * X 1 : ring buffers are permanently disabled | 144 | * X 1 : ring buffers are permanently disabled |
145 | */ | 145 | */ |
146 | 146 | ||
147 | enum { | 147 | enum { |
148 | RB_BUFFERS_ON_BIT = 0, | 148 | RB_BUFFERS_ON_BIT = 0, |
149 | RB_BUFFERS_DISABLED_BIT = 1, | 149 | RB_BUFFERS_DISABLED_BIT = 1, |
150 | }; | 150 | }; |
151 | 151 | ||
152 | enum { | 152 | enum { |
153 | RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, | 153 | RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, |
154 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, | 154 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, |
155 | }; | 155 | }; |
156 | 156 | ||
157 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; | 157 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; |
158 | 158 | ||
159 | /* Used for individual buffers (after the counter) */ | 159 | /* Used for individual buffers (after the counter) */ |
160 | #define RB_BUFFER_OFF (1 << 20) | 160 | #define RB_BUFFER_OFF (1 << 20) |
161 | 161 | ||
162 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) | 162 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) |
163 | 163 | ||
164 | /** | 164 | /** |
165 | * tracing_off_permanent - permanently disable ring buffers | 165 | * tracing_off_permanent - permanently disable ring buffers |
166 | * | 166 | * |
167 | * This function, once called, will disable all ring buffers | 167 | * This function, once called, will disable all ring buffers |
168 | * permanently. | 168 | * permanently. |
169 | */ | 169 | */ |
170 | void tracing_off_permanent(void) | 170 | void tracing_off_permanent(void) |
171 | { | 171 | { |
172 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); | 172 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); |
173 | } | 173 | } |
174 | 174 | ||
175 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 175 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
176 | #define RB_ALIGNMENT 4U | 176 | #define RB_ALIGNMENT 4U |
177 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 177 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
178 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 178 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ |
179 | 179 | ||
180 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | 180 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
181 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | 181 | # define RB_FORCE_8BYTE_ALIGNMENT 0 |
182 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | 182 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT |
183 | #else | 183 | #else |
184 | # define RB_FORCE_8BYTE_ALIGNMENT 1 | 184 | # define RB_FORCE_8BYTE_ALIGNMENT 1 |
185 | # define RB_ARCH_ALIGNMENT 8U | 185 | # define RB_ARCH_ALIGNMENT 8U |
186 | #endif | 186 | #endif |
187 | 187 | ||
188 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 188 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
189 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 189 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
190 | 190 | ||
191 | enum { | 191 | enum { |
192 | RB_LEN_TIME_EXTEND = 8, | 192 | RB_LEN_TIME_EXTEND = 8, |
193 | RB_LEN_TIME_STAMP = 16, | 193 | RB_LEN_TIME_STAMP = 16, |
194 | }; | 194 | }; |
195 | 195 | ||
196 | #define skip_time_extend(event) \ | 196 | #define skip_time_extend(event) \ |
197 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | 197 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) |
198 | 198 | ||
199 | static inline int rb_null_event(struct ring_buffer_event *event) | 199 | static inline int rb_null_event(struct ring_buffer_event *event) |
200 | { | 200 | { |
201 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 201 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
202 | } | 202 | } |
203 | 203 | ||
204 | static void rb_event_set_padding(struct ring_buffer_event *event) | 204 | static void rb_event_set_padding(struct ring_buffer_event *event) |
205 | { | 205 | { |
206 | /* padding has a NULL time_delta */ | 206 | /* padding has a NULL time_delta */ |
207 | event->type_len = RINGBUF_TYPE_PADDING; | 207 | event->type_len = RINGBUF_TYPE_PADDING; |
208 | event->time_delta = 0; | 208 | event->time_delta = 0; |
209 | } | 209 | } |
210 | 210 | ||
211 | static unsigned | 211 | static unsigned |
212 | rb_event_data_length(struct ring_buffer_event *event) | 212 | rb_event_data_length(struct ring_buffer_event *event) |
213 | { | 213 | { |
214 | unsigned length; | 214 | unsigned length; |
215 | 215 | ||
216 | if (event->type_len) | 216 | if (event->type_len) |
217 | length = event->type_len * RB_ALIGNMENT; | 217 | length = event->type_len * RB_ALIGNMENT; |
218 | else | 218 | else |
219 | length = event->array[0]; | 219 | length = event->array[0]; |
220 | return length + RB_EVNT_HDR_SIZE; | 220 | return length + RB_EVNT_HDR_SIZE; |
221 | } | 221 | } |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * Return the length of the given event. Will return | 224 | * Return the length of the given event. Will return |
225 | * the length of the time extend if the event is a | 225 | * the length of the time extend if the event is a |
226 | * time extend. | 226 | * time extend. |
227 | */ | 227 | */ |
228 | static inline unsigned | 228 | static inline unsigned |
229 | rb_event_length(struct ring_buffer_event *event) | 229 | rb_event_length(struct ring_buffer_event *event) |
230 | { | 230 | { |
231 | switch (event->type_len) { | 231 | switch (event->type_len) { |
232 | case RINGBUF_TYPE_PADDING: | 232 | case RINGBUF_TYPE_PADDING: |
233 | if (rb_null_event(event)) | 233 | if (rb_null_event(event)) |
234 | /* undefined */ | 234 | /* undefined */ |
235 | return -1; | 235 | return -1; |
236 | return event->array[0] + RB_EVNT_HDR_SIZE; | 236 | return event->array[0] + RB_EVNT_HDR_SIZE; |
237 | 237 | ||
238 | case RINGBUF_TYPE_TIME_EXTEND: | 238 | case RINGBUF_TYPE_TIME_EXTEND: |
239 | return RB_LEN_TIME_EXTEND; | 239 | return RB_LEN_TIME_EXTEND; |
240 | 240 | ||
241 | case RINGBUF_TYPE_TIME_STAMP: | 241 | case RINGBUF_TYPE_TIME_STAMP: |
242 | return RB_LEN_TIME_STAMP; | 242 | return RB_LEN_TIME_STAMP; |
243 | 243 | ||
244 | case RINGBUF_TYPE_DATA: | 244 | case RINGBUF_TYPE_DATA: |
245 | return rb_event_data_length(event); | 245 | return rb_event_data_length(event); |
246 | default: | 246 | default: |
247 | BUG(); | 247 | BUG(); |
248 | } | 248 | } |
249 | /* not hit */ | 249 | /* not hit */ |
250 | return 0; | 250 | return 0; |
251 | } | 251 | } |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * Return total length of time extend and data, | 254 | * Return total length of time extend and data, |
255 | * or just the event length for all other events. | 255 | * or just the event length for all other events. |
256 | */ | 256 | */ |
257 | static inline unsigned | 257 | static inline unsigned |
258 | rb_event_ts_length(struct ring_buffer_event *event) | 258 | rb_event_ts_length(struct ring_buffer_event *event) |
259 | { | 259 | { |
260 | unsigned len = 0; | 260 | unsigned len = 0; |
261 | 261 | ||
262 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | 262 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { |
263 | /* time extends include the data event after it */ | 263 | /* time extends include the data event after it */ |
264 | len = RB_LEN_TIME_EXTEND; | 264 | len = RB_LEN_TIME_EXTEND; |
265 | event = skip_time_extend(event); | 265 | event = skip_time_extend(event); |
266 | } | 266 | } |
267 | return len + rb_event_length(event); | 267 | return len + rb_event_length(event); |
268 | } | 268 | } |
269 | 269 | ||
270 | /** | 270 | /** |
271 | * ring_buffer_event_length - return the length of the event | 271 | * ring_buffer_event_length - return the length of the event |
272 | * @event: the event to get the length of | 272 | * @event: the event to get the length of |
273 | * | 273 | * |
274 | * Returns the size of the data load of a data event. | 274 | * Returns the size of the data load of a data event. |
275 | * If the event is something other than a data event, it | 275 | * If the event is something other than a data event, it |
276 | * returns the size of the event itself. With the exception | 276 | * returns the size of the event itself. With the exception |
277 | * of a TIME EXTEND, where it still returns the size of the | 277 | * of a TIME EXTEND, where it still returns the size of the |
278 | * data load of the data event after it. | 278 | * data load of the data event after it. |
279 | */ | 279 | */ |
280 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 280 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
281 | { | 281 | { |
282 | unsigned length; | 282 | unsigned length; |
283 | 283 | ||
284 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 284 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) |
285 | event = skip_time_extend(event); | 285 | event = skip_time_extend(event); |
286 | 286 | ||
287 | length = rb_event_length(event); | 287 | length = rb_event_length(event); |
288 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 288 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
289 | return length; | 289 | return length; |
290 | length -= RB_EVNT_HDR_SIZE; | 290 | length -= RB_EVNT_HDR_SIZE; |
291 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) | 291 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) |
292 | length -= sizeof(event->array[0]); | 292 | length -= sizeof(event->array[0]); |
293 | return length; | 293 | return length; |
294 | } | 294 | } |
295 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 295 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
296 | 296 | ||
297 | /* inline for ring buffer fast paths */ | 297 | /* inline for ring buffer fast paths */ |
298 | static void * | 298 | static void * |
299 | rb_event_data(struct ring_buffer_event *event) | 299 | rb_event_data(struct ring_buffer_event *event) |
300 | { | 300 | { |
301 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 301 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) |
302 | event = skip_time_extend(event); | 302 | event = skip_time_extend(event); |
303 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 303 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
304 | /* If length is in len field, then array[0] has the data */ | 304 | /* If length is in len field, then array[0] has the data */ |
305 | if (event->type_len) | 305 | if (event->type_len) |
306 | return (void *)&event->array[0]; | 306 | return (void *)&event->array[0]; |
307 | /* Otherwise length is in array[0] and array[1] has the data */ | 307 | /* Otherwise length is in array[0] and array[1] has the data */ |
308 | return (void *)&event->array[1]; | 308 | return (void *)&event->array[1]; |
309 | } | 309 | } |
310 | 310 | ||
311 | /** | 311 | /** |
312 | * ring_buffer_event_data - return the data of the event | 312 | * ring_buffer_event_data - return the data of the event |
313 | * @event: the event to get the data from | 313 | * @event: the event to get the data from |
314 | */ | 314 | */ |
315 | void *ring_buffer_event_data(struct ring_buffer_event *event) | 315 | void *ring_buffer_event_data(struct ring_buffer_event *event) |
316 | { | 316 | { |
317 | return rb_event_data(event); | 317 | return rb_event_data(event); |
318 | } | 318 | } |
319 | EXPORT_SYMBOL_GPL(ring_buffer_event_data); | 319 | EXPORT_SYMBOL_GPL(ring_buffer_event_data); |
320 | 320 | ||
321 | #define for_each_buffer_cpu(buffer, cpu) \ | 321 | #define for_each_buffer_cpu(buffer, cpu) \ |
322 | for_each_cpu(cpu, buffer->cpumask) | 322 | for_each_cpu(cpu, buffer->cpumask) |
323 | 323 | ||
324 | #define TS_SHIFT 27 | 324 | #define TS_SHIFT 27 |
325 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 325 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
326 | #define TS_DELTA_TEST (~TS_MASK) | 326 | #define TS_DELTA_TEST (~TS_MASK) |
327 | 327 | ||
328 | /* Flag when events were overwritten */ | 328 | /* Flag when events were overwritten */ |
329 | #define RB_MISSED_EVENTS (1 << 31) | 329 | #define RB_MISSED_EVENTS (1 << 31) |
330 | /* Missed count stored at end */ | 330 | /* Missed count stored at end */ |
331 | #define RB_MISSED_STORED (1 << 30) | 331 | #define RB_MISSED_STORED (1 << 30) |
332 | 332 | ||
333 | struct buffer_data_page { | 333 | struct buffer_data_page { |
334 | u64 time_stamp; /* page time stamp */ | 334 | u64 time_stamp; /* page time stamp */ |
335 | local_t commit; /* write committed index */ | 335 | local_t commit; /* write committed index */ |
336 | unsigned char data[]; /* data of buffer page */ | 336 | unsigned char data[]; /* data of buffer page */ |
337 | }; | 337 | }; |
338 | 338 | ||
339 | /* | 339 | /* |
340 | * Note, the buffer_page list must be first. The buffer pages | 340 | * Note, the buffer_page list must be first. The buffer pages |
341 | * are allocated in cache lines, which means that each buffer | 341 | * are allocated in cache lines, which means that each buffer |
342 | * page will be at the beginning of a cache line, and thus | 342 | * page will be at the beginning of a cache line, and thus |
343 | * the least significant bits will be zero. We use this to | 343 | * the least significant bits will be zero. We use this to |
344 | * add flags in the list struct pointers, to make the ring buffer | 344 | * add flags in the list struct pointers, to make the ring buffer |
345 | * lockless. | 345 | * lockless. |
346 | */ | 346 | */ |
347 | struct buffer_page { | 347 | struct buffer_page { |
348 | struct list_head list; /* list of buffer pages */ | 348 | struct list_head list; /* list of buffer pages */ |
349 | local_t write; /* index for next write */ | 349 | local_t write; /* index for next write */ |
350 | unsigned read; /* index for next read */ | 350 | unsigned read; /* index for next read */ |
351 | local_t entries; /* entries on this page */ | 351 | local_t entries; /* entries on this page */ |
352 | unsigned long real_end; /* real end of data */ | 352 | unsigned long real_end; /* real end of data */ |
353 | struct buffer_data_page *page; /* Actual data page */ | 353 | struct buffer_data_page *page; /* Actual data page */ |
354 | }; | 354 | }; |
355 | 355 | ||
356 | /* | 356 | /* |
357 | * The buffer page counters, write and entries, must be reset | 357 | * The buffer page counters, write and entries, must be reset |
358 | * atomically when crossing page boundaries. To synchronize this | 358 | * atomically when crossing page boundaries. To synchronize this |
359 | * update, two counters are inserted into the number. One is | 359 | * update, two counters are inserted into the number. One is |
360 | * the actual counter for the write position or count on the page. | 360 | * the actual counter for the write position or count on the page. |
361 | * | 361 | * |
362 | * The other is a counter of updaters. Before an update happens | 362 | * The other is a counter of updaters. Before an update happens |
363 | * the update partition of the counter is incremented. This will | 363 | * the update partition of the counter is incremented. This will |
364 | * allow the updater to update the counter atomically. | 364 | * allow the updater to update the counter atomically. |
365 | * | 365 | * |
366 | * The counter is 20 bits, and the state data is 12. | 366 | * The counter is 20 bits, and the state data is 12. |
367 | */ | 367 | */ |
368 | #define RB_WRITE_MASK 0xfffff | 368 | #define RB_WRITE_MASK 0xfffff |
369 | #define RB_WRITE_INTCNT (1 << 20) | 369 | #define RB_WRITE_INTCNT (1 << 20) |
370 | 370 | ||
371 | static void rb_init_page(struct buffer_data_page *bpage) | 371 | static void rb_init_page(struct buffer_data_page *bpage) |
372 | { | 372 | { |
373 | local_set(&bpage->commit, 0); | 373 | local_set(&bpage->commit, 0); |
374 | } | 374 | } |
375 | 375 | ||
376 | /** | 376 | /** |
377 | * ring_buffer_page_len - the size of data on the page. | 377 | * ring_buffer_page_len - the size of data on the page. |
378 | * @page: The page to read | 378 | * @page: The page to read |
379 | * | 379 | * |
380 | * Returns the amount of data on the page, including buffer page header. | 380 | * Returns the amount of data on the page, including buffer page header. |
381 | */ | 381 | */ |
382 | size_t ring_buffer_page_len(void *page) | 382 | size_t ring_buffer_page_len(void *page) |
383 | { | 383 | { |
384 | return local_read(&((struct buffer_data_page *)page)->commit) | 384 | return local_read(&((struct buffer_data_page *)page)->commit) |
385 | + BUF_PAGE_HDR_SIZE; | 385 | + BUF_PAGE_HDR_SIZE; |
386 | } | 386 | } |
387 | 387 | ||
388 | /* | 388 | /* |
389 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing | 389 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing |
390 | * this issue out. | 390 | * this issue out. |
391 | */ | 391 | */ |
392 | static void free_buffer_page(struct buffer_page *bpage) | 392 | static void free_buffer_page(struct buffer_page *bpage) |
393 | { | 393 | { |
394 | free_page((unsigned long)bpage->page); | 394 | free_page((unsigned long)bpage->page); |
395 | kfree(bpage); | 395 | kfree(bpage); |
396 | } | 396 | } |
397 | 397 | ||
398 | /* | 398 | /* |
399 | * We need to fit the time_stamp delta into 27 bits. | 399 | * We need to fit the time_stamp delta into 27 bits. |
400 | */ | 400 | */ |
401 | static inline int test_time_stamp(u64 delta) | 401 | static inline int test_time_stamp(u64 delta) |
402 | { | 402 | { |
403 | if (delta & TS_DELTA_TEST) | 403 | if (delta & TS_DELTA_TEST) |
404 | return 1; | 404 | return 1; |
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
407 | 407 | ||
408 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) | 408 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
409 | 409 | ||
410 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 410 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
411 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 411 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
412 | 412 | ||
413 | int ring_buffer_print_page_header(struct trace_seq *s) | 413 | int ring_buffer_print_page_header(struct trace_seq *s) |
414 | { | 414 | { |
415 | struct buffer_data_page field; | 415 | struct buffer_data_page field; |
416 | int ret; | 416 | int ret; |
417 | 417 | ||
418 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" | 418 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" |
419 | "offset:0;\tsize:%u;\tsigned:%u;\n", | 419 | "offset:0;\tsize:%u;\tsigned:%u;\n", |
420 | (unsigned int)sizeof(field.time_stamp), | 420 | (unsigned int)sizeof(field.time_stamp), |
421 | (unsigned int)is_signed_type(u64)); | 421 | (unsigned int)is_signed_type(u64)); |
422 | 422 | ||
423 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" | 423 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" |
424 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 424 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
425 | (unsigned int)offsetof(typeof(field), commit), | 425 | (unsigned int)offsetof(typeof(field), commit), |
426 | (unsigned int)sizeof(field.commit), | 426 | (unsigned int)sizeof(field.commit), |
427 | (unsigned int)is_signed_type(long)); | 427 | (unsigned int)is_signed_type(long)); |
428 | 428 | ||
429 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | 429 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" |
430 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 430 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
431 | (unsigned int)offsetof(typeof(field), commit), | 431 | (unsigned int)offsetof(typeof(field), commit), |
432 | 1, | 432 | 1, |
433 | (unsigned int)is_signed_type(long)); | 433 | (unsigned int)is_signed_type(long)); |
434 | 434 | ||
435 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 435 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
436 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 436 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
437 | (unsigned int)offsetof(typeof(field), data), | 437 | (unsigned int)offsetof(typeof(field), data), |
438 | (unsigned int)BUF_PAGE_SIZE, | 438 | (unsigned int)BUF_PAGE_SIZE, |
439 | (unsigned int)is_signed_type(char)); | 439 | (unsigned int)is_signed_type(char)); |
440 | 440 | ||
441 | return ret; | 441 | return ret; |
442 | } | 442 | } |
443 | 443 | ||
444 | /* | 444 | /* |
445 | * head_page == tail_page && head == tail then buffer is empty. | 445 | * head_page == tail_page && head == tail then buffer is empty. |
446 | */ | 446 | */ |
447 | struct ring_buffer_per_cpu { | 447 | struct ring_buffer_per_cpu { |
448 | int cpu; | 448 | int cpu; |
449 | atomic_t record_disabled; | 449 | atomic_t record_disabled; |
450 | struct ring_buffer *buffer; | 450 | struct ring_buffer *buffer; |
451 | raw_spinlock_t reader_lock; /* serialize readers */ | 451 | raw_spinlock_t reader_lock; /* serialize readers */ |
452 | arch_spinlock_t lock; | 452 | arch_spinlock_t lock; |
453 | struct lock_class_key lock_key; | 453 | struct lock_class_key lock_key; |
454 | unsigned int nr_pages; | 454 | unsigned int nr_pages; |
455 | struct list_head *pages; | 455 | struct list_head *pages; |
456 | struct buffer_page *head_page; /* read from head */ | 456 | struct buffer_page *head_page; /* read from head */ |
457 | struct buffer_page *tail_page; /* write to tail */ | 457 | struct buffer_page *tail_page; /* write to tail */ |
458 | struct buffer_page *commit_page; /* committed pages */ | 458 | struct buffer_page *commit_page; /* committed pages */ |
459 | struct buffer_page *reader_page; | 459 | struct buffer_page *reader_page; |
460 | unsigned long lost_events; | 460 | unsigned long lost_events; |
461 | unsigned long last_overrun; | 461 | unsigned long last_overrun; |
462 | local_t entries_bytes; | 462 | local_t entries_bytes; |
463 | local_t commit_overrun; | ||
464 | local_t overrun; | ||
465 | local_t entries; | 463 | local_t entries; |
464 | local_t overrun; | ||
465 | local_t commit_overrun; | ||
466 | local_t dropped_events; | ||
466 | local_t committing; | 467 | local_t committing; |
467 | local_t commits; | 468 | local_t commits; |
468 | unsigned long read; | 469 | unsigned long read; |
469 | unsigned long read_bytes; | 470 | unsigned long read_bytes; |
470 | u64 write_stamp; | 471 | u64 write_stamp; |
471 | u64 read_stamp; | 472 | u64 read_stamp; |
472 | /* ring buffer pages to update, > 0 to add, < 0 to remove */ | 473 | /* ring buffer pages to update, > 0 to add, < 0 to remove */ |
473 | int nr_pages_to_update; | 474 | int nr_pages_to_update; |
474 | struct list_head new_pages; /* new pages to add */ | 475 | struct list_head new_pages; /* new pages to add */ |
475 | struct work_struct update_pages_work; | 476 | struct work_struct update_pages_work; |
476 | struct completion update_done; | 477 | struct completion update_done; |
477 | }; | 478 | }; |
478 | 479 | ||
479 | struct ring_buffer { | 480 | struct ring_buffer { |
480 | unsigned flags; | 481 | unsigned flags; |
481 | int cpus; | 482 | int cpus; |
482 | atomic_t record_disabled; | 483 | atomic_t record_disabled; |
483 | atomic_t resize_disabled; | 484 | atomic_t resize_disabled; |
484 | cpumask_var_t cpumask; | 485 | cpumask_var_t cpumask; |
485 | 486 | ||
486 | struct lock_class_key *reader_lock_key; | 487 | struct lock_class_key *reader_lock_key; |
487 | 488 | ||
488 | struct mutex mutex; | 489 | struct mutex mutex; |
489 | 490 | ||
490 | struct ring_buffer_per_cpu **buffers; | 491 | struct ring_buffer_per_cpu **buffers; |
491 | 492 | ||
492 | #ifdef CONFIG_HOTPLUG_CPU | 493 | #ifdef CONFIG_HOTPLUG_CPU |
493 | struct notifier_block cpu_notify; | 494 | struct notifier_block cpu_notify; |
494 | #endif | 495 | #endif |
495 | u64 (*clock)(void); | 496 | u64 (*clock)(void); |
496 | }; | 497 | }; |
497 | 498 | ||
498 | struct ring_buffer_iter { | 499 | struct ring_buffer_iter { |
499 | struct ring_buffer_per_cpu *cpu_buffer; | 500 | struct ring_buffer_per_cpu *cpu_buffer; |
500 | unsigned long head; | 501 | unsigned long head; |
501 | struct buffer_page *head_page; | 502 | struct buffer_page *head_page; |
502 | struct buffer_page *cache_reader_page; | 503 | struct buffer_page *cache_reader_page; |
503 | unsigned long cache_read; | 504 | unsigned long cache_read; |
504 | u64 read_stamp; | 505 | u64 read_stamp; |
505 | }; | 506 | }; |
506 | 507 | ||
507 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 508 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ |
508 | #define RB_WARN_ON(b, cond) \ | 509 | #define RB_WARN_ON(b, cond) \ |
509 | ({ \ | 510 | ({ \ |
510 | int _____ret = unlikely(cond); \ | 511 | int _____ret = unlikely(cond); \ |
511 | if (_____ret) { \ | 512 | if (_____ret) { \ |
512 | if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ | 513 | if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ |
513 | struct ring_buffer_per_cpu *__b = \ | 514 | struct ring_buffer_per_cpu *__b = \ |
514 | (void *)b; \ | 515 | (void *)b; \ |
515 | atomic_inc(&__b->buffer->record_disabled); \ | 516 | atomic_inc(&__b->buffer->record_disabled); \ |
516 | } else \ | 517 | } else \ |
517 | atomic_inc(&b->record_disabled); \ | 518 | atomic_inc(&b->record_disabled); \ |
518 | WARN_ON(1); \ | 519 | WARN_ON(1); \ |
519 | } \ | 520 | } \ |
520 | _____ret; \ | 521 | _____ret; \ |
521 | }) | 522 | }) |
522 | 523 | ||
523 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 524 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
524 | #define DEBUG_SHIFT 0 | 525 | #define DEBUG_SHIFT 0 |
525 | 526 | ||
526 | static inline u64 rb_time_stamp(struct ring_buffer *buffer) | 527 | static inline u64 rb_time_stamp(struct ring_buffer *buffer) |
527 | { | 528 | { |
528 | /* shift to debug/test normalization and TIME_EXTENTS */ | 529 | /* shift to debug/test normalization and TIME_EXTENTS */ |
529 | return buffer->clock() << DEBUG_SHIFT; | 530 | return buffer->clock() << DEBUG_SHIFT; |
530 | } | 531 | } |
531 | 532 | ||
532 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | 533 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) |
533 | { | 534 | { |
534 | u64 time; | 535 | u64 time; |
535 | 536 | ||
536 | preempt_disable_notrace(); | 537 | preempt_disable_notrace(); |
537 | time = rb_time_stamp(buffer); | 538 | time = rb_time_stamp(buffer); |
538 | preempt_enable_no_resched_notrace(); | 539 | preempt_enable_no_resched_notrace(); |
539 | 540 | ||
540 | return time; | 541 | return time; |
541 | } | 542 | } |
542 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); | 543 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); |
543 | 544 | ||
544 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | 545 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, |
545 | int cpu, u64 *ts) | 546 | int cpu, u64 *ts) |
546 | { | 547 | { |
547 | /* Just stupid testing the normalize function and deltas */ | 548 | /* Just stupid testing the normalize function and deltas */ |
548 | *ts >>= DEBUG_SHIFT; | 549 | *ts >>= DEBUG_SHIFT; |
549 | } | 550 | } |
550 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 551 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); |
551 | 552 | ||
552 | /* | 553 | /* |
553 | * Making the ring buffer lockless makes things tricky. | 554 | * Making the ring buffer lockless makes things tricky. |
554 | * Although writes only happen on the CPU that they are on, | 555 | * Although writes only happen on the CPU that they are on, |
555 | * and they only need to worry about interrupts. Reads can | 556 | * and they only need to worry about interrupts. Reads can |
556 | * happen on any CPU. | 557 | * happen on any CPU. |
557 | * | 558 | * |
558 | * The reader page is always off the ring buffer, but when the | 559 | * The reader page is always off the ring buffer, but when the |
559 | * reader finishes with a page, it needs to swap its page with | 560 | * reader finishes with a page, it needs to swap its page with |
560 | * a new one from the buffer. The reader needs to take from | 561 | * a new one from the buffer. The reader needs to take from |
561 | * the head (writes go to the tail). But if a writer is in overwrite | 562 | * the head (writes go to the tail). But if a writer is in overwrite |
562 | * mode and wraps, it must push the head page forward. | 563 | * mode and wraps, it must push the head page forward. |
563 | * | 564 | * |
564 | * Here lies the problem. | 565 | * Here lies the problem. |
565 | * | 566 | * |
566 | * The reader must be careful to replace only the head page, and | 567 | * The reader must be careful to replace only the head page, and |
567 | * not another one. As described at the top of the file in the | 568 | * not another one. As described at the top of the file in the |
568 | * ASCII art, the reader sets its old page to point to the next | 569 | * ASCII art, the reader sets its old page to point to the next |
569 | * page after head. It then sets the page after head to point to | 570 | * page after head. It then sets the page after head to point to |
570 | * the old reader page. But if the writer moves the head page | 571 | * the old reader page. But if the writer moves the head page |
571 | * during this operation, the reader could end up with the tail. | 572 | * during this operation, the reader could end up with the tail. |
572 | * | 573 | * |
573 | * We use cmpxchg to help prevent this race. We also do something | 574 | * We use cmpxchg to help prevent this race. We also do something |
574 | * special with the page before head. We set the LSB to 1. | 575 | * special with the page before head. We set the LSB to 1. |
575 | * | 576 | * |
576 | * When the writer must push the page forward, it will clear the | 577 | * When the writer must push the page forward, it will clear the |
577 | * bit that points to the head page, move the head, and then set | 578 | * bit that points to the head page, move the head, and then set |
578 | * the bit that points to the new head page. | 579 | * the bit that points to the new head page. |
579 | * | 580 | * |
580 | * We also don't want an interrupt coming in and moving the head | 581 | * We also don't want an interrupt coming in and moving the head |
581 | * page on another writer. Thus we use the second LSB to catch | 582 | * page on another writer. Thus we use the second LSB to catch |
582 | * that too. Thus: | 583 | * that too. Thus: |
583 | * | 584 | * |
584 | * head->list->prev->next bit 1 bit 0 | 585 | * head->list->prev->next bit 1 bit 0 |
585 | * ------- ------- | 586 | * ------- ------- |
586 | * Normal page 0 0 | 587 | * Normal page 0 0 |
587 | * Points to head page 0 1 | 588 | * Points to head page 0 1 |
588 | * New head page 1 0 | 589 | * New head page 1 0 |
589 | * | 590 | * |
590 | * Note we can not trust the prev pointer of the head page, because: | 591 | * Note we can not trust the prev pointer of the head page, because: |
591 | * | 592 | * |
592 | * +----+ +-----+ +-----+ | 593 | * +----+ +-----+ +-----+ |
593 | * | |------>| T |---X--->| N | | 594 | * | |------>| T |---X--->| N | |
594 | * | |<------| | | | | 595 | * | |<------| | | | |
595 | * +----+ +-----+ +-----+ | 596 | * +----+ +-----+ +-----+ |
596 | * ^ ^ | | 597 | * ^ ^ | |
597 | * | +-----+ | | | 598 | * | +-----+ | | |
598 | * +----------| R |----------+ | | 599 | * +----------| R |----------+ | |
599 | * | |<-----------+ | 600 | * | |<-----------+ |
600 | * +-----+ | 601 | * +-----+ |
601 | * | 602 | * |
602 | * Key: ---X--> HEAD flag set in pointer | 603 | * Key: ---X--> HEAD flag set in pointer |
603 | * T Tail page | 604 | * T Tail page |
604 | * R Reader page | 605 | * R Reader page |
605 | * N Next page | 606 | * N Next page |
606 | * | 607 | * |
607 | * (see __rb_reserve_next() to see where this happens) | 608 | * (see __rb_reserve_next() to see where this happens) |
608 | * | 609 | * |
609 | * What the above shows is that the reader just swapped out | 610 | * What the above shows is that the reader just swapped out |
610 | * the reader page with a page in the buffer, but before it | 611 | * the reader page with a page in the buffer, but before it |
611 | * could make the new header point back to the new page added | 612 | * could make the new header point back to the new page added |
612 | * it was preempted by a writer. The writer moved forward onto | 613 | * it was preempted by a writer. The writer moved forward onto |
613 | * the new page added by the reader and is about to move forward | 614 | * the new page added by the reader and is about to move forward |
614 | * again. | 615 | * again. |
615 | * | 616 | * |
616 | * You can see, it is legitimate for the previous pointer of | 617 | * You can see, it is legitimate for the previous pointer of |
617 | * the head (or any page) not to point back to itself. But only | 618 | * the head (or any page) not to point back to itself. But only |
618 | * temporarially. | 619 | * temporarially. |
619 | */ | 620 | */ |
620 | 621 | ||
621 | #define RB_PAGE_NORMAL 0UL | 622 | #define RB_PAGE_NORMAL 0UL |
622 | #define RB_PAGE_HEAD 1UL | 623 | #define RB_PAGE_HEAD 1UL |
623 | #define RB_PAGE_UPDATE 2UL | 624 | #define RB_PAGE_UPDATE 2UL |
624 | 625 | ||
625 | 626 | ||
626 | #define RB_FLAG_MASK 3UL | 627 | #define RB_FLAG_MASK 3UL |
627 | 628 | ||
628 | /* PAGE_MOVED is not part of the mask */ | 629 | /* PAGE_MOVED is not part of the mask */ |
629 | #define RB_PAGE_MOVED 4UL | 630 | #define RB_PAGE_MOVED 4UL |
630 | 631 | ||
631 | /* | 632 | /* |
632 | * rb_list_head - remove any bit | 633 | * rb_list_head - remove any bit |
633 | */ | 634 | */ |
634 | static struct list_head *rb_list_head(struct list_head *list) | 635 | static struct list_head *rb_list_head(struct list_head *list) |
635 | { | 636 | { |
636 | unsigned long val = (unsigned long)list; | 637 | unsigned long val = (unsigned long)list; |
637 | 638 | ||
638 | return (struct list_head *)(val & ~RB_FLAG_MASK); | 639 | return (struct list_head *)(val & ~RB_FLAG_MASK); |
639 | } | 640 | } |
640 | 641 | ||
641 | /* | 642 | /* |
642 | * rb_is_head_page - test if the given page is the head page | 643 | * rb_is_head_page - test if the given page is the head page |
643 | * | 644 | * |
644 | * Because the reader may move the head_page pointer, we can | 645 | * Because the reader may move the head_page pointer, we can |
645 | * not trust what the head page is (it may be pointing to | 646 | * not trust what the head page is (it may be pointing to |
646 | * the reader page). But if the next page is a header page, | 647 | * the reader page). But if the next page is a header page, |
647 | * its flags will be non zero. | 648 | * its flags will be non zero. |
648 | */ | 649 | */ |
649 | static inline int | 650 | static inline int |
650 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | 651 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, |
651 | struct buffer_page *page, struct list_head *list) | 652 | struct buffer_page *page, struct list_head *list) |
652 | { | 653 | { |
653 | unsigned long val; | 654 | unsigned long val; |
654 | 655 | ||
655 | val = (unsigned long)list->next; | 656 | val = (unsigned long)list->next; |
656 | 657 | ||
657 | if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) | 658 | if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) |
658 | return RB_PAGE_MOVED; | 659 | return RB_PAGE_MOVED; |
659 | 660 | ||
660 | return val & RB_FLAG_MASK; | 661 | return val & RB_FLAG_MASK; |
661 | } | 662 | } |
662 | 663 | ||
663 | /* | 664 | /* |
664 | * rb_is_reader_page | 665 | * rb_is_reader_page |
665 | * | 666 | * |
666 | * The unique thing about the reader page, is that, if the | 667 | * The unique thing about the reader page, is that, if the |
667 | * writer is ever on it, the previous pointer never points | 668 | * writer is ever on it, the previous pointer never points |
668 | * back to the reader page. | 669 | * back to the reader page. |
669 | */ | 670 | */ |
670 | static int rb_is_reader_page(struct buffer_page *page) | 671 | static int rb_is_reader_page(struct buffer_page *page) |
671 | { | 672 | { |
672 | struct list_head *list = page->list.prev; | 673 | struct list_head *list = page->list.prev; |
673 | 674 | ||
674 | return rb_list_head(list->next) != &page->list; | 675 | return rb_list_head(list->next) != &page->list; |
675 | } | 676 | } |
676 | 677 | ||
677 | /* | 678 | /* |
678 | * rb_set_list_to_head - set a list_head to be pointing to head. | 679 | * rb_set_list_to_head - set a list_head to be pointing to head. |
679 | */ | 680 | */ |
680 | static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, | 681 | static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, |
681 | struct list_head *list) | 682 | struct list_head *list) |
682 | { | 683 | { |
683 | unsigned long *ptr; | 684 | unsigned long *ptr; |
684 | 685 | ||
685 | ptr = (unsigned long *)&list->next; | 686 | ptr = (unsigned long *)&list->next; |
686 | *ptr |= RB_PAGE_HEAD; | 687 | *ptr |= RB_PAGE_HEAD; |
687 | *ptr &= ~RB_PAGE_UPDATE; | 688 | *ptr &= ~RB_PAGE_UPDATE; |
688 | } | 689 | } |
689 | 690 | ||
690 | /* | 691 | /* |
691 | * rb_head_page_activate - sets up head page | 692 | * rb_head_page_activate - sets up head page |
692 | */ | 693 | */ |
693 | static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) | 694 | static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) |
694 | { | 695 | { |
695 | struct buffer_page *head; | 696 | struct buffer_page *head; |
696 | 697 | ||
697 | head = cpu_buffer->head_page; | 698 | head = cpu_buffer->head_page; |
698 | if (!head) | 699 | if (!head) |
699 | return; | 700 | return; |
700 | 701 | ||
701 | /* | 702 | /* |
702 | * Set the previous list pointer to have the HEAD flag. | 703 | * Set the previous list pointer to have the HEAD flag. |
703 | */ | 704 | */ |
704 | rb_set_list_to_head(cpu_buffer, head->list.prev); | 705 | rb_set_list_to_head(cpu_buffer, head->list.prev); |
705 | } | 706 | } |
706 | 707 | ||
707 | static void rb_list_head_clear(struct list_head *list) | 708 | static void rb_list_head_clear(struct list_head *list) |
708 | { | 709 | { |
709 | unsigned long *ptr = (unsigned long *)&list->next; | 710 | unsigned long *ptr = (unsigned long *)&list->next; |
710 | 711 | ||
711 | *ptr &= ~RB_FLAG_MASK; | 712 | *ptr &= ~RB_FLAG_MASK; |
712 | } | 713 | } |
713 | 714 | ||
714 | /* | 715 | /* |
715 | * rb_head_page_dactivate - clears head page ptr (for free list) | 716 | * rb_head_page_dactivate - clears head page ptr (for free list) |
716 | */ | 717 | */ |
717 | static void | 718 | static void |
718 | rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) | 719 | rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) |
719 | { | 720 | { |
720 | struct list_head *hd; | 721 | struct list_head *hd; |
721 | 722 | ||
722 | /* Go through the whole list and clear any pointers found. */ | 723 | /* Go through the whole list and clear any pointers found. */ |
723 | rb_list_head_clear(cpu_buffer->pages); | 724 | rb_list_head_clear(cpu_buffer->pages); |
724 | 725 | ||
725 | list_for_each(hd, cpu_buffer->pages) | 726 | list_for_each(hd, cpu_buffer->pages) |
726 | rb_list_head_clear(hd); | 727 | rb_list_head_clear(hd); |
727 | } | 728 | } |
728 | 729 | ||
729 | static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, | 730 | static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, |
730 | struct buffer_page *head, | 731 | struct buffer_page *head, |
731 | struct buffer_page *prev, | 732 | struct buffer_page *prev, |
732 | int old_flag, int new_flag) | 733 | int old_flag, int new_flag) |
733 | { | 734 | { |
734 | struct list_head *list; | 735 | struct list_head *list; |
735 | unsigned long val = (unsigned long)&head->list; | 736 | unsigned long val = (unsigned long)&head->list; |
736 | unsigned long ret; | 737 | unsigned long ret; |
737 | 738 | ||
738 | list = &prev->list; | 739 | list = &prev->list; |
739 | 740 | ||
740 | val &= ~RB_FLAG_MASK; | 741 | val &= ~RB_FLAG_MASK; |
741 | 742 | ||
742 | ret = cmpxchg((unsigned long *)&list->next, | 743 | ret = cmpxchg((unsigned long *)&list->next, |
743 | val | old_flag, val | new_flag); | 744 | val | old_flag, val | new_flag); |
744 | 745 | ||
745 | /* check if the reader took the page */ | 746 | /* check if the reader took the page */ |
746 | if ((ret & ~RB_FLAG_MASK) != val) | 747 | if ((ret & ~RB_FLAG_MASK) != val) |
747 | return RB_PAGE_MOVED; | 748 | return RB_PAGE_MOVED; |
748 | 749 | ||
749 | return ret & RB_FLAG_MASK; | 750 | return ret & RB_FLAG_MASK; |
750 | } | 751 | } |
751 | 752 | ||
752 | static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, | 753 | static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, |
753 | struct buffer_page *head, | 754 | struct buffer_page *head, |
754 | struct buffer_page *prev, | 755 | struct buffer_page *prev, |
755 | int old_flag) | 756 | int old_flag) |
756 | { | 757 | { |
757 | return rb_head_page_set(cpu_buffer, head, prev, | 758 | return rb_head_page_set(cpu_buffer, head, prev, |
758 | old_flag, RB_PAGE_UPDATE); | 759 | old_flag, RB_PAGE_UPDATE); |
759 | } | 760 | } |
760 | 761 | ||
761 | static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, | 762 | static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, |
762 | struct buffer_page *head, | 763 | struct buffer_page *head, |
763 | struct buffer_page *prev, | 764 | struct buffer_page *prev, |
764 | int old_flag) | 765 | int old_flag) |
765 | { | 766 | { |
766 | return rb_head_page_set(cpu_buffer, head, prev, | 767 | return rb_head_page_set(cpu_buffer, head, prev, |
767 | old_flag, RB_PAGE_HEAD); | 768 | old_flag, RB_PAGE_HEAD); |
768 | } | 769 | } |
769 | 770 | ||
770 | static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, | 771 | static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, |
771 | struct buffer_page *head, | 772 | struct buffer_page *head, |
772 | struct buffer_page *prev, | 773 | struct buffer_page *prev, |
773 | int old_flag) | 774 | int old_flag) |
774 | { | 775 | { |
775 | return rb_head_page_set(cpu_buffer, head, prev, | 776 | return rb_head_page_set(cpu_buffer, head, prev, |
776 | old_flag, RB_PAGE_NORMAL); | 777 | old_flag, RB_PAGE_NORMAL); |
777 | } | 778 | } |
778 | 779 | ||
779 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | 780 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, |
780 | struct buffer_page **bpage) | 781 | struct buffer_page **bpage) |
781 | { | 782 | { |
782 | struct list_head *p = rb_list_head((*bpage)->list.next); | 783 | struct list_head *p = rb_list_head((*bpage)->list.next); |
783 | 784 | ||
784 | *bpage = list_entry(p, struct buffer_page, list); | 785 | *bpage = list_entry(p, struct buffer_page, list); |
785 | } | 786 | } |
786 | 787 | ||
787 | static struct buffer_page * | 788 | static struct buffer_page * |
788 | rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) | 789 | rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) |
789 | { | 790 | { |
790 | struct buffer_page *head; | 791 | struct buffer_page *head; |
791 | struct buffer_page *page; | 792 | struct buffer_page *page; |
792 | struct list_head *list; | 793 | struct list_head *list; |
793 | int i; | 794 | int i; |
794 | 795 | ||
795 | if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) | 796 | if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) |
796 | return NULL; | 797 | return NULL; |
797 | 798 | ||
798 | /* sanity check */ | 799 | /* sanity check */ |
799 | list = cpu_buffer->pages; | 800 | list = cpu_buffer->pages; |
800 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) | 801 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) |
801 | return NULL; | 802 | return NULL; |
802 | 803 | ||
803 | page = head = cpu_buffer->head_page; | 804 | page = head = cpu_buffer->head_page; |
804 | /* | 805 | /* |
805 | * It is possible that the writer moves the header behind | 806 | * It is possible that the writer moves the header behind |
806 | * where we started, and we miss in one loop. | 807 | * where we started, and we miss in one loop. |
807 | * A second loop should grab the header, but we'll do | 808 | * A second loop should grab the header, but we'll do |
808 | * three loops just because I'm paranoid. | 809 | * three loops just because I'm paranoid. |
809 | */ | 810 | */ |
810 | for (i = 0; i < 3; i++) { | 811 | for (i = 0; i < 3; i++) { |
811 | do { | 812 | do { |
812 | if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { | 813 | if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { |
813 | cpu_buffer->head_page = page; | 814 | cpu_buffer->head_page = page; |
814 | return page; | 815 | return page; |
815 | } | 816 | } |
816 | rb_inc_page(cpu_buffer, &page); | 817 | rb_inc_page(cpu_buffer, &page); |
817 | } while (page != head); | 818 | } while (page != head); |
818 | } | 819 | } |
819 | 820 | ||
820 | RB_WARN_ON(cpu_buffer, 1); | 821 | RB_WARN_ON(cpu_buffer, 1); |
821 | 822 | ||
822 | return NULL; | 823 | return NULL; |
823 | } | 824 | } |
824 | 825 | ||
825 | static int rb_head_page_replace(struct buffer_page *old, | 826 | static int rb_head_page_replace(struct buffer_page *old, |
826 | struct buffer_page *new) | 827 | struct buffer_page *new) |
827 | { | 828 | { |
828 | unsigned long *ptr = (unsigned long *)&old->list.prev->next; | 829 | unsigned long *ptr = (unsigned long *)&old->list.prev->next; |
829 | unsigned long val; | 830 | unsigned long val; |
830 | unsigned long ret; | 831 | unsigned long ret; |
831 | 832 | ||
832 | val = *ptr & ~RB_FLAG_MASK; | 833 | val = *ptr & ~RB_FLAG_MASK; |
833 | val |= RB_PAGE_HEAD; | 834 | val |= RB_PAGE_HEAD; |
834 | 835 | ||
835 | ret = cmpxchg(ptr, val, (unsigned long)&new->list); | 836 | ret = cmpxchg(ptr, val, (unsigned long)&new->list); |
836 | 837 | ||
837 | return ret == val; | 838 | return ret == val; |
838 | } | 839 | } |
839 | 840 | ||
840 | /* | 841 | /* |
841 | * rb_tail_page_update - move the tail page forward | 842 | * rb_tail_page_update - move the tail page forward |
842 | * | 843 | * |
843 | * Returns 1 if moved tail page, 0 if someone else did. | 844 | * Returns 1 if moved tail page, 0 if someone else did. |
844 | */ | 845 | */ |
845 | static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, | 846 | static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, |
846 | struct buffer_page *tail_page, | 847 | struct buffer_page *tail_page, |
847 | struct buffer_page *next_page) | 848 | struct buffer_page *next_page) |
848 | { | 849 | { |
849 | struct buffer_page *old_tail; | 850 | struct buffer_page *old_tail; |
850 | unsigned long old_entries; | 851 | unsigned long old_entries; |
851 | unsigned long old_write; | 852 | unsigned long old_write; |
852 | int ret = 0; | 853 | int ret = 0; |
853 | 854 | ||
854 | /* | 855 | /* |
855 | * The tail page now needs to be moved forward. | 856 | * The tail page now needs to be moved forward. |
856 | * | 857 | * |
857 | * We need to reset the tail page, but without messing | 858 | * We need to reset the tail page, but without messing |
858 | * with possible erasing of data brought in by interrupts | 859 | * with possible erasing of data brought in by interrupts |
859 | * that have moved the tail page and are currently on it. | 860 | * that have moved the tail page and are currently on it. |
860 | * | 861 | * |
861 | * We add a counter to the write field to denote this. | 862 | * We add a counter to the write field to denote this. |
862 | */ | 863 | */ |
863 | old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); | 864 | old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); |
864 | old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); | 865 | old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); |
865 | 866 | ||
866 | /* | 867 | /* |
867 | * Just make sure we have seen our old_write and synchronize | 868 | * Just make sure we have seen our old_write and synchronize |
868 | * with any interrupts that come in. | 869 | * with any interrupts that come in. |
869 | */ | 870 | */ |
870 | barrier(); | 871 | barrier(); |
871 | 872 | ||
872 | /* | 873 | /* |
873 | * If the tail page is still the same as what we think | 874 | * If the tail page is still the same as what we think |
874 | * it is, then it is up to us to update the tail | 875 | * it is, then it is up to us to update the tail |
875 | * pointer. | 876 | * pointer. |
876 | */ | 877 | */ |
877 | if (tail_page == cpu_buffer->tail_page) { | 878 | if (tail_page == cpu_buffer->tail_page) { |
878 | /* Zero the write counter */ | 879 | /* Zero the write counter */ |
879 | unsigned long val = old_write & ~RB_WRITE_MASK; | 880 | unsigned long val = old_write & ~RB_WRITE_MASK; |
880 | unsigned long eval = old_entries & ~RB_WRITE_MASK; | 881 | unsigned long eval = old_entries & ~RB_WRITE_MASK; |
881 | 882 | ||
882 | /* | 883 | /* |
883 | * This will only succeed if an interrupt did | 884 | * This will only succeed if an interrupt did |
884 | * not come in and change it. In which case, we | 885 | * not come in and change it. In which case, we |
885 | * do not want to modify it. | 886 | * do not want to modify it. |
886 | * | 887 | * |
887 | * We add (void) to let the compiler know that we do not care | 888 | * We add (void) to let the compiler know that we do not care |
888 | * about the return value of these functions. We use the | 889 | * about the return value of these functions. We use the |
889 | * cmpxchg to only update if an interrupt did not already | 890 | * cmpxchg to only update if an interrupt did not already |
890 | * do it for us. If the cmpxchg fails, we don't care. | 891 | * do it for us. If the cmpxchg fails, we don't care. |
891 | */ | 892 | */ |
892 | (void)local_cmpxchg(&next_page->write, old_write, val); | 893 | (void)local_cmpxchg(&next_page->write, old_write, val); |
893 | (void)local_cmpxchg(&next_page->entries, old_entries, eval); | 894 | (void)local_cmpxchg(&next_page->entries, old_entries, eval); |
894 | 895 | ||
895 | /* | 896 | /* |
896 | * No need to worry about races with clearing out the commit. | 897 | * No need to worry about races with clearing out the commit. |
897 | * it only can increment when a commit takes place. But that | 898 | * it only can increment when a commit takes place. But that |
898 | * only happens in the outer most nested commit. | 899 | * only happens in the outer most nested commit. |
899 | */ | 900 | */ |
900 | local_set(&next_page->page->commit, 0); | 901 | local_set(&next_page->page->commit, 0); |
901 | 902 | ||
902 | old_tail = cmpxchg(&cpu_buffer->tail_page, | 903 | old_tail = cmpxchg(&cpu_buffer->tail_page, |
903 | tail_page, next_page); | 904 | tail_page, next_page); |
904 | 905 | ||
905 | if (old_tail == tail_page) | 906 | if (old_tail == tail_page) |
906 | ret = 1; | 907 | ret = 1; |
907 | } | 908 | } |
908 | 909 | ||
909 | return ret; | 910 | return ret; |
910 | } | 911 | } |
911 | 912 | ||
912 | static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, | 913 | static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, |
913 | struct buffer_page *bpage) | 914 | struct buffer_page *bpage) |
914 | { | 915 | { |
915 | unsigned long val = (unsigned long)bpage; | 916 | unsigned long val = (unsigned long)bpage; |
916 | 917 | ||
917 | if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) | 918 | if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) |
918 | return 1; | 919 | return 1; |
919 | 920 | ||
920 | return 0; | 921 | return 0; |
921 | } | 922 | } |
922 | 923 | ||
923 | /** | 924 | /** |
924 | * rb_check_list - make sure a pointer to a list has the last bits zero | 925 | * rb_check_list - make sure a pointer to a list has the last bits zero |
925 | */ | 926 | */ |
926 | static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | 927 | static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, |
927 | struct list_head *list) | 928 | struct list_head *list) |
928 | { | 929 | { |
929 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) | 930 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) |
930 | return 1; | 931 | return 1; |
931 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) | 932 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) |
932 | return 1; | 933 | return 1; |
933 | return 0; | 934 | return 0; |
934 | } | 935 | } |
935 | 936 | ||
936 | /** | 937 | /** |
937 | * check_pages - integrity check of buffer pages | 938 | * check_pages - integrity check of buffer pages |
938 | * @cpu_buffer: CPU buffer with pages to test | 939 | * @cpu_buffer: CPU buffer with pages to test |
939 | * | 940 | * |
940 | * As a safety measure we check to make sure the data pages have not | 941 | * As a safety measure we check to make sure the data pages have not |
941 | * been corrupted. | 942 | * been corrupted. |
942 | */ | 943 | */ |
943 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 944 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
944 | { | 945 | { |
945 | struct list_head *head = cpu_buffer->pages; | 946 | struct list_head *head = cpu_buffer->pages; |
946 | struct buffer_page *bpage, *tmp; | 947 | struct buffer_page *bpage, *tmp; |
947 | 948 | ||
948 | /* Reset the head page if it exists */ | 949 | /* Reset the head page if it exists */ |
949 | if (cpu_buffer->head_page) | 950 | if (cpu_buffer->head_page) |
950 | rb_set_head_page(cpu_buffer); | 951 | rb_set_head_page(cpu_buffer); |
951 | 952 | ||
952 | rb_head_page_deactivate(cpu_buffer); | 953 | rb_head_page_deactivate(cpu_buffer); |
953 | 954 | ||
954 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) | 955 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) |
955 | return -1; | 956 | return -1; |
956 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) | 957 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) |
957 | return -1; | 958 | return -1; |
958 | 959 | ||
959 | if (rb_check_list(cpu_buffer, head)) | 960 | if (rb_check_list(cpu_buffer, head)) |
960 | return -1; | 961 | return -1; |
961 | 962 | ||
962 | list_for_each_entry_safe(bpage, tmp, head, list) { | 963 | list_for_each_entry_safe(bpage, tmp, head, list) { |
963 | if (RB_WARN_ON(cpu_buffer, | 964 | if (RB_WARN_ON(cpu_buffer, |
964 | bpage->list.next->prev != &bpage->list)) | 965 | bpage->list.next->prev != &bpage->list)) |
965 | return -1; | 966 | return -1; |
966 | if (RB_WARN_ON(cpu_buffer, | 967 | if (RB_WARN_ON(cpu_buffer, |
967 | bpage->list.prev->next != &bpage->list)) | 968 | bpage->list.prev->next != &bpage->list)) |
968 | return -1; | 969 | return -1; |
969 | if (rb_check_list(cpu_buffer, &bpage->list)) | 970 | if (rb_check_list(cpu_buffer, &bpage->list)) |
970 | return -1; | 971 | return -1; |
971 | } | 972 | } |
972 | 973 | ||
973 | rb_head_page_activate(cpu_buffer); | 974 | rb_head_page_activate(cpu_buffer); |
974 | 975 | ||
975 | return 0; | 976 | return 0; |
976 | } | 977 | } |
977 | 978 | ||
978 | static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu) | 979 | static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu) |
979 | { | 980 | { |
980 | int i; | 981 | int i; |
981 | struct buffer_page *bpage, *tmp; | 982 | struct buffer_page *bpage, *tmp; |
982 | 983 | ||
983 | for (i = 0; i < nr_pages; i++) { | 984 | for (i = 0; i < nr_pages; i++) { |
984 | struct page *page; | 985 | struct page *page; |
985 | /* | 986 | /* |
986 | * __GFP_NORETRY flag makes sure that the allocation fails | 987 | * __GFP_NORETRY flag makes sure that the allocation fails |
987 | * gracefully without invoking oom-killer and the system is | 988 | * gracefully without invoking oom-killer and the system is |
988 | * not destabilized. | 989 | * not destabilized. |
989 | */ | 990 | */ |
990 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 991 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
991 | GFP_KERNEL | __GFP_NORETRY, | 992 | GFP_KERNEL | __GFP_NORETRY, |
992 | cpu_to_node(cpu)); | 993 | cpu_to_node(cpu)); |
993 | if (!bpage) | 994 | if (!bpage) |
994 | goto free_pages; | 995 | goto free_pages; |
995 | 996 | ||
996 | list_add(&bpage->list, pages); | 997 | list_add(&bpage->list, pages); |
997 | 998 | ||
998 | page = alloc_pages_node(cpu_to_node(cpu), | 999 | page = alloc_pages_node(cpu_to_node(cpu), |
999 | GFP_KERNEL | __GFP_NORETRY, 0); | 1000 | GFP_KERNEL | __GFP_NORETRY, 0); |
1000 | if (!page) | 1001 | if (!page) |
1001 | goto free_pages; | 1002 | goto free_pages; |
1002 | bpage->page = page_address(page); | 1003 | bpage->page = page_address(page); |
1003 | rb_init_page(bpage->page); | 1004 | rb_init_page(bpage->page); |
1004 | } | 1005 | } |
1005 | 1006 | ||
1006 | return 0; | 1007 | return 0; |
1007 | 1008 | ||
1008 | free_pages: | 1009 | free_pages: |
1009 | list_for_each_entry_safe(bpage, tmp, pages, list) { | 1010 | list_for_each_entry_safe(bpage, tmp, pages, list) { |
1010 | list_del_init(&bpage->list); | 1011 | list_del_init(&bpage->list); |
1011 | free_buffer_page(bpage); | 1012 | free_buffer_page(bpage); |
1012 | } | 1013 | } |
1013 | 1014 | ||
1014 | return -ENOMEM; | 1015 | return -ENOMEM; |
1015 | } | 1016 | } |
1016 | 1017 | ||
1017 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 1018 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, |
1018 | unsigned nr_pages) | 1019 | unsigned nr_pages) |
1019 | { | 1020 | { |
1020 | LIST_HEAD(pages); | 1021 | LIST_HEAD(pages); |
1021 | 1022 | ||
1022 | WARN_ON(!nr_pages); | 1023 | WARN_ON(!nr_pages); |
1023 | 1024 | ||
1024 | if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu)) | 1025 | if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu)) |
1025 | return -ENOMEM; | 1026 | return -ENOMEM; |
1026 | 1027 | ||
1027 | /* | 1028 | /* |
1028 | * The ring buffer page list is a circular list that does not | 1029 | * The ring buffer page list is a circular list that does not |
1029 | * start and end with a list head. All page list items point to | 1030 | * start and end with a list head. All page list items point to |
1030 | * other pages. | 1031 | * other pages. |
1031 | */ | 1032 | */ |
1032 | cpu_buffer->pages = pages.next; | 1033 | cpu_buffer->pages = pages.next; |
1033 | list_del(&pages); | 1034 | list_del(&pages); |
1034 | 1035 | ||
1035 | cpu_buffer->nr_pages = nr_pages; | 1036 | cpu_buffer->nr_pages = nr_pages; |
1036 | 1037 | ||
1037 | rb_check_pages(cpu_buffer); | 1038 | rb_check_pages(cpu_buffer); |
1038 | 1039 | ||
1039 | return 0; | 1040 | return 0; |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | static struct ring_buffer_per_cpu * | 1043 | static struct ring_buffer_per_cpu * |
1043 | rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) | 1044 | rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) |
1044 | { | 1045 | { |
1045 | struct ring_buffer_per_cpu *cpu_buffer; | 1046 | struct ring_buffer_per_cpu *cpu_buffer; |
1046 | struct buffer_page *bpage; | 1047 | struct buffer_page *bpage; |
1047 | struct page *page; | 1048 | struct page *page; |
1048 | int ret; | 1049 | int ret; |
1049 | 1050 | ||
1050 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), | 1051 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), |
1051 | GFP_KERNEL, cpu_to_node(cpu)); | 1052 | GFP_KERNEL, cpu_to_node(cpu)); |
1052 | if (!cpu_buffer) | 1053 | if (!cpu_buffer) |
1053 | return NULL; | 1054 | return NULL; |
1054 | 1055 | ||
1055 | cpu_buffer->cpu = cpu; | 1056 | cpu_buffer->cpu = cpu; |
1056 | cpu_buffer->buffer = buffer; | 1057 | cpu_buffer->buffer = buffer; |
1057 | raw_spin_lock_init(&cpu_buffer->reader_lock); | 1058 | raw_spin_lock_init(&cpu_buffer->reader_lock); |
1058 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 1059 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); |
1059 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 1060 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
1060 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); | 1061 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); |
1061 | init_completion(&cpu_buffer->update_done); | 1062 | init_completion(&cpu_buffer->update_done); |
1062 | 1063 | ||
1063 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1064 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1064 | GFP_KERNEL, cpu_to_node(cpu)); | 1065 | GFP_KERNEL, cpu_to_node(cpu)); |
1065 | if (!bpage) | 1066 | if (!bpage) |
1066 | goto fail_free_buffer; | 1067 | goto fail_free_buffer; |
1067 | 1068 | ||
1068 | rb_check_bpage(cpu_buffer, bpage); | 1069 | rb_check_bpage(cpu_buffer, bpage); |
1069 | 1070 | ||
1070 | cpu_buffer->reader_page = bpage; | 1071 | cpu_buffer->reader_page = bpage; |
1071 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); | 1072 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
1072 | if (!page) | 1073 | if (!page) |
1073 | goto fail_free_reader; | 1074 | goto fail_free_reader; |
1074 | bpage->page = page_address(page); | 1075 | bpage->page = page_address(page); |
1075 | rb_init_page(bpage->page); | 1076 | rb_init_page(bpage->page); |
1076 | 1077 | ||
1077 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1078 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
1078 | INIT_LIST_HEAD(&cpu_buffer->new_pages); | 1079 | INIT_LIST_HEAD(&cpu_buffer->new_pages); |
1079 | 1080 | ||
1080 | ret = rb_allocate_pages(cpu_buffer, nr_pages); | 1081 | ret = rb_allocate_pages(cpu_buffer, nr_pages); |
1081 | if (ret < 0) | 1082 | if (ret < 0) |
1082 | goto fail_free_reader; | 1083 | goto fail_free_reader; |
1083 | 1084 | ||
1084 | cpu_buffer->head_page | 1085 | cpu_buffer->head_page |
1085 | = list_entry(cpu_buffer->pages, struct buffer_page, list); | 1086 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
1086 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 1087 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; |
1087 | 1088 | ||
1088 | rb_head_page_activate(cpu_buffer); | 1089 | rb_head_page_activate(cpu_buffer); |
1089 | 1090 | ||
1090 | return cpu_buffer; | 1091 | return cpu_buffer; |
1091 | 1092 | ||
1092 | fail_free_reader: | 1093 | fail_free_reader: |
1093 | free_buffer_page(cpu_buffer->reader_page); | 1094 | free_buffer_page(cpu_buffer->reader_page); |
1094 | 1095 | ||
1095 | fail_free_buffer: | 1096 | fail_free_buffer: |
1096 | kfree(cpu_buffer); | 1097 | kfree(cpu_buffer); |
1097 | return NULL; | 1098 | return NULL; |
1098 | } | 1099 | } |
1099 | 1100 | ||
1100 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 1101 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) |
1101 | { | 1102 | { |
1102 | struct list_head *head = cpu_buffer->pages; | 1103 | struct list_head *head = cpu_buffer->pages; |
1103 | struct buffer_page *bpage, *tmp; | 1104 | struct buffer_page *bpage, *tmp; |
1104 | 1105 | ||
1105 | free_buffer_page(cpu_buffer->reader_page); | 1106 | free_buffer_page(cpu_buffer->reader_page); |
1106 | 1107 | ||
1107 | rb_head_page_deactivate(cpu_buffer); | 1108 | rb_head_page_deactivate(cpu_buffer); |
1108 | 1109 | ||
1109 | if (head) { | 1110 | if (head) { |
1110 | list_for_each_entry_safe(bpage, tmp, head, list) { | 1111 | list_for_each_entry_safe(bpage, tmp, head, list) { |
1111 | list_del_init(&bpage->list); | 1112 | list_del_init(&bpage->list); |
1112 | free_buffer_page(bpage); | 1113 | free_buffer_page(bpage); |
1113 | } | 1114 | } |
1114 | bpage = list_entry(head, struct buffer_page, list); | 1115 | bpage = list_entry(head, struct buffer_page, list); |
1115 | free_buffer_page(bpage); | 1116 | free_buffer_page(bpage); |
1116 | } | 1117 | } |
1117 | 1118 | ||
1118 | kfree(cpu_buffer); | 1119 | kfree(cpu_buffer); |
1119 | } | 1120 | } |
1120 | 1121 | ||
1121 | #ifdef CONFIG_HOTPLUG_CPU | 1122 | #ifdef CONFIG_HOTPLUG_CPU |
1122 | static int rb_cpu_notify(struct notifier_block *self, | 1123 | static int rb_cpu_notify(struct notifier_block *self, |
1123 | unsigned long action, void *hcpu); | 1124 | unsigned long action, void *hcpu); |
1124 | #endif | 1125 | #endif |
1125 | 1126 | ||
1126 | /** | 1127 | /** |
1127 | * ring_buffer_alloc - allocate a new ring_buffer | 1128 | * ring_buffer_alloc - allocate a new ring_buffer |
1128 | * @size: the size in bytes per cpu that is needed. | 1129 | * @size: the size in bytes per cpu that is needed. |
1129 | * @flags: attributes to set for the ring buffer. | 1130 | * @flags: attributes to set for the ring buffer. |
1130 | * | 1131 | * |
1131 | * Currently the only flag that is available is the RB_FL_OVERWRITE | 1132 | * Currently the only flag that is available is the RB_FL_OVERWRITE |
1132 | * flag. This flag means that the buffer will overwrite old data | 1133 | * flag. This flag means that the buffer will overwrite old data |
1133 | * when the buffer wraps. If this flag is not set, the buffer will | 1134 | * when the buffer wraps. If this flag is not set, the buffer will |
1134 | * drop data when the tail hits the head. | 1135 | * drop data when the tail hits the head. |
1135 | */ | 1136 | */ |
1136 | struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | 1137 | struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, |
1137 | struct lock_class_key *key) | 1138 | struct lock_class_key *key) |
1138 | { | 1139 | { |
1139 | struct ring_buffer *buffer; | 1140 | struct ring_buffer *buffer; |
1140 | int bsize; | 1141 | int bsize; |
1141 | int cpu, nr_pages; | 1142 | int cpu, nr_pages; |
1142 | 1143 | ||
1143 | /* keep it in its own cache line */ | 1144 | /* keep it in its own cache line */ |
1144 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 1145 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
1145 | GFP_KERNEL); | 1146 | GFP_KERNEL); |
1146 | if (!buffer) | 1147 | if (!buffer) |
1147 | return NULL; | 1148 | return NULL; |
1148 | 1149 | ||
1149 | if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) | 1150 | if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) |
1150 | goto fail_free_buffer; | 1151 | goto fail_free_buffer; |
1151 | 1152 | ||
1152 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 1153 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
1153 | buffer->flags = flags; | 1154 | buffer->flags = flags; |
1154 | buffer->clock = trace_clock_local; | 1155 | buffer->clock = trace_clock_local; |
1155 | buffer->reader_lock_key = key; | 1156 | buffer->reader_lock_key = key; |
1156 | 1157 | ||
1157 | /* need at least two pages */ | 1158 | /* need at least two pages */ |
1158 | if (nr_pages < 2) | 1159 | if (nr_pages < 2) |
1159 | nr_pages = 2; | 1160 | nr_pages = 2; |
1160 | 1161 | ||
1161 | /* | 1162 | /* |
1162 | * In case of non-hotplug cpu, if the ring-buffer is allocated | 1163 | * In case of non-hotplug cpu, if the ring-buffer is allocated |
1163 | * in early initcall, it will not be notified of secondary cpus. | 1164 | * in early initcall, it will not be notified of secondary cpus. |
1164 | * In that off case, we need to allocate for all possible cpus. | 1165 | * In that off case, we need to allocate for all possible cpus. |
1165 | */ | 1166 | */ |
1166 | #ifdef CONFIG_HOTPLUG_CPU | 1167 | #ifdef CONFIG_HOTPLUG_CPU |
1167 | get_online_cpus(); | 1168 | get_online_cpus(); |
1168 | cpumask_copy(buffer->cpumask, cpu_online_mask); | 1169 | cpumask_copy(buffer->cpumask, cpu_online_mask); |
1169 | #else | 1170 | #else |
1170 | cpumask_copy(buffer->cpumask, cpu_possible_mask); | 1171 | cpumask_copy(buffer->cpumask, cpu_possible_mask); |
1171 | #endif | 1172 | #endif |
1172 | buffer->cpus = nr_cpu_ids; | 1173 | buffer->cpus = nr_cpu_ids; |
1173 | 1174 | ||
1174 | bsize = sizeof(void *) * nr_cpu_ids; | 1175 | bsize = sizeof(void *) * nr_cpu_ids; |
1175 | buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), | 1176 | buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), |
1176 | GFP_KERNEL); | 1177 | GFP_KERNEL); |
1177 | if (!buffer->buffers) | 1178 | if (!buffer->buffers) |
1178 | goto fail_free_cpumask; | 1179 | goto fail_free_cpumask; |
1179 | 1180 | ||
1180 | for_each_buffer_cpu(buffer, cpu) { | 1181 | for_each_buffer_cpu(buffer, cpu) { |
1181 | buffer->buffers[cpu] = | 1182 | buffer->buffers[cpu] = |
1182 | rb_allocate_cpu_buffer(buffer, nr_pages, cpu); | 1183 | rb_allocate_cpu_buffer(buffer, nr_pages, cpu); |
1183 | if (!buffer->buffers[cpu]) | 1184 | if (!buffer->buffers[cpu]) |
1184 | goto fail_free_buffers; | 1185 | goto fail_free_buffers; |
1185 | } | 1186 | } |
1186 | 1187 | ||
1187 | #ifdef CONFIG_HOTPLUG_CPU | 1188 | #ifdef CONFIG_HOTPLUG_CPU |
1188 | buffer->cpu_notify.notifier_call = rb_cpu_notify; | 1189 | buffer->cpu_notify.notifier_call = rb_cpu_notify; |
1189 | buffer->cpu_notify.priority = 0; | 1190 | buffer->cpu_notify.priority = 0; |
1190 | register_cpu_notifier(&buffer->cpu_notify); | 1191 | register_cpu_notifier(&buffer->cpu_notify); |
1191 | #endif | 1192 | #endif |
1192 | 1193 | ||
1193 | put_online_cpus(); | 1194 | put_online_cpus(); |
1194 | mutex_init(&buffer->mutex); | 1195 | mutex_init(&buffer->mutex); |
1195 | 1196 | ||
1196 | return buffer; | 1197 | return buffer; |
1197 | 1198 | ||
1198 | fail_free_buffers: | 1199 | fail_free_buffers: |
1199 | for_each_buffer_cpu(buffer, cpu) { | 1200 | for_each_buffer_cpu(buffer, cpu) { |
1200 | if (buffer->buffers[cpu]) | 1201 | if (buffer->buffers[cpu]) |
1201 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 1202 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
1202 | } | 1203 | } |
1203 | kfree(buffer->buffers); | 1204 | kfree(buffer->buffers); |
1204 | 1205 | ||
1205 | fail_free_cpumask: | 1206 | fail_free_cpumask: |
1206 | free_cpumask_var(buffer->cpumask); | 1207 | free_cpumask_var(buffer->cpumask); |
1207 | put_online_cpus(); | 1208 | put_online_cpus(); |
1208 | 1209 | ||
1209 | fail_free_buffer: | 1210 | fail_free_buffer: |
1210 | kfree(buffer); | 1211 | kfree(buffer); |
1211 | return NULL; | 1212 | return NULL; |
1212 | } | 1213 | } |
1213 | EXPORT_SYMBOL_GPL(__ring_buffer_alloc); | 1214 | EXPORT_SYMBOL_GPL(__ring_buffer_alloc); |
1214 | 1215 | ||
1215 | /** | 1216 | /** |
1216 | * ring_buffer_free - free a ring buffer. | 1217 | * ring_buffer_free - free a ring buffer. |
1217 | * @buffer: the buffer to free. | 1218 | * @buffer: the buffer to free. |
1218 | */ | 1219 | */ |
1219 | void | 1220 | void |
1220 | ring_buffer_free(struct ring_buffer *buffer) | 1221 | ring_buffer_free(struct ring_buffer *buffer) |
1221 | { | 1222 | { |
1222 | int cpu; | 1223 | int cpu; |
1223 | 1224 | ||
1224 | get_online_cpus(); | 1225 | get_online_cpus(); |
1225 | 1226 | ||
1226 | #ifdef CONFIG_HOTPLUG_CPU | 1227 | #ifdef CONFIG_HOTPLUG_CPU |
1227 | unregister_cpu_notifier(&buffer->cpu_notify); | 1228 | unregister_cpu_notifier(&buffer->cpu_notify); |
1228 | #endif | 1229 | #endif |
1229 | 1230 | ||
1230 | for_each_buffer_cpu(buffer, cpu) | 1231 | for_each_buffer_cpu(buffer, cpu) |
1231 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 1232 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
1232 | 1233 | ||
1233 | put_online_cpus(); | 1234 | put_online_cpus(); |
1234 | 1235 | ||
1235 | kfree(buffer->buffers); | 1236 | kfree(buffer->buffers); |
1236 | free_cpumask_var(buffer->cpumask); | 1237 | free_cpumask_var(buffer->cpumask); |
1237 | 1238 | ||
1238 | kfree(buffer); | 1239 | kfree(buffer); |
1239 | } | 1240 | } |
1240 | EXPORT_SYMBOL_GPL(ring_buffer_free); | 1241 | EXPORT_SYMBOL_GPL(ring_buffer_free); |
1241 | 1242 | ||
1242 | void ring_buffer_set_clock(struct ring_buffer *buffer, | 1243 | void ring_buffer_set_clock(struct ring_buffer *buffer, |
1243 | u64 (*clock)(void)) | 1244 | u64 (*clock)(void)) |
1244 | { | 1245 | { |
1245 | buffer->clock = clock; | 1246 | buffer->clock = clock; |
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); | 1249 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
1249 | 1250 | ||
1250 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | 1251 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) |
1251 | { | 1252 | { |
1252 | return local_read(&bpage->entries) & RB_WRITE_MASK; | 1253 | return local_read(&bpage->entries) & RB_WRITE_MASK; |
1253 | } | 1254 | } |
1254 | 1255 | ||
1255 | static inline unsigned long rb_page_write(struct buffer_page *bpage) | 1256 | static inline unsigned long rb_page_write(struct buffer_page *bpage) |
1256 | { | 1257 | { |
1257 | return local_read(&bpage->write) & RB_WRITE_MASK; | 1258 | return local_read(&bpage->write) & RB_WRITE_MASK; |
1258 | } | 1259 | } |
1259 | 1260 | ||
1260 | static int | 1261 | static int |
1261 | rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages) | 1262 | rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages) |
1262 | { | 1263 | { |
1263 | struct list_head *tail_page, *to_remove, *next_page; | 1264 | struct list_head *tail_page, *to_remove, *next_page; |
1264 | struct buffer_page *to_remove_page, *tmp_iter_page; | 1265 | struct buffer_page *to_remove_page, *tmp_iter_page; |
1265 | struct buffer_page *last_page, *first_page; | 1266 | struct buffer_page *last_page, *first_page; |
1266 | unsigned int nr_removed; | 1267 | unsigned int nr_removed; |
1267 | unsigned long head_bit; | 1268 | unsigned long head_bit; |
1268 | int page_entries; | 1269 | int page_entries; |
1269 | 1270 | ||
1270 | head_bit = 0; | 1271 | head_bit = 0; |
1271 | 1272 | ||
1272 | raw_spin_lock_irq(&cpu_buffer->reader_lock); | 1273 | raw_spin_lock_irq(&cpu_buffer->reader_lock); |
1273 | atomic_inc(&cpu_buffer->record_disabled); | 1274 | atomic_inc(&cpu_buffer->record_disabled); |
1274 | /* | 1275 | /* |
1275 | * We don't race with the readers since we have acquired the reader | 1276 | * We don't race with the readers since we have acquired the reader |
1276 | * lock. We also don't race with writers after disabling recording. | 1277 | * lock. We also don't race with writers after disabling recording. |
1277 | * This makes it easy to figure out the first and the last page to be | 1278 | * This makes it easy to figure out the first and the last page to be |
1278 | * removed from the list. We unlink all the pages in between including | 1279 | * removed from the list. We unlink all the pages in between including |
1279 | * the first and last pages. This is done in a busy loop so that we | 1280 | * the first and last pages. This is done in a busy loop so that we |
1280 | * lose the least number of traces. | 1281 | * lose the least number of traces. |
1281 | * The pages are freed after we restart recording and unlock readers. | 1282 | * The pages are freed after we restart recording and unlock readers. |
1282 | */ | 1283 | */ |
1283 | tail_page = &cpu_buffer->tail_page->list; | 1284 | tail_page = &cpu_buffer->tail_page->list; |
1284 | 1285 | ||
1285 | /* | 1286 | /* |
1286 | * tail page might be on reader page, we remove the next page | 1287 | * tail page might be on reader page, we remove the next page |
1287 | * from the ring buffer | 1288 | * from the ring buffer |
1288 | */ | 1289 | */ |
1289 | if (cpu_buffer->tail_page == cpu_buffer->reader_page) | 1290 | if (cpu_buffer->tail_page == cpu_buffer->reader_page) |
1290 | tail_page = rb_list_head(tail_page->next); | 1291 | tail_page = rb_list_head(tail_page->next); |
1291 | to_remove = tail_page; | 1292 | to_remove = tail_page; |
1292 | 1293 | ||
1293 | /* start of pages to remove */ | 1294 | /* start of pages to remove */ |
1294 | first_page = list_entry(rb_list_head(to_remove->next), | 1295 | first_page = list_entry(rb_list_head(to_remove->next), |
1295 | struct buffer_page, list); | 1296 | struct buffer_page, list); |
1296 | 1297 | ||
1297 | for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { | 1298 | for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { |
1298 | to_remove = rb_list_head(to_remove)->next; | 1299 | to_remove = rb_list_head(to_remove)->next; |
1299 | head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; | 1300 | head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; |
1300 | } | 1301 | } |
1301 | 1302 | ||
1302 | next_page = rb_list_head(to_remove)->next; | 1303 | next_page = rb_list_head(to_remove)->next; |
1303 | 1304 | ||
1304 | /* | 1305 | /* |
1305 | * Now we remove all pages between tail_page and next_page. | 1306 | * Now we remove all pages between tail_page and next_page. |
1306 | * Make sure that we have head_bit value preserved for the | 1307 | * Make sure that we have head_bit value preserved for the |
1307 | * next page | 1308 | * next page |
1308 | */ | 1309 | */ |
1309 | tail_page->next = (struct list_head *)((unsigned long)next_page | | 1310 | tail_page->next = (struct list_head *)((unsigned long)next_page | |
1310 | head_bit); | 1311 | head_bit); |
1311 | next_page = rb_list_head(next_page); | 1312 | next_page = rb_list_head(next_page); |
1312 | next_page->prev = tail_page; | 1313 | next_page->prev = tail_page; |
1313 | 1314 | ||
1314 | /* make sure pages points to a valid page in the ring buffer */ | 1315 | /* make sure pages points to a valid page in the ring buffer */ |
1315 | cpu_buffer->pages = next_page; | 1316 | cpu_buffer->pages = next_page; |
1316 | 1317 | ||
1317 | /* update head page */ | 1318 | /* update head page */ |
1318 | if (head_bit) | 1319 | if (head_bit) |
1319 | cpu_buffer->head_page = list_entry(next_page, | 1320 | cpu_buffer->head_page = list_entry(next_page, |
1320 | struct buffer_page, list); | 1321 | struct buffer_page, list); |
1321 | 1322 | ||
1322 | /* | 1323 | /* |
1323 | * change read pointer to make sure any read iterators reset | 1324 | * change read pointer to make sure any read iterators reset |
1324 | * themselves | 1325 | * themselves |
1325 | */ | 1326 | */ |
1326 | cpu_buffer->read = 0; | 1327 | cpu_buffer->read = 0; |
1327 | 1328 | ||
1328 | /* pages are removed, resume tracing and then free the pages */ | 1329 | /* pages are removed, resume tracing and then free the pages */ |
1329 | atomic_dec(&cpu_buffer->record_disabled); | 1330 | atomic_dec(&cpu_buffer->record_disabled); |
1330 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); | 1331 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
1331 | 1332 | ||
1332 | RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)); | 1333 | RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)); |
1333 | 1334 | ||
1334 | /* last buffer page to remove */ | 1335 | /* last buffer page to remove */ |
1335 | last_page = list_entry(rb_list_head(to_remove), struct buffer_page, | 1336 | last_page = list_entry(rb_list_head(to_remove), struct buffer_page, |
1336 | list); | 1337 | list); |
1337 | tmp_iter_page = first_page; | 1338 | tmp_iter_page = first_page; |
1338 | 1339 | ||
1339 | do { | 1340 | do { |
1340 | to_remove_page = tmp_iter_page; | 1341 | to_remove_page = tmp_iter_page; |
1341 | rb_inc_page(cpu_buffer, &tmp_iter_page); | 1342 | rb_inc_page(cpu_buffer, &tmp_iter_page); |
1342 | 1343 | ||
1343 | /* update the counters */ | 1344 | /* update the counters */ |
1344 | page_entries = rb_page_entries(to_remove_page); | 1345 | page_entries = rb_page_entries(to_remove_page); |
1345 | if (page_entries) { | 1346 | if (page_entries) { |
1346 | /* | 1347 | /* |
1347 | * If something was added to this page, it was full | 1348 | * If something was added to this page, it was full |
1348 | * since it is not the tail page. So we deduct the | 1349 | * since it is not the tail page. So we deduct the |
1349 | * bytes consumed in ring buffer from here. | 1350 | * bytes consumed in ring buffer from here. |
1350 | * Increment overrun to account for the lost events. | 1351 | * Increment overrun to account for the lost events. |
1351 | */ | 1352 | */ |
1352 | local_add(page_entries, &cpu_buffer->overrun); | 1353 | local_add(page_entries, &cpu_buffer->overrun); |
1353 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); | 1354 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); |
1354 | } | 1355 | } |
1355 | 1356 | ||
1356 | /* | 1357 | /* |
1357 | * We have already removed references to this list item, just | 1358 | * We have already removed references to this list item, just |
1358 | * free up the buffer_page and its page | 1359 | * free up the buffer_page and its page |
1359 | */ | 1360 | */ |
1360 | free_buffer_page(to_remove_page); | 1361 | free_buffer_page(to_remove_page); |
1361 | nr_removed--; | 1362 | nr_removed--; |
1362 | 1363 | ||
1363 | } while (to_remove_page != last_page); | 1364 | } while (to_remove_page != last_page); |
1364 | 1365 | ||
1365 | RB_WARN_ON(cpu_buffer, nr_removed); | 1366 | RB_WARN_ON(cpu_buffer, nr_removed); |
1366 | 1367 | ||
1367 | return nr_removed == 0; | 1368 | return nr_removed == 0; |
1368 | } | 1369 | } |
1369 | 1370 | ||
1370 | static int | 1371 | static int |
1371 | rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) | 1372 | rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) |
1372 | { | 1373 | { |
1373 | struct list_head *pages = &cpu_buffer->new_pages; | 1374 | struct list_head *pages = &cpu_buffer->new_pages; |
1374 | int retries, success; | 1375 | int retries, success; |
1375 | 1376 | ||
1376 | raw_spin_lock_irq(&cpu_buffer->reader_lock); | 1377 | raw_spin_lock_irq(&cpu_buffer->reader_lock); |
1377 | /* | 1378 | /* |
1378 | * We are holding the reader lock, so the reader page won't be swapped | 1379 | * We are holding the reader lock, so the reader page won't be swapped |
1379 | * in the ring buffer. Now we are racing with the writer trying to | 1380 | * in the ring buffer. Now we are racing with the writer trying to |
1380 | * move head page and the tail page. | 1381 | * move head page and the tail page. |
1381 | * We are going to adapt the reader page update process where: | 1382 | * We are going to adapt the reader page update process where: |
1382 | * 1. We first splice the start and end of list of new pages between | 1383 | * 1. We first splice the start and end of list of new pages between |
1383 | * the head page and its previous page. | 1384 | * the head page and its previous page. |
1384 | * 2. We cmpxchg the prev_page->next to point from head page to the | 1385 | * 2. We cmpxchg the prev_page->next to point from head page to the |
1385 | * start of new pages list. | 1386 | * start of new pages list. |
1386 | * 3. Finally, we update the head->prev to the end of new list. | 1387 | * 3. Finally, we update the head->prev to the end of new list. |
1387 | * | 1388 | * |
1388 | * We will try this process 10 times, to make sure that we don't keep | 1389 | * We will try this process 10 times, to make sure that we don't keep |
1389 | * spinning. | 1390 | * spinning. |
1390 | */ | 1391 | */ |
1391 | retries = 10; | 1392 | retries = 10; |
1392 | success = 0; | 1393 | success = 0; |
1393 | while (retries--) { | 1394 | while (retries--) { |
1394 | struct list_head *head_page, *prev_page, *r; | 1395 | struct list_head *head_page, *prev_page, *r; |
1395 | struct list_head *last_page, *first_page; | 1396 | struct list_head *last_page, *first_page; |
1396 | struct list_head *head_page_with_bit; | 1397 | struct list_head *head_page_with_bit; |
1397 | 1398 | ||
1398 | head_page = &rb_set_head_page(cpu_buffer)->list; | 1399 | head_page = &rb_set_head_page(cpu_buffer)->list; |
1399 | prev_page = head_page->prev; | 1400 | prev_page = head_page->prev; |
1400 | 1401 | ||
1401 | first_page = pages->next; | 1402 | first_page = pages->next; |
1402 | last_page = pages->prev; | 1403 | last_page = pages->prev; |
1403 | 1404 | ||
1404 | head_page_with_bit = (struct list_head *) | 1405 | head_page_with_bit = (struct list_head *) |
1405 | ((unsigned long)head_page | RB_PAGE_HEAD); | 1406 | ((unsigned long)head_page | RB_PAGE_HEAD); |
1406 | 1407 | ||
1407 | last_page->next = head_page_with_bit; | 1408 | last_page->next = head_page_with_bit; |
1408 | first_page->prev = prev_page; | 1409 | first_page->prev = prev_page; |
1409 | 1410 | ||
1410 | r = cmpxchg(&prev_page->next, head_page_with_bit, first_page); | 1411 | r = cmpxchg(&prev_page->next, head_page_with_bit, first_page); |
1411 | 1412 | ||
1412 | if (r == head_page_with_bit) { | 1413 | if (r == head_page_with_bit) { |
1413 | /* | 1414 | /* |
1414 | * yay, we replaced the page pointer to our new list, | 1415 | * yay, we replaced the page pointer to our new list, |
1415 | * now, we just have to update to head page's prev | 1416 | * now, we just have to update to head page's prev |
1416 | * pointer to point to end of list | 1417 | * pointer to point to end of list |
1417 | */ | 1418 | */ |
1418 | head_page->prev = last_page; | 1419 | head_page->prev = last_page; |
1419 | success = 1; | 1420 | success = 1; |
1420 | break; | 1421 | break; |
1421 | } | 1422 | } |
1422 | } | 1423 | } |
1423 | 1424 | ||
1424 | if (success) | 1425 | if (success) |
1425 | INIT_LIST_HEAD(pages); | 1426 | INIT_LIST_HEAD(pages); |
1426 | /* | 1427 | /* |
1427 | * If we weren't successful in adding in new pages, warn and stop | 1428 | * If we weren't successful in adding in new pages, warn and stop |
1428 | * tracing | 1429 | * tracing |
1429 | */ | 1430 | */ |
1430 | RB_WARN_ON(cpu_buffer, !success); | 1431 | RB_WARN_ON(cpu_buffer, !success); |
1431 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); | 1432 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
1432 | 1433 | ||
1433 | /* free pages if they weren't inserted */ | 1434 | /* free pages if they weren't inserted */ |
1434 | if (!success) { | 1435 | if (!success) { |
1435 | struct buffer_page *bpage, *tmp; | 1436 | struct buffer_page *bpage, *tmp; |
1436 | list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, | 1437 | list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, |
1437 | list) { | 1438 | list) { |
1438 | list_del_init(&bpage->list); | 1439 | list_del_init(&bpage->list); |
1439 | free_buffer_page(bpage); | 1440 | free_buffer_page(bpage); |
1440 | } | 1441 | } |
1441 | } | 1442 | } |
1442 | return success; | 1443 | return success; |
1443 | } | 1444 | } |
1444 | 1445 | ||
1445 | static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) | 1446 | static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) |
1446 | { | 1447 | { |
1447 | int success; | 1448 | int success; |
1448 | 1449 | ||
1449 | if (cpu_buffer->nr_pages_to_update > 0) | 1450 | if (cpu_buffer->nr_pages_to_update > 0) |
1450 | success = rb_insert_pages(cpu_buffer); | 1451 | success = rb_insert_pages(cpu_buffer); |
1451 | else | 1452 | else |
1452 | success = rb_remove_pages(cpu_buffer, | 1453 | success = rb_remove_pages(cpu_buffer, |
1453 | -cpu_buffer->nr_pages_to_update); | 1454 | -cpu_buffer->nr_pages_to_update); |
1454 | 1455 | ||
1455 | if (success) | 1456 | if (success) |
1456 | cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; | 1457 | cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; |
1457 | } | 1458 | } |
1458 | 1459 | ||
1459 | static void update_pages_handler(struct work_struct *work) | 1460 | static void update_pages_handler(struct work_struct *work) |
1460 | { | 1461 | { |
1461 | struct ring_buffer_per_cpu *cpu_buffer = container_of(work, | 1462 | struct ring_buffer_per_cpu *cpu_buffer = container_of(work, |
1462 | struct ring_buffer_per_cpu, update_pages_work); | 1463 | struct ring_buffer_per_cpu, update_pages_work); |
1463 | rb_update_pages(cpu_buffer); | 1464 | rb_update_pages(cpu_buffer); |
1464 | complete(&cpu_buffer->update_done); | 1465 | complete(&cpu_buffer->update_done); |
1465 | } | 1466 | } |
1466 | 1467 | ||
1467 | /** | 1468 | /** |
1468 | * ring_buffer_resize - resize the ring buffer | 1469 | * ring_buffer_resize - resize the ring buffer |
1469 | * @buffer: the buffer to resize. | 1470 | * @buffer: the buffer to resize. |
1470 | * @size: the new size. | 1471 | * @size: the new size. |
1471 | * | 1472 | * |
1472 | * Minimum size is 2 * BUF_PAGE_SIZE. | 1473 | * Minimum size is 2 * BUF_PAGE_SIZE. |
1473 | * | 1474 | * |
1474 | * Returns 0 on success and < 0 on failure. | 1475 | * Returns 0 on success and < 0 on failure. |
1475 | */ | 1476 | */ |
1476 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, | 1477 | int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, |
1477 | int cpu_id) | 1478 | int cpu_id) |
1478 | { | 1479 | { |
1479 | struct ring_buffer_per_cpu *cpu_buffer; | 1480 | struct ring_buffer_per_cpu *cpu_buffer; |
1480 | unsigned nr_pages; | 1481 | unsigned nr_pages; |
1481 | int cpu, err = 0; | 1482 | int cpu, err = 0; |
1482 | 1483 | ||
1483 | /* | 1484 | /* |
1484 | * Always succeed at resizing a non-existent buffer: | 1485 | * Always succeed at resizing a non-existent buffer: |
1485 | */ | 1486 | */ |
1486 | if (!buffer) | 1487 | if (!buffer) |
1487 | return size; | 1488 | return size; |
1488 | 1489 | ||
1489 | /* Make sure the requested buffer exists */ | 1490 | /* Make sure the requested buffer exists */ |
1490 | if (cpu_id != RING_BUFFER_ALL_CPUS && | 1491 | if (cpu_id != RING_BUFFER_ALL_CPUS && |
1491 | !cpumask_test_cpu(cpu_id, buffer->cpumask)) | 1492 | !cpumask_test_cpu(cpu_id, buffer->cpumask)) |
1492 | return size; | 1493 | return size; |
1493 | 1494 | ||
1494 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 1495 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
1495 | size *= BUF_PAGE_SIZE; | 1496 | size *= BUF_PAGE_SIZE; |
1496 | 1497 | ||
1497 | /* we need a minimum of two pages */ | 1498 | /* we need a minimum of two pages */ |
1498 | if (size < BUF_PAGE_SIZE * 2) | 1499 | if (size < BUF_PAGE_SIZE * 2) |
1499 | size = BUF_PAGE_SIZE * 2; | 1500 | size = BUF_PAGE_SIZE * 2; |
1500 | 1501 | ||
1501 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 1502 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
1502 | 1503 | ||
1503 | /* | 1504 | /* |
1504 | * Don't succeed if resizing is disabled, as a reader might be | 1505 | * Don't succeed if resizing is disabled, as a reader might be |
1505 | * manipulating the ring buffer and is expecting a sane state while | 1506 | * manipulating the ring buffer and is expecting a sane state while |
1506 | * this is true. | 1507 | * this is true. |
1507 | */ | 1508 | */ |
1508 | if (atomic_read(&buffer->resize_disabled)) | 1509 | if (atomic_read(&buffer->resize_disabled)) |
1509 | return -EBUSY; | 1510 | return -EBUSY; |
1510 | 1511 | ||
1511 | /* prevent another thread from changing buffer sizes */ | 1512 | /* prevent another thread from changing buffer sizes */ |
1512 | mutex_lock(&buffer->mutex); | 1513 | mutex_lock(&buffer->mutex); |
1513 | 1514 | ||
1514 | if (cpu_id == RING_BUFFER_ALL_CPUS) { | 1515 | if (cpu_id == RING_BUFFER_ALL_CPUS) { |
1515 | /* calculate the pages to update */ | 1516 | /* calculate the pages to update */ |
1516 | for_each_buffer_cpu(buffer, cpu) { | 1517 | for_each_buffer_cpu(buffer, cpu) { |
1517 | cpu_buffer = buffer->buffers[cpu]; | 1518 | cpu_buffer = buffer->buffers[cpu]; |
1518 | 1519 | ||
1519 | cpu_buffer->nr_pages_to_update = nr_pages - | 1520 | cpu_buffer->nr_pages_to_update = nr_pages - |
1520 | cpu_buffer->nr_pages; | 1521 | cpu_buffer->nr_pages; |
1521 | /* | 1522 | /* |
1522 | * nothing more to do for removing pages or no update | 1523 | * nothing more to do for removing pages or no update |
1523 | */ | 1524 | */ |
1524 | if (cpu_buffer->nr_pages_to_update <= 0) | 1525 | if (cpu_buffer->nr_pages_to_update <= 0) |
1525 | continue; | 1526 | continue; |
1526 | /* | 1527 | /* |
1527 | * to add pages, make sure all new pages can be | 1528 | * to add pages, make sure all new pages can be |
1528 | * allocated without receiving ENOMEM | 1529 | * allocated without receiving ENOMEM |
1529 | */ | 1530 | */ |
1530 | INIT_LIST_HEAD(&cpu_buffer->new_pages); | 1531 | INIT_LIST_HEAD(&cpu_buffer->new_pages); |
1531 | if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update, | 1532 | if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update, |
1532 | &cpu_buffer->new_pages, cpu)) { | 1533 | &cpu_buffer->new_pages, cpu)) { |
1533 | /* not enough memory for new pages */ | 1534 | /* not enough memory for new pages */ |
1534 | err = -ENOMEM; | 1535 | err = -ENOMEM; |
1535 | goto out_err; | 1536 | goto out_err; |
1536 | } | 1537 | } |
1537 | } | 1538 | } |
1538 | 1539 | ||
1539 | get_online_cpus(); | 1540 | get_online_cpus(); |
1540 | /* | 1541 | /* |
1541 | * Fire off all the required work handlers | 1542 | * Fire off all the required work handlers |
1542 | * We can't schedule on offline CPUs, but it's not necessary | 1543 | * We can't schedule on offline CPUs, but it's not necessary |
1543 | * since we can change their buffer sizes without any race. | 1544 | * since we can change their buffer sizes without any race. |
1544 | */ | 1545 | */ |
1545 | for_each_buffer_cpu(buffer, cpu) { | 1546 | for_each_buffer_cpu(buffer, cpu) { |
1546 | cpu_buffer = buffer->buffers[cpu]; | 1547 | cpu_buffer = buffer->buffers[cpu]; |
1547 | if (!cpu_buffer->nr_pages_to_update) | 1548 | if (!cpu_buffer->nr_pages_to_update) |
1548 | continue; | 1549 | continue; |
1549 | 1550 | ||
1550 | if (cpu_online(cpu)) | 1551 | if (cpu_online(cpu)) |
1551 | schedule_work_on(cpu, | 1552 | schedule_work_on(cpu, |
1552 | &cpu_buffer->update_pages_work); | 1553 | &cpu_buffer->update_pages_work); |
1553 | else | 1554 | else |
1554 | rb_update_pages(cpu_buffer); | 1555 | rb_update_pages(cpu_buffer); |
1555 | } | 1556 | } |
1556 | 1557 | ||
1557 | /* wait for all the updates to complete */ | 1558 | /* wait for all the updates to complete */ |
1558 | for_each_buffer_cpu(buffer, cpu) { | 1559 | for_each_buffer_cpu(buffer, cpu) { |
1559 | cpu_buffer = buffer->buffers[cpu]; | 1560 | cpu_buffer = buffer->buffers[cpu]; |
1560 | if (!cpu_buffer->nr_pages_to_update) | 1561 | if (!cpu_buffer->nr_pages_to_update) |
1561 | continue; | 1562 | continue; |
1562 | 1563 | ||
1563 | if (cpu_online(cpu)) | 1564 | if (cpu_online(cpu)) |
1564 | wait_for_completion(&cpu_buffer->update_done); | 1565 | wait_for_completion(&cpu_buffer->update_done); |
1565 | cpu_buffer->nr_pages_to_update = 0; | 1566 | cpu_buffer->nr_pages_to_update = 0; |
1566 | } | 1567 | } |
1567 | 1568 | ||
1568 | put_online_cpus(); | 1569 | put_online_cpus(); |
1569 | } else { | 1570 | } else { |
1570 | /* Make sure this CPU has been intitialized */ | 1571 | /* Make sure this CPU has been intitialized */ |
1571 | if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) | 1572 | if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) |
1572 | goto out; | 1573 | goto out; |
1573 | 1574 | ||
1574 | cpu_buffer = buffer->buffers[cpu_id]; | 1575 | cpu_buffer = buffer->buffers[cpu_id]; |
1575 | 1576 | ||
1576 | if (nr_pages == cpu_buffer->nr_pages) | 1577 | if (nr_pages == cpu_buffer->nr_pages) |
1577 | goto out; | 1578 | goto out; |
1578 | 1579 | ||
1579 | cpu_buffer->nr_pages_to_update = nr_pages - | 1580 | cpu_buffer->nr_pages_to_update = nr_pages - |
1580 | cpu_buffer->nr_pages; | 1581 | cpu_buffer->nr_pages; |
1581 | 1582 | ||
1582 | INIT_LIST_HEAD(&cpu_buffer->new_pages); | 1583 | INIT_LIST_HEAD(&cpu_buffer->new_pages); |
1583 | if (cpu_buffer->nr_pages_to_update > 0 && | 1584 | if (cpu_buffer->nr_pages_to_update > 0 && |
1584 | __rb_allocate_pages(cpu_buffer->nr_pages_to_update, | 1585 | __rb_allocate_pages(cpu_buffer->nr_pages_to_update, |
1585 | &cpu_buffer->new_pages, cpu_id)) { | 1586 | &cpu_buffer->new_pages, cpu_id)) { |
1586 | err = -ENOMEM; | 1587 | err = -ENOMEM; |
1587 | goto out_err; | 1588 | goto out_err; |
1588 | } | 1589 | } |
1589 | 1590 | ||
1590 | get_online_cpus(); | 1591 | get_online_cpus(); |
1591 | 1592 | ||
1592 | if (cpu_online(cpu_id)) { | 1593 | if (cpu_online(cpu_id)) { |
1593 | schedule_work_on(cpu_id, | 1594 | schedule_work_on(cpu_id, |
1594 | &cpu_buffer->update_pages_work); | 1595 | &cpu_buffer->update_pages_work); |
1595 | wait_for_completion(&cpu_buffer->update_done); | 1596 | wait_for_completion(&cpu_buffer->update_done); |
1596 | } else | 1597 | } else |
1597 | rb_update_pages(cpu_buffer); | 1598 | rb_update_pages(cpu_buffer); |
1598 | 1599 | ||
1599 | cpu_buffer->nr_pages_to_update = 0; | 1600 | cpu_buffer->nr_pages_to_update = 0; |
1600 | put_online_cpus(); | 1601 | put_online_cpus(); |
1601 | } | 1602 | } |
1602 | 1603 | ||
1603 | out: | 1604 | out: |
1604 | /* | 1605 | /* |
1605 | * The ring buffer resize can happen with the ring buffer | 1606 | * The ring buffer resize can happen with the ring buffer |
1606 | * enabled, so that the update disturbs the tracing as little | 1607 | * enabled, so that the update disturbs the tracing as little |
1607 | * as possible. But if the buffer is disabled, we do not need | 1608 | * as possible. But if the buffer is disabled, we do not need |
1608 | * to worry about that, and we can take the time to verify | 1609 | * to worry about that, and we can take the time to verify |
1609 | * that the buffer is not corrupt. | 1610 | * that the buffer is not corrupt. |
1610 | */ | 1611 | */ |
1611 | if (atomic_read(&buffer->record_disabled)) { | 1612 | if (atomic_read(&buffer->record_disabled)) { |
1612 | atomic_inc(&buffer->record_disabled); | 1613 | atomic_inc(&buffer->record_disabled); |
1613 | /* | 1614 | /* |
1614 | * Even though the buffer was disabled, we must make sure | 1615 | * Even though the buffer was disabled, we must make sure |
1615 | * that it is truly disabled before calling rb_check_pages. | 1616 | * that it is truly disabled before calling rb_check_pages. |
1616 | * There could have been a race between checking | 1617 | * There could have been a race between checking |
1617 | * record_disable and incrementing it. | 1618 | * record_disable and incrementing it. |
1618 | */ | 1619 | */ |
1619 | synchronize_sched(); | 1620 | synchronize_sched(); |
1620 | for_each_buffer_cpu(buffer, cpu) { | 1621 | for_each_buffer_cpu(buffer, cpu) { |
1621 | cpu_buffer = buffer->buffers[cpu]; | 1622 | cpu_buffer = buffer->buffers[cpu]; |
1622 | rb_check_pages(cpu_buffer); | 1623 | rb_check_pages(cpu_buffer); |
1623 | } | 1624 | } |
1624 | atomic_dec(&buffer->record_disabled); | 1625 | atomic_dec(&buffer->record_disabled); |
1625 | } | 1626 | } |
1626 | 1627 | ||
1627 | mutex_unlock(&buffer->mutex); | 1628 | mutex_unlock(&buffer->mutex); |
1628 | return size; | 1629 | return size; |
1629 | 1630 | ||
1630 | out_err: | 1631 | out_err: |
1631 | for_each_buffer_cpu(buffer, cpu) { | 1632 | for_each_buffer_cpu(buffer, cpu) { |
1632 | struct buffer_page *bpage, *tmp; | 1633 | struct buffer_page *bpage, *tmp; |
1633 | 1634 | ||
1634 | cpu_buffer = buffer->buffers[cpu]; | 1635 | cpu_buffer = buffer->buffers[cpu]; |
1635 | cpu_buffer->nr_pages_to_update = 0; | 1636 | cpu_buffer->nr_pages_to_update = 0; |
1636 | 1637 | ||
1637 | if (list_empty(&cpu_buffer->new_pages)) | 1638 | if (list_empty(&cpu_buffer->new_pages)) |
1638 | continue; | 1639 | continue; |
1639 | 1640 | ||
1640 | list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, | 1641 | list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, |
1641 | list) { | 1642 | list) { |
1642 | list_del_init(&bpage->list); | 1643 | list_del_init(&bpage->list); |
1643 | free_buffer_page(bpage); | 1644 | free_buffer_page(bpage); |
1644 | } | 1645 | } |
1645 | } | 1646 | } |
1646 | mutex_unlock(&buffer->mutex); | 1647 | mutex_unlock(&buffer->mutex); |
1647 | return err; | 1648 | return err; |
1648 | } | 1649 | } |
1649 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | 1650 | EXPORT_SYMBOL_GPL(ring_buffer_resize); |
1650 | 1651 | ||
1651 | void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) | 1652 | void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) |
1652 | { | 1653 | { |
1653 | mutex_lock(&buffer->mutex); | 1654 | mutex_lock(&buffer->mutex); |
1654 | if (val) | 1655 | if (val) |
1655 | buffer->flags |= RB_FL_OVERWRITE; | 1656 | buffer->flags |= RB_FL_OVERWRITE; |
1656 | else | 1657 | else |
1657 | buffer->flags &= ~RB_FL_OVERWRITE; | 1658 | buffer->flags &= ~RB_FL_OVERWRITE; |
1658 | mutex_unlock(&buffer->mutex); | 1659 | mutex_unlock(&buffer->mutex); |
1659 | } | 1660 | } |
1660 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); | 1661 | EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); |
1661 | 1662 | ||
1662 | static inline void * | 1663 | static inline void * |
1663 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 1664 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
1664 | { | 1665 | { |
1665 | return bpage->data + index; | 1666 | return bpage->data + index; |
1666 | } | 1667 | } |
1667 | 1668 | ||
1668 | static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) | 1669 | static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) |
1669 | { | 1670 | { |
1670 | return bpage->page->data + index; | 1671 | return bpage->page->data + index; |
1671 | } | 1672 | } |
1672 | 1673 | ||
1673 | static inline struct ring_buffer_event * | 1674 | static inline struct ring_buffer_event * |
1674 | rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) | 1675 | rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) |
1675 | { | 1676 | { |
1676 | return __rb_page_index(cpu_buffer->reader_page, | 1677 | return __rb_page_index(cpu_buffer->reader_page, |
1677 | cpu_buffer->reader_page->read); | 1678 | cpu_buffer->reader_page->read); |
1678 | } | 1679 | } |
1679 | 1680 | ||
1680 | static inline struct ring_buffer_event * | 1681 | static inline struct ring_buffer_event * |
1681 | rb_iter_head_event(struct ring_buffer_iter *iter) | 1682 | rb_iter_head_event(struct ring_buffer_iter *iter) |
1682 | { | 1683 | { |
1683 | return __rb_page_index(iter->head_page, iter->head); | 1684 | return __rb_page_index(iter->head_page, iter->head); |
1684 | } | 1685 | } |
1685 | 1686 | ||
1686 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 1687 | static inline unsigned rb_page_commit(struct buffer_page *bpage) |
1687 | { | 1688 | { |
1688 | return local_read(&bpage->page->commit); | 1689 | return local_read(&bpage->page->commit); |
1689 | } | 1690 | } |
1690 | 1691 | ||
1691 | /* Size is determined by what has been committed */ | 1692 | /* Size is determined by what has been committed */ |
1692 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1693 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
1693 | { | 1694 | { |
1694 | return rb_page_commit(bpage); | 1695 | return rb_page_commit(bpage); |
1695 | } | 1696 | } |
1696 | 1697 | ||
1697 | static inline unsigned | 1698 | static inline unsigned |
1698 | rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) | 1699 | rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) |
1699 | { | 1700 | { |
1700 | return rb_page_commit(cpu_buffer->commit_page); | 1701 | return rb_page_commit(cpu_buffer->commit_page); |
1701 | } | 1702 | } |
1702 | 1703 | ||
1703 | static inline unsigned | 1704 | static inline unsigned |
1704 | rb_event_index(struct ring_buffer_event *event) | 1705 | rb_event_index(struct ring_buffer_event *event) |
1705 | { | 1706 | { |
1706 | unsigned long addr = (unsigned long)event; | 1707 | unsigned long addr = (unsigned long)event; |
1707 | 1708 | ||
1708 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; | 1709 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1709 | } | 1710 | } |
1710 | 1711 | ||
1711 | static inline int | 1712 | static inline int |
1712 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1713 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1713 | struct ring_buffer_event *event) | 1714 | struct ring_buffer_event *event) |
1714 | { | 1715 | { |
1715 | unsigned long addr = (unsigned long)event; | 1716 | unsigned long addr = (unsigned long)event; |
1716 | unsigned long index; | 1717 | unsigned long index; |
1717 | 1718 | ||
1718 | index = rb_event_index(event); | 1719 | index = rb_event_index(event); |
1719 | addr &= PAGE_MASK; | 1720 | addr &= PAGE_MASK; |
1720 | 1721 | ||
1721 | return cpu_buffer->commit_page->page == (void *)addr && | 1722 | return cpu_buffer->commit_page->page == (void *)addr && |
1722 | rb_commit_index(cpu_buffer) == index; | 1723 | rb_commit_index(cpu_buffer) == index; |
1723 | } | 1724 | } |
1724 | 1725 | ||
1725 | static void | 1726 | static void |
1726 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1727 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1727 | { | 1728 | { |
1728 | unsigned long max_count; | 1729 | unsigned long max_count; |
1729 | 1730 | ||
1730 | /* | 1731 | /* |
1731 | * We only race with interrupts and NMIs on this CPU. | 1732 | * We only race with interrupts and NMIs on this CPU. |
1732 | * If we own the commit event, then we can commit | 1733 | * If we own the commit event, then we can commit |
1733 | * all others that interrupted us, since the interruptions | 1734 | * all others that interrupted us, since the interruptions |
1734 | * are in stack format (they finish before they come | 1735 | * are in stack format (they finish before they come |
1735 | * back to us). This allows us to do a simple loop to | 1736 | * back to us). This allows us to do a simple loop to |
1736 | * assign the commit to the tail. | 1737 | * assign the commit to the tail. |
1737 | */ | 1738 | */ |
1738 | again: | 1739 | again: |
1739 | max_count = cpu_buffer->nr_pages * 100; | 1740 | max_count = cpu_buffer->nr_pages * 100; |
1740 | 1741 | ||
1741 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 1742 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { |
1742 | if (RB_WARN_ON(cpu_buffer, !(--max_count))) | 1743 | if (RB_WARN_ON(cpu_buffer, !(--max_count))) |
1743 | return; | 1744 | return; |
1744 | if (RB_WARN_ON(cpu_buffer, | 1745 | if (RB_WARN_ON(cpu_buffer, |
1745 | rb_is_reader_page(cpu_buffer->tail_page))) | 1746 | rb_is_reader_page(cpu_buffer->tail_page))) |
1746 | return; | 1747 | return; |
1747 | local_set(&cpu_buffer->commit_page->page->commit, | 1748 | local_set(&cpu_buffer->commit_page->page->commit, |
1748 | rb_page_write(cpu_buffer->commit_page)); | 1749 | rb_page_write(cpu_buffer->commit_page)); |
1749 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 1750 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); |
1750 | cpu_buffer->write_stamp = | 1751 | cpu_buffer->write_stamp = |
1751 | cpu_buffer->commit_page->page->time_stamp; | 1752 | cpu_buffer->commit_page->page->time_stamp; |
1752 | /* add barrier to keep gcc from optimizing too much */ | 1753 | /* add barrier to keep gcc from optimizing too much */ |
1753 | barrier(); | 1754 | barrier(); |
1754 | } | 1755 | } |
1755 | while (rb_commit_index(cpu_buffer) != | 1756 | while (rb_commit_index(cpu_buffer) != |
1756 | rb_page_write(cpu_buffer->commit_page)) { | 1757 | rb_page_write(cpu_buffer->commit_page)) { |
1757 | 1758 | ||
1758 | local_set(&cpu_buffer->commit_page->page->commit, | 1759 | local_set(&cpu_buffer->commit_page->page->commit, |
1759 | rb_page_write(cpu_buffer->commit_page)); | 1760 | rb_page_write(cpu_buffer->commit_page)); |
1760 | RB_WARN_ON(cpu_buffer, | 1761 | RB_WARN_ON(cpu_buffer, |
1761 | local_read(&cpu_buffer->commit_page->page->commit) & | 1762 | local_read(&cpu_buffer->commit_page->page->commit) & |
1762 | ~RB_WRITE_MASK); | 1763 | ~RB_WRITE_MASK); |
1763 | barrier(); | 1764 | barrier(); |
1764 | } | 1765 | } |
1765 | 1766 | ||
1766 | /* again, keep gcc from optimizing */ | 1767 | /* again, keep gcc from optimizing */ |
1767 | barrier(); | 1768 | barrier(); |
1768 | 1769 | ||
1769 | /* | 1770 | /* |
1770 | * If an interrupt came in just after the first while loop | 1771 | * If an interrupt came in just after the first while loop |
1771 | * and pushed the tail page forward, we will be left with | 1772 | * and pushed the tail page forward, we will be left with |
1772 | * a dangling commit that will never go forward. | 1773 | * a dangling commit that will never go forward. |
1773 | */ | 1774 | */ |
1774 | if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) | 1775 | if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) |
1775 | goto again; | 1776 | goto again; |
1776 | } | 1777 | } |
1777 | 1778 | ||
1778 | static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 1779 | static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
1779 | { | 1780 | { |
1780 | cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; | 1781 | cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; |
1781 | cpu_buffer->reader_page->read = 0; | 1782 | cpu_buffer->reader_page->read = 0; |
1782 | } | 1783 | } |
1783 | 1784 | ||
1784 | static void rb_inc_iter(struct ring_buffer_iter *iter) | 1785 | static void rb_inc_iter(struct ring_buffer_iter *iter) |
1785 | { | 1786 | { |
1786 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1787 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
1787 | 1788 | ||
1788 | /* | 1789 | /* |
1789 | * The iterator could be on the reader page (it starts there). | 1790 | * The iterator could be on the reader page (it starts there). |
1790 | * But the head could have moved, since the reader was | 1791 | * But the head could have moved, since the reader was |
1791 | * found. Check for this case and assign the iterator | 1792 | * found. Check for this case and assign the iterator |
1792 | * to the head page instead of next. | 1793 | * to the head page instead of next. |
1793 | */ | 1794 | */ |
1794 | if (iter->head_page == cpu_buffer->reader_page) | 1795 | if (iter->head_page == cpu_buffer->reader_page) |
1795 | iter->head_page = rb_set_head_page(cpu_buffer); | 1796 | iter->head_page = rb_set_head_page(cpu_buffer); |
1796 | else | 1797 | else |
1797 | rb_inc_page(cpu_buffer, &iter->head_page); | 1798 | rb_inc_page(cpu_buffer, &iter->head_page); |
1798 | 1799 | ||
1799 | iter->read_stamp = iter->head_page->page->time_stamp; | 1800 | iter->read_stamp = iter->head_page->page->time_stamp; |
1800 | iter->head = 0; | 1801 | iter->head = 0; |
1801 | } | 1802 | } |
1802 | 1803 | ||
1803 | /* Slow path, do not inline */ | 1804 | /* Slow path, do not inline */ |
1804 | static noinline struct ring_buffer_event * | 1805 | static noinline struct ring_buffer_event * |
1805 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | 1806 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) |
1806 | { | 1807 | { |
1807 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | 1808 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; |
1808 | 1809 | ||
1809 | /* Not the first event on the page? */ | 1810 | /* Not the first event on the page? */ |
1810 | if (rb_event_index(event)) { | 1811 | if (rb_event_index(event)) { |
1811 | event->time_delta = delta & TS_MASK; | 1812 | event->time_delta = delta & TS_MASK; |
1812 | event->array[0] = delta >> TS_SHIFT; | 1813 | event->array[0] = delta >> TS_SHIFT; |
1813 | } else { | 1814 | } else { |
1814 | /* nope, just zero it */ | 1815 | /* nope, just zero it */ |
1815 | event->time_delta = 0; | 1816 | event->time_delta = 0; |
1816 | event->array[0] = 0; | 1817 | event->array[0] = 0; |
1817 | } | 1818 | } |
1818 | 1819 | ||
1819 | return skip_time_extend(event); | 1820 | return skip_time_extend(event); |
1820 | } | 1821 | } |
1821 | 1822 | ||
1822 | /** | 1823 | /** |
1823 | * ring_buffer_update_event - update event type and data | 1824 | * ring_buffer_update_event - update event type and data |
1824 | * @event: the even to update | 1825 | * @event: the even to update |
1825 | * @type: the type of event | 1826 | * @type: the type of event |
1826 | * @length: the size of the event field in the ring buffer | 1827 | * @length: the size of the event field in the ring buffer |
1827 | * | 1828 | * |
1828 | * Update the type and data fields of the event. The length | 1829 | * Update the type and data fields of the event. The length |
1829 | * is the actual size that is written to the ring buffer, | 1830 | * is the actual size that is written to the ring buffer, |
1830 | * and with this, we can determine what to place into the | 1831 | * and with this, we can determine what to place into the |
1831 | * data field. | 1832 | * data field. |
1832 | */ | 1833 | */ |
1833 | static void | 1834 | static void |
1834 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, | 1835 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1835 | struct ring_buffer_event *event, unsigned length, | 1836 | struct ring_buffer_event *event, unsigned length, |
1836 | int add_timestamp, u64 delta) | 1837 | int add_timestamp, u64 delta) |
1837 | { | 1838 | { |
1838 | /* Only a commit updates the timestamp */ | 1839 | /* Only a commit updates the timestamp */ |
1839 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | 1840 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) |
1840 | delta = 0; | 1841 | delta = 0; |
1841 | 1842 | ||
1842 | /* | 1843 | /* |
1843 | * If we need to add a timestamp, then we | 1844 | * If we need to add a timestamp, then we |
1844 | * add it to the start of the resevered space. | 1845 | * add it to the start of the resevered space. |
1845 | */ | 1846 | */ |
1846 | if (unlikely(add_timestamp)) { | 1847 | if (unlikely(add_timestamp)) { |
1847 | event = rb_add_time_stamp(event, delta); | 1848 | event = rb_add_time_stamp(event, delta); |
1848 | length -= RB_LEN_TIME_EXTEND; | 1849 | length -= RB_LEN_TIME_EXTEND; |
1849 | delta = 0; | 1850 | delta = 0; |
1850 | } | 1851 | } |
1851 | 1852 | ||
1852 | event->time_delta = delta; | 1853 | event->time_delta = delta; |
1853 | length -= RB_EVNT_HDR_SIZE; | 1854 | length -= RB_EVNT_HDR_SIZE; |
1854 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | 1855 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { |
1855 | event->type_len = 0; | 1856 | event->type_len = 0; |
1856 | event->array[0] = length; | 1857 | event->array[0] = length; |
1857 | } else | 1858 | } else |
1858 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1859 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
1859 | } | 1860 | } |
1860 | 1861 | ||
1861 | /* | 1862 | /* |
1862 | * rb_handle_head_page - writer hit the head page | 1863 | * rb_handle_head_page - writer hit the head page |
1863 | * | 1864 | * |
1864 | * Returns: +1 to retry page | 1865 | * Returns: +1 to retry page |
1865 | * 0 to continue | 1866 | * 0 to continue |
1866 | * -1 on error | 1867 | * -1 on error |
1867 | */ | 1868 | */ |
1868 | static int | 1869 | static int |
1869 | rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, | 1870 | rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, |
1870 | struct buffer_page *tail_page, | 1871 | struct buffer_page *tail_page, |
1871 | struct buffer_page *next_page) | 1872 | struct buffer_page *next_page) |
1872 | { | 1873 | { |
1873 | struct buffer_page *new_head; | 1874 | struct buffer_page *new_head; |
1874 | int entries; | 1875 | int entries; |
1875 | int type; | 1876 | int type; |
1876 | int ret; | 1877 | int ret; |
1877 | 1878 | ||
1878 | entries = rb_page_entries(next_page); | 1879 | entries = rb_page_entries(next_page); |
1879 | 1880 | ||
1880 | /* | 1881 | /* |
1881 | * The hard part is here. We need to move the head | 1882 | * The hard part is here. We need to move the head |
1882 | * forward, and protect against both readers on | 1883 | * forward, and protect against both readers on |
1883 | * other CPUs and writers coming in via interrupts. | 1884 | * other CPUs and writers coming in via interrupts. |
1884 | */ | 1885 | */ |
1885 | type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, | 1886 | type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, |
1886 | RB_PAGE_HEAD); | 1887 | RB_PAGE_HEAD); |
1887 | 1888 | ||
1888 | /* | 1889 | /* |
1889 | * type can be one of four: | 1890 | * type can be one of four: |
1890 | * NORMAL - an interrupt already moved it for us | 1891 | * NORMAL - an interrupt already moved it for us |
1891 | * HEAD - we are the first to get here. | 1892 | * HEAD - we are the first to get here. |
1892 | * UPDATE - we are the interrupt interrupting | 1893 | * UPDATE - we are the interrupt interrupting |
1893 | * a current move. | 1894 | * a current move. |
1894 | * MOVED - a reader on another CPU moved the next | 1895 | * MOVED - a reader on another CPU moved the next |
1895 | * pointer to its reader page. Give up | 1896 | * pointer to its reader page. Give up |
1896 | * and try again. | 1897 | * and try again. |
1897 | */ | 1898 | */ |
1898 | 1899 | ||
1899 | switch (type) { | 1900 | switch (type) { |
1900 | case RB_PAGE_HEAD: | 1901 | case RB_PAGE_HEAD: |
1901 | /* | 1902 | /* |
1902 | * We changed the head to UPDATE, thus | 1903 | * We changed the head to UPDATE, thus |
1903 | * it is our responsibility to update | 1904 | * it is our responsibility to update |
1904 | * the counters. | 1905 | * the counters. |
1905 | */ | 1906 | */ |
1906 | local_add(entries, &cpu_buffer->overrun); | 1907 | local_add(entries, &cpu_buffer->overrun); |
1907 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); | 1908 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); |
1908 | 1909 | ||
1909 | /* | 1910 | /* |
1910 | * The entries will be zeroed out when we move the | 1911 | * The entries will be zeroed out when we move the |
1911 | * tail page. | 1912 | * tail page. |
1912 | */ | 1913 | */ |
1913 | 1914 | ||
1914 | /* still more to do */ | 1915 | /* still more to do */ |
1915 | break; | 1916 | break; |
1916 | 1917 | ||
1917 | case RB_PAGE_UPDATE: | 1918 | case RB_PAGE_UPDATE: |
1918 | /* | 1919 | /* |
1919 | * This is an interrupt that interrupt the | 1920 | * This is an interrupt that interrupt the |
1920 | * previous update. Still more to do. | 1921 | * previous update. Still more to do. |
1921 | */ | 1922 | */ |
1922 | break; | 1923 | break; |
1923 | case RB_PAGE_NORMAL: | 1924 | case RB_PAGE_NORMAL: |
1924 | /* | 1925 | /* |
1925 | * An interrupt came in before the update | 1926 | * An interrupt came in before the update |
1926 | * and processed this for us. | 1927 | * and processed this for us. |
1927 | * Nothing left to do. | 1928 | * Nothing left to do. |
1928 | */ | 1929 | */ |
1929 | return 1; | 1930 | return 1; |
1930 | case RB_PAGE_MOVED: | 1931 | case RB_PAGE_MOVED: |
1931 | /* | 1932 | /* |
1932 | * The reader is on another CPU and just did | 1933 | * The reader is on another CPU and just did |
1933 | * a swap with our next_page. | 1934 | * a swap with our next_page. |
1934 | * Try again. | 1935 | * Try again. |
1935 | */ | 1936 | */ |
1936 | return 1; | 1937 | return 1; |
1937 | default: | 1938 | default: |
1938 | RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ | 1939 | RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ |
1939 | return -1; | 1940 | return -1; |
1940 | } | 1941 | } |
1941 | 1942 | ||
1942 | /* | 1943 | /* |
1943 | * Now that we are here, the old head pointer is | 1944 | * Now that we are here, the old head pointer is |
1944 | * set to UPDATE. This will keep the reader from | 1945 | * set to UPDATE. This will keep the reader from |
1945 | * swapping the head page with the reader page. | 1946 | * swapping the head page with the reader page. |
1946 | * The reader (on another CPU) will spin till | 1947 | * The reader (on another CPU) will spin till |
1947 | * we are finished. | 1948 | * we are finished. |
1948 | * | 1949 | * |
1949 | * We just need to protect against interrupts | 1950 | * We just need to protect against interrupts |
1950 | * doing the job. We will set the next pointer | 1951 | * doing the job. We will set the next pointer |
1951 | * to HEAD. After that, we set the old pointer | 1952 | * to HEAD. After that, we set the old pointer |
1952 | * to NORMAL, but only if it was HEAD before. | 1953 | * to NORMAL, but only if it was HEAD before. |
1953 | * otherwise we are an interrupt, and only | 1954 | * otherwise we are an interrupt, and only |
1954 | * want the outer most commit to reset it. | 1955 | * want the outer most commit to reset it. |
1955 | */ | 1956 | */ |
1956 | new_head = next_page; | 1957 | new_head = next_page; |
1957 | rb_inc_page(cpu_buffer, &new_head); | 1958 | rb_inc_page(cpu_buffer, &new_head); |
1958 | 1959 | ||
1959 | ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, | 1960 | ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, |
1960 | RB_PAGE_NORMAL); | 1961 | RB_PAGE_NORMAL); |
1961 | 1962 | ||
1962 | /* | 1963 | /* |
1963 | * Valid returns are: | 1964 | * Valid returns are: |
1964 | * HEAD - an interrupt came in and already set it. | 1965 | * HEAD - an interrupt came in and already set it. |
1965 | * NORMAL - One of two things: | 1966 | * NORMAL - One of two things: |
1966 | * 1) We really set it. | 1967 | * 1) We really set it. |
1967 | * 2) A bunch of interrupts came in and moved | 1968 | * 2) A bunch of interrupts came in and moved |
1968 | * the page forward again. | 1969 | * the page forward again. |
1969 | */ | 1970 | */ |
1970 | switch (ret) { | 1971 | switch (ret) { |
1971 | case RB_PAGE_HEAD: | 1972 | case RB_PAGE_HEAD: |
1972 | case RB_PAGE_NORMAL: | 1973 | case RB_PAGE_NORMAL: |
1973 | /* OK */ | 1974 | /* OK */ |
1974 | break; | 1975 | break; |
1975 | default: | 1976 | default: |
1976 | RB_WARN_ON(cpu_buffer, 1); | 1977 | RB_WARN_ON(cpu_buffer, 1); |
1977 | return -1; | 1978 | return -1; |
1978 | } | 1979 | } |
1979 | 1980 | ||
1980 | /* | 1981 | /* |
1981 | * It is possible that an interrupt came in, | 1982 | * It is possible that an interrupt came in, |
1982 | * set the head up, then more interrupts came in | 1983 | * set the head up, then more interrupts came in |
1983 | * and moved it again. When we get back here, | 1984 | * and moved it again. When we get back here, |
1984 | * the page would have been set to NORMAL but we | 1985 | * the page would have been set to NORMAL but we |
1985 | * just set it back to HEAD. | 1986 | * just set it back to HEAD. |
1986 | * | 1987 | * |
1987 | * How do you detect this? Well, if that happened | 1988 | * How do you detect this? Well, if that happened |
1988 | * the tail page would have moved. | 1989 | * the tail page would have moved. |
1989 | */ | 1990 | */ |
1990 | if (ret == RB_PAGE_NORMAL) { | 1991 | if (ret == RB_PAGE_NORMAL) { |
1991 | /* | 1992 | /* |
1992 | * If the tail had moved passed next, then we need | 1993 | * If the tail had moved passed next, then we need |
1993 | * to reset the pointer. | 1994 | * to reset the pointer. |
1994 | */ | 1995 | */ |
1995 | if (cpu_buffer->tail_page != tail_page && | 1996 | if (cpu_buffer->tail_page != tail_page && |
1996 | cpu_buffer->tail_page != next_page) | 1997 | cpu_buffer->tail_page != next_page) |
1997 | rb_head_page_set_normal(cpu_buffer, new_head, | 1998 | rb_head_page_set_normal(cpu_buffer, new_head, |
1998 | next_page, | 1999 | next_page, |
1999 | RB_PAGE_HEAD); | 2000 | RB_PAGE_HEAD); |
2000 | } | 2001 | } |
2001 | 2002 | ||
2002 | /* | 2003 | /* |
2003 | * If this was the outer most commit (the one that | 2004 | * If this was the outer most commit (the one that |
2004 | * changed the original pointer from HEAD to UPDATE), | 2005 | * changed the original pointer from HEAD to UPDATE), |
2005 | * then it is up to us to reset it to NORMAL. | 2006 | * then it is up to us to reset it to NORMAL. |
2006 | */ | 2007 | */ |
2007 | if (type == RB_PAGE_HEAD) { | 2008 | if (type == RB_PAGE_HEAD) { |
2008 | ret = rb_head_page_set_normal(cpu_buffer, next_page, | 2009 | ret = rb_head_page_set_normal(cpu_buffer, next_page, |
2009 | tail_page, | 2010 | tail_page, |
2010 | RB_PAGE_UPDATE); | 2011 | RB_PAGE_UPDATE); |
2011 | if (RB_WARN_ON(cpu_buffer, | 2012 | if (RB_WARN_ON(cpu_buffer, |
2012 | ret != RB_PAGE_UPDATE)) | 2013 | ret != RB_PAGE_UPDATE)) |
2013 | return -1; | 2014 | return -1; |
2014 | } | 2015 | } |
2015 | 2016 | ||
2016 | return 0; | 2017 | return 0; |
2017 | } | 2018 | } |
2018 | 2019 | ||
2019 | static unsigned rb_calculate_event_length(unsigned length) | 2020 | static unsigned rb_calculate_event_length(unsigned length) |
2020 | { | 2021 | { |
2021 | struct ring_buffer_event event; /* Used only for sizeof array */ | 2022 | struct ring_buffer_event event; /* Used only for sizeof array */ |
2022 | 2023 | ||
2023 | /* zero length can cause confusions */ | 2024 | /* zero length can cause confusions */ |
2024 | if (!length) | 2025 | if (!length) |
2025 | length = 1; | 2026 | length = 1; |
2026 | 2027 | ||
2027 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 2028 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) |
2028 | length += sizeof(event.array[0]); | 2029 | length += sizeof(event.array[0]); |
2029 | 2030 | ||
2030 | length += RB_EVNT_HDR_SIZE; | 2031 | length += RB_EVNT_HDR_SIZE; |
2031 | length = ALIGN(length, RB_ARCH_ALIGNMENT); | 2032 | length = ALIGN(length, RB_ARCH_ALIGNMENT); |
2032 | 2033 | ||
2033 | return length; | 2034 | return length; |
2034 | } | 2035 | } |
2035 | 2036 | ||
2036 | static inline void | 2037 | static inline void |
2037 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | 2038 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, |
2038 | struct buffer_page *tail_page, | 2039 | struct buffer_page *tail_page, |
2039 | unsigned long tail, unsigned long length) | 2040 | unsigned long tail, unsigned long length) |
2040 | { | 2041 | { |
2041 | struct ring_buffer_event *event; | 2042 | struct ring_buffer_event *event; |
2042 | 2043 | ||
2043 | /* | 2044 | /* |
2044 | * Only the event that crossed the page boundary | 2045 | * Only the event that crossed the page boundary |
2045 | * must fill the old tail_page with padding. | 2046 | * must fill the old tail_page with padding. |
2046 | */ | 2047 | */ |
2047 | if (tail >= BUF_PAGE_SIZE) { | 2048 | if (tail >= BUF_PAGE_SIZE) { |
2048 | /* | 2049 | /* |
2049 | * If the page was filled, then we still need | 2050 | * If the page was filled, then we still need |
2050 | * to update the real_end. Reset it to zero | 2051 | * to update the real_end. Reset it to zero |
2051 | * and the reader will ignore it. | 2052 | * and the reader will ignore it. |
2052 | */ | 2053 | */ |
2053 | if (tail == BUF_PAGE_SIZE) | 2054 | if (tail == BUF_PAGE_SIZE) |
2054 | tail_page->real_end = 0; | 2055 | tail_page->real_end = 0; |
2055 | 2056 | ||
2056 | local_sub(length, &tail_page->write); | 2057 | local_sub(length, &tail_page->write); |
2057 | return; | 2058 | return; |
2058 | } | 2059 | } |
2059 | 2060 | ||
2060 | event = __rb_page_index(tail_page, tail); | 2061 | event = __rb_page_index(tail_page, tail); |
2061 | kmemcheck_annotate_bitfield(event, bitfield); | 2062 | kmemcheck_annotate_bitfield(event, bitfield); |
2062 | 2063 | ||
2063 | /* account for padding bytes */ | 2064 | /* account for padding bytes */ |
2064 | local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); | 2065 | local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); |
2065 | 2066 | ||
2066 | /* | 2067 | /* |
2067 | * Save the original length to the meta data. | 2068 | * Save the original length to the meta data. |
2068 | * This will be used by the reader to add lost event | 2069 | * This will be used by the reader to add lost event |
2069 | * counter. | 2070 | * counter. |
2070 | */ | 2071 | */ |
2071 | tail_page->real_end = tail; | 2072 | tail_page->real_end = tail; |
2072 | 2073 | ||
2073 | /* | 2074 | /* |
2074 | * If this event is bigger than the minimum size, then | 2075 | * If this event is bigger than the minimum size, then |
2075 | * we need to be careful that we don't subtract the | 2076 | * we need to be careful that we don't subtract the |
2076 | * write counter enough to allow another writer to slip | 2077 | * write counter enough to allow another writer to slip |
2077 | * in on this page. | 2078 | * in on this page. |
2078 | * We put in a discarded commit instead, to make sure | 2079 | * We put in a discarded commit instead, to make sure |
2079 | * that this space is not used again. | 2080 | * that this space is not used again. |
2080 | * | 2081 | * |
2081 | * If we are less than the minimum size, we don't need to | 2082 | * If we are less than the minimum size, we don't need to |
2082 | * worry about it. | 2083 | * worry about it. |
2083 | */ | 2084 | */ |
2084 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { | 2085 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { |
2085 | /* No room for any events */ | 2086 | /* No room for any events */ |
2086 | 2087 | ||
2087 | /* Mark the rest of the page with padding */ | 2088 | /* Mark the rest of the page with padding */ |
2088 | rb_event_set_padding(event); | 2089 | rb_event_set_padding(event); |
2089 | 2090 | ||
2090 | /* Set the write back to the previous setting */ | 2091 | /* Set the write back to the previous setting */ |
2091 | local_sub(length, &tail_page->write); | 2092 | local_sub(length, &tail_page->write); |
2092 | return; | 2093 | return; |
2093 | } | 2094 | } |
2094 | 2095 | ||
2095 | /* Put in a discarded event */ | 2096 | /* Put in a discarded event */ |
2096 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; | 2097 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; |
2097 | event->type_len = RINGBUF_TYPE_PADDING; | 2098 | event->type_len = RINGBUF_TYPE_PADDING; |
2098 | /* time delta must be non zero */ | 2099 | /* time delta must be non zero */ |
2099 | event->time_delta = 1; | 2100 | event->time_delta = 1; |
2100 | 2101 | ||
2101 | /* Set write to end of buffer */ | 2102 | /* Set write to end of buffer */ |
2102 | length = (tail + length) - BUF_PAGE_SIZE; | 2103 | length = (tail + length) - BUF_PAGE_SIZE; |
2103 | local_sub(length, &tail_page->write); | 2104 | local_sub(length, &tail_page->write); |
2104 | } | 2105 | } |
2105 | 2106 | ||
2106 | /* | 2107 | /* |
2107 | * This is the slow path, force gcc not to inline it. | 2108 | * This is the slow path, force gcc not to inline it. |
2108 | */ | 2109 | */ |
2109 | static noinline struct ring_buffer_event * | 2110 | static noinline struct ring_buffer_event * |
2110 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 2111 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
2111 | unsigned long length, unsigned long tail, | 2112 | unsigned long length, unsigned long tail, |
2112 | struct buffer_page *tail_page, u64 ts) | 2113 | struct buffer_page *tail_page, u64 ts) |
2113 | { | 2114 | { |
2114 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 2115 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
2115 | struct ring_buffer *buffer = cpu_buffer->buffer; | 2116 | struct ring_buffer *buffer = cpu_buffer->buffer; |
2116 | struct buffer_page *next_page; | 2117 | struct buffer_page *next_page; |
2117 | int ret; | 2118 | int ret; |
2118 | 2119 | ||
2119 | next_page = tail_page; | 2120 | next_page = tail_page; |
2120 | 2121 | ||
2121 | rb_inc_page(cpu_buffer, &next_page); | 2122 | rb_inc_page(cpu_buffer, &next_page); |
2122 | 2123 | ||
2123 | /* | 2124 | /* |
2124 | * If for some reason, we had an interrupt storm that made | 2125 | * If for some reason, we had an interrupt storm that made |
2125 | * it all the way around the buffer, bail, and warn | 2126 | * it all the way around the buffer, bail, and warn |
2126 | * about it. | 2127 | * about it. |
2127 | */ | 2128 | */ |
2128 | if (unlikely(next_page == commit_page)) { | 2129 | if (unlikely(next_page == commit_page)) { |
2129 | local_inc(&cpu_buffer->commit_overrun); | 2130 | local_inc(&cpu_buffer->commit_overrun); |
2130 | goto out_reset; | 2131 | goto out_reset; |
2131 | } | 2132 | } |
2132 | 2133 | ||
2133 | /* | 2134 | /* |
2134 | * This is where the fun begins! | 2135 | * This is where the fun begins! |
2135 | * | 2136 | * |
2136 | * We are fighting against races between a reader that | 2137 | * We are fighting against races between a reader that |
2137 | * could be on another CPU trying to swap its reader | 2138 | * could be on another CPU trying to swap its reader |
2138 | * page with the buffer head. | 2139 | * page with the buffer head. |
2139 | * | 2140 | * |
2140 | * We are also fighting against interrupts coming in and | 2141 | * We are also fighting against interrupts coming in and |
2141 | * moving the head or tail on us as well. | 2142 | * moving the head or tail on us as well. |
2142 | * | 2143 | * |
2143 | * If the next page is the head page then we have filled | 2144 | * If the next page is the head page then we have filled |
2144 | * the buffer, unless the commit page is still on the | 2145 | * the buffer, unless the commit page is still on the |
2145 | * reader page. | 2146 | * reader page. |
2146 | */ | 2147 | */ |
2147 | if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { | 2148 | if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { |
2148 | 2149 | ||
2149 | /* | 2150 | /* |
2150 | * If the commit is not on the reader page, then | 2151 | * If the commit is not on the reader page, then |
2151 | * move the header page. | 2152 | * move the header page. |
2152 | */ | 2153 | */ |
2153 | if (!rb_is_reader_page(cpu_buffer->commit_page)) { | 2154 | if (!rb_is_reader_page(cpu_buffer->commit_page)) { |
2154 | /* | 2155 | /* |
2155 | * If we are not in overwrite mode, | 2156 | * If we are not in overwrite mode, |
2156 | * this is easy, just stop here. | 2157 | * this is easy, just stop here. |
2157 | */ | 2158 | */ |
2158 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 2159 | if (!(buffer->flags & RB_FL_OVERWRITE)) { |
2160 | local_inc(&cpu_buffer->dropped_events); | ||
2159 | goto out_reset; | 2161 | goto out_reset; |
2162 | } | ||
2160 | 2163 | ||
2161 | ret = rb_handle_head_page(cpu_buffer, | 2164 | ret = rb_handle_head_page(cpu_buffer, |
2162 | tail_page, | 2165 | tail_page, |
2163 | next_page); | 2166 | next_page); |
2164 | if (ret < 0) | 2167 | if (ret < 0) |
2165 | goto out_reset; | 2168 | goto out_reset; |
2166 | if (ret) | 2169 | if (ret) |
2167 | goto out_again; | 2170 | goto out_again; |
2168 | } else { | 2171 | } else { |
2169 | /* | 2172 | /* |
2170 | * We need to be careful here too. The | 2173 | * We need to be careful here too. The |
2171 | * commit page could still be on the reader | 2174 | * commit page could still be on the reader |
2172 | * page. We could have a small buffer, and | 2175 | * page. We could have a small buffer, and |
2173 | * have filled up the buffer with events | 2176 | * have filled up the buffer with events |
2174 | * from interrupts and such, and wrapped. | 2177 | * from interrupts and such, and wrapped. |
2175 | * | 2178 | * |
2176 | * Note, if the tail page is also the on the | 2179 | * Note, if the tail page is also the on the |
2177 | * reader_page, we let it move out. | 2180 | * reader_page, we let it move out. |
2178 | */ | 2181 | */ |
2179 | if (unlikely((cpu_buffer->commit_page != | 2182 | if (unlikely((cpu_buffer->commit_page != |
2180 | cpu_buffer->tail_page) && | 2183 | cpu_buffer->tail_page) && |
2181 | (cpu_buffer->commit_page == | 2184 | (cpu_buffer->commit_page == |
2182 | cpu_buffer->reader_page))) { | 2185 | cpu_buffer->reader_page))) { |
2183 | local_inc(&cpu_buffer->commit_overrun); | 2186 | local_inc(&cpu_buffer->commit_overrun); |
2184 | goto out_reset; | 2187 | goto out_reset; |
2185 | } | 2188 | } |
2186 | } | 2189 | } |
2187 | } | 2190 | } |
2188 | 2191 | ||
2189 | ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); | 2192 | ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); |
2190 | if (ret) { | 2193 | if (ret) { |
2191 | /* | 2194 | /* |
2192 | * Nested commits always have zero deltas, so | 2195 | * Nested commits always have zero deltas, so |
2193 | * just reread the time stamp | 2196 | * just reread the time stamp |
2194 | */ | 2197 | */ |
2195 | ts = rb_time_stamp(buffer); | 2198 | ts = rb_time_stamp(buffer); |
2196 | next_page->page->time_stamp = ts; | 2199 | next_page->page->time_stamp = ts; |
2197 | } | 2200 | } |
2198 | 2201 | ||
2199 | out_again: | 2202 | out_again: |
2200 | 2203 | ||
2201 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 2204 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
2202 | 2205 | ||
2203 | /* fail and let the caller try again */ | 2206 | /* fail and let the caller try again */ |
2204 | return ERR_PTR(-EAGAIN); | 2207 | return ERR_PTR(-EAGAIN); |
2205 | 2208 | ||
2206 | out_reset: | 2209 | out_reset: |
2207 | /* reset write */ | 2210 | /* reset write */ |
2208 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 2211 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
2209 | 2212 | ||
2210 | return NULL; | 2213 | return NULL; |
2211 | } | 2214 | } |
2212 | 2215 | ||
2213 | static struct ring_buffer_event * | 2216 | static struct ring_buffer_event * |
2214 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 2217 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
2215 | unsigned long length, u64 ts, | 2218 | unsigned long length, u64 ts, |
2216 | u64 delta, int add_timestamp) | 2219 | u64 delta, int add_timestamp) |
2217 | { | 2220 | { |
2218 | struct buffer_page *tail_page; | 2221 | struct buffer_page *tail_page; |
2219 | struct ring_buffer_event *event; | 2222 | struct ring_buffer_event *event; |
2220 | unsigned long tail, write; | 2223 | unsigned long tail, write; |
2221 | 2224 | ||
2222 | /* | 2225 | /* |
2223 | * If the time delta since the last event is too big to | 2226 | * If the time delta since the last event is too big to |
2224 | * hold in the time field of the event, then we append a | 2227 | * hold in the time field of the event, then we append a |
2225 | * TIME EXTEND event ahead of the data event. | 2228 | * TIME EXTEND event ahead of the data event. |
2226 | */ | 2229 | */ |
2227 | if (unlikely(add_timestamp)) | 2230 | if (unlikely(add_timestamp)) |
2228 | length += RB_LEN_TIME_EXTEND; | 2231 | length += RB_LEN_TIME_EXTEND; |
2229 | 2232 | ||
2230 | tail_page = cpu_buffer->tail_page; | 2233 | tail_page = cpu_buffer->tail_page; |
2231 | write = local_add_return(length, &tail_page->write); | 2234 | write = local_add_return(length, &tail_page->write); |
2232 | 2235 | ||
2233 | /* set write to only the index of the write */ | 2236 | /* set write to only the index of the write */ |
2234 | write &= RB_WRITE_MASK; | 2237 | write &= RB_WRITE_MASK; |
2235 | tail = write - length; | 2238 | tail = write - length; |
2236 | 2239 | ||
2237 | /* See if we shot pass the end of this buffer page */ | 2240 | /* See if we shot pass the end of this buffer page */ |
2238 | if (unlikely(write > BUF_PAGE_SIZE)) | 2241 | if (unlikely(write > BUF_PAGE_SIZE)) |
2239 | return rb_move_tail(cpu_buffer, length, tail, | 2242 | return rb_move_tail(cpu_buffer, length, tail, |
2240 | tail_page, ts); | 2243 | tail_page, ts); |
2241 | 2244 | ||
2242 | /* We reserved something on the buffer */ | 2245 | /* We reserved something on the buffer */ |
2243 | 2246 | ||
2244 | event = __rb_page_index(tail_page, tail); | 2247 | event = __rb_page_index(tail_page, tail); |
2245 | kmemcheck_annotate_bitfield(event, bitfield); | 2248 | kmemcheck_annotate_bitfield(event, bitfield); |
2246 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); | 2249 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
2247 | 2250 | ||
2248 | local_inc(&tail_page->entries); | 2251 | local_inc(&tail_page->entries); |
2249 | 2252 | ||
2250 | /* | 2253 | /* |
2251 | * If this is the first commit on the page, then update | 2254 | * If this is the first commit on the page, then update |
2252 | * its timestamp. | 2255 | * its timestamp. |
2253 | */ | 2256 | */ |
2254 | if (!tail) | 2257 | if (!tail) |
2255 | tail_page->page->time_stamp = ts; | 2258 | tail_page->page->time_stamp = ts; |
2256 | 2259 | ||
2257 | /* account for these added bytes */ | 2260 | /* account for these added bytes */ |
2258 | local_add(length, &cpu_buffer->entries_bytes); | 2261 | local_add(length, &cpu_buffer->entries_bytes); |
2259 | 2262 | ||
2260 | return event; | 2263 | return event; |
2261 | } | 2264 | } |
2262 | 2265 | ||
2263 | static inline int | 2266 | static inline int |
2264 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | 2267 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, |
2265 | struct ring_buffer_event *event) | 2268 | struct ring_buffer_event *event) |
2266 | { | 2269 | { |
2267 | unsigned long new_index, old_index; | 2270 | unsigned long new_index, old_index; |
2268 | struct buffer_page *bpage; | 2271 | struct buffer_page *bpage; |
2269 | unsigned long index; | 2272 | unsigned long index; |
2270 | unsigned long addr; | 2273 | unsigned long addr; |
2271 | 2274 | ||
2272 | new_index = rb_event_index(event); | 2275 | new_index = rb_event_index(event); |
2273 | old_index = new_index + rb_event_ts_length(event); | 2276 | old_index = new_index + rb_event_ts_length(event); |
2274 | addr = (unsigned long)event; | 2277 | addr = (unsigned long)event; |
2275 | addr &= PAGE_MASK; | 2278 | addr &= PAGE_MASK; |
2276 | 2279 | ||
2277 | bpage = cpu_buffer->tail_page; | 2280 | bpage = cpu_buffer->tail_page; |
2278 | 2281 | ||
2279 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 2282 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { |
2280 | unsigned long write_mask = | 2283 | unsigned long write_mask = |
2281 | local_read(&bpage->write) & ~RB_WRITE_MASK; | 2284 | local_read(&bpage->write) & ~RB_WRITE_MASK; |
2282 | unsigned long event_length = rb_event_length(event); | 2285 | unsigned long event_length = rb_event_length(event); |
2283 | /* | 2286 | /* |
2284 | * This is on the tail page. It is possible that | 2287 | * This is on the tail page. It is possible that |
2285 | * a write could come in and move the tail page | 2288 | * a write could come in and move the tail page |
2286 | * and write to the next page. That is fine | 2289 | * and write to the next page. That is fine |
2287 | * because we just shorten what is on this page. | 2290 | * because we just shorten what is on this page. |
2288 | */ | 2291 | */ |
2289 | old_index += write_mask; | 2292 | old_index += write_mask; |
2290 | new_index += write_mask; | 2293 | new_index += write_mask; |
2291 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 2294 | index = local_cmpxchg(&bpage->write, old_index, new_index); |
2292 | if (index == old_index) { | 2295 | if (index == old_index) { |
2293 | /* update counters */ | 2296 | /* update counters */ |
2294 | local_sub(event_length, &cpu_buffer->entries_bytes); | 2297 | local_sub(event_length, &cpu_buffer->entries_bytes); |
2295 | return 1; | 2298 | return 1; |
2296 | } | 2299 | } |
2297 | } | 2300 | } |
2298 | 2301 | ||
2299 | /* could not discard */ | 2302 | /* could not discard */ |
2300 | return 0; | 2303 | return 0; |
2301 | } | 2304 | } |
2302 | 2305 | ||
2303 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2306 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2304 | { | 2307 | { |
2305 | local_inc(&cpu_buffer->committing); | 2308 | local_inc(&cpu_buffer->committing); |
2306 | local_inc(&cpu_buffer->commits); | 2309 | local_inc(&cpu_buffer->commits); |
2307 | } | 2310 | } |
2308 | 2311 | ||
2309 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2312 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2310 | { | 2313 | { |
2311 | unsigned long commits; | 2314 | unsigned long commits; |
2312 | 2315 | ||
2313 | if (RB_WARN_ON(cpu_buffer, | 2316 | if (RB_WARN_ON(cpu_buffer, |
2314 | !local_read(&cpu_buffer->committing))) | 2317 | !local_read(&cpu_buffer->committing))) |
2315 | return; | 2318 | return; |
2316 | 2319 | ||
2317 | again: | 2320 | again: |
2318 | commits = local_read(&cpu_buffer->commits); | 2321 | commits = local_read(&cpu_buffer->commits); |
2319 | /* synchronize with interrupts */ | 2322 | /* synchronize with interrupts */ |
2320 | barrier(); | 2323 | barrier(); |
2321 | if (local_read(&cpu_buffer->committing) == 1) | 2324 | if (local_read(&cpu_buffer->committing) == 1) |
2322 | rb_set_commit_to_write(cpu_buffer); | 2325 | rb_set_commit_to_write(cpu_buffer); |
2323 | 2326 | ||
2324 | local_dec(&cpu_buffer->committing); | 2327 | local_dec(&cpu_buffer->committing); |
2325 | 2328 | ||
2326 | /* synchronize with interrupts */ | 2329 | /* synchronize with interrupts */ |
2327 | barrier(); | 2330 | barrier(); |
2328 | 2331 | ||
2329 | /* | 2332 | /* |
2330 | * Need to account for interrupts coming in between the | 2333 | * Need to account for interrupts coming in between the |
2331 | * updating of the commit page and the clearing of the | 2334 | * updating of the commit page and the clearing of the |
2332 | * committing counter. | 2335 | * committing counter. |
2333 | */ | 2336 | */ |
2334 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && | 2337 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && |
2335 | !local_read(&cpu_buffer->committing)) { | 2338 | !local_read(&cpu_buffer->committing)) { |
2336 | local_inc(&cpu_buffer->committing); | 2339 | local_inc(&cpu_buffer->committing); |
2337 | goto again; | 2340 | goto again; |
2338 | } | 2341 | } |
2339 | } | 2342 | } |
2340 | 2343 | ||
2341 | static struct ring_buffer_event * | 2344 | static struct ring_buffer_event * |
2342 | rb_reserve_next_event(struct ring_buffer *buffer, | 2345 | rb_reserve_next_event(struct ring_buffer *buffer, |
2343 | struct ring_buffer_per_cpu *cpu_buffer, | 2346 | struct ring_buffer_per_cpu *cpu_buffer, |
2344 | unsigned long length) | 2347 | unsigned long length) |
2345 | { | 2348 | { |
2346 | struct ring_buffer_event *event; | 2349 | struct ring_buffer_event *event; |
2347 | u64 ts, delta; | 2350 | u64 ts, delta; |
2348 | int nr_loops = 0; | 2351 | int nr_loops = 0; |
2349 | int add_timestamp; | 2352 | int add_timestamp; |
2350 | u64 diff; | 2353 | u64 diff; |
2351 | 2354 | ||
2352 | rb_start_commit(cpu_buffer); | 2355 | rb_start_commit(cpu_buffer); |
2353 | 2356 | ||
2354 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | 2357 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP |
2355 | /* | 2358 | /* |
2356 | * Due to the ability to swap a cpu buffer from a buffer | 2359 | * Due to the ability to swap a cpu buffer from a buffer |
2357 | * it is possible it was swapped before we committed. | 2360 | * it is possible it was swapped before we committed. |
2358 | * (committing stops a swap). We check for it here and | 2361 | * (committing stops a swap). We check for it here and |
2359 | * if it happened, we have to fail the write. | 2362 | * if it happened, we have to fail the write. |
2360 | */ | 2363 | */ |
2361 | barrier(); | 2364 | barrier(); |
2362 | if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { | 2365 | if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { |
2363 | local_dec(&cpu_buffer->committing); | 2366 | local_dec(&cpu_buffer->committing); |
2364 | local_dec(&cpu_buffer->commits); | 2367 | local_dec(&cpu_buffer->commits); |
2365 | return NULL; | 2368 | return NULL; |
2366 | } | 2369 | } |
2367 | #endif | 2370 | #endif |
2368 | 2371 | ||
2369 | length = rb_calculate_event_length(length); | 2372 | length = rb_calculate_event_length(length); |
2370 | again: | 2373 | again: |
2371 | add_timestamp = 0; | 2374 | add_timestamp = 0; |
2372 | delta = 0; | 2375 | delta = 0; |
2373 | 2376 | ||
2374 | /* | 2377 | /* |
2375 | * We allow for interrupts to reenter here and do a trace. | 2378 | * We allow for interrupts to reenter here and do a trace. |
2376 | * If one does, it will cause this original code to loop | 2379 | * If one does, it will cause this original code to loop |
2377 | * back here. Even with heavy interrupts happening, this | 2380 | * back here. Even with heavy interrupts happening, this |
2378 | * should only happen a few times in a row. If this happens | 2381 | * should only happen a few times in a row. If this happens |
2379 | * 1000 times in a row, there must be either an interrupt | 2382 | * 1000 times in a row, there must be either an interrupt |
2380 | * storm or we have something buggy. | 2383 | * storm or we have something buggy. |
2381 | * Bail! | 2384 | * Bail! |
2382 | */ | 2385 | */ |
2383 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 2386 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
2384 | goto out_fail; | 2387 | goto out_fail; |
2385 | 2388 | ||
2386 | ts = rb_time_stamp(cpu_buffer->buffer); | 2389 | ts = rb_time_stamp(cpu_buffer->buffer); |
2387 | diff = ts - cpu_buffer->write_stamp; | 2390 | diff = ts - cpu_buffer->write_stamp; |
2388 | 2391 | ||
2389 | /* make sure this diff is calculated here */ | 2392 | /* make sure this diff is calculated here */ |
2390 | barrier(); | 2393 | barrier(); |
2391 | 2394 | ||
2392 | /* Did the write stamp get updated already? */ | 2395 | /* Did the write stamp get updated already? */ |
2393 | if (likely(ts >= cpu_buffer->write_stamp)) { | 2396 | if (likely(ts >= cpu_buffer->write_stamp)) { |
2394 | delta = diff; | 2397 | delta = diff; |
2395 | if (unlikely(test_time_stamp(delta))) { | 2398 | if (unlikely(test_time_stamp(delta))) { |
2396 | int local_clock_stable = 1; | 2399 | int local_clock_stable = 1; |
2397 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 2400 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
2398 | local_clock_stable = sched_clock_stable; | 2401 | local_clock_stable = sched_clock_stable; |
2399 | #endif | 2402 | #endif |
2400 | WARN_ONCE(delta > (1ULL << 59), | 2403 | WARN_ONCE(delta > (1ULL << 59), |
2401 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", | 2404 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", |
2402 | (unsigned long long)delta, | 2405 | (unsigned long long)delta, |
2403 | (unsigned long long)ts, | 2406 | (unsigned long long)ts, |
2404 | (unsigned long long)cpu_buffer->write_stamp, | 2407 | (unsigned long long)cpu_buffer->write_stamp, |
2405 | local_clock_stable ? "" : | 2408 | local_clock_stable ? "" : |
2406 | "If you just came from a suspend/resume,\n" | 2409 | "If you just came from a suspend/resume,\n" |
2407 | "please switch to the trace global clock:\n" | 2410 | "please switch to the trace global clock:\n" |
2408 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); | 2411 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); |
2409 | add_timestamp = 1; | 2412 | add_timestamp = 1; |
2410 | } | 2413 | } |
2411 | } | 2414 | } |
2412 | 2415 | ||
2413 | event = __rb_reserve_next(cpu_buffer, length, ts, | 2416 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2414 | delta, add_timestamp); | 2417 | delta, add_timestamp); |
2415 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2418 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2416 | goto again; | 2419 | goto again; |
2417 | 2420 | ||
2418 | if (!event) | 2421 | if (!event) |
2419 | goto out_fail; | 2422 | goto out_fail; |
2420 | 2423 | ||
2421 | return event; | 2424 | return event; |
2422 | 2425 | ||
2423 | out_fail: | 2426 | out_fail: |
2424 | rb_end_commit(cpu_buffer); | 2427 | rb_end_commit(cpu_buffer); |
2425 | return NULL; | 2428 | return NULL; |
2426 | } | 2429 | } |
2427 | 2430 | ||
2428 | #ifdef CONFIG_TRACING | 2431 | #ifdef CONFIG_TRACING |
2429 | 2432 | ||
2430 | #define TRACE_RECURSIVE_DEPTH 16 | 2433 | #define TRACE_RECURSIVE_DEPTH 16 |
2431 | 2434 | ||
2432 | /* Keep this code out of the fast path cache */ | 2435 | /* Keep this code out of the fast path cache */ |
2433 | static noinline void trace_recursive_fail(void) | 2436 | static noinline void trace_recursive_fail(void) |
2434 | { | 2437 | { |
2435 | /* Disable all tracing before we do anything else */ | 2438 | /* Disable all tracing before we do anything else */ |
2436 | tracing_off_permanent(); | 2439 | tracing_off_permanent(); |
2437 | 2440 | ||
2438 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | 2441 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" |
2439 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | 2442 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", |
2440 | trace_recursion_buffer(), | 2443 | trace_recursion_buffer(), |
2441 | hardirq_count() >> HARDIRQ_SHIFT, | 2444 | hardirq_count() >> HARDIRQ_SHIFT, |
2442 | softirq_count() >> SOFTIRQ_SHIFT, | 2445 | softirq_count() >> SOFTIRQ_SHIFT, |
2443 | in_nmi()); | 2446 | in_nmi()); |
2444 | 2447 | ||
2445 | WARN_ON_ONCE(1); | 2448 | WARN_ON_ONCE(1); |
2446 | } | 2449 | } |
2447 | 2450 | ||
2448 | static inline int trace_recursive_lock(void) | 2451 | static inline int trace_recursive_lock(void) |
2449 | { | 2452 | { |
2450 | trace_recursion_inc(); | 2453 | trace_recursion_inc(); |
2451 | 2454 | ||
2452 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) | 2455 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) |
2453 | return 0; | 2456 | return 0; |
2454 | 2457 | ||
2455 | trace_recursive_fail(); | 2458 | trace_recursive_fail(); |
2456 | 2459 | ||
2457 | return -1; | 2460 | return -1; |
2458 | } | 2461 | } |
2459 | 2462 | ||
2460 | static inline void trace_recursive_unlock(void) | 2463 | static inline void trace_recursive_unlock(void) |
2461 | { | 2464 | { |
2462 | WARN_ON_ONCE(!trace_recursion_buffer()); | 2465 | WARN_ON_ONCE(!trace_recursion_buffer()); |
2463 | 2466 | ||
2464 | trace_recursion_dec(); | 2467 | trace_recursion_dec(); |
2465 | } | 2468 | } |
2466 | 2469 | ||
2467 | #else | 2470 | #else |
2468 | 2471 | ||
2469 | #define trace_recursive_lock() (0) | 2472 | #define trace_recursive_lock() (0) |
2470 | #define trace_recursive_unlock() do { } while (0) | 2473 | #define trace_recursive_unlock() do { } while (0) |
2471 | 2474 | ||
2472 | #endif | 2475 | #endif |
2473 | 2476 | ||
2474 | /** | 2477 | /** |
2475 | * ring_buffer_lock_reserve - reserve a part of the buffer | 2478 | * ring_buffer_lock_reserve - reserve a part of the buffer |
2476 | * @buffer: the ring buffer to reserve from | 2479 | * @buffer: the ring buffer to reserve from |
2477 | * @length: the length of the data to reserve (excluding event header) | 2480 | * @length: the length of the data to reserve (excluding event header) |
2478 | * | 2481 | * |
2479 | * Returns a reseverd event on the ring buffer to copy directly to. | 2482 | * Returns a reseverd event on the ring buffer to copy directly to. |
2480 | * The user of this interface will need to get the body to write into | 2483 | * The user of this interface will need to get the body to write into |
2481 | * and can use the ring_buffer_event_data() interface. | 2484 | * and can use the ring_buffer_event_data() interface. |
2482 | * | 2485 | * |
2483 | * The length is the length of the data needed, not the event length | 2486 | * The length is the length of the data needed, not the event length |
2484 | * which also includes the event header. | 2487 | * which also includes the event header. |
2485 | * | 2488 | * |
2486 | * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. | 2489 | * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. |
2487 | * If NULL is returned, then nothing has been allocated or locked. | 2490 | * If NULL is returned, then nothing has been allocated or locked. |
2488 | */ | 2491 | */ |
2489 | struct ring_buffer_event * | 2492 | struct ring_buffer_event * |
2490 | ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | 2493 | ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) |
2491 | { | 2494 | { |
2492 | struct ring_buffer_per_cpu *cpu_buffer; | 2495 | struct ring_buffer_per_cpu *cpu_buffer; |
2493 | struct ring_buffer_event *event; | 2496 | struct ring_buffer_event *event; |
2494 | int cpu; | 2497 | int cpu; |
2495 | 2498 | ||
2496 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2499 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2497 | return NULL; | 2500 | return NULL; |
2498 | 2501 | ||
2499 | /* If we are tracing schedule, we don't want to recurse */ | 2502 | /* If we are tracing schedule, we don't want to recurse */ |
2500 | preempt_disable_notrace(); | 2503 | preempt_disable_notrace(); |
2501 | 2504 | ||
2502 | if (atomic_read(&buffer->record_disabled)) | 2505 | if (atomic_read(&buffer->record_disabled)) |
2503 | goto out_nocheck; | 2506 | goto out_nocheck; |
2504 | 2507 | ||
2505 | if (trace_recursive_lock()) | 2508 | if (trace_recursive_lock()) |
2506 | goto out_nocheck; | 2509 | goto out_nocheck; |
2507 | 2510 | ||
2508 | cpu = raw_smp_processor_id(); | 2511 | cpu = raw_smp_processor_id(); |
2509 | 2512 | ||
2510 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2513 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2511 | goto out; | 2514 | goto out; |
2512 | 2515 | ||
2513 | cpu_buffer = buffer->buffers[cpu]; | 2516 | cpu_buffer = buffer->buffers[cpu]; |
2514 | 2517 | ||
2515 | if (atomic_read(&cpu_buffer->record_disabled)) | 2518 | if (atomic_read(&cpu_buffer->record_disabled)) |
2516 | goto out; | 2519 | goto out; |
2517 | 2520 | ||
2518 | if (length > BUF_MAX_DATA_SIZE) | 2521 | if (length > BUF_MAX_DATA_SIZE) |
2519 | goto out; | 2522 | goto out; |
2520 | 2523 | ||
2521 | event = rb_reserve_next_event(buffer, cpu_buffer, length); | 2524 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
2522 | if (!event) | 2525 | if (!event) |
2523 | goto out; | 2526 | goto out; |
2524 | 2527 | ||
2525 | return event; | 2528 | return event; |
2526 | 2529 | ||
2527 | out: | 2530 | out: |
2528 | trace_recursive_unlock(); | 2531 | trace_recursive_unlock(); |
2529 | 2532 | ||
2530 | out_nocheck: | 2533 | out_nocheck: |
2531 | preempt_enable_notrace(); | 2534 | preempt_enable_notrace(); |
2532 | return NULL; | 2535 | return NULL; |
2533 | } | 2536 | } |
2534 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2537 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); |
2535 | 2538 | ||
2536 | static void | 2539 | static void |
2537 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2540 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2538 | struct ring_buffer_event *event) | 2541 | struct ring_buffer_event *event) |
2539 | { | 2542 | { |
2540 | u64 delta; | 2543 | u64 delta; |
2541 | 2544 | ||
2542 | /* | 2545 | /* |
2543 | * The event first in the commit queue updates the | 2546 | * The event first in the commit queue updates the |
2544 | * time stamp. | 2547 | * time stamp. |
2545 | */ | 2548 | */ |
2546 | if (rb_event_is_commit(cpu_buffer, event)) { | 2549 | if (rb_event_is_commit(cpu_buffer, event)) { |
2547 | /* | 2550 | /* |
2548 | * A commit event that is first on a page | 2551 | * A commit event that is first on a page |
2549 | * updates the write timestamp with the page stamp | 2552 | * updates the write timestamp with the page stamp |
2550 | */ | 2553 | */ |
2551 | if (!rb_event_index(event)) | 2554 | if (!rb_event_index(event)) |
2552 | cpu_buffer->write_stamp = | 2555 | cpu_buffer->write_stamp = |
2553 | cpu_buffer->commit_page->page->time_stamp; | 2556 | cpu_buffer->commit_page->page->time_stamp; |
2554 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | 2557 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { |
2555 | delta = event->array[0]; | 2558 | delta = event->array[0]; |
2556 | delta <<= TS_SHIFT; | 2559 | delta <<= TS_SHIFT; |
2557 | delta += event->time_delta; | 2560 | delta += event->time_delta; |
2558 | cpu_buffer->write_stamp += delta; | 2561 | cpu_buffer->write_stamp += delta; |
2559 | } else | 2562 | } else |
2560 | cpu_buffer->write_stamp += event->time_delta; | 2563 | cpu_buffer->write_stamp += event->time_delta; |
2561 | } | 2564 | } |
2562 | } | 2565 | } |
2563 | 2566 | ||
2564 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2567 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
2565 | struct ring_buffer_event *event) | 2568 | struct ring_buffer_event *event) |
2566 | { | 2569 | { |
2567 | local_inc(&cpu_buffer->entries); | 2570 | local_inc(&cpu_buffer->entries); |
2568 | rb_update_write_stamp(cpu_buffer, event); | 2571 | rb_update_write_stamp(cpu_buffer, event); |
2569 | rb_end_commit(cpu_buffer); | 2572 | rb_end_commit(cpu_buffer); |
2570 | } | 2573 | } |
2571 | 2574 | ||
2572 | /** | 2575 | /** |
2573 | * ring_buffer_unlock_commit - commit a reserved | 2576 | * ring_buffer_unlock_commit - commit a reserved |
2574 | * @buffer: The buffer to commit to | 2577 | * @buffer: The buffer to commit to |
2575 | * @event: The event pointer to commit. | 2578 | * @event: The event pointer to commit. |
2576 | * | 2579 | * |
2577 | * This commits the data to the ring buffer, and releases any locks held. | 2580 | * This commits the data to the ring buffer, and releases any locks held. |
2578 | * | 2581 | * |
2579 | * Must be paired with ring_buffer_lock_reserve. | 2582 | * Must be paired with ring_buffer_lock_reserve. |
2580 | */ | 2583 | */ |
2581 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 2584 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
2582 | struct ring_buffer_event *event) | 2585 | struct ring_buffer_event *event) |
2583 | { | 2586 | { |
2584 | struct ring_buffer_per_cpu *cpu_buffer; | 2587 | struct ring_buffer_per_cpu *cpu_buffer; |
2585 | int cpu = raw_smp_processor_id(); | 2588 | int cpu = raw_smp_processor_id(); |
2586 | 2589 | ||
2587 | cpu_buffer = buffer->buffers[cpu]; | 2590 | cpu_buffer = buffer->buffers[cpu]; |
2588 | 2591 | ||
2589 | rb_commit(cpu_buffer, event); | 2592 | rb_commit(cpu_buffer, event); |
2590 | 2593 | ||
2591 | trace_recursive_unlock(); | 2594 | trace_recursive_unlock(); |
2592 | 2595 | ||
2593 | preempt_enable_notrace(); | 2596 | preempt_enable_notrace(); |
2594 | 2597 | ||
2595 | return 0; | 2598 | return 0; |
2596 | } | 2599 | } |
2597 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | 2600 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); |
2598 | 2601 | ||
2599 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2602 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2600 | { | 2603 | { |
2601 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 2604 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) |
2602 | event = skip_time_extend(event); | 2605 | event = skip_time_extend(event); |
2603 | 2606 | ||
2604 | /* array[0] holds the actual length for the discarded event */ | 2607 | /* array[0] holds the actual length for the discarded event */ |
2605 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2608 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2606 | event->type_len = RINGBUF_TYPE_PADDING; | 2609 | event->type_len = RINGBUF_TYPE_PADDING; |
2607 | /* time delta must be non zero */ | 2610 | /* time delta must be non zero */ |
2608 | if (!event->time_delta) | 2611 | if (!event->time_delta) |
2609 | event->time_delta = 1; | 2612 | event->time_delta = 1; |
2610 | } | 2613 | } |
2611 | 2614 | ||
2612 | /* | 2615 | /* |
2613 | * Decrement the entries to the page that an event is on. | 2616 | * Decrement the entries to the page that an event is on. |
2614 | * The event does not even need to exist, only the pointer | 2617 | * The event does not even need to exist, only the pointer |
2615 | * to the page it is on. This may only be called before the commit | 2618 | * to the page it is on. This may only be called before the commit |
2616 | * takes place. | 2619 | * takes place. |
2617 | */ | 2620 | */ |
2618 | static inline void | 2621 | static inline void |
2619 | rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, | 2622 | rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, |
2620 | struct ring_buffer_event *event) | 2623 | struct ring_buffer_event *event) |
2621 | { | 2624 | { |
2622 | unsigned long addr = (unsigned long)event; | 2625 | unsigned long addr = (unsigned long)event; |
2623 | struct buffer_page *bpage = cpu_buffer->commit_page; | 2626 | struct buffer_page *bpage = cpu_buffer->commit_page; |
2624 | struct buffer_page *start; | 2627 | struct buffer_page *start; |
2625 | 2628 | ||
2626 | addr &= PAGE_MASK; | 2629 | addr &= PAGE_MASK; |
2627 | 2630 | ||
2628 | /* Do the likely case first */ | 2631 | /* Do the likely case first */ |
2629 | if (likely(bpage->page == (void *)addr)) { | 2632 | if (likely(bpage->page == (void *)addr)) { |
2630 | local_dec(&bpage->entries); | 2633 | local_dec(&bpage->entries); |
2631 | return; | 2634 | return; |
2632 | } | 2635 | } |
2633 | 2636 | ||
2634 | /* | 2637 | /* |
2635 | * Because the commit page may be on the reader page we | 2638 | * Because the commit page may be on the reader page we |
2636 | * start with the next page and check the end loop there. | 2639 | * start with the next page and check the end loop there. |
2637 | */ | 2640 | */ |
2638 | rb_inc_page(cpu_buffer, &bpage); | 2641 | rb_inc_page(cpu_buffer, &bpage); |
2639 | start = bpage; | 2642 | start = bpage; |
2640 | do { | 2643 | do { |
2641 | if (bpage->page == (void *)addr) { | 2644 | if (bpage->page == (void *)addr) { |
2642 | local_dec(&bpage->entries); | 2645 | local_dec(&bpage->entries); |
2643 | return; | 2646 | return; |
2644 | } | 2647 | } |
2645 | rb_inc_page(cpu_buffer, &bpage); | 2648 | rb_inc_page(cpu_buffer, &bpage); |
2646 | } while (bpage != start); | 2649 | } while (bpage != start); |
2647 | 2650 | ||
2648 | /* commit not part of this buffer?? */ | 2651 | /* commit not part of this buffer?? */ |
2649 | RB_WARN_ON(cpu_buffer, 1); | 2652 | RB_WARN_ON(cpu_buffer, 1); |
2650 | } | 2653 | } |
2651 | 2654 | ||
2652 | /** | 2655 | /** |
2653 | * ring_buffer_commit_discard - discard an event that has not been committed | 2656 | * ring_buffer_commit_discard - discard an event that has not been committed |
2654 | * @buffer: the ring buffer | 2657 | * @buffer: the ring buffer |
2655 | * @event: non committed event to discard | 2658 | * @event: non committed event to discard |
2656 | * | 2659 | * |
2657 | * Sometimes an event that is in the ring buffer needs to be ignored. | 2660 | * Sometimes an event that is in the ring buffer needs to be ignored. |
2658 | * This function lets the user discard an event in the ring buffer | 2661 | * This function lets the user discard an event in the ring buffer |
2659 | * and then that event will not be read later. | 2662 | * and then that event will not be read later. |
2660 | * | 2663 | * |
2661 | * This function only works if it is called before the the item has been | 2664 | * This function only works if it is called before the the item has been |
2662 | * committed. It will try to free the event from the ring buffer | 2665 | * committed. It will try to free the event from the ring buffer |
2663 | * if another event has not been added behind it. | 2666 | * if another event has not been added behind it. |
2664 | * | 2667 | * |
2665 | * If another event has been added behind it, it will set the event | 2668 | * If another event has been added behind it, it will set the event |
2666 | * up as discarded, and perform the commit. | 2669 | * up as discarded, and perform the commit. |
2667 | * | 2670 | * |
2668 | * If this function is called, do not call ring_buffer_unlock_commit on | 2671 | * If this function is called, do not call ring_buffer_unlock_commit on |
2669 | * the event. | 2672 | * the event. |
2670 | */ | 2673 | */ |
2671 | void ring_buffer_discard_commit(struct ring_buffer *buffer, | 2674 | void ring_buffer_discard_commit(struct ring_buffer *buffer, |
2672 | struct ring_buffer_event *event) | 2675 | struct ring_buffer_event *event) |
2673 | { | 2676 | { |
2674 | struct ring_buffer_per_cpu *cpu_buffer; | 2677 | struct ring_buffer_per_cpu *cpu_buffer; |
2675 | int cpu; | 2678 | int cpu; |
2676 | 2679 | ||
2677 | /* The event is discarded regardless */ | 2680 | /* The event is discarded regardless */ |
2678 | rb_event_discard(event); | 2681 | rb_event_discard(event); |
2679 | 2682 | ||
2680 | cpu = smp_processor_id(); | 2683 | cpu = smp_processor_id(); |
2681 | cpu_buffer = buffer->buffers[cpu]; | 2684 | cpu_buffer = buffer->buffers[cpu]; |
2682 | 2685 | ||
2683 | /* | 2686 | /* |
2684 | * This must only be called if the event has not been | 2687 | * This must only be called if the event has not been |
2685 | * committed yet. Thus we can assume that preemption | 2688 | * committed yet. Thus we can assume that preemption |
2686 | * is still disabled. | 2689 | * is still disabled. |
2687 | */ | 2690 | */ |
2688 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); | 2691 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
2689 | 2692 | ||
2690 | rb_decrement_entry(cpu_buffer, event); | 2693 | rb_decrement_entry(cpu_buffer, event); |
2691 | if (rb_try_to_discard(cpu_buffer, event)) | 2694 | if (rb_try_to_discard(cpu_buffer, event)) |
2692 | goto out; | 2695 | goto out; |
2693 | 2696 | ||
2694 | /* | 2697 | /* |
2695 | * The commit is still visible by the reader, so we | 2698 | * The commit is still visible by the reader, so we |
2696 | * must still update the timestamp. | 2699 | * must still update the timestamp. |
2697 | */ | 2700 | */ |
2698 | rb_update_write_stamp(cpu_buffer, event); | 2701 | rb_update_write_stamp(cpu_buffer, event); |
2699 | out: | 2702 | out: |
2700 | rb_end_commit(cpu_buffer); | 2703 | rb_end_commit(cpu_buffer); |
2701 | 2704 | ||
2702 | trace_recursive_unlock(); | 2705 | trace_recursive_unlock(); |
2703 | 2706 | ||
2704 | preempt_enable_notrace(); | 2707 | preempt_enable_notrace(); |
2705 | 2708 | ||
2706 | } | 2709 | } |
2707 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | 2710 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); |
2708 | 2711 | ||
2709 | /** | 2712 | /** |
2710 | * ring_buffer_write - write data to the buffer without reserving | 2713 | * ring_buffer_write - write data to the buffer without reserving |
2711 | * @buffer: The ring buffer to write to. | 2714 | * @buffer: The ring buffer to write to. |
2712 | * @length: The length of the data being written (excluding the event header) | 2715 | * @length: The length of the data being written (excluding the event header) |
2713 | * @data: The data to write to the buffer. | 2716 | * @data: The data to write to the buffer. |
2714 | * | 2717 | * |
2715 | * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as | 2718 | * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as |
2716 | * one function. If you already have the data to write to the buffer, it | 2719 | * one function. If you already have the data to write to the buffer, it |
2717 | * may be easier to simply call this function. | 2720 | * may be easier to simply call this function. |
2718 | * | 2721 | * |
2719 | * Note, like ring_buffer_lock_reserve, the length is the length of the data | 2722 | * Note, like ring_buffer_lock_reserve, the length is the length of the data |
2720 | * and not the length of the event which would hold the header. | 2723 | * and not the length of the event which would hold the header. |
2721 | */ | 2724 | */ |
2722 | int ring_buffer_write(struct ring_buffer *buffer, | 2725 | int ring_buffer_write(struct ring_buffer *buffer, |
2723 | unsigned long length, | 2726 | unsigned long length, |
2724 | void *data) | 2727 | void *data) |
2725 | { | 2728 | { |
2726 | struct ring_buffer_per_cpu *cpu_buffer; | 2729 | struct ring_buffer_per_cpu *cpu_buffer; |
2727 | struct ring_buffer_event *event; | 2730 | struct ring_buffer_event *event; |
2728 | void *body; | 2731 | void *body; |
2729 | int ret = -EBUSY; | 2732 | int ret = -EBUSY; |
2730 | int cpu; | 2733 | int cpu; |
2731 | 2734 | ||
2732 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2735 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2733 | return -EBUSY; | 2736 | return -EBUSY; |
2734 | 2737 | ||
2735 | preempt_disable_notrace(); | 2738 | preempt_disable_notrace(); |
2736 | 2739 | ||
2737 | if (atomic_read(&buffer->record_disabled)) | 2740 | if (atomic_read(&buffer->record_disabled)) |
2738 | goto out; | 2741 | goto out; |
2739 | 2742 | ||
2740 | cpu = raw_smp_processor_id(); | 2743 | cpu = raw_smp_processor_id(); |
2741 | 2744 | ||
2742 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2745 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2743 | goto out; | 2746 | goto out; |
2744 | 2747 | ||
2745 | cpu_buffer = buffer->buffers[cpu]; | 2748 | cpu_buffer = buffer->buffers[cpu]; |
2746 | 2749 | ||
2747 | if (atomic_read(&cpu_buffer->record_disabled)) | 2750 | if (atomic_read(&cpu_buffer->record_disabled)) |
2748 | goto out; | 2751 | goto out; |
2749 | 2752 | ||
2750 | if (length > BUF_MAX_DATA_SIZE) | 2753 | if (length > BUF_MAX_DATA_SIZE) |
2751 | goto out; | 2754 | goto out; |
2752 | 2755 | ||
2753 | event = rb_reserve_next_event(buffer, cpu_buffer, length); | 2756 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
2754 | if (!event) | 2757 | if (!event) |
2755 | goto out; | 2758 | goto out; |
2756 | 2759 | ||
2757 | body = rb_event_data(event); | 2760 | body = rb_event_data(event); |
2758 | 2761 | ||
2759 | memcpy(body, data, length); | 2762 | memcpy(body, data, length); |
2760 | 2763 | ||
2761 | rb_commit(cpu_buffer, event); | 2764 | rb_commit(cpu_buffer, event); |
2762 | 2765 | ||
2763 | ret = 0; | 2766 | ret = 0; |
2764 | out: | 2767 | out: |
2765 | preempt_enable_notrace(); | 2768 | preempt_enable_notrace(); |
2766 | 2769 | ||
2767 | return ret; | 2770 | return ret; |
2768 | } | 2771 | } |
2769 | EXPORT_SYMBOL_GPL(ring_buffer_write); | 2772 | EXPORT_SYMBOL_GPL(ring_buffer_write); |
2770 | 2773 | ||
2771 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 2774 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
2772 | { | 2775 | { |
2773 | struct buffer_page *reader = cpu_buffer->reader_page; | 2776 | struct buffer_page *reader = cpu_buffer->reader_page; |
2774 | struct buffer_page *head = rb_set_head_page(cpu_buffer); | 2777 | struct buffer_page *head = rb_set_head_page(cpu_buffer); |
2775 | struct buffer_page *commit = cpu_buffer->commit_page; | 2778 | struct buffer_page *commit = cpu_buffer->commit_page; |
2776 | 2779 | ||
2777 | /* In case of error, head will be NULL */ | 2780 | /* In case of error, head will be NULL */ |
2778 | if (unlikely(!head)) | 2781 | if (unlikely(!head)) |
2779 | return 1; | 2782 | return 1; |
2780 | 2783 | ||
2781 | return reader->read == rb_page_commit(reader) && | 2784 | return reader->read == rb_page_commit(reader) && |
2782 | (commit == reader || | 2785 | (commit == reader || |
2783 | (commit == head && | 2786 | (commit == head && |
2784 | head->read == rb_page_commit(commit))); | 2787 | head->read == rb_page_commit(commit))); |
2785 | } | 2788 | } |
2786 | 2789 | ||
2787 | /** | 2790 | /** |
2788 | * ring_buffer_record_disable - stop all writes into the buffer | 2791 | * ring_buffer_record_disable - stop all writes into the buffer |
2789 | * @buffer: The ring buffer to stop writes to. | 2792 | * @buffer: The ring buffer to stop writes to. |
2790 | * | 2793 | * |
2791 | * This prevents all writes to the buffer. Any attempt to write | 2794 | * This prevents all writes to the buffer. Any attempt to write |
2792 | * to the buffer after this will fail and return NULL. | 2795 | * to the buffer after this will fail and return NULL. |
2793 | * | 2796 | * |
2794 | * The caller should call synchronize_sched() after this. | 2797 | * The caller should call synchronize_sched() after this. |
2795 | */ | 2798 | */ |
2796 | void ring_buffer_record_disable(struct ring_buffer *buffer) | 2799 | void ring_buffer_record_disable(struct ring_buffer *buffer) |
2797 | { | 2800 | { |
2798 | atomic_inc(&buffer->record_disabled); | 2801 | atomic_inc(&buffer->record_disabled); |
2799 | } | 2802 | } |
2800 | EXPORT_SYMBOL_GPL(ring_buffer_record_disable); | 2803 | EXPORT_SYMBOL_GPL(ring_buffer_record_disable); |
2801 | 2804 | ||
2802 | /** | 2805 | /** |
2803 | * ring_buffer_record_enable - enable writes to the buffer | 2806 | * ring_buffer_record_enable - enable writes to the buffer |
2804 | * @buffer: The ring buffer to enable writes | 2807 | * @buffer: The ring buffer to enable writes |
2805 | * | 2808 | * |
2806 | * Note, multiple disables will need the same number of enables | 2809 | * Note, multiple disables will need the same number of enables |
2807 | * to truly enable the writing (much like preempt_disable). | 2810 | * to truly enable the writing (much like preempt_disable). |
2808 | */ | 2811 | */ |
2809 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 2812 | void ring_buffer_record_enable(struct ring_buffer *buffer) |
2810 | { | 2813 | { |
2811 | atomic_dec(&buffer->record_disabled); | 2814 | atomic_dec(&buffer->record_disabled); |
2812 | } | 2815 | } |
2813 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable); | 2816 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable); |
2814 | 2817 | ||
2815 | /** | 2818 | /** |
2816 | * ring_buffer_record_off - stop all writes into the buffer | 2819 | * ring_buffer_record_off - stop all writes into the buffer |
2817 | * @buffer: The ring buffer to stop writes to. | 2820 | * @buffer: The ring buffer to stop writes to. |
2818 | * | 2821 | * |
2819 | * This prevents all writes to the buffer. Any attempt to write | 2822 | * This prevents all writes to the buffer. Any attempt to write |
2820 | * to the buffer after this will fail and return NULL. | 2823 | * to the buffer after this will fail and return NULL. |
2821 | * | 2824 | * |
2822 | * This is different than ring_buffer_record_disable() as | 2825 | * This is different than ring_buffer_record_disable() as |
2823 | * it works like an on/off switch, where as the disable() version | 2826 | * it works like an on/off switch, where as the disable() version |
2824 | * must be paired with a enable(). | 2827 | * must be paired with a enable(). |
2825 | */ | 2828 | */ |
2826 | void ring_buffer_record_off(struct ring_buffer *buffer) | 2829 | void ring_buffer_record_off(struct ring_buffer *buffer) |
2827 | { | 2830 | { |
2828 | unsigned int rd; | 2831 | unsigned int rd; |
2829 | unsigned int new_rd; | 2832 | unsigned int new_rd; |
2830 | 2833 | ||
2831 | do { | 2834 | do { |
2832 | rd = atomic_read(&buffer->record_disabled); | 2835 | rd = atomic_read(&buffer->record_disabled); |
2833 | new_rd = rd | RB_BUFFER_OFF; | 2836 | new_rd = rd | RB_BUFFER_OFF; |
2834 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); | 2837 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); |
2835 | } | 2838 | } |
2836 | EXPORT_SYMBOL_GPL(ring_buffer_record_off); | 2839 | EXPORT_SYMBOL_GPL(ring_buffer_record_off); |
2837 | 2840 | ||
2838 | /** | 2841 | /** |
2839 | * ring_buffer_record_on - restart writes into the buffer | 2842 | * ring_buffer_record_on - restart writes into the buffer |
2840 | * @buffer: The ring buffer to start writes to. | 2843 | * @buffer: The ring buffer to start writes to. |
2841 | * | 2844 | * |
2842 | * This enables all writes to the buffer that was disabled by | 2845 | * This enables all writes to the buffer that was disabled by |
2843 | * ring_buffer_record_off(). | 2846 | * ring_buffer_record_off(). |
2844 | * | 2847 | * |
2845 | * This is different than ring_buffer_record_enable() as | 2848 | * This is different than ring_buffer_record_enable() as |
2846 | * it works like an on/off switch, where as the enable() version | 2849 | * it works like an on/off switch, where as the enable() version |
2847 | * must be paired with a disable(). | 2850 | * must be paired with a disable(). |
2848 | */ | 2851 | */ |
2849 | void ring_buffer_record_on(struct ring_buffer *buffer) | 2852 | void ring_buffer_record_on(struct ring_buffer *buffer) |
2850 | { | 2853 | { |
2851 | unsigned int rd; | 2854 | unsigned int rd; |
2852 | unsigned int new_rd; | 2855 | unsigned int new_rd; |
2853 | 2856 | ||
2854 | do { | 2857 | do { |
2855 | rd = atomic_read(&buffer->record_disabled); | 2858 | rd = atomic_read(&buffer->record_disabled); |
2856 | new_rd = rd & ~RB_BUFFER_OFF; | 2859 | new_rd = rd & ~RB_BUFFER_OFF; |
2857 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); | 2860 | } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); |
2858 | } | 2861 | } |
2859 | EXPORT_SYMBOL_GPL(ring_buffer_record_on); | 2862 | EXPORT_SYMBOL_GPL(ring_buffer_record_on); |
2860 | 2863 | ||
2861 | /** | 2864 | /** |
2862 | * ring_buffer_record_is_on - return true if the ring buffer can write | 2865 | * ring_buffer_record_is_on - return true if the ring buffer can write |
2863 | * @buffer: The ring buffer to see if write is enabled | 2866 | * @buffer: The ring buffer to see if write is enabled |
2864 | * | 2867 | * |
2865 | * Returns true if the ring buffer is in a state that it accepts writes. | 2868 | * Returns true if the ring buffer is in a state that it accepts writes. |
2866 | */ | 2869 | */ |
2867 | int ring_buffer_record_is_on(struct ring_buffer *buffer) | 2870 | int ring_buffer_record_is_on(struct ring_buffer *buffer) |
2868 | { | 2871 | { |
2869 | return !atomic_read(&buffer->record_disabled); | 2872 | return !atomic_read(&buffer->record_disabled); |
2870 | } | 2873 | } |
2871 | 2874 | ||
2872 | /** | 2875 | /** |
2873 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer | 2876 | * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer |
2874 | * @buffer: The ring buffer to stop writes to. | 2877 | * @buffer: The ring buffer to stop writes to. |
2875 | * @cpu: The CPU buffer to stop | 2878 | * @cpu: The CPU buffer to stop |
2876 | * | 2879 | * |
2877 | * This prevents all writes to the buffer. Any attempt to write | 2880 | * This prevents all writes to the buffer. Any attempt to write |
2878 | * to the buffer after this will fail and return NULL. | 2881 | * to the buffer after this will fail and return NULL. |
2879 | * | 2882 | * |
2880 | * The caller should call synchronize_sched() after this. | 2883 | * The caller should call synchronize_sched() after this. |
2881 | */ | 2884 | */ |
2882 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) | 2885 | void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) |
2883 | { | 2886 | { |
2884 | struct ring_buffer_per_cpu *cpu_buffer; | 2887 | struct ring_buffer_per_cpu *cpu_buffer; |
2885 | 2888 | ||
2886 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2889 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2887 | return; | 2890 | return; |
2888 | 2891 | ||
2889 | cpu_buffer = buffer->buffers[cpu]; | 2892 | cpu_buffer = buffer->buffers[cpu]; |
2890 | atomic_inc(&cpu_buffer->record_disabled); | 2893 | atomic_inc(&cpu_buffer->record_disabled); |
2891 | } | 2894 | } |
2892 | EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); | 2895 | EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); |
2893 | 2896 | ||
2894 | /** | 2897 | /** |
2895 | * ring_buffer_record_enable_cpu - enable writes to the buffer | 2898 | * ring_buffer_record_enable_cpu - enable writes to the buffer |
2896 | * @buffer: The ring buffer to enable writes | 2899 | * @buffer: The ring buffer to enable writes |
2897 | * @cpu: The CPU to enable. | 2900 | * @cpu: The CPU to enable. |
2898 | * | 2901 | * |
2899 | * Note, multiple disables will need the same number of enables | 2902 | * Note, multiple disables will need the same number of enables |
2900 | * to truly enable the writing (much like preempt_disable). | 2903 | * to truly enable the writing (much like preempt_disable). |
2901 | */ | 2904 | */ |
2902 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 2905 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) |
2903 | { | 2906 | { |
2904 | struct ring_buffer_per_cpu *cpu_buffer; | 2907 | struct ring_buffer_per_cpu *cpu_buffer; |
2905 | 2908 | ||
2906 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2909 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2907 | return; | 2910 | return; |
2908 | 2911 | ||
2909 | cpu_buffer = buffer->buffers[cpu]; | 2912 | cpu_buffer = buffer->buffers[cpu]; |
2910 | atomic_dec(&cpu_buffer->record_disabled); | 2913 | atomic_dec(&cpu_buffer->record_disabled); |
2911 | } | 2914 | } |
2912 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2915 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
2913 | 2916 | ||
2914 | /* | 2917 | /* |
2915 | * The total entries in the ring buffer is the running counter | 2918 | * The total entries in the ring buffer is the running counter |
2916 | * of entries entered into the ring buffer, minus the sum of | 2919 | * of entries entered into the ring buffer, minus the sum of |
2917 | * the entries read from the ring buffer and the number of | 2920 | * the entries read from the ring buffer and the number of |
2918 | * entries that were overwritten. | 2921 | * entries that were overwritten. |
2919 | */ | 2922 | */ |
2920 | static inline unsigned long | 2923 | static inline unsigned long |
2921 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | 2924 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) |
2922 | { | 2925 | { |
2923 | return local_read(&cpu_buffer->entries) - | 2926 | return local_read(&cpu_buffer->entries) - |
2924 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | 2927 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); |
2925 | } | 2928 | } |
2926 | 2929 | ||
2927 | /** | 2930 | /** |
2928 | * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer | 2931 | * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer |
2929 | * @buffer: The ring buffer | 2932 | * @buffer: The ring buffer |
2930 | * @cpu: The per CPU buffer to read from. | 2933 | * @cpu: The per CPU buffer to read from. |
2931 | */ | 2934 | */ |
2932 | unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) | 2935 | unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) |
2933 | { | 2936 | { |
2934 | unsigned long flags; | 2937 | unsigned long flags; |
2935 | struct ring_buffer_per_cpu *cpu_buffer; | 2938 | struct ring_buffer_per_cpu *cpu_buffer; |
2936 | struct buffer_page *bpage; | 2939 | struct buffer_page *bpage; |
2937 | unsigned long ret; | 2940 | unsigned long ret; |
2938 | 2941 | ||
2939 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2942 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2940 | return 0; | 2943 | return 0; |
2941 | 2944 | ||
2942 | cpu_buffer = buffer->buffers[cpu]; | 2945 | cpu_buffer = buffer->buffers[cpu]; |
2943 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2946 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2944 | /* | 2947 | /* |
2945 | * if the tail is on reader_page, oldest time stamp is on the reader | 2948 | * if the tail is on reader_page, oldest time stamp is on the reader |
2946 | * page | 2949 | * page |
2947 | */ | 2950 | */ |
2948 | if (cpu_buffer->tail_page == cpu_buffer->reader_page) | 2951 | if (cpu_buffer->tail_page == cpu_buffer->reader_page) |
2949 | bpage = cpu_buffer->reader_page; | 2952 | bpage = cpu_buffer->reader_page; |
2950 | else | 2953 | else |
2951 | bpage = rb_set_head_page(cpu_buffer); | 2954 | bpage = rb_set_head_page(cpu_buffer); |
2952 | ret = bpage->page->time_stamp; | 2955 | ret = bpage->page->time_stamp; |
2953 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2956 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2954 | 2957 | ||
2955 | return ret; | 2958 | return ret; |
2956 | } | 2959 | } |
2957 | EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); | 2960 | EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); |
2958 | 2961 | ||
2959 | /** | 2962 | /** |
2960 | * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer | 2963 | * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer |
2961 | * @buffer: The ring buffer | 2964 | * @buffer: The ring buffer |
2962 | * @cpu: The per CPU buffer to read from. | 2965 | * @cpu: The per CPU buffer to read from. |
2963 | */ | 2966 | */ |
2964 | unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) | 2967 | unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) |
2965 | { | 2968 | { |
2966 | struct ring_buffer_per_cpu *cpu_buffer; | 2969 | struct ring_buffer_per_cpu *cpu_buffer; |
2967 | unsigned long ret; | 2970 | unsigned long ret; |
2968 | 2971 | ||
2969 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2972 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2970 | return 0; | 2973 | return 0; |
2971 | 2974 | ||
2972 | cpu_buffer = buffer->buffers[cpu]; | 2975 | cpu_buffer = buffer->buffers[cpu]; |
2973 | ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; | 2976 | ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; |
2974 | 2977 | ||
2975 | return ret; | 2978 | return ret; |
2976 | } | 2979 | } |
2977 | EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); | 2980 | EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); |
2978 | 2981 | ||
2979 | /** | 2982 | /** |
2980 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2983 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2981 | * @buffer: The ring buffer | 2984 | * @buffer: The ring buffer |
2982 | * @cpu: The per CPU buffer to get the entries from. | 2985 | * @cpu: The per CPU buffer to get the entries from. |
2983 | */ | 2986 | */ |
2984 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2987 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
2985 | { | 2988 | { |
2986 | struct ring_buffer_per_cpu *cpu_buffer; | 2989 | struct ring_buffer_per_cpu *cpu_buffer; |
2987 | 2990 | ||
2988 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2991 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2989 | return 0; | 2992 | return 0; |
2990 | 2993 | ||
2991 | cpu_buffer = buffer->buffers[cpu]; | 2994 | cpu_buffer = buffer->buffers[cpu]; |
2992 | 2995 | ||
2993 | return rb_num_of_entries(cpu_buffer); | 2996 | return rb_num_of_entries(cpu_buffer); |
2994 | } | 2997 | } |
2995 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2998 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
2996 | 2999 | ||
2997 | /** | 3000 | /** |
2998 | * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer | 3001 | * ring_buffer_overrun_cpu - get the number of overruns caused by the ring |
3002 | * buffer wrapping around (only if RB_FL_OVERWRITE is on). | ||
2999 | * @buffer: The ring buffer | 3003 | * @buffer: The ring buffer |
3000 | * @cpu: The per CPU buffer to get the number of overruns from | 3004 | * @cpu: The per CPU buffer to get the number of overruns from |
3001 | */ | 3005 | */ |
3002 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | 3006 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) |
3003 | { | 3007 | { |
3004 | struct ring_buffer_per_cpu *cpu_buffer; | 3008 | struct ring_buffer_per_cpu *cpu_buffer; |
3005 | unsigned long ret; | 3009 | unsigned long ret; |
3006 | 3010 | ||
3007 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3011 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3008 | return 0; | 3012 | return 0; |
3009 | 3013 | ||
3010 | cpu_buffer = buffer->buffers[cpu]; | 3014 | cpu_buffer = buffer->buffers[cpu]; |
3011 | ret = local_read(&cpu_buffer->overrun); | 3015 | ret = local_read(&cpu_buffer->overrun); |
3012 | 3016 | ||
3013 | return ret; | 3017 | return ret; |
3014 | } | 3018 | } |
3015 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 3019 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
3016 | 3020 | ||
3017 | /** | 3021 | /** |
3018 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | 3022 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by |
3023 | * commits failing due to the buffer wrapping around while there are uncommitted | ||
3024 | * events, such as during an interrupt storm. | ||
3019 | * @buffer: The ring buffer | 3025 | * @buffer: The ring buffer |
3020 | * @cpu: The per CPU buffer to get the number of overruns from | 3026 | * @cpu: The per CPU buffer to get the number of overruns from |
3021 | */ | 3027 | */ |
3022 | unsigned long | 3028 | unsigned long |
3023 | ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | 3029 | ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) |
3024 | { | 3030 | { |
3025 | struct ring_buffer_per_cpu *cpu_buffer; | 3031 | struct ring_buffer_per_cpu *cpu_buffer; |
3026 | unsigned long ret; | 3032 | unsigned long ret; |
3027 | 3033 | ||
3028 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3034 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3029 | return 0; | 3035 | return 0; |
3030 | 3036 | ||
3031 | cpu_buffer = buffer->buffers[cpu]; | 3037 | cpu_buffer = buffer->buffers[cpu]; |
3032 | ret = local_read(&cpu_buffer->commit_overrun); | 3038 | ret = local_read(&cpu_buffer->commit_overrun); |
3033 | 3039 | ||
3034 | return ret; | 3040 | return ret; |
3035 | } | 3041 | } |
3036 | EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); | 3042 | EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); |
3037 | 3043 | ||
3038 | /** | 3044 | /** |
3045 | * ring_buffer_dropped_events_cpu - get the number of dropped events caused by | ||
3046 | * the ring buffer filling up (only if RB_FL_OVERWRITE is off). | ||
3047 | * @buffer: The ring buffer | ||
3048 | * @cpu: The per CPU buffer to get the number of overruns from | ||
3049 | */ | ||
3050 | unsigned long | ||
3051 | ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) | ||
3052 | { | ||
3053 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3054 | unsigned long ret; | ||
3055 | |||
3056 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
3057 | return 0; | ||
3058 | |||
3059 | cpu_buffer = buffer->buffers[cpu]; | ||
3060 | ret = local_read(&cpu_buffer->dropped_events); | ||
3061 | |||
3062 | return ret; | ||
3063 | } | ||
3064 | EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); | ||
3065 | |||
3066 | /** | ||
3039 | * ring_buffer_entries - get the number of entries in a buffer | 3067 | * ring_buffer_entries - get the number of entries in a buffer |
3040 | * @buffer: The ring buffer | 3068 | * @buffer: The ring buffer |
3041 | * | 3069 | * |
3042 | * Returns the total number of entries in the ring buffer | 3070 | * Returns the total number of entries in the ring buffer |
3043 | * (all CPU entries) | 3071 | * (all CPU entries) |
3044 | */ | 3072 | */ |
3045 | unsigned long ring_buffer_entries(struct ring_buffer *buffer) | 3073 | unsigned long ring_buffer_entries(struct ring_buffer *buffer) |
3046 | { | 3074 | { |
3047 | struct ring_buffer_per_cpu *cpu_buffer; | 3075 | struct ring_buffer_per_cpu *cpu_buffer; |
3048 | unsigned long entries = 0; | 3076 | unsigned long entries = 0; |
3049 | int cpu; | 3077 | int cpu; |
3050 | 3078 | ||
3051 | /* if you care about this being correct, lock the buffer */ | 3079 | /* if you care about this being correct, lock the buffer */ |
3052 | for_each_buffer_cpu(buffer, cpu) { | 3080 | for_each_buffer_cpu(buffer, cpu) { |
3053 | cpu_buffer = buffer->buffers[cpu]; | 3081 | cpu_buffer = buffer->buffers[cpu]; |
3054 | entries += rb_num_of_entries(cpu_buffer); | 3082 | entries += rb_num_of_entries(cpu_buffer); |
3055 | } | 3083 | } |
3056 | 3084 | ||
3057 | return entries; | 3085 | return entries; |
3058 | } | 3086 | } |
3059 | EXPORT_SYMBOL_GPL(ring_buffer_entries); | 3087 | EXPORT_SYMBOL_GPL(ring_buffer_entries); |
3060 | 3088 | ||
3061 | /** | 3089 | /** |
3062 | * ring_buffer_overruns - get the number of overruns in buffer | 3090 | * ring_buffer_overruns - get the number of overruns in buffer |
3063 | * @buffer: The ring buffer | 3091 | * @buffer: The ring buffer |
3064 | * | 3092 | * |
3065 | * Returns the total number of overruns in the ring buffer | 3093 | * Returns the total number of overruns in the ring buffer |
3066 | * (all CPU entries) | 3094 | * (all CPU entries) |
3067 | */ | 3095 | */ |
3068 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer) | 3096 | unsigned long ring_buffer_overruns(struct ring_buffer *buffer) |
3069 | { | 3097 | { |
3070 | struct ring_buffer_per_cpu *cpu_buffer; | 3098 | struct ring_buffer_per_cpu *cpu_buffer; |
3071 | unsigned long overruns = 0; | 3099 | unsigned long overruns = 0; |
3072 | int cpu; | 3100 | int cpu; |
3073 | 3101 | ||
3074 | /* if you care about this being correct, lock the buffer */ | 3102 | /* if you care about this being correct, lock the buffer */ |
3075 | for_each_buffer_cpu(buffer, cpu) { | 3103 | for_each_buffer_cpu(buffer, cpu) { |
3076 | cpu_buffer = buffer->buffers[cpu]; | 3104 | cpu_buffer = buffer->buffers[cpu]; |
3077 | overruns += local_read(&cpu_buffer->overrun); | 3105 | overruns += local_read(&cpu_buffer->overrun); |
3078 | } | 3106 | } |
3079 | 3107 | ||
3080 | return overruns; | 3108 | return overruns; |
3081 | } | 3109 | } |
3082 | EXPORT_SYMBOL_GPL(ring_buffer_overruns); | 3110 | EXPORT_SYMBOL_GPL(ring_buffer_overruns); |
3083 | 3111 | ||
3084 | static void rb_iter_reset(struct ring_buffer_iter *iter) | 3112 | static void rb_iter_reset(struct ring_buffer_iter *iter) |
3085 | { | 3113 | { |
3086 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3114 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
3087 | 3115 | ||
3088 | /* Iterator usage is expected to have record disabled */ | 3116 | /* Iterator usage is expected to have record disabled */ |
3089 | if (list_empty(&cpu_buffer->reader_page->list)) { | 3117 | if (list_empty(&cpu_buffer->reader_page->list)) { |
3090 | iter->head_page = rb_set_head_page(cpu_buffer); | 3118 | iter->head_page = rb_set_head_page(cpu_buffer); |
3091 | if (unlikely(!iter->head_page)) | 3119 | if (unlikely(!iter->head_page)) |
3092 | return; | 3120 | return; |
3093 | iter->head = iter->head_page->read; | 3121 | iter->head = iter->head_page->read; |
3094 | } else { | 3122 | } else { |
3095 | iter->head_page = cpu_buffer->reader_page; | 3123 | iter->head_page = cpu_buffer->reader_page; |
3096 | iter->head = cpu_buffer->reader_page->read; | 3124 | iter->head = cpu_buffer->reader_page->read; |
3097 | } | 3125 | } |
3098 | if (iter->head) | 3126 | if (iter->head) |
3099 | iter->read_stamp = cpu_buffer->read_stamp; | 3127 | iter->read_stamp = cpu_buffer->read_stamp; |
3100 | else | 3128 | else |
3101 | iter->read_stamp = iter->head_page->page->time_stamp; | 3129 | iter->read_stamp = iter->head_page->page->time_stamp; |
3102 | iter->cache_reader_page = cpu_buffer->reader_page; | 3130 | iter->cache_reader_page = cpu_buffer->reader_page; |
3103 | iter->cache_read = cpu_buffer->read; | 3131 | iter->cache_read = cpu_buffer->read; |
3104 | } | 3132 | } |
3105 | 3133 | ||
3106 | /** | 3134 | /** |
3107 | * ring_buffer_iter_reset - reset an iterator | 3135 | * ring_buffer_iter_reset - reset an iterator |
3108 | * @iter: The iterator to reset | 3136 | * @iter: The iterator to reset |
3109 | * | 3137 | * |
3110 | * Resets the iterator, so that it will start from the beginning | 3138 | * Resets the iterator, so that it will start from the beginning |
3111 | * again. | 3139 | * again. |
3112 | */ | 3140 | */ |
3113 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) | 3141 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) |
3114 | { | 3142 | { |
3115 | struct ring_buffer_per_cpu *cpu_buffer; | 3143 | struct ring_buffer_per_cpu *cpu_buffer; |
3116 | unsigned long flags; | 3144 | unsigned long flags; |
3117 | 3145 | ||
3118 | if (!iter) | 3146 | if (!iter) |
3119 | return; | 3147 | return; |
3120 | 3148 | ||
3121 | cpu_buffer = iter->cpu_buffer; | 3149 | cpu_buffer = iter->cpu_buffer; |
3122 | 3150 | ||
3123 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3151 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3124 | rb_iter_reset(iter); | 3152 | rb_iter_reset(iter); |
3125 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3153 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3126 | } | 3154 | } |
3127 | EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); | 3155 | EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); |
3128 | 3156 | ||
3129 | /** | 3157 | /** |
3130 | * ring_buffer_iter_empty - check if an iterator has no more to read | 3158 | * ring_buffer_iter_empty - check if an iterator has no more to read |
3131 | * @iter: The iterator to check | 3159 | * @iter: The iterator to check |
3132 | */ | 3160 | */ |
3133 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter) | 3161 | int ring_buffer_iter_empty(struct ring_buffer_iter *iter) |
3134 | { | 3162 | { |
3135 | struct ring_buffer_per_cpu *cpu_buffer; | 3163 | struct ring_buffer_per_cpu *cpu_buffer; |
3136 | 3164 | ||
3137 | cpu_buffer = iter->cpu_buffer; | 3165 | cpu_buffer = iter->cpu_buffer; |
3138 | 3166 | ||
3139 | return iter->head_page == cpu_buffer->commit_page && | 3167 | return iter->head_page == cpu_buffer->commit_page && |
3140 | iter->head == rb_commit_index(cpu_buffer); | 3168 | iter->head == rb_commit_index(cpu_buffer); |
3141 | } | 3169 | } |
3142 | EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); | 3170 | EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); |
3143 | 3171 | ||
3144 | static void | 3172 | static void |
3145 | rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 3173 | rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
3146 | struct ring_buffer_event *event) | 3174 | struct ring_buffer_event *event) |
3147 | { | 3175 | { |
3148 | u64 delta; | 3176 | u64 delta; |
3149 | 3177 | ||
3150 | switch (event->type_len) { | 3178 | switch (event->type_len) { |
3151 | case RINGBUF_TYPE_PADDING: | 3179 | case RINGBUF_TYPE_PADDING: |
3152 | return; | 3180 | return; |
3153 | 3181 | ||
3154 | case RINGBUF_TYPE_TIME_EXTEND: | 3182 | case RINGBUF_TYPE_TIME_EXTEND: |
3155 | delta = event->array[0]; | 3183 | delta = event->array[0]; |
3156 | delta <<= TS_SHIFT; | 3184 | delta <<= TS_SHIFT; |
3157 | delta += event->time_delta; | 3185 | delta += event->time_delta; |
3158 | cpu_buffer->read_stamp += delta; | 3186 | cpu_buffer->read_stamp += delta; |
3159 | return; | 3187 | return; |
3160 | 3188 | ||
3161 | case RINGBUF_TYPE_TIME_STAMP: | 3189 | case RINGBUF_TYPE_TIME_STAMP: |
3162 | /* FIXME: not implemented */ | 3190 | /* FIXME: not implemented */ |
3163 | return; | 3191 | return; |
3164 | 3192 | ||
3165 | case RINGBUF_TYPE_DATA: | 3193 | case RINGBUF_TYPE_DATA: |
3166 | cpu_buffer->read_stamp += event->time_delta; | 3194 | cpu_buffer->read_stamp += event->time_delta; |
3167 | return; | 3195 | return; |
3168 | 3196 | ||
3169 | default: | 3197 | default: |
3170 | BUG(); | 3198 | BUG(); |
3171 | } | 3199 | } |
3172 | return; | 3200 | return; |
3173 | } | 3201 | } |
3174 | 3202 | ||
3175 | static void | 3203 | static void |
3176 | rb_update_iter_read_stamp(struct ring_buffer_iter *iter, | 3204 | rb_update_iter_read_stamp(struct ring_buffer_iter *iter, |
3177 | struct ring_buffer_event *event) | 3205 | struct ring_buffer_event *event) |
3178 | { | 3206 | { |
3179 | u64 delta; | 3207 | u64 delta; |
3180 | 3208 | ||
3181 | switch (event->type_len) { | 3209 | switch (event->type_len) { |
3182 | case RINGBUF_TYPE_PADDING: | 3210 | case RINGBUF_TYPE_PADDING: |
3183 | return; | 3211 | return; |
3184 | 3212 | ||
3185 | case RINGBUF_TYPE_TIME_EXTEND: | 3213 | case RINGBUF_TYPE_TIME_EXTEND: |
3186 | delta = event->array[0]; | 3214 | delta = event->array[0]; |
3187 | delta <<= TS_SHIFT; | 3215 | delta <<= TS_SHIFT; |
3188 | delta += event->time_delta; | 3216 | delta += event->time_delta; |
3189 | iter->read_stamp += delta; | 3217 | iter->read_stamp += delta; |
3190 | return; | 3218 | return; |
3191 | 3219 | ||
3192 | case RINGBUF_TYPE_TIME_STAMP: | 3220 | case RINGBUF_TYPE_TIME_STAMP: |
3193 | /* FIXME: not implemented */ | 3221 | /* FIXME: not implemented */ |
3194 | return; | 3222 | return; |
3195 | 3223 | ||
3196 | case RINGBUF_TYPE_DATA: | 3224 | case RINGBUF_TYPE_DATA: |
3197 | iter->read_stamp += event->time_delta; | 3225 | iter->read_stamp += event->time_delta; |
3198 | return; | 3226 | return; |
3199 | 3227 | ||
3200 | default: | 3228 | default: |
3201 | BUG(); | 3229 | BUG(); |
3202 | } | 3230 | } |
3203 | return; | 3231 | return; |
3204 | } | 3232 | } |
3205 | 3233 | ||
3206 | static struct buffer_page * | 3234 | static struct buffer_page * |
3207 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 3235 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
3208 | { | 3236 | { |
3209 | struct buffer_page *reader = NULL; | 3237 | struct buffer_page *reader = NULL; |
3210 | unsigned long overwrite; | 3238 | unsigned long overwrite; |
3211 | unsigned long flags; | 3239 | unsigned long flags; |
3212 | int nr_loops = 0; | 3240 | int nr_loops = 0; |
3213 | int ret; | 3241 | int ret; |
3214 | 3242 | ||
3215 | local_irq_save(flags); | 3243 | local_irq_save(flags); |
3216 | arch_spin_lock(&cpu_buffer->lock); | 3244 | arch_spin_lock(&cpu_buffer->lock); |
3217 | 3245 | ||
3218 | again: | 3246 | again: |
3219 | /* | 3247 | /* |
3220 | * This should normally only loop twice. But because the | 3248 | * This should normally only loop twice. But because the |
3221 | * start of the reader inserts an empty page, it causes | 3249 | * start of the reader inserts an empty page, it causes |
3222 | * a case where we will loop three times. There should be no | 3250 | * a case where we will loop three times. There should be no |
3223 | * reason to loop four times (that I know of). | 3251 | * reason to loop four times (that I know of). |
3224 | */ | 3252 | */ |
3225 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { | 3253 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { |
3226 | reader = NULL; | 3254 | reader = NULL; |
3227 | goto out; | 3255 | goto out; |
3228 | } | 3256 | } |
3229 | 3257 | ||
3230 | reader = cpu_buffer->reader_page; | 3258 | reader = cpu_buffer->reader_page; |
3231 | 3259 | ||
3232 | /* If there's more to read, return this page */ | 3260 | /* If there's more to read, return this page */ |
3233 | if (cpu_buffer->reader_page->read < rb_page_size(reader)) | 3261 | if (cpu_buffer->reader_page->read < rb_page_size(reader)) |
3234 | goto out; | 3262 | goto out; |
3235 | 3263 | ||
3236 | /* Never should we have an index greater than the size */ | 3264 | /* Never should we have an index greater than the size */ |
3237 | if (RB_WARN_ON(cpu_buffer, | 3265 | if (RB_WARN_ON(cpu_buffer, |
3238 | cpu_buffer->reader_page->read > rb_page_size(reader))) | 3266 | cpu_buffer->reader_page->read > rb_page_size(reader))) |
3239 | goto out; | 3267 | goto out; |
3240 | 3268 | ||
3241 | /* check if we caught up to the tail */ | 3269 | /* check if we caught up to the tail */ |
3242 | reader = NULL; | 3270 | reader = NULL; |
3243 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) | 3271 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) |
3244 | goto out; | 3272 | goto out; |
3245 | 3273 | ||
3246 | /* Don't bother swapping if the ring buffer is empty */ | 3274 | /* Don't bother swapping if the ring buffer is empty */ |
3247 | if (rb_num_of_entries(cpu_buffer) == 0) | 3275 | if (rb_num_of_entries(cpu_buffer) == 0) |
3248 | goto out; | 3276 | goto out; |
3249 | 3277 | ||
3250 | /* | 3278 | /* |
3251 | * Reset the reader page to size zero. | 3279 | * Reset the reader page to size zero. |
3252 | */ | 3280 | */ |
3253 | local_set(&cpu_buffer->reader_page->write, 0); | 3281 | local_set(&cpu_buffer->reader_page->write, 0); |
3254 | local_set(&cpu_buffer->reader_page->entries, 0); | 3282 | local_set(&cpu_buffer->reader_page->entries, 0); |
3255 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 3283 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
3256 | cpu_buffer->reader_page->real_end = 0; | 3284 | cpu_buffer->reader_page->real_end = 0; |
3257 | 3285 | ||
3258 | spin: | 3286 | spin: |
3259 | /* | 3287 | /* |
3260 | * Splice the empty reader page into the list around the head. | 3288 | * Splice the empty reader page into the list around the head. |
3261 | */ | 3289 | */ |
3262 | reader = rb_set_head_page(cpu_buffer); | 3290 | reader = rb_set_head_page(cpu_buffer); |
3263 | cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); | 3291 | cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); |
3264 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 3292 | cpu_buffer->reader_page->list.prev = reader->list.prev; |
3265 | 3293 | ||
3266 | /* | 3294 | /* |
3267 | * cpu_buffer->pages just needs to point to the buffer, it | 3295 | * cpu_buffer->pages just needs to point to the buffer, it |
3268 | * has no specific buffer page to point to. Lets move it out | 3296 | * has no specific buffer page to point to. Lets move it out |
3269 | * of our way so we don't accidentally swap it. | 3297 | * of our way so we don't accidentally swap it. |
3270 | */ | 3298 | */ |
3271 | cpu_buffer->pages = reader->list.prev; | 3299 | cpu_buffer->pages = reader->list.prev; |
3272 | 3300 | ||
3273 | /* The reader page will be pointing to the new head */ | 3301 | /* The reader page will be pointing to the new head */ |
3274 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 3302 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
3275 | 3303 | ||
3276 | /* | 3304 | /* |
3277 | * We want to make sure we read the overruns after we set up our | 3305 | * We want to make sure we read the overruns after we set up our |
3278 | * pointers to the next object. The writer side does a | 3306 | * pointers to the next object. The writer side does a |
3279 | * cmpxchg to cross pages which acts as the mb on the writer | 3307 | * cmpxchg to cross pages which acts as the mb on the writer |
3280 | * side. Note, the reader will constantly fail the swap | 3308 | * side. Note, the reader will constantly fail the swap |
3281 | * while the writer is updating the pointers, so this | 3309 | * while the writer is updating the pointers, so this |
3282 | * guarantees that the overwrite recorded here is the one we | 3310 | * guarantees that the overwrite recorded here is the one we |
3283 | * want to compare with the last_overrun. | 3311 | * want to compare with the last_overrun. |
3284 | */ | 3312 | */ |
3285 | smp_mb(); | 3313 | smp_mb(); |
3286 | overwrite = local_read(&(cpu_buffer->overrun)); | 3314 | overwrite = local_read(&(cpu_buffer->overrun)); |
3287 | 3315 | ||
3288 | /* | 3316 | /* |
3289 | * Here's the tricky part. | 3317 | * Here's the tricky part. |
3290 | * | 3318 | * |
3291 | * We need to move the pointer past the header page. | 3319 | * We need to move the pointer past the header page. |
3292 | * But we can only do that if a writer is not currently | 3320 | * But we can only do that if a writer is not currently |
3293 | * moving it. The page before the header page has the | 3321 | * moving it. The page before the header page has the |
3294 | * flag bit '1' set if it is pointing to the page we want. | 3322 | * flag bit '1' set if it is pointing to the page we want. |
3295 | * but if the writer is in the process of moving it | 3323 | * but if the writer is in the process of moving it |
3296 | * than it will be '2' or already moved '0'. | 3324 | * than it will be '2' or already moved '0'. |
3297 | */ | 3325 | */ |
3298 | 3326 | ||
3299 | ret = rb_head_page_replace(reader, cpu_buffer->reader_page); | 3327 | ret = rb_head_page_replace(reader, cpu_buffer->reader_page); |
3300 | 3328 | ||
3301 | /* | 3329 | /* |
3302 | * If we did not convert it, then we must try again. | 3330 | * If we did not convert it, then we must try again. |
3303 | */ | 3331 | */ |
3304 | if (!ret) | 3332 | if (!ret) |
3305 | goto spin; | 3333 | goto spin; |
3306 | 3334 | ||
3307 | /* | 3335 | /* |
3308 | * Yeah! We succeeded in replacing the page. | 3336 | * Yeah! We succeeded in replacing the page. |
3309 | * | 3337 | * |
3310 | * Now make the new head point back to the reader page. | 3338 | * Now make the new head point back to the reader page. |
3311 | */ | 3339 | */ |
3312 | rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; | 3340 | rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; |
3313 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | 3341 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); |
3314 | 3342 | ||
3315 | /* Finally update the reader page to the new head */ | 3343 | /* Finally update the reader page to the new head */ |
3316 | cpu_buffer->reader_page = reader; | 3344 | cpu_buffer->reader_page = reader; |
3317 | rb_reset_reader_page(cpu_buffer); | 3345 | rb_reset_reader_page(cpu_buffer); |
3318 | 3346 | ||
3319 | if (overwrite != cpu_buffer->last_overrun) { | 3347 | if (overwrite != cpu_buffer->last_overrun) { |
3320 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | 3348 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; |
3321 | cpu_buffer->last_overrun = overwrite; | 3349 | cpu_buffer->last_overrun = overwrite; |
3322 | } | 3350 | } |
3323 | 3351 | ||
3324 | goto again; | 3352 | goto again; |
3325 | 3353 | ||
3326 | out: | 3354 | out: |
3327 | arch_spin_unlock(&cpu_buffer->lock); | 3355 | arch_spin_unlock(&cpu_buffer->lock); |
3328 | local_irq_restore(flags); | 3356 | local_irq_restore(flags); |
3329 | 3357 | ||
3330 | return reader; | 3358 | return reader; |
3331 | } | 3359 | } |
3332 | 3360 | ||
3333 | static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | 3361 | static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) |
3334 | { | 3362 | { |
3335 | struct ring_buffer_event *event; | 3363 | struct ring_buffer_event *event; |
3336 | struct buffer_page *reader; | 3364 | struct buffer_page *reader; |
3337 | unsigned length; | 3365 | unsigned length; |
3338 | 3366 | ||
3339 | reader = rb_get_reader_page(cpu_buffer); | 3367 | reader = rb_get_reader_page(cpu_buffer); |
3340 | 3368 | ||
3341 | /* This function should not be called when buffer is empty */ | 3369 | /* This function should not be called when buffer is empty */ |
3342 | if (RB_WARN_ON(cpu_buffer, !reader)) | 3370 | if (RB_WARN_ON(cpu_buffer, !reader)) |
3343 | return; | 3371 | return; |
3344 | 3372 | ||
3345 | event = rb_reader_event(cpu_buffer); | 3373 | event = rb_reader_event(cpu_buffer); |
3346 | 3374 | ||
3347 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 3375 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
3348 | cpu_buffer->read++; | 3376 | cpu_buffer->read++; |
3349 | 3377 | ||
3350 | rb_update_read_stamp(cpu_buffer, event); | 3378 | rb_update_read_stamp(cpu_buffer, event); |
3351 | 3379 | ||
3352 | length = rb_event_length(event); | 3380 | length = rb_event_length(event); |
3353 | cpu_buffer->reader_page->read += length; | 3381 | cpu_buffer->reader_page->read += length; |
3354 | } | 3382 | } |
3355 | 3383 | ||
3356 | static void rb_advance_iter(struct ring_buffer_iter *iter) | 3384 | static void rb_advance_iter(struct ring_buffer_iter *iter) |
3357 | { | 3385 | { |
3358 | struct ring_buffer_per_cpu *cpu_buffer; | 3386 | struct ring_buffer_per_cpu *cpu_buffer; |
3359 | struct ring_buffer_event *event; | 3387 | struct ring_buffer_event *event; |
3360 | unsigned length; | 3388 | unsigned length; |
3361 | 3389 | ||
3362 | cpu_buffer = iter->cpu_buffer; | 3390 | cpu_buffer = iter->cpu_buffer; |
3363 | 3391 | ||
3364 | /* | 3392 | /* |
3365 | * Check if we are at the end of the buffer. | 3393 | * Check if we are at the end of the buffer. |
3366 | */ | 3394 | */ |
3367 | if (iter->head >= rb_page_size(iter->head_page)) { | 3395 | if (iter->head >= rb_page_size(iter->head_page)) { |
3368 | /* discarded commits can make the page empty */ | 3396 | /* discarded commits can make the page empty */ |
3369 | if (iter->head_page == cpu_buffer->commit_page) | 3397 | if (iter->head_page == cpu_buffer->commit_page) |
3370 | return; | 3398 | return; |
3371 | rb_inc_iter(iter); | 3399 | rb_inc_iter(iter); |
3372 | return; | 3400 | return; |
3373 | } | 3401 | } |
3374 | 3402 | ||
3375 | event = rb_iter_head_event(iter); | 3403 | event = rb_iter_head_event(iter); |
3376 | 3404 | ||
3377 | length = rb_event_length(event); | 3405 | length = rb_event_length(event); |
3378 | 3406 | ||
3379 | /* | 3407 | /* |
3380 | * This should not be called to advance the header if we are | 3408 | * This should not be called to advance the header if we are |
3381 | * at the tail of the buffer. | 3409 | * at the tail of the buffer. |
3382 | */ | 3410 | */ |
3383 | if (RB_WARN_ON(cpu_buffer, | 3411 | if (RB_WARN_ON(cpu_buffer, |
3384 | (iter->head_page == cpu_buffer->commit_page) && | 3412 | (iter->head_page == cpu_buffer->commit_page) && |
3385 | (iter->head + length > rb_commit_index(cpu_buffer)))) | 3413 | (iter->head + length > rb_commit_index(cpu_buffer)))) |
3386 | return; | 3414 | return; |
3387 | 3415 | ||
3388 | rb_update_iter_read_stamp(iter, event); | 3416 | rb_update_iter_read_stamp(iter, event); |
3389 | 3417 | ||
3390 | iter->head += length; | 3418 | iter->head += length; |
3391 | 3419 | ||
3392 | /* check for end of page padding */ | 3420 | /* check for end of page padding */ |
3393 | if ((iter->head >= rb_page_size(iter->head_page)) && | 3421 | if ((iter->head >= rb_page_size(iter->head_page)) && |
3394 | (iter->head_page != cpu_buffer->commit_page)) | 3422 | (iter->head_page != cpu_buffer->commit_page)) |
3395 | rb_advance_iter(iter); | 3423 | rb_advance_iter(iter); |
3396 | } | 3424 | } |
3397 | 3425 | ||
3398 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | 3426 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) |
3399 | { | 3427 | { |
3400 | return cpu_buffer->lost_events; | 3428 | return cpu_buffer->lost_events; |
3401 | } | 3429 | } |
3402 | 3430 | ||
3403 | static struct ring_buffer_event * | 3431 | static struct ring_buffer_event * |
3404 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | 3432 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3405 | unsigned long *lost_events) | 3433 | unsigned long *lost_events) |
3406 | { | 3434 | { |
3407 | struct ring_buffer_event *event; | 3435 | struct ring_buffer_event *event; |
3408 | struct buffer_page *reader; | 3436 | struct buffer_page *reader; |
3409 | int nr_loops = 0; | 3437 | int nr_loops = 0; |
3410 | 3438 | ||
3411 | again: | 3439 | again: |
3412 | /* | 3440 | /* |
3413 | * We repeat when a time extend is encountered. | 3441 | * We repeat when a time extend is encountered. |
3414 | * Since the time extend is always attached to a data event, | 3442 | * Since the time extend is always attached to a data event, |
3415 | * we should never loop more than once. | 3443 | * we should never loop more than once. |
3416 | * (We never hit the following condition more than twice). | 3444 | * (We never hit the following condition more than twice). |
3417 | */ | 3445 | */ |
3418 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) | 3446 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3419 | return NULL; | 3447 | return NULL; |
3420 | 3448 | ||
3421 | reader = rb_get_reader_page(cpu_buffer); | 3449 | reader = rb_get_reader_page(cpu_buffer); |
3422 | if (!reader) | 3450 | if (!reader) |
3423 | return NULL; | 3451 | return NULL; |
3424 | 3452 | ||
3425 | event = rb_reader_event(cpu_buffer); | 3453 | event = rb_reader_event(cpu_buffer); |
3426 | 3454 | ||
3427 | switch (event->type_len) { | 3455 | switch (event->type_len) { |
3428 | case RINGBUF_TYPE_PADDING: | 3456 | case RINGBUF_TYPE_PADDING: |
3429 | if (rb_null_event(event)) | 3457 | if (rb_null_event(event)) |
3430 | RB_WARN_ON(cpu_buffer, 1); | 3458 | RB_WARN_ON(cpu_buffer, 1); |
3431 | /* | 3459 | /* |
3432 | * Because the writer could be discarding every | 3460 | * Because the writer could be discarding every |
3433 | * event it creates (which would probably be bad) | 3461 | * event it creates (which would probably be bad) |
3434 | * if we were to go back to "again" then we may never | 3462 | * if we were to go back to "again" then we may never |
3435 | * catch up, and will trigger the warn on, or lock | 3463 | * catch up, and will trigger the warn on, or lock |
3436 | * the box. Return the padding, and we will release | 3464 | * the box. Return the padding, and we will release |
3437 | * the current locks, and try again. | 3465 | * the current locks, and try again. |
3438 | */ | 3466 | */ |
3439 | return event; | 3467 | return event; |
3440 | 3468 | ||
3441 | case RINGBUF_TYPE_TIME_EXTEND: | 3469 | case RINGBUF_TYPE_TIME_EXTEND: |
3442 | /* Internal data, OK to advance */ | 3470 | /* Internal data, OK to advance */ |
3443 | rb_advance_reader(cpu_buffer); | 3471 | rb_advance_reader(cpu_buffer); |
3444 | goto again; | 3472 | goto again; |
3445 | 3473 | ||
3446 | case RINGBUF_TYPE_TIME_STAMP: | 3474 | case RINGBUF_TYPE_TIME_STAMP: |
3447 | /* FIXME: not implemented */ | 3475 | /* FIXME: not implemented */ |
3448 | rb_advance_reader(cpu_buffer); | 3476 | rb_advance_reader(cpu_buffer); |
3449 | goto again; | 3477 | goto again; |
3450 | 3478 | ||
3451 | case RINGBUF_TYPE_DATA: | 3479 | case RINGBUF_TYPE_DATA: |
3452 | if (ts) { | 3480 | if (ts) { |
3453 | *ts = cpu_buffer->read_stamp + event->time_delta; | 3481 | *ts = cpu_buffer->read_stamp + event->time_delta; |
3454 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3482 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3455 | cpu_buffer->cpu, ts); | 3483 | cpu_buffer->cpu, ts); |
3456 | } | 3484 | } |
3457 | if (lost_events) | 3485 | if (lost_events) |
3458 | *lost_events = rb_lost_events(cpu_buffer); | 3486 | *lost_events = rb_lost_events(cpu_buffer); |
3459 | return event; | 3487 | return event; |
3460 | 3488 | ||
3461 | default: | 3489 | default: |
3462 | BUG(); | 3490 | BUG(); |
3463 | } | 3491 | } |
3464 | 3492 | ||
3465 | return NULL; | 3493 | return NULL; |
3466 | } | 3494 | } |
3467 | EXPORT_SYMBOL_GPL(ring_buffer_peek); | 3495 | EXPORT_SYMBOL_GPL(ring_buffer_peek); |
3468 | 3496 | ||
3469 | static struct ring_buffer_event * | 3497 | static struct ring_buffer_event * |
3470 | rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | 3498 | rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) |
3471 | { | 3499 | { |
3472 | struct ring_buffer *buffer; | 3500 | struct ring_buffer *buffer; |
3473 | struct ring_buffer_per_cpu *cpu_buffer; | 3501 | struct ring_buffer_per_cpu *cpu_buffer; |
3474 | struct ring_buffer_event *event; | 3502 | struct ring_buffer_event *event; |
3475 | int nr_loops = 0; | 3503 | int nr_loops = 0; |
3476 | 3504 | ||
3477 | cpu_buffer = iter->cpu_buffer; | 3505 | cpu_buffer = iter->cpu_buffer; |
3478 | buffer = cpu_buffer->buffer; | 3506 | buffer = cpu_buffer->buffer; |
3479 | 3507 | ||
3480 | /* | 3508 | /* |
3481 | * Check if someone performed a consuming read to | 3509 | * Check if someone performed a consuming read to |
3482 | * the buffer. A consuming read invalidates the iterator | 3510 | * the buffer. A consuming read invalidates the iterator |
3483 | * and we need to reset the iterator in this case. | 3511 | * and we need to reset the iterator in this case. |
3484 | */ | 3512 | */ |
3485 | if (unlikely(iter->cache_read != cpu_buffer->read || | 3513 | if (unlikely(iter->cache_read != cpu_buffer->read || |
3486 | iter->cache_reader_page != cpu_buffer->reader_page)) | 3514 | iter->cache_reader_page != cpu_buffer->reader_page)) |
3487 | rb_iter_reset(iter); | 3515 | rb_iter_reset(iter); |
3488 | 3516 | ||
3489 | again: | 3517 | again: |
3490 | if (ring_buffer_iter_empty(iter)) | 3518 | if (ring_buffer_iter_empty(iter)) |
3491 | return NULL; | 3519 | return NULL; |
3492 | 3520 | ||
3493 | /* | 3521 | /* |
3494 | * We repeat when a time extend is encountered. | 3522 | * We repeat when a time extend is encountered. |
3495 | * Since the time extend is always attached to a data event, | 3523 | * Since the time extend is always attached to a data event, |
3496 | * we should never loop more than once. | 3524 | * we should never loop more than once. |
3497 | * (We never hit the following condition more than twice). | 3525 | * (We never hit the following condition more than twice). |
3498 | */ | 3526 | */ |
3499 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) | 3527 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3500 | return NULL; | 3528 | return NULL; |
3501 | 3529 | ||
3502 | if (rb_per_cpu_empty(cpu_buffer)) | 3530 | if (rb_per_cpu_empty(cpu_buffer)) |
3503 | return NULL; | 3531 | return NULL; |
3504 | 3532 | ||
3505 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | 3533 | if (iter->head >= local_read(&iter->head_page->page->commit)) { |
3506 | rb_inc_iter(iter); | 3534 | rb_inc_iter(iter); |
3507 | goto again; | 3535 | goto again; |
3508 | } | 3536 | } |
3509 | 3537 | ||
3510 | event = rb_iter_head_event(iter); | 3538 | event = rb_iter_head_event(iter); |
3511 | 3539 | ||
3512 | switch (event->type_len) { | 3540 | switch (event->type_len) { |
3513 | case RINGBUF_TYPE_PADDING: | 3541 | case RINGBUF_TYPE_PADDING: |
3514 | if (rb_null_event(event)) { | 3542 | if (rb_null_event(event)) { |
3515 | rb_inc_iter(iter); | 3543 | rb_inc_iter(iter); |
3516 | goto again; | 3544 | goto again; |
3517 | } | 3545 | } |
3518 | rb_advance_iter(iter); | 3546 | rb_advance_iter(iter); |
3519 | return event; | 3547 | return event; |
3520 | 3548 | ||
3521 | case RINGBUF_TYPE_TIME_EXTEND: | 3549 | case RINGBUF_TYPE_TIME_EXTEND: |
3522 | /* Internal data, OK to advance */ | 3550 | /* Internal data, OK to advance */ |
3523 | rb_advance_iter(iter); | 3551 | rb_advance_iter(iter); |
3524 | goto again; | 3552 | goto again; |
3525 | 3553 | ||
3526 | case RINGBUF_TYPE_TIME_STAMP: | 3554 | case RINGBUF_TYPE_TIME_STAMP: |
3527 | /* FIXME: not implemented */ | 3555 | /* FIXME: not implemented */ |
3528 | rb_advance_iter(iter); | 3556 | rb_advance_iter(iter); |
3529 | goto again; | 3557 | goto again; |
3530 | 3558 | ||
3531 | case RINGBUF_TYPE_DATA: | 3559 | case RINGBUF_TYPE_DATA: |
3532 | if (ts) { | 3560 | if (ts) { |
3533 | *ts = iter->read_stamp + event->time_delta; | 3561 | *ts = iter->read_stamp + event->time_delta; |
3534 | ring_buffer_normalize_time_stamp(buffer, | 3562 | ring_buffer_normalize_time_stamp(buffer, |
3535 | cpu_buffer->cpu, ts); | 3563 | cpu_buffer->cpu, ts); |
3536 | } | 3564 | } |
3537 | return event; | 3565 | return event; |
3538 | 3566 | ||
3539 | default: | 3567 | default: |
3540 | BUG(); | 3568 | BUG(); |
3541 | } | 3569 | } |
3542 | 3570 | ||
3543 | return NULL; | 3571 | return NULL; |
3544 | } | 3572 | } |
3545 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); | 3573 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
3546 | 3574 | ||
3547 | static inline int rb_ok_to_lock(void) | 3575 | static inline int rb_ok_to_lock(void) |
3548 | { | 3576 | { |
3549 | /* | 3577 | /* |
3550 | * If an NMI die dumps out the content of the ring buffer | 3578 | * If an NMI die dumps out the content of the ring buffer |
3551 | * do not grab locks. We also permanently disable the ring | 3579 | * do not grab locks. We also permanently disable the ring |
3552 | * buffer too. A one time deal is all you get from reading | 3580 | * buffer too. A one time deal is all you get from reading |
3553 | * the ring buffer from an NMI. | 3581 | * the ring buffer from an NMI. |
3554 | */ | 3582 | */ |
3555 | if (likely(!in_nmi())) | 3583 | if (likely(!in_nmi())) |
3556 | return 1; | 3584 | return 1; |
3557 | 3585 | ||
3558 | tracing_off_permanent(); | 3586 | tracing_off_permanent(); |
3559 | return 0; | 3587 | return 0; |
3560 | } | 3588 | } |
3561 | 3589 | ||
3562 | /** | 3590 | /** |
3563 | * ring_buffer_peek - peek at the next event to be read | 3591 | * ring_buffer_peek - peek at the next event to be read |
3564 | * @buffer: The ring buffer to read | 3592 | * @buffer: The ring buffer to read |
3565 | * @cpu: The cpu to peak at | 3593 | * @cpu: The cpu to peak at |
3566 | * @ts: The timestamp counter of this event. | 3594 | * @ts: The timestamp counter of this event. |
3567 | * @lost_events: a variable to store if events were lost (may be NULL) | 3595 | * @lost_events: a variable to store if events were lost (may be NULL) |
3568 | * | 3596 | * |
3569 | * This will return the event that will be read next, but does | 3597 | * This will return the event that will be read next, but does |
3570 | * not consume the data. | 3598 | * not consume the data. |
3571 | */ | 3599 | */ |
3572 | struct ring_buffer_event * | 3600 | struct ring_buffer_event * |
3573 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | 3601 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3574 | unsigned long *lost_events) | 3602 | unsigned long *lost_events) |
3575 | { | 3603 | { |
3576 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3604 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3577 | struct ring_buffer_event *event; | 3605 | struct ring_buffer_event *event; |
3578 | unsigned long flags; | 3606 | unsigned long flags; |
3579 | int dolock; | 3607 | int dolock; |
3580 | 3608 | ||
3581 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3609 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3582 | return NULL; | 3610 | return NULL; |
3583 | 3611 | ||
3584 | dolock = rb_ok_to_lock(); | 3612 | dolock = rb_ok_to_lock(); |
3585 | again: | 3613 | again: |
3586 | local_irq_save(flags); | 3614 | local_irq_save(flags); |
3587 | if (dolock) | 3615 | if (dolock) |
3588 | raw_spin_lock(&cpu_buffer->reader_lock); | 3616 | raw_spin_lock(&cpu_buffer->reader_lock); |
3589 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 3617 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3590 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3618 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3591 | rb_advance_reader(cpu_buffer); | 3619 | rb_advance_reader(cpu_buffer); |
3592 | if (dolock) | 3620 | if (dolock) |
3593 | raw_spin_unlock(&cpu_buffer->reader_lock); | 3621 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3594 | local_irq_restore(flags); | 3622 | local_irq_restore(flags); |
3595 | 3623 | ||
3596 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3624 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3597 | goto again; | 3625 | goto again; |
3598 | 3626 | ||
3599 | return event; | 3627 | return event; |
3600 | } | 3628 | } |
3601 | 3629 | ||
3602 | /** | 3630 | /** |
3603 | * ring_buffer_iter_peek - peek at the next event to be read | 3631 | * ring_buffer_iter_peek - peek at the next event to be read |
3604 | * @iter: The ring buffer iterator | 3632 | * @iter: The ring buffer iterator |
3605 | * @ts: The timestamp counter of this event. | 3633 | * @ts: The timestamp counter of this event. |
3606 | * | 3634 | * |
3607 | * This will return the event that will be read next, but does | 3635 | * This will return the event that will be read next, but does |
3608 | * not increment the iterator. | 3636 | * not increment the iterator. |
3609 | */ | 3637 | */ |
3610 | struct ring_buffer_event * | 3638 | struct ring_buffer_event * |
3611 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | 3639 | ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) |
3612 | { | 3640 | { |
3613 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3641 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
3614 | struct ring_buffer_event *event; | 3642 | struct ring_buffer_event *event; |
3615 | unsigned long flags; | 3643 | unsigned long flags; |
3616 | 3644 | ||
3617 | again: | 3645 | again: |
3618 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3646 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3619 | event = rb_iter_peek(iter, ts); | 3647 | event = rb_iter_peek(iter, ts); |
3620 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3648 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3621 | 3649 | ||
3622 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3650 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3623 | goto again; | 3651 | goto again; |
3624 | 3652 | ||
3625 | return event; | 3653 | return event; |
3626 | } | 3654 | } |
3627 | 3655 | ||
3628 | /** | 3656 | /** |
3629 | * ring_buffer_consume - return an event and consume it | 3657 | * ring_buffer_consume - return an event and consume it |
3630 | * @buffer: The ring buffer to get the next event from | 3658 | * @buffer: The ring buffer to get the next event from |
3631 | * @cpu: the cpu to read the buffer from | 3659 | * @cpu: the cpu to read the buffer from |
3632 | * @ts: a variable to store the timestamp (may be NULL) | 3660 | * @ts: a variable to store the timestamp (may be NULL) |
3633 | * @lost_events: a variable to store if events were lost (may be NULL) | 3661 | * @lost_events: a variable to store if events were lost (may be NULL) |
3634 | * | 3662 | * |
3635 | * Returns the next event in the ring buffer, and that event is consumed. | 3663 | * Returns the next event in the ring buffer, and that event is consumed. |
3636 | * Meaning, that sequential reads will keep returning a different event, | 3664 | * Meaning, that sequential reads will keep returning a different event, |
3637 | * and eventually empty the ring buffer if the producer is slower. | 3665 | * and eventually empty the ring buffer if the producer is slower. |
3638 | */ | 3666 | */ |
3639 | struct ring_buffer_event * | 3667 | struct ring_buffer_event * |
3640 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, | 3668 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3641 | unsigned long *lost_events) | 3669 | unsigned long *lost_events) |
3642 | { | 3670 | { |
3643 | struct ring_buffer_per_cpu *cpu_buffer; | 3671 | struct ring_buffer_per_cpu *cpu_buffer; |
3644 | struct ring_buffer_event *event = NULL; | 3672 | struct ring_buffer_event *event = NULL; |
3645 | unsigned long flags; | 3673 | unsigned long flags; |
3646 | int dolock; | 3674 | int dolock; |
3647 | 3675 | ||
3648 | dolock = rb_ok_to_lock(); | 3676 | dolock = rb_ok_to_lock(); |
3649 | 3677 | ||
3650 | again: | 3678 | again: |
3651 | /* might be called in atomic */ | 3679 | /* might be called in atomic */ |
3652 | preempt_disable(); | 3680 | preempt_disable(); |
3653 | 3681 | ||
3654 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3682 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3655 | goto out; | 3683 | goto out; |
3656 | 3684 | ||
3657 | cpu_buffer = buffer->buffers[cpu]; | 3685 | cpu_buffer = buffer->buffers[cpu]; |
3658 | local_irq_save(flags); | 3686 | local_irq_save(flags); |
3659 | if (dolock) | 3687 | if (dolock) |
3660 | raw_spin_lock(&cpu_buffer->reader_lock); | 3688 | raw_spin_lock(&cpu_buffer->reader_lock); |
3661 | 3689 | ||
3662 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 3690 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3663 | if (event) { | 3691 | if (event) { |
3664 | cpu_buffer->lost_events = 0; | 3692 | cpu_buffer->lost_events = 0; |
3665 | rb_advance_reader(cpu_buffer); | 3693 | rb_advance_reader(cpu_buffer); |
3666 | } | 3694 | } |
3667 | 3695 | ||
3668 | if (dolock) | 3696 | if (dolock) |
3669 | raw_spin_unlock(&cpu_buffer->reader_lock); | 3697 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3670 | local_irq_restore(flags); | 3698 | local_irq_restore(flags); |
3671 | 3699 | ||
3672 | out: | 3700 | out: |
3673 | preempt_enable(); | 3701 | preempt_enable(); |
3674 | 3702 | ||
3675 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3703 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3676 | goto again; | 3704 | goto again; |
3677 | 3705 | ||
3678 | return event; | 3706 | return event; |
3679 | } | 3707 | } |
3680 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3708 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
3681 | 3709 | ||
3682 | /** | 3710 | /** |
3683 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer | 3711 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer |
3684 | * @buffer: The ring buffer to read from | 3712 | * @buffer: The ring buffer to read from |
3685 | * @cpu: The cpu buffer to iterate over | 3713 | * @cpu: The cpu buffer to iterate over |
3686 | * | 3714 | * |
3687 | * This performs the initial preparations necessary to iterate | 3715 | * This performs the initial preparations necessary to iterate |
3688 | * through the buffer. Memory is allocated, buffer recording | 3716 | * through the buffer. Memory is allocated, buffer recording |
3689 | * is disabled, and the iterator pointer is returned to the caller. | 3717 | * is disabled, and the iterator pointer is returned to the caller. |
3690 | * | 3718 | * |
3691 | * Disabling buffer recordng prevents the reading from being | 3719 | * Disabling buffer recordng prevents the reading from being |
3692 | * corrupted. This is not a consuming read, so a producer is not | 3720 | * corrupted. This is not a consuming read, so a producer is not |
3693 | * expected. | 3721 | * expected. |
3694 | * | 3722 | * |
3695 | * After a sequence of ring_buffer_read_prepare calls, the user is | 3723 | * After a sequence of ring_buffer_read_prepare calls, the user is |
3696 | * expected to make at least one call to ring_buffer_prepare_sync. | 3724 | * expected to make at least one call to ring_buffer_prepare_sync. |
3697 | * Afterwards, ring_buffer_read_start is invoked to get things going | 3725 | * Afterwards, ring_buffer_read_start is invoked to get things going |
3698 | * for real. | 3726 | * for real. |
3699 | * | 3727 | * |
3700 | * This overall must be paired with ring_buffer_finish. | 3728 | * This overall must be paired with ring_buffer_finish. |
3701 | */ | 3729 | */ |
3702 | struct ring_buffer_iter * | 3730 | struct ring_buffer_iter * |
3703 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) | 3731 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
3704 | { | 3732 | { |
3705 | struct ring_buffer_per_cpu *cpu_buffer; | 3733 | struct ring_buffer_per_cpu *cpu_buffer; |
3706 | struct ring_buffer_iter *iter; | 3734 | struct ring_buffer_iter *iter; |
3707 | 3735 | ||
3708 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3736 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3709 | return NULL; | 3737 | return NULL; |
3710 | 3738 | ||
3711 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); | 3739 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); |
3712 | if (!iter) | 3740 | if (!iter) |
3713 | return NULL; | 3741 | return NULL; |
3714 | 3742 | ||
3715 | cpu_buffer = buffer->buffers[cpu]; | 3743 | cpu_buffer = buffer->buffers[cpu]; |
3716 | 3744 | ||
3717 | iter->cpu_buffer = cpu_buffer; | 3745 | iter->cpu_buffer = cpu_buffer; |
3718 | 3746 | ||
3719 | atomic_inc(&buffer->resize_disabled); | 3747 | atomic_inc(&buffer->resize_disabled); |
3720 | atomic_inc(&cpu_buffer->record_disabled); | 3748 | atomic_inc(&cpu_buffer->record_disabled); |
3721 | 3749 | ||
3722 | return iter; | 3750 | return iter; |
3723 | } | 3751 | } |
3724 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | 3752 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); |
3725 | 3753 | ||
3726 | /** | 3754 | /** |
3727 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | 3755 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls |
3728 | * | 3756 | * |
3729 | * All previously invoked ring_buffer_read_prepare calls to prepare | 3757 | * All previously invoked ring_buffer_read_prepare calls to prepare |
3730 | * iterators will be synchronized. Afterwards, read_buffer_read_start | 3758 | * iterators will be synchronized. Afterwards, read_buffer_read_start |
3731 | * calls on those iterators are allowed. | 3759 | * calls on those iterators are allowed. |
3732 | */ | 3760 | */ |
3733 | void | 3761 | void |
3734 | ring_buffer_read_prepare_sync(void) | 3762 | ring_buffer_read_prepare_sync(void) |
3735 | { | 3763 | { |
3736 | synchronize_sched(); | 3764 | synchronize_sched(); |
3737 | } | 3765 | } |
3738 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | 3766 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); |
3739 | 3767 | ||
3740 | /** | 3768 | /** |
3741 | * ring_buffer_read_start - start a non consuming read of the buffer | 3769 | * ring_buffer_read_start - start a non consuming read of the buffer |
3742 | * @iter: The iterator returned by ring_buffer_read_prepare | 3770 | * @iter: The iterator returned by ring_buffer_read_prepare |
3743 | * | 3771 | * |
3744 | * This finalizes the startup of an iteration through the buffer. | 3772 | * This finalizes the startup of an iteration through the buffer. |
3745 | * The iterator comes from a call to ring_buffer_read_prepare and | 3773 | * The iterator comes from a call to ring_buffer_read_prepare and |
3746 | * an intervening ring_buffer_read_prepare_sync must have been | 3774 | * an intervening ring_buffer_read_prepare_sync must have been |
3747 | * performed. | 3775 | * performed. |
3748 | * | 3776 | * |
3749 | * Must be paired with ring_buffer_finish. | 3777 | * Must be paired with ring_buffer_finish. |
3750 | */ | 3778 | */ |
3751 | void | 3779 | void |
3752 | ring_buffer_read_start(struct ring_buffer_iter *iter) | 3780 | ring_buffer_read_start(struct ring_buffer_iter *iter) |
3753 | { | 3781 | { |
3754 | struct ring_buffer_per_cpu *cpu_buffer; | 3782 | struct ring_buffer_per_cpu *cpu_buffer; |
3755 | unsigned long flags; | 3783 | unsigned long flags; |
3756 | 3784 | ||
3757 | if (!iter) | 3785 | if (!iter) |
3758 | return; | 3786 | return; |
3759 | 3787 | ||
3760 | cpu_buffer = iter->cpu_buffer; | 3788 | cpu_buffer = iter->cpu_buffer; |
3761 | 3789 | ||
3762 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3790 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3763 | arch_spin_lock(&cpu_buffer->lock); | 3791 | arch_spin_lock(&cpu_buffer->lock); |
3764 | rb_iter_reset(iter); | 3792 | rb_iter_reset(iter); |
3765 | arch_spin_unlock(&cpu_buffer->lock); | 3793 | arch_spin_unlock(&cpu_buffer->lock); |
3766 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3794 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3767 | } | 3795 | } |
3768 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3796 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
3769 | 3797 | ||
3770 | /** | 3798 | /** |
3771 | * ring_buffer_finish - finish reading the iterator of the buffer | 3799 | * ring_buffer_finish - finish reading the iterator of the buffer |
3772 | * @iter: The iterator retrieved by ring_buffer_start | 3800 | * @iter: The iterator retrieved by ring_buffer_start |
3773 | * | 3801 | * |
3774 | * This re-enables the recording to the buffer, and frees the | 3802 | * This re-enables the recording to the buffer, and frees the |
3775 | * iterator. | 3803 | * iterator. |
3776 | */ | 3804 | */ |
3777 | void | 3805 | void |
3778 | ring_buffer_read_finish(struct ring_buffer_iter *iter) | 3806 | ring_buffer_read_finish(struct ring_buffer_iter *iter) |
3779 | { | 3807 | { |
3780 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3808 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
3781 | 3809 | ||
3782 | /* | 3810 | /* |
3783 | * Ring buffer is disabled from recording, here's a good place | 3811 | * Ring buffer is disabled from recording, here's a good place |
3784 | * to check the integrity of the ring buffer. | 3812 | * to check the integrity of the ring buffer. |
3785 | */ | 3813 | */ |
3786 | rb_check_pages(cpu_buffer); | 3814 | rb_check_pages(cpu_buffer); |
3787 | 3815 | ||
3788 | atomic_dec(&cpu_buffer->record_disabled); | 3816 | atomic_dec(&cpu_buffer->record_disabled); |
3789 | atomic_dec(&cpu_buffer->buffer->resize_disabled); | 3817 | atomic_dec(&cpu_buffer->buffer->resize_disabled); |
3790 | kfree(iter); | 3818 | kfree(iter); |
3791 | } | 3819 | } |
3792 | EXPORT_SYMBOL_GPL(ring_buffer_read_finish); | 3820 | EXPORT_SYMBOL_GPL(ring_buffer_read_finish); |
3793 | 3821 | ||
3794 | /** | 3822 | /** |
3795 | * ring_buffer_read - read the next item in the ring buffer by the iterator | 3823 | * ring_buffer_read - read the next item in the ring buffer by the iterator |
3796 | * @iter: The ring buffer iterator | 3824 | * @iter: The ring buffer iterator |
3797 | * @ts: The time stamp of the event read. | 3825 | * @ts: The time stamp of the event read. |
3798 | * | 3826 | * |
3799 | * This reads the next event in the ring buffer and increments the iterator. | 3827 | * This reads the next event in the ring buffer and increments the iterator. |
3800 | */ | 3828 | */ |
3801 | struct ring_buffer_event * | 3829 | struct ring_buffer_event * |
3802 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | 3830 | ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) |
3803 | { | 3831 | { |
3804 | struct ring_buffer_event *event; | 3832 | struct ring_buffer_event *event; |
3805 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3833 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
3806 | unsigned long flags; | 3834 | unsigned long flags; |
3807 | 3835 | ||
3808 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3836 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3809 | again: | 3837 | again: |
3810 | event = rb_iter_peek(iter, ts); | 3838 | event = rb_iter_peek(iter, ts); |
3811 | if (!event) | 3839 | if (!event) |
3812 | goto out; | 3840 | goto out; |
3813 | 3841 | ||
3814 | if (event->type_len == RINGBUF_TYPE_PADDING) | 3842 | if (event->type_len == RINGBUF_TYPE_PADDING) |
3815 | goto again; | 3843 | goto again; |
3816 | 3844 | ||
3817 | rb_advance_iter(iter); | 3845 | rb_advance_iter(iter); |
3818 | out: | 3846 | out: |
3819 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3847 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3820 | 3848 | ||
3821 | return event; | 3849 | return event; |
3822 | } | 3850 | } |
3823 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 3851 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
3824 | 3852 | ||
3825 | /** | 3853 | /** |
3826 | * ring_buffer_size - return the size of the ring buffer (in bytes) | 3854 | * ring_buffer_size - return the size of the ring buffer (in bytes) |
3827 | * @buffer: The ring buffer. | 3855 | * @buffer: The ring buffer. |
3828 | */ | 3856 | */ |
3829 | unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) | 3857 | unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) |
3830 | { | 3858 | { |
3831 | /* | 3859 | /* |
3832 | * Earlier, this method returned | 3860 | * Earlier, this method returned |
3833 | * BUF_PAGE_SIZE * buffer->nr_pages | 3861 | * BUF_PAGE_SIZE * buffer->nr_pages |
3834 | * Since the nr_pages field is now removed, we have converted this to | 3862 | * Since the nr_pages field is now removed, we have converted this to |
3835 | * return the per cpu buffer value. | 3863 | * return the per cpu buffer value. |
3836 | */ | 3864 | */ |
3837 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3865 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3838 | return 0; | 3866 | return 0; |
3839 | 3867 | ||
3840 | return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages; | 3868 | return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages; |
3841 | } | 3869 | } |
3842 | EXPORT_SYMBOL_GPL(ring_buffer_size); | 3870 | EXPORT_SYMBOL_GPL(ring_buffer_size); |
3843 | 3871 | ||
3844 | static void | 3872 | static void |
3845 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 3873 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) |
3846 | { | 3874 | { |
3847 | rb_head_page_deactivate(cpu_buffer); | 3875 | rb_head_page_deactivate(cpu_buffer); |
3848 | 3876 | ||
3849 | cpu_buffer->head_page | 3877 | cpu_buffer->head_page |
3850 | = list_entry(cpu_buffer->pages, struct buffer_page, list); | 3878 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
3851 | local_set(&cpu_buffer->head_page->write, 0); | 3879 | local_set(&cpu_buffer->head_page->write, 0); |
3852 | local_set(&cpu_buffer->head_page->entries, 0); | 3880 | local_set(&cpu_buffer->head_page->entries, 0); |
3853 | local_set(&cpu_buffer->head_page->page->commit, 0); | 3881 | local_set(&cpu_buffer->head_page->page->commit, 0); |
3854 | 3882 | ||
3855 | cpu_buffer->head_page->read = 0; | 3883 | cpu_buffer->head_page->read = 0; |
3856 | 3884 | ||
3857 | cpu_buffer->tail_page = cpu_buffer->head_page; | 3885 | cpu_buffer->tail_page = cpu_buffer->head_page; |
3858 | cpu_buffer->commit_page = cpu_buffer->head_page; | 3886 | cpu_buffer->commit_page = cpu_buffer->head_page; |
3859 | 3887 | ||
3860 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 3888 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
3861 | INIT_LIST_HEAD(&cpu_buffer->new_pages); | 3889 | INIT_LIST_HEAD(&cpu_buffer->new_pages); |
3862 | local_set(&cpu_buffer->reader_page->write, 0); | 3890 | local_set(&cpu_buffer->reader_page->write, 0); |
3863 | local_set(&cpu_buffer->reader_page->entries, 0); | 3891 | local_set(&cpu_buffer->reader_page->entries, 0); |
3864 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 3892 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
3865 | cpu_buffer->reader_page->read = 0; | 3893 | cpu_buffer->reader_page->read = 0; |
3866 | 3894 | ||
3867 | local_set(&cpu_buffer->commit_overrun, 0); | ||
3868 | local_set(&cpu_buffer->entries_bytes, 0); | 3895 | local_set(&cpu_buffer->entries_bytes, 0); |
3869 | local_set(&cpu_buffer->overrun, 0); | 3896 | local_set(&cpu_buffer->overrun, 0); |
3897 | local_set(&cpu_buffer->commit_overrun, 0); | ||
3898 | local_set(&cpu_buffer->dropped_events, 0); | ||
3870 | local_set(&cpu_buffer->entries, 0); | 3899 | local_set(&cpu_buffer->entries, 0); |
3871 | local_set(&cpu_buffer->committing, 0); | 3900 | local_set(&cpu_buffer->committing, 0); |
3872 | local_set(&cpu_buffer->commits, 0); | 3901 | local_set(&cpu_buffer->commits, 0); |
3873 | cpu_buffer->read = 0; | 3902 | cpu_buffer->read = 0; |
3874 | cpu_buffer->read_bytes = 0; | 3903 | cpu_buffer->read_bytes = 0; |
3875 | 3904 | ||
3876 | cpu_buffer->write_stamp = 0; | 3905 | cpu_buffer->write_stamp = 0; |
3877 | cpu_buffer->read_stamp = 0; | 3906 | cpu_buffer->read_stamp = 0; |
3878 | 3907 | ||
3879 | cpu_buffer->lost_events = 0; | 3908 | cpu_buffer->lost_events = 0; |
3880 | cpu_buffer->last_overrun = 0; | 3909 | cpu_buffer->last_overrun = 0; |
3881 | 3910 | ||
3882 | rb_head_page_activate(cpu_buffer); | 3911 | rb_head_page_activate(cpu_buffer); |
3883 | } | 3912 | } |
3884 | 3913 | ||
3885 | /** | 3914 | /** |
3886 | * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer | 3915 | * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer |
3887 | * @buffer: The ring buffer to reset a per cpu buffer of | 3916 | * @buffer: The ring buffer to reset a per cpu buffer of |
3888 | * @cpu: The CPU buffer to be reset | 3917 | * @cpu: The CPU buffer to be reset |
3889 | */ | 3918 | */ |
3890 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | 3919 | void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
3891 | { | 3920 | { |
3892 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3921 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3893 | unsigned long flags; | 3922 | unsigned long flags; |
3894 | 3923 | ||
3895 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3924 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3896 | return; | 3925 | return; |
3897 | 3926 | ||
3898 | atomic_inc(&buffer->resize_disabled); | 3927 | atomic_inc(&buffer->resize_disabled); |
3899 | atomic_inc(&cpu_buffer->record_disabled); | 3928 | atomic_inc(&cpu_buffer->record_disabled); |
3900 | 3929 | ||
3901 | /* Make sure all commits have finished */ | 3930 | /* Make sure all commits have finished */ |
3902 | synchronize_sched(); | 3931 | synchronize_sched(); |
3903 | 3932 | ||
3904 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3933 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3905 | 3934 | ||
3906 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) | 3935 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) |
3907 | goto out; | 3936 | goto out; |
3908 | 3937 | ||
3909 | arch_spin_lock(&cpu_buffer->lock); | 3938 | arch_spin_lock(&cpu_buffer->lock); |
3910 | 3939 | ||
3911 | rb_reset_cpu(cpu_buffer); | 3940 | rb_reset_cpu(cpu_buffer); |
3912 | 3941 | ||
3913 | arch_spin_unlock(&cpu_buffer->lock); | 3942 | arch_spin_unlock(&cpu_buffer->lock); |
3914 | 3943 | ||
3915 | out: | 3944 | out: |
3916 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3945 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3917 | 3946 | ||
3918 | atomic_dec(&cpu_buffer->record_disabled); | 3947 | atomic_dec(&cpu_buffer->record_disabled); |
3919 | atomic_dec(&buffer->resize_disabled); | 3948 | atomic_dec(&buffer->resize_disabled); |
3920 | } | 3949 | } |
3921 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); | 3950 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); |
3922 | 3951 | ||
3923 | /** | 3952 | /** |
3924 | * ring_buffer_reset - reset a ring buffer | 3953 | * ring_buffer_reset - reset a ring buffer |
3925 | * @buffer: The ring buffer to reset all cpu buffers | 3954 | * @buffer: The ring buffer to reset all cpu buffers |
3926 | */ | 3955 | */ |
3927 | void ring_buffer_reset(struct ring_buffer *buffer) | 3956 | void ring_buffer_reset(struct ring_buffer *buffer) |
3928 | { | 3957 | { |
3929 | int cpu; | 3958 | int cpu; |
3930 | 3959 | ||
3931 | for_each_buffer_cpu(buffer, cpu) | 3960 | for_each_buffer_cpu(buffer, cpu) |
3932 | ring_buffer_reset_cpu(buffer, cpu); | 3961 | ring_buffer_reset_cpu(buffer, cpu); |
3933 | } | 3962 | } |
3934 | EXPORT_SYMBOL_GPL(ring_buffer_reset); | 3963 | EXPORT_SYMBOL_GPL(ring_buffer_reset); |
3935 | 3964 | ||
3936 | /** | 3965 | /** |
3937 | * rind_buffer_empty - is the ring buffer empty? | 3966 | * rind_buffer_empty - is the ring buffer empty? |
3938 | * @buffer: The ring buffer to test | 3967 | * @buffer: The ring buffer to test |
3939 | */ | 3968 | */ |
3940 | int ring_buffer_empty(struct ring_buffer *buffer) | 3969 | int ring_buffer_empty(struct ring_buffer *buffer) |
3941 | { | 3970 | { |
3942 | struct ring_buffer_per_cpu *cpu_buffer; | 3971 | struct ring_buffer_per_cpu *cpu_buffer; |
3943 | unsigned long flags; | 3972 | unsigned long flags; |
3944 | int dolock; | 3973 | int dolock; |
3945 | int cpu; | 3974 | int cpu; |
3946 | int ret; | 3975 | int ret; |
3947 | 3976 | ||
3948 | dolock = rb_ok_to_lock(); | 3977 | dolock = rb_ok_to_lock(); |
3949 | 3978 | ||
3950 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 3979 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
3951 | for_each_buffer_cpu(buffer, cpu) { | 3980 | for_each_buffer_cpu(buffer, cpu) { |
3952 | cpu_buffer = buffer->buffers[cpu]; | 3981 | cpu_buffer = buffer->buffers[cpu]; |
3953 | local_irq_save(flags); | 3982 | local_irq_save(flags); |
3954 | if (dolock) | 3983 | if (dolock) |
3955 | raw_spin_lock(&cpu_buffer->reader_lock); | 3984 | raw_spin_lock(&cpu_buffer->reader_lock); |
3956 | ret = rb_per_cpu_empty(cpu_buffer); | 3985 | ret = rb_per_cpu_empty(cpu_buffer); |
3957 | if (dolock) | 3986 | if (dolock) |
3958 | raw_spin_unlock(&cpu_buffer->reader_lock); | 3987 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3959 | local_irq_restore(flags); | 3988 | local_irq_restore(flags); |
3960 | 3989 | ||
3961 | if (!ret) | 3990 | if (!ret) |
3962 | return 0; | 3991 | return 0; |
3963 | } | 3992 | } |
3964 | 3993 | ||
3965 | return 1; | 3994 | return 1; |
3966 | } | 3995 | } |
3967 | EXPORT_SYMBOL_GPL(ring_buffer_empty); | 3996 | EXPORT_SYMBOL_GPL(ring_buffer_empty); |
3968 | 3997 | ||
3969 | /** | 3998 | /** |
3970 | * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? | 3999 | * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? |
3971 | * @buffer: The ring buffer | 4000 | * @buffer: The ring buffer |
3972 | * @cpu: The CPU buffer to test | 4001 | * @cpu: The CPU buffer to test |
3973 | */ | 4002 | */ |
3974 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 4003 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
3975 | { | 4004 | { |
3976 | struct ring_buffer_per_cpu *cpu_buffer; | 4005 | struct ring_buffer_per_cpu *cpu_buffer; |
3977 | unsigned long flags; | 4006 | unsigned long flags; |
3978 | int dolock; | 4007 | int dolock; |
3979 | int ret; | 4008 | int ret; |
3980 | 4009 | ||
3981 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 4010 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3982 | return 1; | 4011 | return 1; |
3983 | 4012 | ||
3984 | dolock = rb_ok_to_lock(); | 4013 | dolock = rb_ok_to_lock(); |
3985 | 4014 | ||
3986 | cpu_buffer = buffer->buffers[cpu]; | 4015 | cpu_buffer = buffer->buffers[cpu]; |
3987 | local_irq_save(flags); | 4016 | local_irq_save(flags); |
3988 | if (dolock) | 4017 | if (dolock) |
3989 | raw_spin_lock(&cpu_buffer->reader_lock); | 4018 | raw_spin_lock(&cpu_buffer->reader_lock); |
3990 | ret = rb_per_cpu_empty(cpu_buffer); | 4019 | ret = rb_per_cpu_empty(cpu_buffer); |
3991 | if (dolock) | 4020 | if (dolock) |
3992 | raw_spin_unlock(&cpu_buffer->reader_lock); | 4021 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3993 | local_irq_restore(flags); | 4022 | local_irq_restore(flags); |
3994 | 4023 | ||
3995 | return ret; | 4024 | return ret; |
3996 | } | 4025 | } |
3997 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 4026 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); |
3998 | 4027 | ||
3999 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | 4028 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP |
4000 | /** | 4029 | /** |
4001 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 4030 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers |
4002 | * @buffer_a: One buffer to swap with | 4031 | * @buffer_a: One buffer to swap with |
4003 | * @buffer_b: The other buffer to swap with | 4032 | * @buffer_b: The other buffer to swap with |
4004 | * | 4033 | * |
4005 | * This function is useful for tracers that want to take a "snapshot" | 4034 | * This function is useful for tracers that want to take a "snapshot" |
4006 | * of a CPU buffer and has another back up buffer lying around. | 4035 | * of a CPU buffer and has another back up buffer lying around. |
4007 | * it is expected that the tracer handles the cpu buffer not being | 4036 | * it is expected that the tracer handles the cpu buffer not being |
4008 | * used at the moment. | 4037 | * used at the moment. |
4009 | */ | 4038 | */ |
4010 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | 4039 | int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, |
4011 | struct ring_buffer *buffer_b, int cpu) | 4040 | struct ring_buffer *buffer_b, int cpu) |
4012 | { | 4041 | { |
4013 | struct ring_buffer_per_cpu *cpu_buffer_a; | 4042 | struct ring_buffer_per_cpu *cpu_buffer_a; |
4014 | struct ring_buffer_per_cpu *cpu_buffer_b; | 4043 | struct ring_buffer_per_cpu *cpu_buffer_b; |
4015 | int ret = -EINVAL; | 4044 | int ret = -EINVAL; |
4016 | 4045 | ||
4017 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || | 4046 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || |
4018 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) | 4047 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) |
4019 | goto out; | 4048 | goto out; |
4020 | 4049 | ||
4021 | cpu_buffer_a = buffer_a->buffers[cpu]; | 4050 | cpu_buffer_a = buffer_a->buffers[cpu]; |
4022 | cpu_buffer_b = buffer_b->buffers[cpu]; | 4051 | cpu_buffer_b = buffer_b->buffers[cpu]; |
4023 | 4052 | ||
4024 | /* At least make sure the two buffers are somewhat the same */ | 4053 | /* At least make sure the two buffers are somewhat the same */ |
4025 | if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) | 4054 | if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) |
4026 | goto out; | 4055 | goto out; |
4027 | 4056 | ||
4028 | ret = -EAGAIN; | 4057 | ret = -EAGAIN; |
4029 | 4058 | ||
4030 | if (ring_buffer_flags != RB_BUFFERS_ON) | 4059 | if (ring_buffer_flags != RB_BUFFERS_ON) |
4031 | goto out; | 4060 | goto out; |
4032 | 4061 | ||
4033 | if (atomic_read(&buffer_a->record_disabled)) | 4062 | if (atomic_read(&buffer_a->record_disabled)) |
4034 | goto out; | 4063 | goto out; |
4035 | 4064 | ||
4036 | if (atomic_read(&buffer_b->record_disabled)) | 4065 | if (atomic_read(&buffer_b->record_disabled)) |
4037 | goto out; | 4066 | goto out; |
4038 | 4067 | ||
4039 | if (atomic_read(&cpu_buffer_a->record_disabled)) | 4068 | if (atomic_read(&cpu_buffer_a->record_disabled)) |
4040 | goto out; | 4069 | goto out; |
4041 | 4070 | ||
4042 | if (atomic_read(&cpu_buffer_b->record_disabled)) | 4071 | if (atomic_read(&cpu_buffer_b->record_disabled)) |
4043 | goto out; | 4072 | goto out; |
4044 | 4073 | ||
4045 | /* | 4074 | /* |
4046 | * We can't do a synchronize_sched here because this | 4075 | * We can't do a synchronize_sched here because this |
4047 | * function can be called in atomic context. | 4076 | * function can be called in atomic context. |
4048 | * Normally this will be called from the same CPU as cpu. | 4077 | * Normally this will be called from the same CPU as cpu. |
4049 | * If not it's up to the caller to protect this. | 4078 | * If not it's up to the caller to protect this. |
4050 | */ | 4079 | */ |
4051 | atomic_inc(&cpu_buffer_a->record_disabled); | 4080 | atomic_inc(&cpu_buffer_a->record_disabled); |
4052 | atomic_inc(&cpu_buffer_b->record_disabled); | 4081 | atomic_inc(&cpu_buffer_b->record_disabled); |
4053 | 4082 | ||
4054 | ret = -EBUSY; | 4083 | ret = -EBUSY; |
4055 | if (local_read(&cpu_buffer_a->committing)) | 4084 | if (local_read(&cpu_buffer_a->committing)) |
4056 | goto out_dec; | 4085 | goto out_dec; |
4057 | if (local_read(&cpu_buffer_b->committing)) | 4086 | if (local_read(&cpu_buffer_b->committing)) |
4058 | goto out_dec; | 4087 | goto out_dec; |
4059 | 4088 | ||
4060 | buffer_a->buffers[cpu] = cpu_buffer_b; | 4089 | buffer_a->buffers[cpu] = cpu_buffer_b; |
4061 | buffer_b->buffers[cpu] = cpu_buffer_a; | 4090 | buffer_b->buffers[cpu] = cpu_buffer_a; |
4062 | 4091 | ||
4063 | cpu_buffer_b->buffer = buffer_a; | 4092 | cpu_buffer_b->buffer = buffer_a; |
4064 | cpu_buffer_a->buffer = buffer_b; | 4093 | cpu_buffer_a->buffer = buffer_b; |
4065 | 4094 | ||
4066 | ret = 0; | 4095 | ret = 0; |
4067 | 4096 | ||
4068 | out_dec: | 4097 | out_dec: |
4069 | atomic_dec(&cpu_buffer_a->record_disabled); | 4098 | atomic_dec(&cpu_buffer_a->record_disabled); |
4070 | atomic_dec(&cpu_buffer_b->record_disabled); | 4099 | atomic_dec(&cpu_buffer_b->record_disabled); |
4071 | out: | 4100 | out: |
4072 | return ret; | 4101 | return ret; |
4073 | } | 4102 | } |
4074 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 4103 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
4075 | #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ | 4104 | #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ |
4076 | 4105 | ||
4077 | /** | 4106 | /** |
4078 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 4107 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
4079 | * @buffer: the buffer to allocate for. | 4108 | * @buffer: the buffer to allocate for. |
4080 | * | 4109 | * |
4081 | * This function is used in conjunction with ring_buffer_read_page. | 4110 | * This function is used in conjunction with ring_buffer_read_page. |
4082 | * When reading a full page from the ring buffer, these functions | 4111 | * When reading a full page from the ring buffer, these functions |
4083 | * can be used to speed up the process. The calling function should | 4112 | * can be used to speed up the process. The calling function should |
4084 | * allocate a few pages first with this function. Then when it | 4113 | * allocate a few pages first with this function. Then when it |
4085 | * needs to get pages from the ring buffer, it passes the result | 4114 | * needs to get pages from the ring buffer, it passes the result |
4086 | * of this function into ring_buffer_read_page, which will swap | 4115 | * of this function into ring_buffer_read_page, which will swap |
4087 | * the page that was allocated, with the read page of the buffer. | 4116 | * the page that was allocated, with the read page of the buffer. |
4088 | * | 4117 | * |
4089 | * Returns: | 4118 | * Returns: |
4090 | * The page allocated, or NULL on error. | 4119 | * The page allocated, or NULL on error. |
4091 | */ | 4120 | */ |
4092 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) | 4121 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) |
4093 | { | 4122 | { |
4094 | struct buffer_data_page *bpage; | 4123 | struct buffer_data_page *bpage; |
4095 | struct page *page; | 4124 | struct page *page; |
4096 | 4125 | ||
4097 | page = alloc_pages_node(cpu_to_node(cpu), | 4126 | page = alloc_pages_node(cpu_to_node(cpu), |
4098 | GFP_KERNEL | __GFP_NORETRY, 0); | 4127 | GFP_KERNEL | __GFP_NORETRY, 0); |
4099 | if (!page) | 4128 | if (!page) |
4100 | return NULL; | 4129 | return NULL; |
4101 | 4130 | ||
4102 | bpage = page_address(page); | 4131 | bpage = page_address(page); |
4103 | 4132 | ||
4104 | rb_init_page(bpage); | 4133 | rb_init_page(bpage); |
4105 | 4134 | ||
4106 | return bpage; | 4135 | return bpage; |
4107 | } | 4136 | } |
4108 | EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); | 4137 | EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); |
4109 | 4138 | ||
4110 | /** | 4139 | /** |
4111 | * ring_buffer_free_read_page - free an allocated read page | 4140 | * ring_buffer_free_read_page - free an allocated read page |
4112 | * @buffer: the buffer the page was allocate for | 4141 | * @buffer: the buffer the page was allocate for |
4113 | * @data: the page to free | 4142 | * @data: the page to free |
4114 | * | 4143 | * |
4115 | * Free a page allocated from ring_buffer_alloc_read_page. | 4144 | * Free a page allocated from ring_buffer_alloc_read_page. |
4116 | */ | 4145 | */ |
4117 | void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | 4146 | void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) |
4118 | { | 4147 | { |
4119 | free_page((unsigned long)data); | 4148 | free_page((unsigned long)data); |
4120 | } | 4149 | } |
4121 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | 4150 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); |
4122 | 4151 | ||
4123 | /** | 4152 | /** |
4124 | * ring_buffer_read_page - extract a page from the ring buffer | 4153 | * ring_buffer_read_page - extract a page from the ring buffer |
4125 | * @buffer: buffer to extract from | 4154 | * @buffer: buffer to extract from |
4126 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page | 4155 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page |
4127 | * @len: amount to extract | 4156 | * @len: amount to extract |
4128 | * @cpu: the cpu of the buffer to extract | 4157 | * @cpu: the cpu of the buffer to extract |
4129 | * @full: should the extraction only happen when the page is full. | 4158 | * @full: should the extraction only happen when the page is full. |
4130 | * | 4159 | * |
4131 | * This function will pull out a page from the ring buffer and consume it. | 4160 | * This function will pull out a page from the ring buffer and consume it. |
4132 | * @data_page must be the address of the variable that was returned | 4161 | * @data_page must be the address of the variable that was returned |
4133 | * from ring_buffer_alloc_read_page. This is because the page might be used | 4162 | * from ring_buffer_alloc_read_page. This is because the page might be used |
4134 | * to swap with a page in the ring buffer. | 4163 | * to swap with a page in the ring buffer. |
4135 | * | 4164 | * |
4136 | * for example: | 4165 | * for example: |
4137 | * rpage = ring_buffer_alloc_read_page(buffer); | 4166 | * rpage = ring_buffer_alloc_read_page(buffer); |
4138 | * if (!rpage) | 4167 | * if (!rpage) |
4139 | * return error; | 4168 | * return error; |
4140 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); | 4169 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
4141 | * if (ret >= 0) | 4170 | * if (ret >= 0) |
4142 | * process_page(rpage, ret); | 4171 | * process_page(rpage, ret); |
4143 | * | 4172 | * |
4144 | * When @full is set, the function will not return true unless | 4173 | * When @full is set, the function will not return true unless |
4145 | * the writer is off the reader page. | 4174 | * the writer is off the reader page. |
4146 | * | 4175 | * |
4147 | * Note: it is up to the calling functions to handle sleeps and wakeups. | 4176 | * Note: it is up to the calling functions to handle sleeps and wakeups. |
4148 | * The ring buffer can be used anywhere in the kernel and can not | 4177 | * The ring buffer can be used anywhere in the kernel and can not |
4149 | * blindly call wake_up. The layer that uses the ring buffer must be | 4178 | * blindly call wake_up. The layer that uses the ring buffer must be |
4150 | * responsible for that. | 4179 | * responsible for that. |
4151 | * | 4180 | * |
4152 | * Returns: | 4181 | * Returns: |
4153 | * >=0 if data has been transferred, returns the offset of consumed data. | 4182 | * >=0 if data has been transferred, returns the offset of consumed data. |
4154 | * <0 if no data has been transferred. | 4183 | * <0 if no data has been transferred. |
4155 | */ | 4184 | */ |
4156 | int ring_buffer_read_page(struct ring_buffer *buffer, | 4185 | int ring_buffer_read_page(struct ring_buffer *buffer, |
4157 | void **data_page, size_t len, int cpu, int full) | 4186 | void **data_page, size_t len, int cpu, int full) |
4158 | { | 4187 | { |
4159 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 4188 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
4160 | struct ring_buffer_event *event; | 4189 | struct ring_buffer_event *event; |
4161 | struct buffer_data_page *bpage; | 4190 | struct buffer_data_page *bpage; |
4162 | struct buffer_page *reader; | 4191 | struct buffer_page *reader; |
4163 | unsigned long missed_events; | 4192 | unsigned long missed_events; |
4164 | unsigned long flags; | 4193 | unsigned long flags; |
4165 | unsigned int commit; | 4194 | unsigned int commit; |
4166 | unsigned int read; | 4195 | unsigned int read; |
4167 | u64 save_timestamp; | 4196 | u64 save_timestamp; |
4168 | int ret = -1; | 4197 | int ret = -1; |
4169 | 4198 | ||
4170 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 4199 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
4171 | goto out; | 4200 | goto out; |
4172 | 4201 | ||
4173 | /* | 4202 | /* |
4174 | * If len is not big enough to hold the page header, then | 4203 | * If len is not big enough to hold the page header, then |
4175 | * we can not copy anything. | 4204 | * we can not copy anything. |
4176 | */ | 4205 | */ |
4177 | if (len <= BUF_PAGE_HDR_SIZE) | 4206 | if (len <= BUF_PAGE_HDR_SIZE) |
4178 | goto out; | 4207 | goto out; |
4179 | 4208 | ||
4180 | len -= BUF_PAGE_HDR_SIZE; | 4209 | len -= BUF_PAGE_HDR_SIZE; |
4181 | 4210 | ||
4182 | if (!data_page) | 4211 | if (!data_page) |
4183 | goto out; | 4212 | goto out; |
4184 | 4213 | ||
4185 | bpage = *data_page; | 4214 | bpage = *data_page; |
4186 | if (!bpage) | 4215 | if (!bpage) |
4187 | goto out; | 4216 | goto out; |
4188 | 4217 | ||
4189 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 4218 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
4190 | 4219 | ||
4191 | reader = rb_get_reader_page(cpu_buffer); | 4220 | reader = rb_get_reader_page(cpu_buffer); |
4192 | if (!reader) | 4221 | if (!reader) |
4193 | goto out_unlock; | 4222 | goto out_unlock; |
4194 | 4223 | ||
4195 | event = rb_reader_event(cpu_buffer); | 4224 | event = rb_reader_event(cpu_buffer); |
4196 | 4225 | ||
4197 | read = reader->read; | 4226 | read = reader->read; |
4198 | commit = rb_page_commit(reader); | 4227 | commit = rb_page_commit(reader); |
4199 | 4228 | ||
4200 | /* Check if any events were dropped */ | 4229 | /* Check if any events were dropped */ |
4201 | missed_events = cpu_buffer->lost_events; | 4230 | missed_events = cpu_buffer->lost_events; |
4202 | 4231 | ||
4203 | /* | 4232 | /* |
4204 | * If this page has been partially read or | 4233 | * If this page has been partially read or |
4205 | * if len is not big enough to read the rest of the page or | 4234 | * if len is not big enough to read the rest of the page or |
4206 | * a writer is still on the page, then | 4235 | * a writer is still on the page, then |
4207 | * we must copy the data from the page to the buffer. | 4236 | * we must copy the data from the page to the buffer. |
4208 | * Otherwise, we can simply swap the page with the one passed in. | 4237 | * Otherwise, we can simply swap the page with the one passed in. |
4209 | */ | 4238 | */ |
4210 | if (read || (len < (commit - read)) || | 4239 | if (read || (len < (commit - read)) || |
4211 | cpu_buffer->reader_page == cpu_buffer->commit_page) { | 4240 | cpu_buffer->reader_page == cpu_buffer->commit_page) { |
4212 | struct buffer_data_page *rpage = cpu_buffer->reader_page->page; | 4241 | struct buffer_data_page *rpage = cpu_buffer->reader_page->page; |
4213 | unsigned int rpos = read; | 4242 | unsigned int rpos = read; |
4214 | unsigned int pos = 0; | 4243 | unsigned int pos = 0; |
4215 | unsigned int size; | 4244 | unsigned int size; |
4216 | 4245 | ||
4217 | if (full) | 4246 | if (full) |
4218 | goto out_unlock; | 4247 | goto out_unlock; |
4219 | 4248 | ||
4220 | if (len > (commit - read)) | 4249 | if (len > (commit - read)) |
4221 | len = (commit - read); | 4250 | len = (commit - read); |
4222 | 4251 | ||
4223 | /* Always keep the time extend and data together */ | 4252 | /* Always keep the time extend and data together */ |
4224 | size = rb_event_ts_length(event); | 4253 | size = rb_event_ts_length(event); |
4225 | 4254 | ||
4226 | if (len < size) | 4255 | if (len < size) |
4227 | goto out_unlock; | 4256 | goto out_unlock; |
4228 | 4257 | ||
4229 | /* save the current timestamp, since the user will need it */ | 4258 | /* save the current timestamp, since the user will need it */ |
4230 | save_timestamp = cpu_buffer->read_stamp; | 4259 | save_timestamp = cpu_buffer->read_stamp; |
4231 | 4260 | ||
4232 | /* Need to copy one event at a time */ | 4261 | /* Need to copy one event at a time */ |
4233 | do { | 4262 | do { |
4234 | /* We need the size of one event, because | 4263 | /* We need the size of one event, because |
4235 | * rb_advance_reader only advances by one event, | 4264 | * rb_advance_reader only advances by one event, |
4236 | * whereas rb_event_ts_length may include the size of | 4265 | * whereas rb_event_ts_length may include the size of |
4237 | * one or two events. | 4266 | * one or two events. |
4238 | * We have already ensured there's enough space if this | 4267 | * We have already ensured there's enough space if this |
4239 | * is a time extend. */ | 4268 | * is a time extend. */ |
4240 | size = rb_event_length(event); | 4269 | size = rb_event_length(event); |
4241 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 4270 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
4242 | 4271 | ||
4243 | len -= size; | 4272 | len -= size; |
4244 | 4273 | ||
4245 | rb_advance_reader(cpu_buffer); | 4274 | rb_advance_reader(cpu_buffer); |
4246 | rpos = reader->read; | 4275 | rpos = reader->read; |
4247 | pos += size; | 4276 | pos += size; |
4248 | 4277 | ||
4249 | if (rpos >= commit) | 4278 | if (rpos >= commit) |
4250 | break; | 4279 | break; |
4251 | 4280 | ||
4252 | event = rb_reader_event(cpu_buffer); | 4281 | event = rb_reader_event(cpu_buffer); |
4253 | /* Always keep the time extend and data together */ | 4282 | /* Always keep the time extend and data together */ |
4254 | size = rb_event_ts_length(event); | 4283 | size = rb_event_ts_length(event); |
4255 | } while (len >= size); | 4284 | } while (len >= size); |
4256 | 4285 | ||
4257 | /* update bpage */ | 4286 | /* update bpage */ |
4258 | local_set(&bpage->commit, pos); | 4287 | local_set(&bpage->commit, pos); |
4259 | bpage->time_stamp = save_timestamp; | 4288 | bpage->time_stamp = save_timestamp; |
4260 | 4289 | ||
4261 | /* we copied everything to the beginning */ | 4290 | /* we copied everything to the beginning */ |
4262 | read = 0; | 4291 | read = 0; |
4263 | } else { | 4292 | } else { |
4264 | /* update the entry counter */ | 4293 | /* update the entry counter */ |
4265 | cpu_buffer->read += rb_page_entries(reader); | 4294 | cpu_buffer->read += rb_page_entries(reader); |
4266 | cpu_buffer->read_bytes += BUF_PAGE_SIZE; | 4295 | cpu_buffer->read_bytes += BUF_PAGE_SIZE; |
4267 | 4296 | ||
4268 | /* swap the pages */ | 4297 | /* swap the pages */ |
4269 | rb_init_page(bpage); | 4298 | rb_init_page(bpage); |
4270 | bpage = reader->page; | 4299 | bpage = reader->page; |
4271 | reader->page = *data_page; | 4300 | reader->page = *data_page; |
4272 | local_set(&reader->write, 0); | 4301 | local_set(&reader->write, 0); |
4273 | local_set(&reader->entries, 0); | 4302 | local_set(&reader->entries, 0); |
4274 | reader->read = 0; | 4303 | reader->read = 0; |
4275 | *data_page = bpage; | 4304 | *data_page = bpage; |
4276 | 4305 | ||
4277 | /* | 4306 | /* |
4278 | * Use the real_end for the data size, | 4307 | * Use the real_end for the data size, |
4279 | * This gives us a chance to store the lost events | 4308 | * This gives us a chance to store the lost events |
4280 | * on the page. | 4309 | * on the page. |
4281 | */ | 4310 | */ |
4282 | if (reader->real_end) | 4311 | if (reader->real_end) |
4283 | local_set(&bpage->commit, reader->real_end); | 4312 | local_set(&bpage->commit, reader->real_end); |
4284 | } | 4313 | } |
4285 | ret = read; | 4314 | ret = read; |
4286 | 4315 | ||
4287 | cpu_buffer->lost_events = 0; | 4316 | cpu_buffer->lost_events = 0; |
4288 | 4317 | ||
4289 | commit = local_read(&bpage->commit); | 4318 | commit = local_read(&bpage->commit); |
4290 | /* | 4319 | /* |
4291 | * Set a flag in the commit field if we lost events | 4320 | * Set a flag in the commit field if we lost events |
4292 | */ | 4321 | */ |
4293 | if (missed_events) { | 4322 | if (missed_events) { |
4294 | /* If there is room at the end of the page to save the | 4323 | /* If there is room at the end of the page to save the |
4295 | * missed events, then record it there. | 4324 | * missed events, then record it there. |
4296 | */ | 4325 | */ |
4297 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | 4326 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { |
4298 | memcpy(&bpage->data[commit], &missed_events, | 4327 | memcpy(&bpage->data[commit], &missed_events, |
4299 | sizeof(missed_events)); | 4328 | sizeof(missed_events)); |
4300 | local_add(RB_MISSED_STORED, &bpage->commit); | 4329 | local_add(RB_MISSED_STORED, &bpage->commit); |
4301 | commit += sizeof(missed_events); | 4330 | commit += sizeof(missed_events); |
4302 | } | 4331 | } |
4303 | local_add(RB_MISSED_EVENTS, &bpage->commit); | 4332 | local_add(RB_MISSED_EVENTS, &bpage->commit); |
4304 | } | 4333 | } |
4305 | 4334 | ||
4306 | /* | 4335 | /* |
4307 | * This page may be off to user land. Zero it out here. | 4336 | * This page may be off to user land. Zero it out here. |
4308 | */ | 4337 | */ |
4309 | if (commit < BUF_PAGE_SIZE) | 4338 | if (commit < BUF_PAGE_SIZE) |
4310 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | 4339 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); |
4311 | 4340 | ||
4312 | out_unlock: | 4341 | out_unlock: |
4313 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 4342 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
4314 | 4343 | ||
4315 | out: | 4344 | out: |
4316 | return ret; | 4345 | return ret; |
4317 | } | 4346 | } |
4318 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 4347 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); |
4319 | 4348 | ||
4320 | #ifdef CONFIG_HOTPLUG_CPU | 4349 | #ifdef CONFIG_HOTPLUG_CPU |
4321 | static int rb_cpu_notify(struct notifier_block *self, | 4350 | static int rb_cpu_notify(struct notifier_block *self, |
4322 | unsigned long action, void *hcpu) | 4351 | unsigned long action, void *hcpu) |
4323 | { | 4352 | { |
4324 | struct ring_buffer *buffer = | 4353 | struct ring_buffer *buffer = |
4325 | container_of(self, struct ring_buffer, cpu_notify); | 4354 | container_of(self, struct ring_buffer, cpu_notify); |
4326 | long cpu = (long)hcpu; | 4355 | long cpu = (long)hcpu; |
4327 | int cpu_i, nr_pages_same; | 4356 | int cpu_i, nr_pages_same; |
4328 | unsigned int nr_pages; | 4357 | unsigned int nr_pages; |
4329 | 4358 | ||
4330 | switch (action) { | 4359 | switch (action) { |
4331 | case CPU_UP_PREPARE: | 4360 | case CPU_UP_PREPARE: |
4332 | case CPU_UP_PREPARE_FROZEN: | 4361 | case CPU_UP_PREPARE_FROZEN: |
4333 | if (cpumask_test_cpu(cpu, buffer->cpumask)) | 4362 | if (cpumask_test_cpu(cpu, buffer->cpumask)) |
4334 | return NOTIFY_OK; | 4363 | return NOTIFY_OK; |
4335 | 4364 | ||
4336 | nr_pages = 0; | 4365 | nr_pages = 0; |
4337 | nr_pages_same = 1; | 4366 | nr_pages_same = 1; |
4338 | /* check if all cpu sizes are same */ | 4367 | /* check if all cpu sizes are same */ |
4339 | for_each_buffer_cpu(buffer, cpu_i) { | 4368 | for_each_buffer_cpu(buffer, cpu_i) { |
4340 | /* fill in the size from first enabled cpu */ | 4369 | /* fill in the size from first enabled cpu */ |
4341 | if (nr_pages == 0) | 4370 | if (nr_pages == 0) |
4342 | nr_pages = buffer->buffers[cpu_i]->nr_pages; | 4371 | nr_pages = buffer->buffers[cpu_i]->nr_pages; |
4343 | if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { | 4372 | if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { |
4344 | nr_pages_same = 0; | 4373 | nr_pages_same = 0; |
4345 | break; | 4374 | break; |
4346 | } | 4375 | } |
4347 | } | 4376 | } |
4348 | /* allocate minimum pages, user can later expand it */ | 4377 | /* allocate minimum pages, user can later expand it */ |
4349 | if (!nr_pages_same) | 4378 | if (!nr_pages_same) |
4350 | nr_pages = 2; | 4379 | nr_pages = 2; |
4351 | buffer->buffers[cpu] = | 4380 | buffer->buffers[cpu] = |
4352 | rb_allocate_cpu_buffer(buffer, nr_pages, cpu); | 4381 | rb_allocate_cpu_buffer(buffer, nr_pages, cpu); |
4353 | if (!buffer->buffers[cpu]) { | 4382 | if (!buffer->buffers[cpu]) { |
4354 | WARN(1, "failed to allocate ring buffer on CPU %ld\n", | 4383 | WARN(1, "failed to allocate ring buffer on CPU %ld\n", |
4355 | cpu); | 4384 | cpu); |
4356 | return NOTIFY_OK; | 4385 | return NOTIFY_OK; |
4357 | } | 4386 | } |
4358 | smp_wmb(); | 4387 | smp_wmb(); |
4359 | cpumask_set_cpu(cpu, buffer->cpumask); | 4388 | cpumask_set_cpu(cpu, buffer->cpumask); |
4360 | break; | 4389 | break; |
4361 | case CPU_DOWN_PREPARE: | 4390 | case CPU_DOWN_PREPARE: |
4362 | case CPU_DOWN_PREPARE_FROZEN: | 4391 | case CPU_DOWN_PREPARE_FROZEN: |
4363 | /* | 4392 | /* |
4364 | * Do nothing. | 4393 | * Do nothing. |
4365 | * If we were to free the buffer, then the user would | 4394 | * If we were to free the buffer, then the user would |
4366 | * lose any trace that was in the buffer. | 4395 | * lose any trace that was in the buffer. |
4367 | */ | 4396 | */ |
4368 | break; | 4397 | break; |
4369 | default: | 4398 | default: |
4370 | break; | 4399 | break; |
4371 | } | 4400 | } |
4372 | return NOTIFY_OK; | 4401 | return NOTIFY_OK; |
kernel/trace/trace.c
1 | /* | 1 | /* |
2 | * ring buffer based function tracer | 2 | * ring buffer based function tracer |
3 | * | 3 | * |
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> |
5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | 5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> |
6 | * | 6 | * |
7 | * Originally taken from the RT patch by: | 7 | * Originally taken from the RT patch by: |
8 | * Arnaldo Carvalho de Melo <acme@redhat.com> | 8 | * Arnaldo Carvalho de Melo <acme@redhat.com> |
9 | * | 9 | * |
10 | * Based on code from the latency_tracer, that is: | 10 | * Based on code from the latency_tracer, that is: |
11 | * Copyright (C) 2004-2006 Ingo Molnar | 11 | * Copyright (C) 2004-2006 Ingo Molnar |
12 | * Copyright (C) 2004 William Lee Irwin III | 12 | * Copyright (C) 2004 William Lee Irwin III |
13 | */ | 13 | */ |
14 | #include <linux/ring_buffer.h> | 14 | #include <linux/ring_buffer.h> |
15 | #include <generated/utsrelease.h> | 15 | #include <generated/utsrelease.h> |
16 | #include <linux/stacktrace.h> | 16 | #include <linux/stacktrace.h> |
17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
21 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
22 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
25 | #include <linux/linkage.h> | 25 | #include <linux/linkage.h> |
26 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
27 | #include <linux/kprobes.h> | 27 | #include <linux/kprobes.h> |
28 | #include <linux/ftrace.h> | 28 | #include <linux/ftrace.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/percpu.h> | 30 | #include <linux/percpu.h> |
31 | #include <linux/splice.h> | 31 | #include <linux/splice.h> |
32 | #include <linux/kdebug.h> | 32 | #include <linux/kdebug.h> |
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/rwsem.h> | 34 | #include <linux/rwsem.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
37 | #include <linux/init.h> | 37 | #include <linux/init.h> |
38 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
39 | #include <linux/nmi.h> | 39 | #include <linux/nmi.h> |
40 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
41 | 41 | ||
42 | #include "trace.h" | 42 | #include "trace.h" |
43 | #include "trace_output.h" | 43 | #include "trace_output.h" |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * On boot up, the ring buffer is set to the minimum size, so that | 46 | * On boot up, the ring buffer is set to the minimum size, so that |
47 | * we do not waste memory on systems that are not using tracing. | 47 | * we do not waste memory on systems that are not using tracing. |
48 | */ | 48 | */ |
49 | int ring_buffer_expanded; | 49 | int ring_buffer_expanded; |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * We need to change this state when a selftest is running. | 52 | * We need to change this state when a selftest is running. |
53 | * A selftest will lurk into the ring-buffer to count the | 53 | * A selftest will lurk into the ring-buffer to count the |
54 | * entries inserted during the selftest although some concurrent | 54 | * entries inserted during the selftest although some concurrent |
55 | * insertions into the ring-buffer such as trace_printk could occurred | 55 | * insertions into the ring-buffer such as trace_printk could occurred |
56 | * at the same time, giving false positive or negative results. | 56 | * at the same time, giving false positive or negative results. |
57 | */ | 57 | */ |
58 | static bool __read_mostly tracing_selftest_running; | 58 | static bool __read_mostly tracing_selftest_running; |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * If a tracer is running, we do not want to run SELFTEST. | 61 | * If a tracer is running, we do not want to run SELFTEST. |
62 | */ | 62 | */ |
63 | bool __read_mostly tracing_selftest_disabled; | 63 | bool __read_mostly tracing_selftest_disabled; |
64 | 64 | ||
65 | /* For tracers that don't implement custom flags */ | 65 | /* For tracers that don't implement custom flags */ |
66 | static struct tracer_opt dummy_tracer_opt[] = { | 66 | static struct tracer_opt dummy_tracer_opt[] = { |
67 | { } | 67 | { } |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static struct tracer_flags dummy_tracer_flags = { | 70 | static struct tracer_flags dummy_tracer_flags = { |
71 | .val = 0, | 71 | .val = 0, |
72 | .opts = dummy_tracer_opt | 72 | .opts = dummy_tracer_opt |
73 | }; | 73 | }; |
74 | 74 | ||
75 | static int dummy_set_flag(u32 old_flags, u32 bit, int set) | 75 | static int dummy_set_flag(u32 old_flags, u32 bit, int set) |
76 | { | 76 | { |
77 | return 0; | 77 | return 0; |
78 | } | 78 | } |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Kill all tracing for good (never come back). | 81 | * Kill all tracing for good (never come back). |
82 | * It is initialized to 1 but will turn to zero if the initialization | 82 | * It is initialized to 1 but will turn to zero if the initialization |
83 | * of the tracer is successful. But that is the only place that sets | 83 | * of the tracer is successful. But that is the only place that sets |
84 | * this back to zero. | 84 | * this back to zero. |
85 | */ | 85 | */ |
86 | static int tracing_disabled = 1; | 86 | static int tracing_disabled = 1; |
87 | 87 | ||
88 | DEFINE_PER_CPU(int, ftrace_cpu_disabled); | 88 | DEFINE_PER_CPU(int, ftrace_cpu_disabled); |
89 | 89 | ||
90 | cpumask_var_t __read_mostly tracing_buffer_mask; | 90 | cpumask_var_t __read_mostly tracing_buffer_mask; |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * ftrace_dump_on_oops - variable to dump ftrace buffer on oops | 93 | * ftrace_dump_on_oops - variable to dump ftrace buffer on oops |
94 | * | 94 | * |
95 | * If there is an oops (or kernel panic) and the ftrace_dump_on_oops | 95 | * If there is an oops (or kernel panic) and the ftrace_dump_on_oops |
96 | * is set, then ftrace_dump is called. This will output the contents | 96 | * is set, then ftrace_dump is called. This will output the contents |
97 | * of the ftrace buffers to the console. This is very useful for | 97 | * of the ftrace buffers to the console. This is very useful for |
98 | * capturing traces that lead to crashes and outputing it to a | 98 | * capturing traces that lead to crashes and outputing it to a |
99 | * serial console. | 99 | * serial console. |
100 | * | 100 | * |
101 | * It is default off, but you can enable it with either specifying | 101 | * It is default off, but you can enable it with either specifying |
102 | * "ftrace_dump_on_oops" in the kernel command line, or setting | 102 | * "ftrace_dump_on_oops" in the kernel command line, or setting |
103 | * /proc/sys/kernel/ftrace_dump_on_oops | 103 | * /proc/sys/kernel/ftrace_dump_on_oops |
104 | * Set 1 if you want to dump buffers of all CPUs | 104 | * Set 1 if you want to dump buffers of all CPUs |
105 | * Set 2 if you want to dump the buffer of the CPU that triggered oops | 105 | * Set 2 if you want to dump the buffer of the CPU that triggered oops |
106 | */ | 106 | */ |
107 | 107 | ||
108 | enum ftrace_dump_mode ftrace_dump_on_oops; | 108 | enum ftrace_dump_mode ftrace_dump_on_oops; |
109 | 109 | ||
110 | static int tracing_set_tracer(const char *buf); | 110 | static int tracing_set_tracer(const char *buf); |
111 | 111 | ||
112 | #define MAX_TRACER_SIZE 100 | 112 | #define MAX_TRACER_SIZE 100 |
113 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; | 113 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
114 | static char *default_bootup_tracer; | 114 | static char *default_bootup_tracer; |
115 | 115 | ||
116 | static int __init set_cmdline_ftrace(char *str) | 116 | static int __init set_cmdline_ftrace(char *str) |
117 | { | 117 | { |
118 | strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); | 118 | strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
119 | default_bootup_tracer = bootup_tracer_buf; | 119 | default_bootup_tracer = bootup_tracer_buf; |
120 | /* We are using ftrace early, expand it */ | 120 | /* We are using ftrace early, expand it */ |
121 | ring_buffer_expanded = 1; | 121 | ring_buffer_expanded = 1; |
122 | return 1; | 122 | return 1; |
123 | } | 123 | } |
124 | __setup("ftrace=", set_cmdline_ftrace); | 124 | __setup("ftrace=", set_cmdline_ftrace); |
125 | 125 | ||
126 | static int __init set_ftrace_dump_on_oops(char *str) | 126 | static int __init set_ftrace_dump_on_oops(char *str) |
127 | { | 127 | { |
128 | if (*str++ != '=' || !*str) { | 128 | if (*str++ != '=' || !*str) { |
129 | ftrace_dump_on_oops = DUMP_ALL; | 129 | ftrace_dump_on_oops = DUMP_ALL; |
130 | return 1; | 130 | return 1; |
131 | } | 131 | } |
132 | 132 | ||
133 | if (!strcmp("orig_cpu", str)) { | 133 | if (!strcmp("orig_cpu", str)) { |
134 | ftrace_dump_on_oops = DUMP_ORIG; | 134 | ftrace_dump_on_oops = DUMP_ORIG; |
135 | return 1; | 135 | return 1; |
136 | } | 136 | } |
137 | 137 | ||
138 | return 0; | 138 | return 0; |
139 | } | 139 | } |
140 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 140 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
141 | 141 | ||
142 | unsigned long long ns2usecs(cycle_t nsec) | 142 | unsigned long long ns2usecs(cycle_t nsec) |
143 | { | 143 | { |
144 | nsec += 500; | 144 | nsec += 500; |
145 | do_div(nsec, 1000); | 145 | do_div(nsec, 1000); |
146 | return nsec; | 146 | return nsec; |
147 | } | 147 | } |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * The global_trace is the descriptor that holds the tracing | 150 | * The global_trace is the descriptor that holds the tracing |
151 | * buffers for the live tracing. For each CPU, it contains | 151 | * buffers for the live tracing. For each CPU, it contains |
152 | * a link list of pages that will store trace entries. The | 152 | * a link list of pages that will store trace entries. The |
153 | * page descriptor of the pages in the memory is used to hold | 153 | * page descriptor of the pages in the memory is used to hold |
154 | * the link list by linking the lru item in the page descriptor | 154 | * the link list by linking the lru item in the page descriptor |
155 | * to each of the pages in the buffer per CPU. | 155 | * to each of the pages in the buffer per CPU. |
156 | * | 156 | * |
157 | * For each active CPU there is a data field that holds the | 157 | * For each active CPU there is a data field that holds the |
158 | * pages for the buffer for that CPU. Each CPU has the same number | 158 | * pages for the buffer for that CPU. Each CPU has the same number |
159 | * of pages allocated for its buffer. | 159 | * of pages allocated for its buffer. |
160 | */ | 160 | */ |
161 | static struct trace_array global_trace; | 161 | static struct trace_array global_trace; |
162 | 162 | ||
163 | static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); | 163 | static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); |
164 | 164 | ||
165 | int filter_current_check_discard(struct ring_buffer *buffer, | 165 | int filter_current_check_discard(struct ring_buffer *buffer, |
166 | struct ftrace_event_call *call, void *rec, | 166 | struct ftrace_event_call *call, void *rec, |
167 | struct ring_buffer_event *event) | 167 | struct ring_buffer_event *event) |
168 | { | 168 | { |
169 | return filter_check_discard(call, rec, buffer, event); | 169 | return filter_check_discard(call, rec, buffer, event); |
170 | } | 170 | } |
171 | EXPORT_SYMBOL_GPL(filter_current_check_discard); | 171 | EXPORT_SYMBOL_GPL(filter_current_check_discard); |
172 | 172 | ||
173 | cycle_t ftrace_now(int cpu) | 173 | cycle_t ftrace_now(int cpu) |
174 | { | 174 | { |
175 | u64 ts; | 175 | u64 ts; |
176 | 176 | ||
177 | /* Early boot up does not have a buffer yet */ | 177 | /* Early boot up does not have a buffer yet */ |
178 | if (!global_trace.buffer) | 178 | if (!global_trace.buffer) |
179 | return trace_clock_local(); | 179 | return trace_clock_local(); |
180 | 180 | ||
181 | ts = ring_buffer_time_stamp(global_trace.buffer, cpu); | 181 | ts = ring_buffer_time_stamp(global_trace.buffer, cpu); |
182 | ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); | 182 | ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); |
183 | 183 | ||
184 | return ts; | 184 | return ts; |
185 | } | 185 | } |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * The max_tr is used to snapshot the global_trace when a maximum | 188 | * The max_tr is used to snapshot the global_trace when a maximum |
189 | * latency is reached. Some tracers will use this to store a maximum | 189 | * latency is reached. Some tracers will use this to store a maximum |
190 | * trace while it continues examining live traces. | 190 | * trace while it continues examining live traces. |
191 | * | 191 | * |
192 | * The buffers for the max_tr are set up the same as the global_trace. | 192 | * The buffers for the max_tr are set up the same as the global_trace. |
193 | * When a snapshot is taken, the link list of the max_tr is swapped | 193 | * When a snapshot is taken, the link list of the max_tr is swapped |
194 | * with the link list of the global_trace and the buffers are reset for | 194 | * with the link list of the global_trace and the buffers are reset for |
195 | * the global_trace so the tracing can continue. | 195 | * the global_trace so the tracing can continue. |
196 | */ | 196 | */ |
197 | static struct trace_array max_tr; | 197 | static struct trace_array max_tr; |
198 | 198 | ||
199 | static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); | 199 | static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); |
200 | 200 | ||
201 | /* tracer_enabled is used to toggle activation of a tracer */ | 201 | /* tracer_enabled is used to toggle activation of a tracer */ |
202 | static int tracer_enabled = 1; | 202 | static int tracer_enabled = 1; |
203 | 203 | ||
204 | /** | 204 | /** |
205 | * tracing_is_enabled - return tracer_enabled status | 205 | * tracing_is_enabled - return tracer_enabled status |
206 | * | 206 | * |
207 | * This function is used by other tracers to know the status | 207 | * This function is used by other tracers to know the status |
208 | * of the tracer_enabled flag. Tracers may use this function | 208 | * of the tracer_enabled flag. Tracers may use this function |
209 | * to know if it should enable their features when starting | 209 | * to know if it should enable their features when starting |
210 | * up. See irqsoff tracer for an example (start_irqsoff_tracer). | 210 | * up. See irqsoff tracer for an example (start_irqsoff_tracer). |
211 | */ | 211 | */ |
212 | int tracing_is_enabled(void) | 212 | int tracing_is_enabled(void) |
213 | { | 213 | { |
214 | return tracer_enabled; | 214 | return tracer_enabled; |
215 | } | 215 | } |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * trace_buf_size is the size in bytes that is allocated | 218 | * trace_buf_size is the size in bytes that is allocated |
219 | * for a buffer. Note, the number of bytes is always rounded | 219 | * for a buffer. Note, the number of bytes is always rounded |
220 | * to page size. | 220 | * to page size. |
221 | * | 221 | * |
222 | * This number is purposely set to a low number of 16384. | 222 | * This number is purposely set to a low number of 16384. |
223 | * If the dump on oops happens, it will be much appreciated | 223 | * If the dump on oops happens, it will be much appreciated |
224 | * to not have to wait for all that output. Anyway this can be | 224 | * to not have to wait for all that output. Anyway this can be |
225 | * boot time and run time configurable. | 225 | * boot time and run time configurable. |
226 | */ | 226 | */ |
227 | #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ | 227 | #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ |
228 | 228 | ||
229 | static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; | 229 | static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; |
230 | 230 | ||
231 | /* trace_types holds a link list of available tracers. */ | 231 | /* trace_types holds a link list of available tracers. */ |
232 | static struct tracer *trace_types __read_mostly; | 232 | static struct tracer *trace_types __read_mostly; |
233 | 233 | ||
234 | /* current_trace points to the tracer that is currently active */ | 234 | /* current_trace points to the tracer that is currently active */ |
235 | static struct tracer *current_trace __read_mostly; | 235 | static struct tracer *current_trace __read_mostly; |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * trace_types_lock is used to protect the trace_types list. | 238 | * trace_types_lock is used to protect the trace_types list. |
239 | */ | 239 | */ |
240 | static DEFINE_MUTEX(trace_types_lock); | 240 | static DEFINE_MUTEX(trace_types_lock); |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * serialize the access of the ring buffer | 243 | * serialize the access of the ring buffer |
244 | * | 244 | * |
245 | * ring buffer serializes readers, but it is low level protection. | 245 | * ring buffer serializes readers, but it is low level protection. |
246 | * The validity of the events (which returns by ring_buffer_peek() ..etc) | 246 | * The validity of the events (which returns by ring_buffer_peek() ..etc) |
247 | * are not protected by ring buffer. | 247 | * are not protected by ring buffer. |
248 | * | 248 | * |
249 | * The content of events may become garbage if we allow other process consumes | 249 | * The content of events may become garbage if we allow other process consumes |
250 | * these events concurrently: | 250 | * these events concurrently: |
251 | * A) the page of the consumed events may become a normal page | 251 | * A) the page of the consumed events may become a normal page |
252 | * (not reader page) in ring buffer, and this page will be rewrited | 252 | * (not reader page) in ring buffer, and this page will be rewrited |
253 | * by events producer. | 253 | * by events producer. |
254 | * B) The page of the consumed events may become a page for splice_read, | 254 | * B) The page of the consumed events may become a page for splice_read, |
255 | * and this page will be returned to system. | 255 | * and this page will be returned to system. |
256 | * | 256 | * |
257 | * These primitives allow multi process access to different cpu ring buffer | 257 | * These primitives allow multi process access to different cpu ring buffer |
258 | * concurrently. | 258 | * concurrently. |
259 | * | 259 | * |
260 | * These primitives don't distinguish read-only and read-consume access. | 260 | * These primitives don't distinguish read-only and read-consume access. |
261 | * Multi read-only access are also serialized. | 261 | * Multi read-only access are also serialized. |
262 | */ | 262 | */ |
263 | 263 | ||
264 | #ifdef CONFIG_SMP | 264 | #ifdef CONFIG_SMP |
265 | static DECLARE_RWSEM(all_cpu_access_lock); | 265 | static DECLARE_RWSEM(all_cpu_access_lock); |
266 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | 266 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); |
267 | 267 | ||
268 | static inline void trace_access_lock(int cpu) | 268 | static inline void trace_access_lock(int cpu) |
269 | { | 269 | { |
270 | if (cpu == TRACE_PIPE_ALL_CPU) { | 270 | if (cpu == TRACE_PIPE_ALL_CPU) { |
271 | /* gain it for accessing the whole ring buffer. */ | 271 | /* gain it for accessing the whole ring buffer. */ |
272 | down_write(&all_cpu_access_lock); | 272 | down_write(&all_cpu_access_lock); |
273 | } else { | 273 | } else { |
274 | /* gain it for accessing a cpu ring buffer. */ | 274 | /* gain it for accessing a cpu ring buffer. */ |
275 | 275 | ||
276 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | 276 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ |
277 | down_read(&all_cpu_access_lock); | 277 | down_read(&all_cpu_access_lock); |
278 | 278 | ||
279 | /* Secondly block other access to this @cpu ring buffer. */ | 279 | /* Secondly block other access to this @cpu ring buffer. */ |
280 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | 280 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); |
281 | } | 281 | } |
282 | } | 282 | } |
283 | 283 | ||
284 | static inline void trace_access_unlock(int cpu) | 284 | static inline void trace_access_unlock(int cpu) |
285 | { | 285 | { |
286 | if (cpu == TRACE_PIPE_ALL_CPU) { | 286 | if (cpu == TRACE_PIPE_ALL_CPU) { |
287 | up_write(&all_cpu_access_lock); | 287 | up_write(&all_cpu_access_lock); |
288 | } else { | 288 | } else { |
289 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | 289 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); |
290 | up_read(&all_cpu_access_lock); | 290 | up_read(&all_cpu_access_lock); |
291 | } | 291 | } |
292 | } | 292 | } |
293 | 293 | ||
294 | static inline void trace_access_lock_init(void) | 294 | static inline void trace_access_lock_init(void) |
295 | { | 295 | { |
296 | int cpu; | 296 | int cpu; |
297 | 297 | ||
298 | for_each_possible_cpu(cpu) | 298 | for_each_possible_cpu(cpu) |
299 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | 299 | mutex_init(&per_cpu(cpu_access_lock, cpu)); |
300 | } | 300 | } |
301 | 301 | ||
302 | #else | 302 | #else |
303 | 303 | ||
304 | static DEFINE_MUTEX(access_lock); | 304 | static DEFINE_MUTEX(access_lock); |
305 | 305 | ||
306 | static inline void trace_access_lock(int cpu) | 306 | static inline void trace_access_lock(int cpu) |
307 | { | 307 | { |
308 | (void)cpu; | 308 | (void)cpu; |
309 | mutex_lock(&access_lock); | 309 | mutex_lock(&access_lock); |
310 | } | 310 | } |
311 | 311 | ||
312 | static inline void trace_access_unlock(int cpu) | 312 | static inline void trace_access_unlock(int cpu) |
313 | { | 313 | { |
314 | (void)cpu; | 314 | (void)cpu; |
315 | mutex_unlock(&access_lock); | 315 | mutex_unlock(&access_lock); |
316 | } | 316 | } |
317 | 317 | ||
318 | static inline void trace_access_lock_init(void) | 318 | static inline void trace_access_lock_init(void) |
319 | { | 319 | { |
320 | } | 320 | } |
321 | 321 | ||
322 | #endif | 322 | #endif |
323 | 323 | ||
324 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 324 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ |
325 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 325 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); |
326 | 326 | ||
327 | /* trace_flags holds trace_options default values */ | 327 | /* trace_flags holds trace_options default values */ |
328 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 328 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
329 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | | 329 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | |
330 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | | 330 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | |
331 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; | 331 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; |
332 | 332 | ||
333 | static int trace_stop_count; | 333 | static int trace_stop_count; |
334 | static DEFINE_RAW_SPINLOCK(tracing_start_lock); | 334 | static DEFINE_RAW_SPINLOCK(tracing_start_lock); |
335 | 335 | ||
336 | static void wakeup_work_handler(struct work_struct *work) | 336 | static void wakeup_work_handler(struct work_struct *work) |
337 | { | 337 | { |
338 | wake_up(&trace_wait); | 338 | wake_up(&trace_wait); |
339 | } | 339 | } |
340 | 340 | ||
341 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); | 341 | static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); |
342 | 342 | ||
343 | /** | 343 | /** |
344 | * tracing_on - enable tracing buffers | 344 | * tracing_on - enable tracing buffers |
345 | * | 345 | * |
346 | * This function enables tracing buffers that may have been | 346 | * This function enables tracing buffers that may have been |
347 | * disabled with tracing_off. | 347 | * disabled with tracing_off. |
348 | */ | 348 | */ |
349 | void tracing_on(void) | 349 | void tracing_on(void) |
350 | { | 350 | { |
351 | if (global_trace.buffer) | 351 | if (global_trace.buffer) |
352 | ring_buffer_record_on(global_trace.buffer); | 352 | ring_buffer_record_on(global_trace.buffer); |
353 | /* | 353 | /* |
354 | * This flag is only looked at when buffers haven't been | 354 | * This flag is only looked at when buffers haven't been |
355 | * allocated yet. We don't really care about the race | 355 | * allocated yet. We don't really care about the race |
356 | * between setting this flag and actually turning | 356 | * between setting this flag and actually turning |
357 | * on the buffer. | 357 | * on the buffer. |
358 | */ | 358 | */ |
359 | global_trace.buffer_disabled = 0; | 359 | global_trace.buffer_disabled = 0; |
360 | } | 360 | } |
361 | EXPORT_SYMBOL_GPL(tracing_on); | 361 | EXPORT_SYMBOL_GPL(tracing_on); |
362 | 362 | ||
363 | /** | 363 | /** |
364 | * tracing_off - turn off tracing buffers | 364 | * tracing_off - turn off tracing buffers |
365 | * | 365 | * |
366 | * This function stops the tracing buffers from recording data. | 366 | * This function stops the tracing buffers from recording data. |
367 | * It does not disable any overhead the tracers themselves may | 367 | * It does not disable any overhead the tracers themselves may |
368 | * be causing. This function simply causes all recording to | 368 | * be causing. This function simply causes all recording to |
369 | * the ring buffers to fail. | 369 | * the ring buffers to fail. |
370 | */ | 370 | */ |
371 | void tracing_off(void) | 371 | void tracing_off(void) |
372 | { | 372 | { |
373 | if (global_trace.buffer) | 373 | if (global_trace.buffer) |
374 | ring_buffer_record_off(global_trace.buffer); | 374 | ring_buffer_record_off(global_trace.buffer); |
375 | /* | 375 | /* |
376 | * This flag is only looked at when buffers haven't been | 376 | * This flag is only looked at when buffers haven't been |
377 | * allocated yet. We don't really care about the race | 377 | * allocated yet. We don't really care about the race |
378 | * between setting this flag and actually turning | 378 | * between setting this flag and actually turning |
379 | * on the buffer. | 379 | * on the buffer. |
380 | */ | 380 | */ |
381 | global_trace.buffer_disabled = 1; | 381 | global_trace.buffer_disabled = 1; |
382 | } | 382 | } |
383 | EXPORT_SYMBOL_GPL(tracing_off); | 383 | EXPORT_SYMBOL_GPL(tracing_off); |
384 | 384 | ||
385 | /** | 385 | /** |
386 | * tracing_is_on - show state of ring buffers enabled | 386 | * tracing_is_on - show state of ring buffers enabled |
387 | */ | 387 | */ |
388 | int tracing_is_on(void) | 388 | int tracing_is_on(void) |
389 | { | 389 | { |
390 | if (global_trace.buffer) | 390 | if (global_trace.buffer) |
391 | return ring_buffer_record_is_on(global_trace.buffer); | 391 | return ring_buffer_record_is_on(global_trace.buffer); |
392 | return !global_trace.buffer_disabled; | 392 | return !global_trace.buffer_disabled; |
393 | } | 393 | } |
394 | EXPORT_SYMBOL_GPL(tracing_is_on); | 394 | EXPORT_SYMBOL_GPL(tracing_is_on); |
395 | 395 | ||
396 | /** | 396 | /** |
397 | * trace_wake_up - wake up tasks waiting for trace input | 397 | * trace_wake_up - wake up tasks waiting for trace input |
398 | * | 398 | * |
399 | * Schedules a delayed work to wake up any task that is blocked on the | 399 | * Schedules a delayed work to wake up any task that is blocked on the |
400 | * trace_wait queue. These is used with trace_poll for tasks polling the | 400 | * trace_wait queue. These is used with trace_poll for tasks polling the |
401 | * trace. | 401 | * trace. |
402 | */ | 402 | */ |
403 | void trace_wake_up(void) | 403 | void trace_wake_up(void) |
404 | { | 404 | { |
405 | const unsigned long delay = msecs_to_jiffies(2); | 405 | const unsigned long delay = msecs_to_jiffies(2); |
406 | 406 | ||
407 | if (trace_flags & TRACE_ITER_BLOCK) | 407 | if (trace_flags & TRACE_ITER_BLOCK) |
408 | return; | 408 | return; |
409 | schedule_delayed_work(&wakeup_work, delay); | 409 | schedule_delayed_work(&wakeup_work, delay); |
410 | } | 410 | } |
411 | 411 | ||
412 | static int __init set_buf_size(char *str) | 412 | static int __init set_buf_size(char *str) |
413 | { | 413 | { |
414 | unsigned long buf_size; | 414 | unsigned long buf_size; |
415 | 415 | ||
416 | if (!str) | 416 | if (!str) |
417 | return 0; | 417 | return 0; |
418 | buf_size = memparse(str, &str); | 418 | buf_size = memparse(str, &str); |
419 | /* nr_entries can not be zero */ | 419 | /* nr_entries can not be zero */ |
420 | if (buf_size == 0) | 420 | if (buf_size == 0) |
421 | return 0; | 421 | return 0; |
422 | trace_buf_size = buf_size; | 422 | trace_buf_size = buf_size; |
423 | return 1; | 423 | return 1; |
424 | } | 424 | } |
425 | __setup("trace_buf_size=", set_buf_size); | 425 | __setup("trace_buf_size=", set_buf_size); |
426 | 426 | ||
427 | static int __init set_tracing_thresh(char *str) | 427 | static int __init set_tracing_thresh(char *str) |
428 | { | 428 | { |
429 | unsigned long threshold; | 429 | unsigned long threshold; |
430 | int ret; | 430 | int ret; |
431 | 431 | ||
432 | if (!str) | 432 | if (!str) |
433 | return 0; | 433 | return 0; |
434 | ret = kstrtoul(str, 0, &threshold); | 434 | ret = kstrtoul(str, 0, &threshold); |
435 | if (ret < 0) | 435 | if (ret < 0) |
436 | return 0; | 436 | return 0; |
437 | tracing_thresh = threshold * 1000; | 437 | tracing_thresh = threshold * 1000; |
438 | return 1; | 438 | return 1; |
439 | } | 439 | } |
440 | __setup("tracing_thresh=", set_tracing_thresh); | 440 | __setup("tracing_thresh=", set_tracing_thresh); |
441 | 441 | ||
442 | unsigned long nsecs_to_usecs(unsigned long nsecs) | 442 | unsigned long nsecs_to_usecs(unsigned long nsecs) |
443 | { | 443 | { |
444 | return nsecs / 1000; | 444 | return nsecs / 1000; |
445 | } | 445 | } |
446 | 446 | ||
447 | /* These must match the bit postions in trace_iterator_flags */ | 447 | /* These must match the bit postions in trace_iterator_flags */ |
448 | static const char *trace_options[] = { | 448 | static const char *trace_options[] = { |
449 | "print-parent", | 449 | "print-parent", |
450 | "sym-offset", | 450 | "sym-offset", |
451 | "sym-addr", | 451 | "sym-addr", |
452 | "verbose", | 452 | "verbose", |
453 | "raw", | 453 | "raw", |
454 | "hex", | 454 | "hex", |
455 | "bin", | 455 | "bin", |
456 | "block", | 456 | "block", |
457 | "stacktrace", | 457 | "stacktrace", |
458 | "trace_printk", | 458 | "trace_printk", |
459 | "ftrace_preempt", | 459 | "ftrace_preempt", |
460 | "branch", | 460 | "branch", |
461 | "annotate", | 461 | "annotate", |
462 | "userstacktrace", | 462 | "userstacktrace", |
463 | "sym-userobj", | 463 | "sym-userobj", |
464 | "printk-msg-only", | 464 | "printk-msg-only", |
465 | "context-info", | 465 | "context-info", |
466 | "latency-format", | 466 | "latency-format", |
467 | "sleep-time", | 467 | "sleep-time", |
468 | "graph-time", | 468 | "graph-time", |
469 | "record-cmd", | 469 | "record-cmd", |
470 | "overwrite", | 470 | "overwrite", |
471 | "disable_on_free", | 471 | "disable_on_free", |
472 | "irq-info", | 472 | "irq-info", |
473 | "markers", | 473 | "markers", |
474 | NULL | 474 | NULL |
475 | }; | 475 | }; |
476 | 476 | ||
477 | static struct { | 477 | static struct { |
478 | u64 (*func)(void); | 478 | u64 (*func)(void); |
479 | const char *name; | 479 | const char *name; |
480 | } trace_clocks[] = { | 480 | } trace_clocks[] = { |
481 | { trace_clock_local, "local" }, | 481 | { trace_clock_local, "local" }, |
482 | { trace_clock_global, "global" }, | 482 | { trace_clock_global, "global" }, |
483 | { trace_clock_counter, "counter" }, | 483 | { trace_clock_counter, "counter" }, |
484 | }; | 484 | }; |
485 | 485 | ||
486 | int trace_clock_id; | 486 | int trace_clock_id; |
487 | 487 | ||
488 | /* | 488 | /* |
489 | * trace_parser_get_init - gets the buffer for trace parser | 489 | * trace_parser_get_init - gets the buffer for trace parser |
490 | */ | 490 | */ |
491 | int trace_parser_get_init(struct trace_parser *parser, int size) | 491 | int trace_parser_get_init(struct trace_parser *parser, int size) |
492 | { | 492 | { |
493 | memset(parser, 0, sizeof(*parser)); | 493 | memset(parser, 0, sizeof(*parser)); |
494 | 494 | ||
495 | parser->buffer = kmalloc(size, GFP_KERNEL); | 495 | parser->buffer = kmalloc(size, GFP_KERNEL); |
496 | if (!parser->buffer) | 496 | if (!parser->buffer) |
497 | return 1; | 497 | return 1; |
498 | 498 | ||
499 | parser->size = size; | 499 | parser->size = size; |
500 | return 0; | 500 | return 0; |
501 | } | 501 | } |
502 | 502 | ||
503 | /* | 503 | /* |
504 | * trace_parser_put - frees the buffer for trace parser | 504 | * trace_parser_put - frees the buffer for trace parser |
505 | */ | 505 | */ |
506 | void trace_parser_put(struct trace_parser *parser) | 506 | void trace_parser_put(struct trace_parser *parser) |
507 | { | 507 | { |
508 | kfree(parser->buffer); | 508 | kfree(parser->buffer); |
509 | } | 509 | } |
510 | 510 | ||
511 | /* | 511 | /* |
512 | * trace_get_user - reads the user input string separated by space | 512 | * trace_get_user - reads the user input string separated by space |
513 | * (matched by isspace(ch)) | 513 | * (matched by isspace(ch)) |
514 | * | 514 | * |
515 | * For each string found the 'struct trace_parser' is updated, | 515 | * For each string found the 'struct trace_parser' is updated, |
516 | * and the function returns. | 516 | * and the function returns. |
517 | * | 517 | * |
518 | * Returns number of bytes read. | 518 | * Returns number of bytes read. |
519 | * | 519 | * |
520 | * See kernel/trace/trace.h for 'struct trace_parser' details. | 520 | * See kernel/trace/trace.h for 'struct trace_parser' details. |
521 | */ | 521 | */ |
522 | int trace_get_user(struct trace_parser *parser, const char __user *ubuf, | 522 | int trace_get_user(struct trace_parser *parser, const char __user *ubuf, |
523 | size_t cnt, loff_t *ppos) | 523 | size_t cnt, loff_t *ppos) |
524 | { | 524 | { |
525 | char ch; | 525 | char ch; |
526 | size_t read = 0; | 526 | size_t read = 0; |
527 | ssize_t ret; | 527 | ssize_t ret; |
528 | 528 | ||
529 | if (!*ppos) | 529 | if (!*ppos) |
530 | trace_parser_clear(parser); | 530 | trace_parser_clear(parser); |
531 | 531 | ||
532 | ret = get_user(ch, ubuf++); | 532 | ret = get_user(ch, ubuf++); |
533 | if (ret) | 533 | if (ret) |
534 | goto out; | 534 | goto out; |
535 | 535 | ||
536 | read++; | 536 | read++; |
537 | cnt--; | 537 | cnt--; |
538 | 538 | ||
539 | /* | 539 | /* |
540 | * The parser is not finished with the last write, | 540 | * The parser is not finished with the last write, |
541 | * continue reading the user input without skipping spaces. | 541 | * continue reading the user input without skipping spaces. |
542 | */ | 542 | */ |
543 | if (!parser->cont) { | 543 | if (!parser->cont) { |
544 | /* skip white space */ | 544 | /* skip white space */ |
545 | while (cnt && isspace(ch)) { | 545 | while (cnt && isspace(ch)) { |
546 | ret = get_user(ch, ubuf++); | 546 | ret = get_user(ch, ubuf++); |
547 | if (ret) | 547 | if (ret) |
548 | goto out; | 548 | goto out; |
549 | read++; | 549 | read++; |
550 | cnt--; | 550 | cnt--; |
551 | } | 551 | } |
552 | 552 | ||
553 | /* only spaces were written */ | 553 | /* only spaces were written */ |
554 | if (isspace(ch)) { | 554 | if (isspace(ch)) { |
555 | *ppos += read; | 555 | *ppos += read; |
556 | ret = read; | 556 | ret = read; |
557 | goto out; | 557 | goto out; |
558 | } | 558 | } |
559 | 559 | ||
560 | parser->idx = 0; | 560 | parser->idx = 0; |
561 | } | 561 | } |
562 | 562 | ||
563 | /* read the non-space input */ | 563 | /* read the non-space input */ |
564 | while (cnt && !isspace(ch)) { | 564 | while (cnt && !isspace(ch)) { |
565 | if (parser->idx < parser->size - 1) | 565 | if (parser->idx < parser->size - 1) |
566 | parser->buffer[parser->idx++] = ch; | 566 | parser->buffer[parser->idx++] = ch; |
567 | else { | 567 | else { |
568 | ret = -EINVAL; | 568 | ret = -EINVAL; |
569 | goto out; | 569 | goto out; |
570 | } | 570 | } |
571 | ret = get_user(ch, ubuf++); | 571 | ret = get_user(ch, ubuf++); |
572 | if (ret) | 572 | if (ret) |
573 | goto out; | 573 | goto out; |
574 | read++; | 574 | read++; |
575 | cnt--; | 575 | cnt--; |
576 | } | 576 | } |
577 | 577 | ||
578 | /* We either got finished input or we have to wait for another call. */ | 578 | /* We either got finished input or we have to wait for another call. */ |
579 | if (isspace(ch)) { | 579 | if (isspace(ch)) { |
580 | parser->buffer[parser->idx] = 0; | 580 | parser->buffer[parser->idx] = 0; |
581 | parser->cont = false; | 581 | parser->cont = false; |
582 | } else { | 582 | } else { |
583 | parser->cont = true; | 583 | parser->cont = true; |
584 | parser->buffer[parser->idx++] = ch; | 584 | parser->buffer[parser->idx++] = ch; |
585 | } | 585 | } |
586 | 586 | ||
587 | *ppos += read; | 587 | *ppos += read; |
588 | ret = read; | 588 | ret = read; |
589 | 589 | ||
590 | out: | 590 | out: |
591 | return ret; | 591 | return ret; |
592 | } | 592 | } |
593 | 593 | ||
594 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) | 594 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) |
595 | { | 595 | { |
596 | int len; | 596 | int len; |
597 | int ret; | 597 | int ret; |
598 | 598 | ||
599 | if (!cnt) | 599 | if (!cnt) |
600 | return 0; | 600 | return 0; |
601 | 601 | ||
602 | if (s->len <= s->readpos) | 602 | if (s->len <= s->readpos) |
603 | return -EBUSY; | 603 | return -EBUSY; |
604 | 604 | ||
605 | len = s->len - s->readpos; | 605 | len = s->len - s->readpos; |
606 | if (cnt > len) | 606 | if (cnt > len) |
607 | cnt = len; | 607 | cnt = len; |
608 | ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); | 608 | ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); |
609 | if (ret == cnt) | 609 | if (ret == cnt) |
610 | return -EFAULT; | 610 | return -EFAULT; |
611 | 611 | ||
612 | cnt -= ret; | 612 | cnt -= ret; |
613 | 613 | ||
614 | s->readpos += cnt; | 614 | s->readpos += cnt; |
615 | return cnt; | 615 | return cnt; |
616 | } | 616 | } |
617 | 617 | ||
618 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) | 618 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) |
619 | { | 619 | { |
620 | int len; | 620 | int len; |
621 | 621 | ||
622 | if (s->len <= s->readpos) | 622 | if (s->len <= s->readpos) |
623 | return -EBUSY; | 623 | return -EBUSY; |
624 | 624 | ||
625 | len = s->len - s->readpos; | 625 | len = s->len - s->readpos; |
626 | if (cnt > len) | 626 | if (cnt > len) |
627 | cnt = len; | 627 | cnt = len; |
628 | memcpy(buf, s->buffer + s->readpos, cnt); | 628 | memcpy(buf, s->buffer + s->readpos, cnt); |
629 | 629 | ||
630 | s->readpos += cnt; | 630 | s->readpos += cnt; |
631 | return cnt; | 631 | return cnt; |
632 | } | 632 | } |
633 | 633 | ||
634 | /* | 634 | /* |
635 | * ftrace_max_lock is used to protect the swapping of buffers | 635 | * ftrace_max_lock is used to protect the swapping of buffers |
636 | * when taking a max snapshot. The buffers themselves are | 636 | * when taking a max snapshot. The buffers themselves are |
637 | * protected by per_cpu spinlocks. But the action of the swap | 637 | * protected by per_cpu spinlocks. But the action of the swap |
638 | * needs its own lock. | 638 | * needs its own lock. |
639 | * | 639 | * |
640 | * This is defined as a arch_spinlock_t in order to help | 640 | * This is defined as a arch_spinlock_t in order to help |
641 | * with performance when lockdep debugging is enabled. | 641 | * with performance when lockdep debugging is enabled. |
642 | * | 642 | * |
643 | * It is also used in other places outside the update_max_tr | 643 | * It is also used in other places outside the update_max_tr |
644 | * so it needs to be defined outside of the | 644 | * so it needs to be defined outside of the |
645 | * CONFIG_TRACER_MAX_TRACE. | 645 | * CONFIG_TRACER_MAX_TRACE. |
646 | */ | 646 | */ |
647 | static arch_spinlock_t ftrace_max_lock = | 647 | static arch_spinlock_t ftrace_max_lock = |
648 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 648 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
649 | 649 | ||
650 | unsigned long __read_mostly tracing_thresh; | 650 | unsigned long __read_mostly tracing_thresh; |
651 | 651 | ||
652 | #ifdef CONFIG_TRACER_MAX_TRACE | 652 | #ifdef CONFIG_TRACER_MAX_TRACE |
653 | unsigned long __read_mostly tracing_max_latency; | 653 | unsigned long __read_mostly tracing_max_latency; |
654 | 654 | ||
655 | /* | 655 | /* |
656 | * Copy the new maximum trace into the separate maximum-trace | 656 | * Copy the new maximum trace into the separate maximum-trace |
657 | * structure. (this way the maximum trace is permanently saved, | 657 | * structure. (this way the maximum trace is permanently saved, |
658 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) | 658 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) |
659 | */ | 659 | */ |
660 | static void | 660 | static void |
661 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 661 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
662 | { | 662 | { |
663 | struct trace_array_cpu *data = tr->data[cpu]; | 663 | struct trace_array_cpu *data = tr->data[cpu]; |
664 | struct trace_array_cpu *max_data; | 664 | struct trace_array_cpu *max_data; |
665 | 665 | ||
666 | max_tr.cpu = cpu; | 666 | max_tr.cpu = cpu; |
667 | max_tr.time_start = data->preempt_timestamp; | 667 | max_tr.time_start = data->preempt_timestamp; |
668 | 668 | ||
669 | max_data = max_tr.data[cpu]; | 669 | max_data = max_tr.data[cpu]; |
670 | max_data->saved_latency = tracing_max_latency; | 670 | max_data->saved_latency = tracing_max_latency; |
671 | max_data->critical_start = data->critical_start; | 671 | max_data->critical_start = data->critical_start; |
672 | max_data->critical_end = data->critical_end; | 672 | max_data->critical_end = data->critical_end; |
673 | 673 | ||
674 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); | 674 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); |
675 | max_data->pid = tsk->pid; | 675 | max_data->pid = tsk->pid; |
676 | max_data->uid = task_uid(tsk); | 676 | max_data->uid = task_uid(tsk); |
677 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; | 677 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; |
678 | max_data->policy = tsk->policy; | 678 | max_data->policy = tsk->policy; |
679 | max_data->rt_priority = tsk->rt_priority; | 679 | max_data->rt_priority = tsk->rt_priority; |
680 | 680 | ||
681 | /* record this tasks comm */ | 681 | /* record this tasks comm */ |
682 | tracing_record_cmdline(tsk); | 682 | tracing_record_cmdline(tsk); |
683 | } | 683 | } |
684 | 684 | ||
685 | /** | 685 | /** |
686 | * update_max_tr - snapshot all trace buffers from global_trace to max_tr | 686 | * update_max_tr - snapshot all trace buffers from global_trace to max_tr |
687 | * @tr: tracer | 687 | * @tr: tracer |
688 | * @tsk: the task with the latency | 688 | * @tsk: the task with the latency |
689 | * @cpu: The cpu that initiated the trace. | 689 | * @cpu: The cpu that initiated the trace. |
690 | * | 690 | * |
691 | * Flip the buffers between the @tr and the max_tr and record information | 691 | * Flip the buffers between the @tr and the max_tr and record information |
692 | * about which task was the cause of this latency. | 692 | * about which task was the cause of this latency. |
693 | */ | 693 | */ |
694 | void | 694 | void |
695 | update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 695 | update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
696 | { | 696 | { |
697 | struct ring_buffer *buf = tr->buffer; | 697 | struct ring_buffer *buf = tr->buffer; |
698 | 698 | ||
699 | if (trace_stop_count) | 699 | if (trace_stop_count) |
700 | return; | 700 | return; |
701 | 701 | ||
702 | WARN_ON_ONCE(!irqs_disabled()); | 702 | WARN_ON_ONCE(!irqs_disabled()); |
703 | if (!current_trace->use_max_tr) { | 703 | if (!current_trace->use_max_tr) { |
704 | WARN_ON_ONCE(1); | 704 | WARN_ON_ONCE(1); |
705 | return; | 705 | return; |
706 | } | 706 | } |
707 | arch_spin_lock(&ftrace_max_lock); | 707 | arch_spin_lock(&ftrace_max_lock); |
708 | 708 | ||
709 | tr->buffer = max_tr.buffer; | 709 | tr->buffer = max_tr.buffer; |
710 | max_tr.buffer = buf; | 710 | max_tr.buffer = buf; |
711 | 711 | ||
712 | __update_max_tr(tr, tsk, cpu); | 712 | __update_max_tr(tr, tsk, cpu); |
713 | arch_spin_unlock(&ftrace_max_lock); | 713 | arch_spin_unlock(&ftrace_max_lock); |
714 | } | 714 | } |
715 | 715 | ||
716 | /** | 716 | /** |
717 | * update_max_tr_single - only copy one trace over, and reset the rest | 717 | * update_max_tr_single - only copy one trace over, and reset the rest |
718 | * @tr - tracer | 718 | * @tr - tracer |
719 | * @tsk - task with the latency | 719 | * @tsk - task with the latency |
720 | * @cpu - the cpu of the buffer to copy. | 720 | * @cpu - the cpu of the buffer to copy. |
721 | * | 721 | * |
722 | * Flip the trace of a single CPU buffer between the @tr and the max_tr. | 722 | * Flip the trace of a single CPU buffer between the @tr and the max_tr. |
723 | */ | 723 | */ |
724 | void | 724 | void |
725 | update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | 725 | update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) |
726 | { | 726 | { |
727 | int ret; | 727 | int ret; |
728 | 728 | ||
729 | if (trace_stop_count) | 729 | if (trace_stop_count) |
730 | return; | 730 | return; |
731 | 731 | ||
732 | WARN_ON_ONCE(!irqs_disabled()); | 732 | WARN_ON_ONCE(!irqs_disabled()); |
733 | if (!current_trace->use_max_tr) { | 733 | if (!current_trace->use_max_tr) { |
734 | WARN_ON_ONCE(1); | 734 | WARN_ON_ONCE(1); |
735 | return; | 735 | return; |
736 | } | 736 | } |
737 | 737 | ||
738 | arch_spin_lock(&ftrace_max_lock); | 738 | arch_spin_lock(&ftrace_max_lock); |
739 | 739 | ||
740 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); | 740 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); |
741 | 741 | ||
742 | if (ret == -EBUSY) { | 742 | if (ret == -EBUSY) { |
743 | /* | 743 | /* |
744 | * We failed to swap the buffer due to a commit taking | 744 | * We failed to swap the buffer due to a commit taking |
745 | * place on this CPU. We fail to record, but we reset | 745 | * place on this CPU. We fail to record, but we reset |
746 | * the max trace buffer (no one writes directly to it) | 746 | * the max trace buffer (no one writes directly to it) |
747 | * and flag that it failed. | 747 | * and flag that it failed. |
748 | */ | 748 | */ |
749 | trace_array_printk(&max_tr, _THIS_IP_, | 749 | trace_array_printk(&max_tr, _THIS_IP_, |
750 | "Failed to swap buffers due to commit in progress\n"); | 750 | "Failed to swap buffers due to commit in progress\n"); |
751 | } | 751 | } |
752 | 752 | ||
753 | WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); | 753 | WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); |
754 | 754 | ||
755 | __update_max_tr(tr, tsk, cpu); | 755 | __update_max_tr(tr, tsk, cpu); |
756 | arch_spin_unlock(&ftrace_max_lock); | 756 | arch_spin_unlock(&ftrace_max_lock); |
757 | } | 757 | } |
758 | #endif /* CONFIG_TRACER_MAX_TRACE */ | 758 | #endif /* CONFIG_TRACER_MAX_TRACE */ |
759 | 759 | ||
760 | /** | 760 | /** |
761 | * register_tracer - register a tracer with the ftrace system. | 761 | * register_tracer - register a tracer with the ftrace system. |
762 | * @type - the plugin for the tracer | 762 | * @type - the plugin for the tracer |
763 | * | 763 | * |
764 | * Register a new plugin tracer. | 764 | * Register a new plugin tracer. |
765 | */ | 765 | */ |
766 | int register_tracer(struct tracer *type) | 766 | int register_tracer(struct tracer *type) |
767 | { | 767 | { |
768 | struct tracer *t; | 768 | struct tracer *t; |
769 | int ret = 0; | 769 | int ret = 0; |
770 | 770 | ||
771 | if (!type->name) { | 771 | if (!type->name) { |
772 | pr_info("Tracer must have a name\n"); | 772 | pr_info("Tracer must have a name\n"); |
773 | return -1; | 773 | return -1; |
774 | } | 774 | } |
775 | 775 | ||
776 | if (strlen(type->name) >= MAX_TRACER_SIZE) { | 776 | if (strlen(type->name) >= MAX_TRACER_SIZE) { |
777 | pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); | 777 | pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); |
778 | return -1; | 778 | return -1; |
779 | } | 779 | } |
780 | 780 | ||
781 | mutex_lock(&trace_types_lock); | 781 | mutex_lock(&trace_types_lock); |
782 | 782 | ||
783 | tracing_selftest_running = true; | 783 | tracing_selftest_running = true; |
784 | 784 | ||
785 | for (t = trace_types; t; t = t->next) { | 785 | for (t = trace_types; t; t = t->next) { |
786 | if (strcmp(type->name, t->name) == 0) { | 786 | if (strcmp(type->name, t->name) == 0) { |
787 | /* already found */ | 787 | /* already found */ |
788 | pr_info("Tracer %s already registered\n", | 788 | pr_info("Tracer %s already registered\n", |
789 | type->name); | 789 | type->name); |
790 | ret = -1; | 790 | ret = -1; |
791 | goto out; | 791 | goto out; |
792 | } | 792 | } |
793 | } | 793 | } |
794 | 794 | ||
795 | if (!type->set_flag) | 795 | if (!type->set_flag) |
796 | type->set_flag = &dummy_set_flag; | 796 | type->set_flag = &dummy_set_flag; |
797 | if (!type->flags) | 797 | if (!type->flags) |
798 | type->flags = &dummy_tracer_flags; | 798 | type->flags = &dummy_tracer_flags; |
799 | else | 799 | else |
800 | if (!type->flags->opts) | 800 | if (!type->flags->opts) |
801 | type->flags->opts = dummy_tracer_opt; | 801 | type->flags->opts = dummy_tracer_opt; |
802 | if (!type->wait_pipe) | 802 | if (!type->wait_pipe) |
803 | type->wait_pipe = default_wait_pipe; | 803 | type->wait_pipe = default_wait_pipe; |
804 | 804 | ||
805 | 805 | ||
806 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 806 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
807 | if (type->selftest && !tracing_selftest_disabled) { | 807 | if (type->selftest && !tracing_selftest_disabled) { |
808 | struct tracer *saved_tracer = current_trace; | 808 | struct tracer *saved_tracer = current_trace; |
809 | struct trace_array *tr = &global_trace; | 809 | struct trace_array *tr = &global_trace; |
810 | 810 | ||
811 | /* | 811 | /* |
812 | * Run a selftest on this tracer. | 812 | * Run a selftest on this tracer. |
813 | * Here we reset the trace buffer, and set the current | 813 | * Here we reset the trace buffer, and set the current |
814 | * tracer to be this tracer. The tracer can then run some | 814 | * tracer to be this tracer. The tracer can then run some |
815 | * internal tracing to verify that everything is in order. | 815 | * internal tracing to verify that everything is in order. |
816 | * If we fail, we do not register this tracer. | 816 | * If we fail, we do not register this tracer. |
817 | */ | 817 | */ |
818 | tracing_reset_online_cpus(tr); | 818 | tracing_reset_online_cpus(tr); |
819 | 819 | ||
820 | current_trace = type; | 820 | current_trace = type; |
821 | 821 | ||
822 | /* If we expanded the buffers, make sure the max is expanded too */ | 822 | /* If we expanded the buffers, make sure the max is expanded too */ |
823 | if (ring_buffer_expanded && type->use_max_tr) | 823 | if (ring_buffer_expanded && type->use_max_tr) |
824 | ring_buffer_resize(max_tr.buffer, trace_buf_size, | 824 | ring_buffer_resize(max_tr.buffer, trace_buf_size, |
825 | RING_BUFFER_ALL_CPUS); | 825 | RING_BUFFER_ALL_CPUS); |
826 | 826 | ||
827 | /* the test is responsible for initializing and enabling */ | 827 | /* the test is responsible for initializing and enabling */ |
828 | pr_info("Testing tracer %s: ", type->name); | 828 | pr_info("Testing tracer %s: ", type->name); |
829 | ret = type->selftest(type, tr); | 829 | ret = type->selftest(type, tr); |
830 | /* the test is responsible for resetting too */ | 830 | /* the test is responsible for resetting too */ |
831 | current_trace = saved_tracer; | 831 | current_trace = saved_tracer; |
832 | if (ret) { | 832 | if (ret) { |
833 | printk(KERN_CONT "FAILED!\n"); | 833 | printk(KERN_CONT "FAILED!\n"); |
834 | /* Add the warning after printing 'FAILED' */ | 834 | /* Add the warning after printing 'FAILED' */ |
835 | WARN_ON(1); | 835 | WARN_ON(1); |
836 | goto out; | 836 | goto out; |
837 | } | 837 | } |
838 | /* Only reset on passing, to avoid touching corrupted buffers */ | 838 | /* Only reset on passing, to avoid touching corrupted buffers */ |
839 | tracing_reset_online_cpus(tr); | 839 | tracing_reset_online_cpus(tr); |
840 | 840 | ||
841 | /* Shrink the max buffer again */ | 841 | /* Shrink the max buffer again */ |
842 | if (ring_buffer_expanded && type->use_max_tr) | 842 | if (ring_buffer_expanded && type->use_max_tr) |
843 | ring_buffer_resize(max_tr.buffer, 1, | 843 | ring_buffer_resize(max_tr.buffer, 1, |
844 | RING_BUFFER_ALL_CPUS); | 844 | RING_BUFFER_ALL_CPUS); |
845 | 845 | ||
846 | printk(KERN_CONT "PASSED\n"); | 846 | printk(KERN_CONT "PASSED\n"); |
847 | } | 847 | } |
848 | #endif | 848 | #endif |
849 | 849 | ||
850 | type->next = trace_types; | 850 | type->next = trace_types; |
851 | trace_types = type; | 851 | trace_types = type; |
852 | 852 | ||
853 | out: | 853 | out: |
854 | tracing_selftest_running = false; | 854 | tracing_selftest_running = false; |
855 | mutex_unlock(&trace_types_lock); | 855 | mutex_unlock(&trace_types_lock); |
856 | 856 | ||
857 | if (ret || !default_bootup_tracer) | 857 | if (ret || !default_bootup_tracer) |
858 | goto out_unlock; | 858 | goto out_unlock; |
859 | 859 | ||
860 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) | 860 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) |
861 | goto out_unlock; | 861 | goto out_unlock; |
862 | 862 | ||
863 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); | 863 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); |
864 | /* Do we want this tracer to start on bootup? */ | 864 | /* Do we want this tracer to start on bootup? */ |
865 | tracing_set_tracer(type->name); | 865 | tracing_set_tracer(type->name); |
866 | default_bootup_tracer = NULL; | 866 | default_bootup_tracer = NULL; |
867 | /* disable other selftests, since this will break it. */ | 867 | /* disable other selftests, since this will break it. */ |
868 | tracing_selftest_disabled = 1; | 868 | tracing_selftest_disabled = 1; |
869 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 869 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
870 | printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", | 870 | printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", |
871 | type->name); | 871 | type->name); |
872 | #endif | 872 | #endif |
873 | 873 | ||
874 | out_unlock: | 874 | out_unlock: |
875 | return ret; | 875 | return ret; |
876 | } | 876 | } |
877 | 877 | ||
878 | void unregister_tracer(struct tracer *type) | 878 | void unregister_tracer(struct tracer *type) |
879 | { | 879 | { |
880 | struct tracer **t; | 880 | struct tracer **t; |
881 | 881 | ||
882 | mutex_lock(&trace_types_lock); | 882 | mutex_lock(&trace_types_lock); |
883 | for (t = &trace_types; *t; t = &(*t)->next) { | 883 | for (t = &trace_types; *t; t = &(*t)->next) { |
884 | if (*t == type) | 884 | if (*t == type) |
885 | goto found; | 885 | goto found; |
886 | } | 886 | } |
887 | pr_info("Tracer %s not registered\n", type->name); | 887 | pr_info("Tracer %s not registered\n", type->name); |
888 | goto out; | 888 | goto out; |
889 | 889 | ||
890 | found: | 890 | found: |
891 | *t = (*t)->next; | 891 | *t = (*t)->next; |
892 | 892 | ||
893 | if (type == current_trace && tracer_enabled) { | 893 | if (type == current_trace && tracer_enabled) { |
894 | tracer_enabled = 0; | 894 | tracer_enabled = 0; |
895 | tracing_stop(); | 895 | tracing_stop(); |
896 | if (current_trace->stop) | 896 | if (current_trace->stop) |
897 | current_trace->stop(&global_trace); | 897 | current_trace->stop(&global_trace); |
898 | current_trace = &nop_trace; | 898 | current_trace = &nop_trace; |
899 | } | 899 | } |
900 | out: | 900 | out: |
901 | mutex_unlock(&trace_types_lock); | 901 | mutex_unlock(&trace_types_lock); |
902 | } | 902 | } |
903 | 903 | ||
904 | void tracing_reset(struct trace_array *tr, int cpu) | 904 | void tracing_reset(struct trace_array *tr, int cpu) |
905 | { | 905 | { |
906 | struct ring_buffer *buffer = tr->buffer; | 906 | struct ring_buffer *buffer = tr->buffer; |
907 | 907 | ||
908 | ring_buffer_record_disable(buffer); | 908 | ring_buffer_record_disable(buffer); |
909 | 909 | ||
910 | /* Make sure all commits have finished */ | 910 | /* Make sure all commits have finished */ |
911 | synchronize_sched(); | 911 | synchronize_sched(); |
912 | ring_buffer_reset_cpu(buffer, cpu); | 912 | ring_buffer_reset_cpu(buffer, cpu); |
913 | 913 | ||
914 | ring_buffer_record_enable(buffer); | 914 | ring_buffer_record_enable(buffer); |
915 | } | 915 | } |
916 | 916 | ||
917 | void tracing_reset_online_cpus(struct trace_array *tr) | 917 | void tracing_reset_online_cpus(struct trace_array *tr) |
918 | { | 918 | { |
919 | struct ring_buffer *buffer = tr->buffer; | 919 | struct ring_buffer *buffer = tr->buffer; |
920 | int cpu; | 920 | int cpu; |
921 | 921 | ||
922 | ring_buffer_record_disable(buffer); | 922 | ring_buffer_record_disable(buffer); |
923 | 923 | ||
924 | /* Make sure all commits have finished */ | 924 | /* Make sure all commits have finished */ |
925 | synchronize_sched(); | 925 | synchronize_sched(); |
926 | 926 | ||
927 | tr->time_start = ftrace_now(tr->cpu); | 927 | tr->time_start = ftrace_now(tr->cpu); |
928 | 928 | ||
929 | for_each_online_cpu(cpu) | 929 | for_each_online_cpu(cpu) |
930 | ring_buffer_reset_cpu(buffer, cpu); | 930 | ring_buffer_reset_cpu(buffer, cpu); |
931 | 931 | ||
932 | ring_buffer_record_enable(buffer); | 932 | ring_buffer_record_enable(buffer); |
933 | } | 933 | } |
934 | 934 | ||
935 | void tracing_reset_current(int cpu) | 935 | void tracing_reset_current(int cpu) |
936 | { | 936 | { |
937 | tracing_reset(&global_trace, cpu); | 937 | tracing_reset(&global_trace, cpu); |
938 | } | 938 | } |
939 | 939 | ||
940 | void tracing_reset_current_online_cpus(void) | 940 | void tracing_reset_current_online_cpus(void) |
941 | { | 941 | { |
942 | tracing_reset_online_cpus(&global_trace); | 942 | tracing_reset_online_cpus(&global_trace); |
943 | } | 943 | } |
944 | 944 | ||
945 | #define SAVED_CMDLINES 128 | 945 | #define SAVED_CMDLINES 128 |
946 | #define NO_CMDLINE_MAP UINT_MAX | 946 | #define NO_CMDLINE_MAP UINT_MAX |
947 | static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; | 947 | static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; |
948 | static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; | 948 | static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; |
949 | static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; | 949 | static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; |
950 | static int cmdline_idx; | 950 | static int cmdline_idx; |
951 | static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; | 951 | static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
952 | 952 | ||
953 | /* temporary disable recording */ | 953 | /* temporary disable recording */ |
954 | static atomic_t trace_record_cmdline_disabled __read_mostly; | 954 | static atomic_t trace_record_cmdline_disabled __read_mostly; |
955 | 955 | ||
956 | static void trace_init_cmdlines(void) | 956 | static void trace_init_cmdlines(void) |
957 | { | 957 | { |
958 | memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline)); | 958 | memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline)); |
959 | memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid)); | 959 | memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid)); |
960 | cmdline_idx = 0; | 960 | cmdline_idx = 0; |
961 | } | 961 | } |
962 | 962 | ||
963 | int is_tracing_stopped(void) | 963 | int is_tracing_stopped(void) |
964 | { | 964 | { |
965 | return trace_stop_count; | 965 | return trace_stop_count; |
966 | } | 966 | } |
967 | 967 | ||
968 | /** | 968 | /** |
969 | * ftrace_off_permanent - disable all ftrace code permanently | 969 | * ftrace_off_permanent - disable all ftrace code permanently |
970 | * | 970 | * |
971 | * This should only be called when a serious anomally has | 971 | * This should only be called when a serious anomally has |
972 | * been detected. This will turn off the function tracing, | 972 | * been detected. This will turn off the function tracing, |
973 | * ring buffers, and other tracing utilites. It takes no | 973 | * ring buffers, and other tracing utilites. It takes no |
974 | * locks and can be called from any context. | 974 | * locks and can be called from any context. |
975 | */ | 975 | */ |
976 | void ftrace_off_permanent(void) | 976 | void ftrace_off_permanent(void) |
977 | { | 977 | { |
978 | tracing_disabled = 1; | 978 | tracing_disabled = 1; |
979 | ftrace_stop(); | 979 | ftrace_stop(); |
980 | tracing_off_permanent(); | 980 | tracing_off_permanent(); |
981 | } | 981 | } |
982 | 982 | ||
983 | /** | 983 | /** |
984 | * tracing_start - quick start of the tracer | 984 | * tracing_start - quick start of the tracer |
985 | * | 985 | * |
986 | * If tracing is enabled but was stopped by tracing_stop, | 986 | * If tracing is enabled but was stopped by tracing_stop, |
987 | * this will start the tracer back up. | 987 | * this will start the tracer back up. |
988 | */ | 988 | */ |
989 | void tracing_start(void) | 989 | void tracing_start(void) |
990 | { | 990 | { |
991 | struct ring_buffer *buffer; | 991 | struct ring_buffer *buffer; |
992 | unsigned long flags; | 992 | unsigned long flags; |
993 | 993 | ||
994 | if (tracing_disabled) | 994 | if (tracing_disabled) |
995 | return; | 995 | return; |
996 | 996 | ||
997 | raw_spin_lock_irqsave(&tracing_start_lock, flags); | 997 | raw_spin_lock_irqsave(&tracing_start_lock, flags); |
998 | if (--trace_stop_count) { | 998 | if (--trace_stop_count) { |
999 | if (trace_stop_count < 0) { | 999 | if (trace_stop_count < 0) { |
1000 | /* Someone screwed up their debugging */ | 1000 | /* Someone screwed up their debugging */ |
1001 | WARN_ON_ONCE(1); | 1001 | WARN_ON_ONCE(1); |
1002 | trace_stop_count = 0; | 1002 | trace_stop_count = 0; |
1003 | } | 1003 | } |
1004 | goto out; | 1004 | goto out; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | /* Prevent the buffers from switching */ | 1007 | /* Prevent the buffers from switching */ |
1008 | arch_spin_lock(&ftrace_max_lock); | 1008 | arch_spin_lock(&ftrace_max_lock); |
1009 | 1009 | ||
1010 | buffer = global_trace.buffer; | 1010 | buffer = global_trace.buffer; |
1011 | if (buffer) | 1011 | if (buffer) |
1012 | ring_buffer_record_enable(buffer); | 1012 | ring_buffer_record_enable(buffer); |
1013 | 1013 | ||
1014 | buffer = max_tr.buffer; | 1014 | buffer = max_tr.buffer; |
1015 | if (buffer) | 1015 | if (buffer) |
1016 | ring_buffer_record_enable(buffer); | 1016 | ring_buffer_record_enable(buffer); |
1017 | 1017 | ||
1018 | arch_spin_unlock(&ftrace_max_lock); | 1018 | arch_spin_unlock(&ftrace_max_lock); |
1019 | 1019 | ||
1020 | ftrace_start(); | 1020 | ftrace_start(); |
1021 | out: | 1021 | out: |
1022 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); | 1022 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); |
1023 | } | 1023 | } |
1024 | 1024 | ||
1025 | /** | 1025 | /** |
1026 | * tracing_stop - quick stop of the tracer | 1026 | * tracing_stop - quick stop of the tracer |
1027 | * | 1027 | * |
1028 | * Light weight way to stop tracing. Use in conjunction with | 1028 | * Light weight way to stop tracing. Use in conjunction with |
1029 | * tracing_start. | 1029 | * tracing_start. |
1030 | */ | 1030 | */ |
1031 | void tracing_stop(void) | 1031 | void tracing_stop(void) |
1032 | { | 1032 | { |
1033 | struct ring_buffer *buffer; | 1033 | struct ring_buffer *buffer; |
1034 | unsigned long flags; | 1034 | unsigned long flags; |
1035 | 1035 | ||
1036 | ftrace_stop(); | 1036 | ftrace_stop(); |
1037 | raw_spin_lock_irqsave(&tracing_start_lock, flags); | 1037 | raw_spin_lock_irqsave(&tracing_start_lock, flags); |
1038 | if (trace_stop_count++) | 1038 | if (trace_stop_count++) |
1039 | goto out; | 1039 | goto out; |
1040 | 1040 | ||
1041 | /* Prevent the buffers from switching */ | 1041 | /* Prevent the buffers from switching */ |
1042 | arch_spin_lock(&ftrace_max_lock); | 1042 | arch_spin_lock(&ftrace_max_lock); |
1043 | 1043 | ||
1044 | buffer = global_trace.buffer; | 1044 | buffer = global_trace.buffer; |
1045 | if (buffer) | 1045 | if (buffer) |
1046 | ring_buffer_record_disable(buffer); | 1046 | ring_buffer_record_disable(buffer); |
1047 | 1047 | ||
1048 | buffer = max_tr.buffer; | 1048 | buffer = max_tr.buffer; |
1049 | if (buffer) | 1049 | if (buffer) |
1050 | ring_buffer_record_disable(buffer); | 1050 | ring_buffer_record_disable(buffer); |
1051 | 1051 | ||
1052 | arch_spin_unlock(&ftrace_max_lock); | 1052 | arch_spin_unlock(&ftrace_max_lock); |
1053 | 1053 | ||
1054 | out: | 1054 | out: |
1055 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); | 1055 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | void trace_stop_cmdline_recording(void); | 1058 | void trace_stop_cmdline_recording(void); |
1059 | 1059 | ||
1060 | static void trace_save_cmdline(struct task_struct *tsk) | 1060 | static void trace_save_cmdline(struct task_struct *tsk) |
1061 | { | 1061 | { |
1062 | unsigned pid, idx; | 1062 | unsigned pid, idx; |
1063 | 1063 | ||
1064 | if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) | 1064 | if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) |
1065 | return; | 1065 | return; |
1066 | 1066 | ||
1067 | /* | 1067 | /* |
1068 | * It's not the end of the world if we don't get | 1068 | * It's not the end of the world if we don't get |
1069 | * the lock, but we also don't want to spin | 1069 | * the lock, but we also don't want to spin |
1070 | * nor do we want to disable interrupts, | 1070 | * nor do we want to disable interrupts, |
1071 | * so if we miss here, then better luck next time. | 1071 | * so if we miss here, then better luck next time. |
1072 | */ | 1072 | */ |
1073 | if (!arch_spin_trylock(&trace_cmdline_lock)) | 1073 | if (!arch_spin_trylock(&trace_cmdline_lock)) |
1074 | return; | 1074 | return; |
1075 | 1075 | ||
1076 | idx = map_pid_to_cmdline[tsk->pid]; | 1076 | idx = map_pid_to_cmdline[tsk->pid]; |
1077 | if (idx == NO_CMDLINE_MAP) { | 1077 | if (idx == NO_CMDLINE_MAP) { |
1078 | idx = (cmdline_idx + 1) % SAVED_CMDLINES; | 1078 | idx = (cmdline_idx + 1) % SAVED_CMDLINES; |
1079 | 1079 | ||
1080 | /* | 1080 | /* |
1081 | * Check whether the cmdline buffer at idx has a pid | 1081 | * Check whether the cmdline buffer at idx has a pid |
1082 | * mapped. We are going to overwrite that entry so we | 1082 | * mapped. We are going to overwrite that entry so we |
1083 | * need to clear the map_pid_to_cmdline. Otherwise we | 1083 | * need to clear the map_pid_to_cmdline. Otherwise we |
1084 | * would read the new comm for the old pid. | 1084 | * would read the new comm for the old pid. |
1085 | */ | 1085 | */ |
1086 | pid = map_cmdline_to_pid[idx]; | 1086 | pid = map_cmdline_to_pid[idx]; |
1087 | if (pid != NO_CMDLINE_MAP) | 1087 | if (pid != NO_CMDLINE_MAP) |
1088 | map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; | 1088 | map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; |
1089 | 1089 | ||
1090 | map_cmdline_to_pid[idx] = tsk->pid; | 1090 | map_cmdline_to_pid[idx] = tsk->pid; |
1091 | map_pid_to_cmdline[tsk->pid] = idx; | 1091 | map_pid_to_cmdline[tsk->pid] = idx; |
1092 | 1092 | ||
1093 | cmdline_idx = idx; | 1093 | cmdline_idx = idx; |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); | 1096 | memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); |
1097 | 1097 | ||
1098 | arch_spin_unlock(&trace_cmdline_lock); | 1098 | arch_spin_unlock(&trace_cmdline_lock); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | void trace_find_cmdline(int pid, char comm[]) | 1101 | void trace_find_cmdline(int pid, char comm[]) |
1102 | { | 1102 | { |
1103 | unsigned map; | 1103 | unsigned map; |
1104 | 1104 | ||
1105 | if (!pid) { | 1105 | if (!pid) { |
1106 | strcpy(comm, "<idle>"); | 1106 | strcpy(comm, "<idle>"); |
1107 | return; | 1107 | return; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | if (WARN_ON_ONCE(pid < 0)) { | 1110 | if (WARN_ON_ONCE(pid < 0)) { |
1111 | strcpy(comm, "<XXX>"); | 1111 | strcpy(comm, "<XXX>"); |
1112 | return; | 1112 | return; |
1113 | } | 1113 | } |
1114 | 1114 | ||
1115 | if (pid > PID_MAX_DEFAULT) { | 1115 | if (pid > PID_MAX_DEFAULT) { |
1116 | strcpy(comm, "<...>"); | 1116 | strcpy(comm, "<...>"); |
1117 | return; | 1117 | return; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | preempt_disable(); | 1120 | preempt_disable(); |
1121 | arch_spin_lock(&trace_cmdline_lock); | 1121 | arch_spin_lock(&trace_cmdline_lock); |
1122 | map = map_pid_to_cmdline[pid]; | 1122 | map = map_pid_to_cmdline[pid]; |
1123 | if (map != NO_CMDLINE_MAP) | 1123 | if (map != NO_CMDLINE_MAP) |
1124 | strcpy(comm, saved_cmdlines[map]); | 1124 | strcpy(comm, saved_cmdlines[map]); |
1125 | else | 1125 | else |
1126 | strcpy(comm, "<...>"); | 1126 | strcpy(comm, "<...>"); |
1127 | 1127 | ||
1128 | arch_spin_unlock(&trace_cmdline_lock); | 1128 | arch_spin_unlock(&trace_cmdline_lock); |
1129 | preempt_enable(); | 1129 | preempt_enable(); |
1130 | } | 1130 | } |
1131 | 1131 | ||
1132 | void tracing_record_cmdline(struct task_struct *tsk) | 1132 | void tracing_record_cmdline(struct task_struct *tsk) |
1133 | { | 1133 | { |
1134 | if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled || | 1134 | if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled || |
1135 | !tracing_is_on()) | 1135 | !tracing_is_on()) |
1136 | return; | 1136 | return; |
1137 | 1137 | ||
1138 | trace_save_cmdline(tsk); | 1138 | trace_save_cmdline(tsk); |
1139 | } | 1139 | } |
1140 | 1140 | ||
1141 | void | 1141 | void |
1142 | tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | 1142 | tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, |
1143 | int pc) | 1143 | int pc) |
1144 | { | 1144 | { |
1145 | struct task_struct *tsk = current; | 1145 | struct task_struct *tsk = current; |
1146 | 1146 | ||
1147 | entry->preempt_count = pc & 0xff; | 1147 | entry->preempt_count = pc & 0xff; |
1148 | entry->pid = (tsk) ? tsk->pid : 0; | 1148 | entry->pid = (tsk) ? tsk->pid : 0; |
1149 | entry->padding = 0; | 1149 | entry->padding = 0; |
1150 | entry->flags = | 1150 | entry->flags = |
1151 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | 1151 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
1152 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | | 1152 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | |
1153 | #else | 1153 | #else |
1154 | TRACE_FLAG_IRQS_NOSUPPORT | | 1154 | TRACE_FLAG_IRQS_NOSUPPORT | |
1155 | #endif | 1155 | #endif |
1156 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | 1156 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | |
1157 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | 1157 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | |
1158 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); | 1158 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); |
1159 | } | 1159 | } |
1160 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | 1160 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); |
1161 | 1161 | ||
1162 | struct ring_buffer_event * | 1162 | struct ring_buffer_event * |
1163 | trace_buffer_lock_reserve(struct ring_buffer *buffer, | 1163 | trace_buffer_lock_reserve(struct ring_buffer *buffer, |
1164 | int type, | 1164 | int type, |
1165 | unsigned long len, | 1165 | unsigned long len, |
1166 | unsigned long flags, int pc) | 1166 | unsigned long flags, int pc) |
1167 | { | 1167 | { |
1168 | struct ring_buffer_event *event; | 1168 | struct ring_buffer_event *event; |
1169 | 1169 | ||
1170 | event = ring_buffer_lock_reserve(buffer, len); | 1170 | event = ring_buffer_lock_reserve(buffer, len); |
1171 | if (event != NULL) { | 1171 | if (event != NULL) { |
1172 | struct trace_entry *ent = ring_buffer_event_data(event); | 1172 | struct trace_entry *ent = ring_buffer_event_data(event); |
1173 | 1173 | ||
1174 | tracing_generic_entry_update(ent, flags, pc); | 1174 | tracing_generic_entry_update(ent, flags, pc); |
1175 | ent->type = type; | 1175 | ent->type = type; |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | return event; | 1178 | return event; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | static inline void | 1181 | static inline void |
1182 | __trace_buffer_unlock_commit(struct ring_buffer *buffer, | 1182 | __trace_buffer_unlock_commit(struct ring_buffer *buffer, |
1183 | struct ring_buffer_event *event, | 1183 | struct ring_buffer_event *event, |
1184 | unsigned long flags, int pc, | 1184 | unsigned long flags, int pc, |
1185 | int wake) | 1185 | int wake) |
1186 | { | 1186 | { |
1187 | ring_buffer_unlock_commit(buffer, event); | 1187 | ring_buffer_unlock_commit(buffer, event); |
1188 | 1188 | ||
1189 | ftrace_trace_stack(buffer, flags, 6, pc); | 1189 | ftrace_trace_stack(buffer, flags, 6, pc); |
1190 | ftrace_trace_userstack(buffer, flags, pc); | 1190 | ftrace_trace_userstack(buffer, flags, pc); |
1191 | 1191 | ||
1192 | if (wake) | 1192 | if (wake) |
1193 | trace_wake_up(); | 1193 | trace_wake_up(); |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | void trace_buffer_unlock_commit(struct ring_buffer *buffer, | 1196 | void trace_buffer_unlock_commit(struct ring_buffer *buffer, |
1197 | struct ring_buffer_event *event, | 1197 | struct ring_buffer_event *event, |
1198 | unsigned long flags, int pc) | 1198 | unsigned long flags, int pc) |
1199 | { | 1199 | { |
1200 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); | 1200 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); |
1201 | } | 1201 | } |
1202 | 1202 | ||
1203 | struct ring_buffer_event * | 1203 | struct ring_buffer_event * |
1204 | trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, | 1204 | trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, |
1205 | int type, unsigned long len, | 1205 | int type, unsigned long len, |
1206 | unsigned long flags, int pc) | 1206 | unsigned long flags, int pc) |
1207 | { | 1207 | { |
1208 | *current_rb = global_trace.buffer; | 1208 | *current_rb = global_trace.buffer; |
1209 | return trace_buffer_lock_reserve(*current_rb, | 1209 | return trace_buffer_lock_reserve(*current_rb, |
1210 | type, len, flags, pc); | 1210 | type, len, flags, pc); |
1211 | } | 1211 | } |
1212 | EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); | 1212 | EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); |
1213 | 1213 | ||
1214 | void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, | 1214 | void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, |
1215 | struct ring_buffer_event *event, | 1215 | struct ring_buffer_event *event, |
1216 | unsigned long flags, int pc) | 1216 | unsigned long flags, int pc) |
1217 | { | 1217 | { |
1218 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); | 1218 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); |
1219 | } | 1219 | } |
1220 | EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); | 1220 | EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); |
1221 | 1221 | ||
1222 | void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, | 1222 | void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, |
1223 | struct ring_buffer_event *event, | 1223 | struct ring_buffer_event *event, |
1224 | unsigned long flags, int pc) | 1224 | unsigned long flags, int pc) |
1225 | { | 1225 | { |
1226 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); | 1226 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); |
1227 | } | 1227 | } |
1228 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); | 1228 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); |
1229 | 1229 | ||
1230 | void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, | 1230 | void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, |
1231 | struct ring_buffer_event *event, | 1231 | struct ring_buffer_event *event, |
1232 | unsigned long flags, int pc, | 1232 | unsigned long flags, int pc, |
1233 | struct pt_regs *regs) | 1233 | struct pt_regs *regs) |
1234 | { | 1234 | { |
1235 | ring_buffer_unlock_commit(buffer, event); | 1235 | ring_buffer_unlock_commit(buffer, event); |
1236 | 1236 | ||
1237 | ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); | 1237 | ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); |
1238 | ftrace_trace_userstack(buffer, flags, pc); | 1238 | ftrace_trace_userstack(buffer, flags, pc); |
1239 | } | 1239 | } |
1240 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); | 1240 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); |
1241 | 1241 | ||
1242 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, | 1242 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, |
1243 | struct ring_buffer_event *event) | 1243 | struct ring_buffer_event *event) |
1244 | { | 1244 | { |
1245 | ring_buffer_discard_commit(buffer, event); | 1245 | ring_buffer_discard_commit(buffer, event); |
1246 | } | 1246 | } |
1247 | EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); | 1247 | EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); |
1248 | 1248 | ||
1249 | void | 1249 | void |
1250 | trace_function(struct trace_array *tr, | 1250 | trace_function(struct trace_array *tr, |
1251 | unsigned long ip, unsigned long parent_ip, unsigned long flags, | 1251 | unsigned long ip, unsigned long parent_ip, unsigned long flags, |
1252 | int pc) | 1252 | int pc) |
1253 | { | 1253 | { |
1254 | struct ftrace_event_call *call = &event_function; | 1254 | struct ftrace_event_call *call = &event_function; |
1255 | struct ring_buffer *buffer = tr->buffer; | 1255 | struct ring_buffer *buffer = tr->buffer; |
1256 | struct ring_buffer_event *event; | 1256 | struct ring_buffer_event *event; |
1257 | struct ftrace_entry *entry; | 1257 | struct ftrace_entry *entry; |
1258 | 1258 | ||
1259 | /* If we are reading the ring buffer, don't trace */ | 1259 | /* If we are reading the ring buffer, don't trace */ |
1260 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) | 1260 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) |
1261 | return; | 1261 | return; |
1262 | 1262 | ||
1263 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), | 1263 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), |
1264 | flags, pc); | 1264 | flags, pc); |
1265 | if (!event) | 1265 | if (!event) |
1266 | return; | 1266 | return; |
1267 | entry = ring_buffer_event_data(event); | 1267 | entry = ring_buffer_event_data(event); |
1268 | entry->ip = ip; | 1268 | entry->ip = ip; |
1269 | entry->parent_ip = parent_ip; | 1269 | entry->parent_ip = parent_ip; |
1270 | 1270 | ||
1271 | if (!filter_check_discard(call, entry, buffer, event)) | 1271 | if (!filter_check_discard(call, entry, buffer, event)) |
1272 | ring_buffer_unlock_commit(buffer, event); | 1272 | ring_buffer_unlock_commit(buffer, event); |
1273 | } | 1273 | } |
1274 | 1274 | ||
1275 | void | 1275 | void |
1276 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, | 1276 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, |
1277 | unsigned long ip, unsigned long parent_ip, unsigned long flags, | 1277 | unsigned long ip, unsigned long parent_ip, unsigned long flags, |
1278 | int pc) | 1278 | int pc) |
1279 | { | 1279 | { |
1280 | if (likely(!atomic_read(&data->disabled))) | 1280 | if (likely(!atomic_read(&data->disabled))) |
1281 | trace_function(tr, ip, parent_ip, flags, pc); | 1281 | trace_function(tr, ip, parent_ip, flags, pc); |
1282 | } | 1282 | } |
1283 | 1283 | ||
1284 | #ifdef CONFIG_STACKTRACE | 1284 | #ifdef CONFIG_STACKTRACE |
1285 | 1285 | ||
1286 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) | 1286 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) |
1287 | struct ftrace_stack { | 1287 | struct ftrace_stack { |
1288 | unsigned long calls[FTRACE_STACK_MAX_ENTRIES]; | 1288 | unsigned long calls[FTRACE_STACK_MAX_ENTRIES]; |
1289 | }; | 1289 | }; |
1290 | 1290 | ||
1291 | static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); | 1291 | static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); |
1292 | static DEFINE_PER_CPU(int, ftrace_stack_reserve); | 1292 | static DEFINE_PER_CPU(int, ftrace_stack_reserve); |
1293 | 1293 | ||
1294 | static void __ftrace_trace_stack(struct ring_buffer *buffer, | 1294 | static void __ftrace_trace_stack(struct ring_buffer *buffer, |
1295 | unsigned long flags, | 1295 | unsigned long flags, |
1296 | int skip, int pc, struct pt_regs *regs) | 1296 | int skip, int pc, struct pt_regs *regs) |
1297 | { | 1297 | { |
1298 | struct ftrace_event_call *call = &event_kernel_stack; | 1298 | struct ftrace_event_call *call = &event_kernel_stack; |
1299 | struct ring_buffer_event *event; | 1299 | struct ring_buffer_event *event; |
1300 | struct stack_entry *entry; | 1300 | struct stack_entry *entry; |
1301 | struct stack_trace trace; | 1301 | struct stack_trace trace; |
1302 | int use_stack; | 1302 | int use_stack; |
1303 | int size = FTRACE_STACK_ENTRIES; | 1303 | int size = FTRACE_STACK_ENTRIES; |
1304 | 1304 | ||
1305 | trace.nr_entries = 0; | 1305 | trace.nr_entries = 0; |
1306 | trace.skip = skip; | 1306 | trace.skip = skip; |
1307 | 1307 | ||
1308 | /* | 1308 | /* |
1309 | * Since events can happen in NMIs there's no safe way to | 1309 | * Since events can happen in NMIs there's no safe way to |
1310 | * use the per cpu ftrace_stacks. We reserve it and if an interrupt | 1310 | * use the per cpu ftrace_stacks. We reserve it and if an interrupt |
1311 | * or NMI comes in, it will just have to use the default | 1311 | * or NMI comes in, it will just have to use the default |
1312 | * FTRACE_STACK_SIZE. | 1312 | * FTRACE_STACK_SIZE. |
1313 | */ | 1313 | */ |
1314 | preempt_disable_notrace(); | 1314 | preempt_disable_notrace(); |
1315 | 1315 | ||
1316 | use_stack = ++__get_cpu_var(ftrace_stack_reserve); | 1316 | use_stack = ++__get_cpu_var(ftrace_stack_reserve); |
1317 | /* | 1317 | /* |
1318 | * We don't need any atomic variables, just a barrier. | 1318 | * We don't need any atomic variables, just a barrier. |
1319 | * If an interrupt comes in, we don't care, because it would | 1319 | * If an interrupt comes in, we don't care, because it would |
1320 | * have exited and put the counter back to what we want. | 1320 | * have exited and put the counter back to what we want. |
1321 | * We just need a barrier to keep gcc from moving things | 1321 | * We just need a barrier to keep gcc from moving things |
1322 | * around. | 1322 | * around. |
1323 | */ | 1323 | */ |
1324 | barrier(); | 1324 | barrier(); |
1325 | if (use_stack == 1) { | 1325 | if (use_stack == 1) { |
1326 | trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; | 1326 | trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; |
1327 | trace.max_entries = FTRACE_STACK_MAX_ENTRIES; | 1327 | trace.max_entries = FTRACE_STACK_MAX_ENTRIES; |
1328 | 1328 | ||
1329 | if (regs) | 1329 | if (regs) |
1330 | save_stack_trace_regs(regs, &trace); | 1330 | save_stack_trace_regs(regs, &trace); |
1331 | else | 1331 | else |
1332 | save_stack_trace(&trace); | 1332 | save_stack_trace(&trace); |
1333 | 1333 | ||
1334 | if (trace.nr_entries > size) | 1334 | if (trace.nr_entries > size) |
1335 | size = trace.nr_entries; | 1335 | size = trace.nr_entries; |
1336 | } else | 1336 | } else |
1337 | /* From now on, use_stack is a boolean */ | 1337 | /* From now on, use_stack is a boolean */ |
1338 | use_stack = 0; | 1338 | use_stack = 0; |
1339 | 1339 | ||
1340 | size *= sizeof(unsigned long); | 1340 | size *= sizeof(unsigned long); |
1341 | 1341 | ||
1342 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, | 1342 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, |
1343 | sizeof(*entry) + size, flags, pc); | 1343 | sizeof(*entry) + size, flags, pc); |
1344 | if (!event) | 1344 | if (!event) |
1345 | goto out; | 1345 | goto out; |
1346 | entry = ring_buffer_event_data(event); | 1346 | entry = ring_buffer_event_data(event); |
1347 | 1347 | ||
1348 | memset(&entry->caller, 0, size); | 1348 | memset(&entry->caller, 0, size); |
1349 | 1349 | ||
1350 | if (use_stack) | 1350 | if (use_stack) |
1351 | memcpy(&entry->caller, trace.entries, | 1351 | memcpy(&entry->caller, trace.entries, |
1352 | trace.nr_entries * sizeof(unsigned long)); | 1352 | trace.nr_entries * sizeof(unsigned long)); |
1353 | else { | 1353 | else { |
1354 | trace.max_entries = FTRACE_STACK_ENTRIES; | 1354 | trace.max_entries = FTRACE_STACK_ENTRIES; |
1355 | trace.entries = entry->caller; | 1355 | trace.entries = entry->caller; |
1356 | if (regs) | 1356 | if (regs) |
1357 | save_stack_trace_regs(regs, &trace); | 1357 | save_stack_trace_regs(regs, &trace); |
1358 | else | 1358 | else |
1359 | save_stack_trace(&trace); | 1359 | save_stack_trace(&trace); |
1360 | } | 1360 | } |
1361 | 1361 | ||
1362 | entry->size = trace.nr_entries; | 1362 | entry->size = trace.nr_entries; |
1363 | 1363 | ||
1364 | if (!filter_check_discard(call, entry, buffer, event)) | 1364 | if (!filter_check_discard(call, entry, buffer, event)) |
1365 | ring_buffer_unlock_commit(buffer, event); | 1365 | ring_buffer_unlock_commit(buffer, event); |
1366 | 1366 | ||
1367 | out: | 1367 | out: |
1368 | /* Again, don't let gcc optimize things here */ | 1368 | /* Again, don't let gcc optimize things here */ |
1369 | barrier(); | 1369 | barrier(); |
1370 | __get_cpu_var(ftrace_stack_reserve)--; | 1370 | __get_cpu_var(ftrace_stack_reserve)--; |
1371 | preempt_enable_notrace(); | 1371 | preempt_enable_notrace(); |
1372 | 1372 | ||
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, | 1375 | void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, |
1376 | int skip, int pc, struct pt_regs *regs) | 1376 | int skip, int pc, struct pt_regs *regs) |
1377 | { | 1377 | { |
1378 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | 1378 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) |
1379 | return; | 1379 | return; |
1380 | 1380 | ||
1381 | __ftrace_trace_stack(buffer, flags, skip, pc, regs); | 1381 | __ftrace_trace_stack(buffer, flags, skip, pc, regs); |
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | 1384 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
1385 | int skip, int pc) | 1385 | int skip, int pc) |
1386 | { | 1386 | { |
1387 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | 1387 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) |
1388 | return; | 1388 | return; |
1389 | 1389 | ||
1390 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); | 1390 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); |
1391 | } | 1391 | } |
1392 | 1392 | ||
1393 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, | 1393 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
1394 | int pc) | 1394 | int pc) |
1395 | { | 1395 | { |
1396 | __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); | 1396 | __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); |
1397 | } | 1397 | } |
1398 | 1398 | ||
1399 | /** | 1399 | /** |
1400 | * trace_dump_stack - record a stack back trace in the trace buffer | 1400 | * trace_dump_stack - record a stack back trace in the trace buffer |
1401 | */ | 1401 | */ |
1402 | void trace_dump_stack(void) | 1402 | void trace_dump_stack(void) |
1403 | { | 1403 | { |
1404 | unsigned long flags; | 1404 | unsigned long flags; |
1405 | 1405 | ||
1406 | if (tracing_disabled || tracing_selftest_running) | 1406 | if (tracing_disabled || tracing_selftest_running) |
1407 | return; | 1407 | return; |
1408 | 1408 | ||
1409 | local_save_flags(flags); | 1409 | local_save_flags(flags); |
1410 | 1410 | ||
1411 | /* skipping 3 traces, seems to get us at the caller of this function */ | 1411 | /* skipping 3 traces, seems to get us at the caller of this function */ |
1412 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); | 1412 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); |
1413 | } | 1413 | } |
1414 | 1414 | ||
1415 | static DEFINE_PER_CPU(int, user_stack_count); | 1415 | static DEFINE_PER_CPU(int, user_stack_count); |
1416 | 1416 | ||
1417 | void | 1417 | void |
1418 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | 1418 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) |
1419 | { | 1419 | { |
1420 | struct ftrace_event_call *call = &event_user_stack; | 1420 | struct ftrace_event_call *call = &event_user_stack; |
1421 | struct ring_buffer_event *event; | 1421 | struct ring_buffer_event *event; |
1422 | struct userstack_entry *entry; | 1422 | struct userstack_entry *entry; |
1423 | struct stack_trace trace; | 1423 | struct stack_trace trace; |
1424 | 1424 | ||
1425 | if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) | 1425 | if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) |
1426 | return; | 1426 | return; |
1427 | 1427 | ||
1428 | /* | 1428 | /* |
1429 | * NMIs can not handle page faults, even with fix ups. | 1429 | * NMIs can not handle page faults, even with fix ups. |
1430 | * The save user stack can (and often does) fault. | 1430 | * The save user stack can (and often does) fault. |
1431 | */ | 1431 | */ |
1432 | if (unlikely(in_nmi())) | 1432 | if (unlikely(in_nmi())) |
1433 | return; | 1433 | return; |
1434 | 1434 | ||
1435 | /* | 1435 | /* |
1436 | * prevent recursion, since the user stack tracing may | 1436 | * prevent recursion, since the user stack tracing may |
1437 | * trigger other kernel events. | 1437 | * trigger other kernel events. |
1438 | */ | 1438 | */ |
1439 | preempt_disable(); | 1439 | preempt_disable(); |
1440 | if (__this_cpu_read(user_stack_count)) | 1440 | if (__this_cpu_read(user_stack_count)) |
1441 | goto out; | 1441 | goto out; |
1442 | 1442 | ||
1443 | __this_cpu_inc(user_stack_count); | 1443 | __this_cpu_inc(user_stack_count); |
1444 | 1444 | ||
1445 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1445 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1446 | sizeof(*entry), flags, pc); | 1446 | sizeof(*entry), flags, pc); |
1447 | if (!event) | 1447 | if (!event) |
1448 | goto out_drop_count; | 1448 | goto out_drop_count; |
1449 | entry = ring_buffer_event_data(event); | 1449 | entry = ring_buffer_event_data(event); |
1450 | 1450 | ||
1451 | entry->tgid = current->tgid; | 1451 | entry->tgid = current->tgid; |
1452 | memset(&entry->caller, 0, sizeof(entry->caller)); | 1452 | memset(&entry->caller, 0, sizeof(entry->caller)); |
1453 | 1453 | ||
1454 | trace.nr_entries = 0; | 1454 | trace.nr_entries = 0; |
1455 | trace.max_entries = FTRACE_STACK_ENTRIES; | 1455 | trace.max_entries = FTRACE_STACK_ENTRIES; |
1456 | trace.skip = 0; | 1456 | trace.skip = 0; |
1457 | trace.entries = entry->caller; | 1457 | trace.entries = entry->caller; |
1458 | 1458 | ||
1459 | save_stack_trace_user(&trace); | 1459 | save_stack_trace_user(&trace); |
1460 | if (!filter_check_discard(call, entry, buffer, event)) | 1460 | if (!filter_check_discard(call, entry, buffer, event)) |
1461 | ring_buffer_unlock_commit(buffer, event); | 1461 | ring_buffer_unlock_commit(buffer, event); |
1462 | 1462 | ||
1463 | out_drop_count: | 1463 | out_drop_count: |
1464 | __this_cpu_dec(user_stack_count); | 1464 | __this_cpu_dec(user_stack_count); |
1465 | out: | 1465 | out: |
1466 | preempt_enable(); | 1466 | preempt_enable(); |
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | #ifdef UNUSED | 1469 | #ifdef UNUSED |
1470 | static void __trace_userstack(struct trace_array *tr, unsigned long flags) | 1470 | static void __trace_userstack(struct trace_array *tr, unsigned long flags) |
1471 | { | 1471 | { |
1472 | ftrace_trace_userstack(tr, flags, preempt_count()); | 1472 | ftrace_trace_userstack(tr, flags, preempt_count()); |
1473 | } | 1473 | } |
1474 | #endif /* UNUSED */ | 1474 | #endif /* UNUSED */ |
1475 | 1475 | ||
1476 | #endif /* CONFIG_STACKTRACE */ | 1476 | #endif /* CONFIG_STACKTRACE */ |
1477 | 1477 | ||
1478 | /* created for use with alloc_percpu */ | 1478 | /* created for use with alloc_percpu */ |
1479 | struct trace_buffer_struct { | 1479 | struct trace_buffer_struct { |
1480 | char buffer[TRACE_BUF_SIZE]; | 1480 | char buffer[TRACE_BUF_SIZE]; |
1481 | }; | 1481 | }; |
1482 | 1482 | ||
1483 | static struct trace_buffer_struct *trace_percpu_buffer; | 1483 | static struct trace_buffer_struct *trace_percpu_buffer; |
1484 | static struct trace_buffer_struct *trace_percpu_sirq_buffer; | 1484 | static struct trace_buffer_struct *trace_percpu_sirq_buffer; |
1485 | static struct trace_buffer_struct *trace_percpu_irq_buffer; | 1485 | static struct trace_buffer_struct *trace_percpu_irq_buffer; |
1486 | static struct trace_buffer_struct *trace_percpu_nmi_buffer; | 1486 | static struct trace_buffer_struct *trace_percpu_nmi_buffer; |
1487 | 1487 | ||
1488 | /* | 1488 | /* |
1489 | * The buffer used is dependent on the context. There is a per cpu | 1489 | * The buffer used is dependent on the context. There is a per cpu |
1490 | * buffer for normal context, softirq contex, hard irq context and | 1490 | * buffer for normal context, softirq contex, hard irq context and |
1491 | * for NMI context. Thise allows for lockless recording. | 1491 | * for NMI context. Thise allows for lockless recording. |
1492 | * | 1492 | * |
1493 | * Note, if the buffers failed to be allocated, then this returns NULL | 1493 | * Note, if the buffers failed to be allocated, then this returns NULL |
1494 | */ | 1494 | */ |
1495 | static char *get_trace_buf(void) | 1495 | static char *get_trace_buf(void) |
1496 | { | 1496 | { |
1497 | struct trace_buffer_struct *percpu_buffer; | 1497 | struct trace_buffer_struct *percpu_buffer; |
1498 | struct trace_buffer_struct *buffer; | 1498 | struct trace_buffer_struct *buffer; |
1499 | 1499 | ||
1500 | /* | 1500 | /* |
1501 | * If we have allocated per cpu buffers, then we do not | 1501 | * If we have allocated per cpu buffers, then we do not |
1502 | * need to do any locking. | 1502 | * need to do any locking. |
1503 | */ | 1503 | */ |
1504 | if (in_nmi()) | 1504 | if (in_nmi()) |
1505 | percpu_buffer = trace_percpu_nmi_buffer; | 1505 | percpu_buffer = trace_percpu_nmi_buffer; |
1506 | else if (in_irq()) | 1506 | else if (in_irq()) |
1507 | percpu_buffer = trace_percpu_irq_buffer; | 1507 | percpu_buffer = trace_percpu_irq_buffer; |
1508 | else if (in_softirq()) | 1508 | else if (in_softirq()) |
1509 | percpu_buffer = trace_percpu_sirq_buffer; | 1509 | percpu_buffer = trace_percpu_sirq_buffer; |
1510 | else | 1510 | else |
1511 | percpu_buffer = trace_percpu_buffer; | 1511 | percpu_buffer = trace_percpu_buffer; |
1512 | 1512 | ||
1513 | if (!percpu_buffer) | 1513 | if (!percpu_buffer) |
1514 | return NULL; | 1514 | return NULL; |
1515 | 1515 | ||
1516 | buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); | 1516 | buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); |
1517 | 1517 | ||
1518 | return buffer->buffer; | 1518 | return buffer->buffer; |
1519 | } | 1519 | } |
1520 | 1520 | ||
1521 | static int alloc_percpu_trace_buffer(void) | 1521 | static int alloc_percpu_trace_buffer(void) |
1522 | { | 1522 | { |
1523 | struct trace_buffer_struct *buffers; | 1523 | struct trace_buffer_struct *buffers; |
1524 | struct trace_buffer_struct *sirq_buffers; | 1524 | struct trace_buffer_struct *sirq_buffers; |
1525 | struct trace_buffer_struct *irq_buffers; | 1525 | struct trace_buffer_struct *irq_buffers; |
1526 | struct trace_buffer_struct *nmi_buffers; | 1526 | struct trace_buffer_struct *nmi_buffers; |
1527 | 1527 | ||
1528 | buffers = alloc_percpu(struct trace_buffer_struct); | 1528 | buffers = alloc_percpu(struct trace_buffer_struct); |
1529 | if (!buffers) | 1529 | if (!buffers) |
1530 | goto err_warn; | 1530 | goto err_warn; |
1531 | 1531 | ||
1532 | sirq_buffers = alloc_percpu(struct trace_buffer_struct); | 1532 | sirq_buffers = alloc_percpu(struct trace_buffer_struct); |
1533 | if (!sirq_buffers) | 1533 | if (!sirq_buffers) |
1534 | goto err_sirq; | 1534 | goto err_sirq; |
1535 | 1535 | ||
1536 | irq_buffers = alloc_percpu(struct trace_buffer_struct); | 1536 | irq_buffers = alloc_percpu(struct trace_buffer_struct); |
1537 | if (!irq_buffers) | 1537 | if (!irq_buffers) |
1538 | goto err_irq; | 1538 | goto err_irq; |
1539 | 1539 | ||
1540 | nmi_buffers = alloc_percpu(struct trace_buffer_struct); | 1540 | nmi_buffers = alloc_percpu(struct trace_buffer_struct); |
1541 | if (!nmi_buffers) | 1541 | if (!nmi_buffers) |
1542 | goto err_nmi; | 1542 | goto err_nmi; |
1543 | 1543 | ||
1544 | trace_percpu_buffer = buffers; | 1544 | trace_percpu_buffer = buffers; |
1545 | trace_percpu_sirq_buffer = sirq_buffers; | 1545 | trace_percpu_sirq_buffer = sirq_buffers; |
1546 | trace_percpu_irq_buffer = irq_buffers; | 1546 | trace_percpu_irq_buffer = irq_buffers; |
1547 | trace_percpu_nmi_buffer = nmi_buffers; | 1547 | trace_percpu_nmi_buffer = nmi_buffers; |
1548 | 1548 | ||
1549 | return 0; | 1549 | return 0; |
1550 | 1550 | ||
1551 | err_nmi: | 1551 | err_nmi: |
1552 | free_percpu(irq_buffers); | 1552 | free_percpu(irq_buffers); |
1553 | err_irq: | 1553 | err_irq: |
1554 | free_percpu(sirq_buffers); | 1554 | free_percpu(sirq_buffers); |
1555 | err_sirq: | 1555 | err_sirq: |
1556 | free_percpu(buffers); | 1556 | free_percpu(buffers); |
1557 | err_warn: | 1557 | err_warn: |
1558 | WARN(1, "Could not allocate percpu trace_printk buffer"); | 1558 | WARN(1, "Could not allocate percpu trace_printk buffer"); |
1559 | return -ENOMEM; | 1559 | return -ENOMEM; |
1560 | } | 1560 | } |
1561 | 1561 | ||
1562 | void trace_printk_init_buffers(void) | 1562 | void trace_printk_init_buffers(void) |
1563 | { | 1563 | { |
1564 | static int buffers_allocated; | 1564 | static int buffers_allocated; |
1565 | 1565 | ||
1566 | if (buffers_allocated) | 1566 | if (buffers_allocated) |
1567 | return; | 1567 | return; |
1568 | 1568 | ||
1569 | if (alloc_percpu_trace_buffer()) | 1569 | if (alloc_percpu_trace_buffer()) |
1570 | return; | 1570 | return; |
1571 | 1571 | ||
1572 | pr_info("ftrace: Allocated trace_printk buffers\n"); | 1572 | pr_info("ftrace: Allocated trace_printk buffers\n"); |
1573 | 1573 | ||
1574 | buffers_allocated = 1; | 1574 | buffers_allocated = 1; |
1575 | } | 1575 | } |
1576 | 1576 | ||
1577 | /** | 1577 | /** |
1578 | * trace_vbprintk - write binary msg to tracing buffer | 1578 | * trace_vbprintk - write binary msg to tracing buffer |
1579 | * | 1579 | * |
1580 | */ | 1580 | */ |
1581 | int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | 1581 | int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) |
1582 | { | 1582 | { |
1583 | struct ftrace_event_call *call = &event_bprint; | 1583 | struct ftrace_event_call *call = &event_bprint; |
1584 | struct ring_buffer_event *event; | 1584 | struct ring_buffer_event *event; |
1585 | struct ring_buffer *buffer; | 1585 | struct ring_buffer *buffer; |
1586 | struct trace_array *tr = &global_trace; | 1586 | struct trace_array *tr = &global_trace; |
1587 | struct bprint_entry *entry; | 1587 | struct bprint_entry *entry; |
1588 | unsigned long flags; | 1588 | unsigned long flags; |
1589 | char *tbuffer; | 1589 | char *tbuffer; |
1590 | int len = 0, size, pc; | 1590 | int len = 0, size, pc; |
1591 | 1591 | ||
1592 | if (unlikely(tracing_selftest_running || tracing_disabled)) | 1592 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
1593 | return 0; | 1593 | return 0; |
1594 | 1594 | ||
1595 | /* Don't pollute graph traces with trace_vprintk internals */ | 1595 | /* Don't pollute graph traces with trace_vprintk internals */ |
1596 | pause_graph_tracing(); | 1596 | pause_graph_tracing(); |
1597 | 1597 | ||
1598 | pc = preempt_count(); | 1598 | pc = preempt_count(); |
1599 | preempt_disable_notrace(); | 1599 | preempt_disable_notrace(); |
1600 | 1600 | ||
1601 | tbuffer = get_trace_buf(); | 1601 | tbuffer = get_trace_buf(); |
1602 | if (!tbuffer) { | 1602 | if (!tbuffer) { |
1603 | len = 0; | 1603 | len = 0; |
1604 | goto out; | 1604 | goto out; |
1605 | } | 1605 | } |
1606 | 1606 | ||
1607 | len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); | 1607 | len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); |
1608 | 1608 | ||
1609 | if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) | 1609 | if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) |
1610 | goto out; | 1610 | goto out; |
1611 | 1611 | ||
1612 | local_save_flags(flags); | 1612 | local_save_flags(flags); |
1613 | size = sizeof(*entry) + sizeof(u32) * len; | 1613 | size = sizeof(*entry) + sizeof(u32) * len; |
1614 | buffer = tr->buffer; | 1614 | buffer = tr->buffer; |
1615 | event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, | 1615 | event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, |
1616 | flags, pc); | 1616 | flags, pc); |
1617 | if (!event) | 1617 | if (!event) |
1618 | goto out; | 1618 | goto out; |
1619 | entry = ring_buffer_event_data(event); | 1619 | entry = ring_buffer_event_data(event); |
1620 | entry->ip = ip; | 1620 | entry->ip = ip; |
1621 | entry->fmt = fmt; | 1621 | entry->fmt = fmt; |
1622 | 1622 | ||
1623 | memcpy(entry->buf, tbuffer, sizeof(u32) * len); | 1623 | memcpy(entry->buf, tbuffer, sizeof(u32) * len); |
1624 | if (!filter_check_discard(call, entry, buffer, event)) { | 1624 | if (!filter_check_discard(call, entry, buffer, event)) { |
1625 | ring_buffer_unlock_commit(buffer, event); | 1625 | ring_buffer_unlock_commit(buffer, event); |
1626 | ftrace_trace_stack(buffer, flags, 6, pc); | 1626 | ftrace_trace_stack(buffer, flags, 6, pc); |
1627 | } | 1627 | } |
1628 | 1628 | ||
1629 | out: | 1629 | out: |
1630 | preempt_enable_notrace(); | 1630 | preempt_enable_notrace(); |
1631 | unpause_graph_tracing(); | 1631 | unpause_graph_tracing(); |
1632 | 1632 | ||
1633 | return len; | 1633 | return len; |
1634 | } | 1634 | } |
1635 | EXPORT_SYMBOL_GPL(trace_vbprintk); | 1635 | EXPORT_SYMBOL_GPL(trace_vbprintk); |
1636 | 1636 | ||
1637 | int trace_array_printk(struct trace_array *tr, | 1637 | int trace_array_printk(struct trace_array *tr, |
1638 | unsigned long ip, const char *fmt, ...) | 1638 | unsigned long ip, const char *fmt, ...) |
1639 | { | 1639 | { |
1640 | int ret; | 1640 | int ret; |
1641 | va_list ap; | 1641 | va_list ap; |
1642 | 1642 | ||
1643 | if (!(trace_flags & TRACE_ITER_PRINTK)) | 1643 | if (!(trace_flags & TRACE_ITER_PRINTK)) |
1644 | return 0; | 1644 | return 0; |
1645 | 1645 | ||
1646 | va_start(ap, fmt); | 1646 | va_start(ap, fmt); |
1647 | ret = trace_array_vprintk(tr, ip, fmt, ap); | 1647 | ret = trace_array_vprintk(tr, ip, fmt, ap); |
1648 | va_end(ap); | 1648 | va_end(ap); |
1649 | return ret; | 1649 | return ret; |
1650 | } | 1650 | } |
1651 | 1651 | ||
1652 | int trace_array_vprintk(struct trace_array *tr, | 1652 | int trace_array_vprintk(struct trace_array *tr, |
1653 | unsigned long ip, const char *fmt, va_list args) | 1653 | unsigned long ip, const char *fmt, va_list args) |
1654 | { | 1654 | { |
1655 | struct ftrace_event_call *call = &event_print; | 1655 | struct ftrace_event_call *call = &event_print; |
1656 | struct ring_buffer_event *event; | 1656 | struct ring_buffer_event *event; |
1657 | struct ring_buffer *buffer; | 1657 | struct ring_buffer *buffer; |
1658 | int len = 0, size, pc; | 1658 | int len = 0, size, pc; |
1659 | struct print_entry *entry; | 1659 | struct print_entry *entry; |
1660 | unsigned long flags; | 1660 | unsigned long flags; |
1661 | char *tbuffer; | 1661 | char *tbuffer; |
1662 | 1662 | ||
1663 | if (tracing_disabled || tracing_selftest_running) | 1663 | if (tracing_disabled || tracing_selftest_running) |
1664 | return 0; | 1664 | return 0; |
1665 | 1665 | ||
1666 | /* Don't pollute graph traces with trace_vprintk internals */ | 1666 | /* Don't pollute graph traces with trace_vprintk internals */ |
1667 | pause_graph_tracing(); | 1667 | pause_graph_tracing(); |
1668 | 1668 | ||
1669 | pc = preempt_count(); | 1669 | pc = preempt_count(); |
1670 | preempt_disable_notrace(); | 1670 | preempt_disable_notrace(); |
1671 | 1671 | ||
1672 | 1672 | ||
1673 | tbuffer = get_trace_buf(); | 1673 | tbuffer = get_trace_buf(); |
1674 | if (!tbuffer) { | 1674 | if (!tbuffer) { |
1675 | len = 0; | 1675 | len = 0; |
1676 | goto out; | 1676 | goto out; |
1677 | } | 1677 | } |
1678 | 1678 | ||
1679 | len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); | 1679 | len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); |
1680 | if (len > TRACE_BUF_SIZE) | 1680 | if (len > TRACE_BUF_SIZE) |
1681 | goto out; | 1681 | goto out; |
1682 | 1682 | ||
1683 | local_save_flags(flags); | 1683 | local_save_flags(flags); |
1684 | size = sizeof(*entry) + len + 1; | 1684 | size = sizeof(*entry) + len + 1; |
1685 | buffer = tr->buffer; | 1685 | buffer = tr->buffer; |
1686 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 1686 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
1687 | flags, pc); | 1687 | flags, pc); |
1688 | if (!event) | 1688 | if (!event) |
1689 | goto out; | 1689 | goto out; |
1690 | entry = ring_buffer_event_data(event); | 1690 | entry = ring_buffer_event_data(event); |
1691 | entry->ip = ip; | 1691 | entry->ip = ip; |
1692 | 1692 | ||
1693 | memcpy(&entry->buf, tbuffer, len); | 1693 | memcpy(&entry->buf, tbuffer, len); |
1694 | entry->buf[len] = '\0'; | 1694 | entry->buf[len] = '\0'; |
1695 | if (!filter_check_discard(call, entry, buffer, event)) { | 1695 | if (!filter_check_discard(call, entry, buffer, event)) { |
1696 | ring_buffer_unlock_commit(buffer, event); | 1696 | ring_buffer_unlock_commit(buffer, event); |
1697 | ftrace_trace_stack(buffer, flags, 6, pc); | 1697 | ftrace_trace_stack(buffer, flags, 6, pc); |
1698 | } | 1698 | } |
1699 | out: | 1699 | out: |
1700 | preempt_enable_notrace(); | 1700 | preempt_enable_notrace(); |
1701 | unpause_graph_tracing(); | 1701 | unpause_graph_tracing(); |
1702 | 1702 | ||
1703 | return len; | 1703 | return len; |
1704 | } | 1704 | } |
1705 | 1705 | ||
1706 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) | 1706 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) |
1707 | { | 1707 | { |
1708 | return trace_array_vprintk(&global_trace, ip, fmt, args); | 1708 | return trace_array_vprintk(&global_trace, ip, fmt, args); |
1709 | } | 1709 | } |
1710 | EXPORT_SYMBOL_GPL(trace_vprintk); | 1710 | EXPORT_SYMBOL_GPL(trace_vprintk); |
1711 | 1711 | ||
1712 | static void trace_iterator_increment(struct trace_iterator *iter) | 1712 | static void trace_iterator_increment(struct trace_iterator *iter) |
1713 | { | 1713 | { |
1714 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); | 1714 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); |
1715 | 1715 | ||
1716 | iter->idx++; | 1716 | iter->idx++; |
1717 | if (buf_iter) | 1717 | if (buf_iter) |
1718 | ring_buffer_read(buf_iter, NULL); | 1718 | ring_buffer_read(buf_iter, NULL); |
1719 | } | 1719 | } |
1720 | 1720 | ||
1721 | static struct trace_entry * | 1721 | static struct trace_entry * |
1722 | peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | 1722 | peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, |
1723 | unsigned long *lost_events) | 1723 | unsigned long *lost_events) |
1724 | { | 1724 | { |
1725 | struct ring_buffer_event *event; | 1725 | struct ring_buffer_event *event; |
1726 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); | 1726 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); |
1727 | 1727 | ||
1728 | if (buf_iter) | 1728 | if (buf_iter) |
1729 | event = ring_buffer_iter_peek(buf_iter, ts); | 1729 | event = ring_buffer_iter_peek(buf_iter, ts); |
1730 | else | 1730 | else |
1731 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts, | 1731 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts, |
1732 | lost_events); | 1732 | lost_events); |
1733 | 1733 | ||
1734 | if (event) { | 1734 | if (event) { |
1735 | iter->ent_size = ring_buffer_event_length(event); | 1735 | iter->ent_size = ring_buffer_event_length(event); |
1736 | return ring_buffer_event_data(event); | 1736 | return ring_buffer_event_data(event); |
1737 | } | 1737 | } |
1738 | iter->ent_size = 0; | 1738 | iter->ent_size = 0; |
1739 | return NULL; | 1739 | return NULL; |
1740 | } | 1740 | } |
1741 | 1741 | ||
1742 | static struct trace_entry * | 1742 | static struct trace_entry * |
1743 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, | 1743 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, |
1744 | unsigned long *missing_events, u64 *ent_ts) | 1744 | unsigned long *missing_events, u64 *ent_ts) |
1745 | { | 1745 | { |
1746 | struct ring_buffer *buffer = iter->tr->buffer; | 1746 | struct ring_buffer *buffer = iter->tr->buffer; |
1747 | struct trace_entry *ent, *next = NULL; | 1747 | struct trace_entry *ent, *next = NULL; |
1748 | unsigned long lost_events = 0, next_lost = 0; | 1748 | unsigned long lost_events = 0, next_lost = 0; |
1749 | int cpu_file = iter->cpu_file; | 1749 | int cpu_file = iter->cpu_file; |
1750 | u64 next_ts = 0, ts; | 1750 | u64 next_ts = 0, ts; |
1751 | int next_cpu = -1; | 1751 | int next_cpu = -1; |
1752 | int next_size = 0; | 1752 | int next_size = 0; |
1753 | int cpu; | 1753 | int cpu; |
1754 | 1754 | ||
1755 | /* | 1755 | /* |
1756 | * If we are in a per_cpu trace file, don't bother by iterating over | 1756 | * If we are in a per_cpu trace file, don't bother by iterating over |
1757 | * all cpu and peek directly. | 1757 | * all cpu and peek directly. |
1758 | */ | 1758 | */ |
1759 | if (cpu_file > TRACE_PIPE_ALL_CPU) { | 1759 | if (cpu_file > TRACE_PIPE_ALL_CPU) { |
1760 | if (ring_buffer_empty_cpu(buffer, cpu_file)) | 1760 | if (ring_buffer_empty_cpu(buffer, cpu_file)) |
1761 | return NULL; | 1761 | return NULL; |
1762 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); | 1762 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); |
1763 | if (ent_cpu) | 1763 | if (ent_cpu) |
1764 | *ent_cpu = cpu_file; | 1764 | *ent_cpu = cpu_file; |
1765 | 1765 | ||
1766 | return ent; | 1766 | return ent; |
1767 | } | 1767 | } |
1768 | 1768 | ||
1769 | for_each_tracing_cpu(cpu) { | 1769 | for_each_tracing_cpu(cpu) { |
1770 | 1770 | ||
1771 | if (ring_buffer_empty_cpu(buffer, cpu)) | 1771 | if (ring_buffer_empty_cpu(buffer, cpu)) |
1772 | continue; | 1772 | continue; |
1773 | 1773 | ||
1774 | ent = peek_next_entry(iter, cpu, &ts, &lost_events); | 1774 | ent = peek_next_entry(iter, cpu, &ts, &lost_events); |
1775 | 1775 | ||
1776 | /* | 1776 | /* |
1777 | * Pick the entry with the smallest timestamp: | 1777 | * Pick the entry with the smallest timestamp: |
1778 | */ | 1778 | */ |
1779 | if (ent && (!next || ts < next_ts)) { | 1779 | if (ent && (!next || ts < next_ts)) { |
1780 | next = ent; | 1780 | next = ent; |
1781 | next_cpu = cpu; | 1781 | next_cpu = cpu; |
1782 | next_ts = ts; | 1782 | next_ts = ts; |
1783 | next_lost = lost_events; | 1783 | next_lost = lost_events; |
1784 | next_size = iter->ent_size; | 1784 | next_size = iter->ent_size; |
1785 | } | 1785 | } |
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | iter->ent_size = next_size; | 1788 | iter->ent_size = next_size; |
1789 | 1789 | ||
1790 | if (ent_cpu) | 1790 | if (ent_cpu) |
1791 | *ent_cpu = next_cpu; | 1791 | *ent_cpu = next_cpu; |
1792 | 1792 | ||
1793 | if (ent_ts) | 1793 | if (ent_ts) |
1794 | *ent_ts = next_ts; | 1794 | *ent_ts = next_ts; |
1795 | 1795 | ||
1796 | if (missing_events) | 1796 | if (missing_events) |
1797 | *missing_events = next_lost; | 1797 | *missing_events = next_lost; |
1798 | 1798 | ||
1799 | return next; | 1799 | return next; |
1800 | } | 1800 | } |
1801 | 1801 | ||
1802 | /* Find the next real entry, without updating the iterator itself */ | 1802 | /* Find the next real entry, without updating the iterator itself */ |
1803 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, | 1803 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, |
1804 | int *ent_cpu, u64 *ent_ts) | 1804 | int *ent_cpu, u64 *ent_ts) |
1805 | { | 1805 | { |
1806 | return __find_next_entry(iter, ent_cpu, NULL, ent_ts); | 1806 | return __find_next_entry(iter, ent_cpu, NULL, ent_ts); |
1807 | } | 1807 | } |
1808 | 1808 | ||
1809 | /* Find the next real entry, and increment the iterator to the next entry */ | 1809 | /* Find the next real entry, and increment the iterator to the next entry */ |
1810 | void *trace_find_next_entry_inc(struct trace_iterator *iter) | 1810 | void *trace_find_next_entry_inc(struct trace_iterator *iter) |
1811 | { | 1811 | { |
1812 | iter->ent = __find_next_entry(iter, &iter->cpu, | 1812 | iter->ent = __find_next_entry(iter, &iter->cpu, |
1813 | &iter->lost_events, &iter->ts); | 1813 | &iter->lost_events, &iter->ts); |
1814 | 1814 | ||
1815 | if (iter->ent) | 1815 | if (iter->ent) |
1816 | trace_iterator_increment(iter); | 1816 | trace_iterator_increment(iter); |
1817 | 1817 | ||
1818 | return iter->ent ? iter : NULL; | 1818 | return iter->ent ? iter : NULL; |
1819 | } | 1819 | } |
1820 | 1820 | ||
1821 | static void trace_consume(struct trace_iterator *iter) | 1821 | static void trace_consume(struct trace_iterator *iter) |
1822 | { | 1822 | { |
1823 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, | 1823 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, |
1824 | &iter->lost_events); | 1824 | &iter->lost_events); |
1825 | } | 1825 | } |
1826 | 1826 | ||
1827 | static void *s_next(struct seq_file *m, void *v, loff_t *pos) | 1827 | static void *s_next(struct seq_file *m, void *v, loff_t *pos) |
1828 | { | 1828 | { |
1829 | struct trace_iterator *iter = m->private; | 1829 | struct trace_iterator *iter = m->private; |
1830 | int i = (int)*pos; | 1830 | int i = (int)*pos; |
1831 | void *ent; | 1831 | void *ent; |
1832 | 1832 | ||
1833 | WARN_ON_ONCE(iter->leftover); | 1833 | WARN_ON_ONCE(iter->leftover); |
1834 | 1834 | ||
1835 | (*pos)++; | 1835 | (*pos)++; |
1836 | 1836 | ||
1837 | /* can't go backwards */ | 1837 | /* can't go backwards */ |
1838 | if (iter->idx > i) | 1838 | if (iter->idx > i) |
1839 | return NULL; | 1839 | return NULL; |
1840 | 1840 | ||
1841 | if (iter->idx < 0) | 1841 | if (iter->idx < 0) |
1842 | ent = trace_find_next_entry_inc(iter); | 1842 | ent = trace_find_next_entry_inc(iter); |
1843 | else | 1843 | else |
1844 | ent = iter; | 1844 | ent = iter; |
1845 | 1845 | ||
1846 | while (ent && iter->idx < i) | 1846 | while (ent && iter->idx < i) |
1847 | ent = trace_find_next_entry_inc(iter); | 1847 | ent = trace_find_next_entry_inc(iter); |
1848 | 1848 | ||
1849 | iter->pos = *pos; | 1849 | iter->pos = *pos; |
1850 | 1850 | ||
1851 | return ent; | 1851 | return ent; |
1852 | } | 1852 | } |
1853 | 1853 | ||
1854 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) | 1854 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) |
1855 | { | 1855 | { |
1856 | struct trace_array *tr = iter->tr; | 1856 | struct trace_array *tr = iter->tr; |
1857 | struct ring_buffer_event *event; | 1857 | struct ring_buffer_event *event; |
1858 | struct ring_buffer_iter *buf_iter; | 1858 | struct ring_buffer_iter *buf_iter; |
1859 | unsigned long entries = 0; | 1859 | unsigned long entries = 0; |
1860 | u64 ts; | 1860 | u64 ts; |
1861 | 1861 | ||
1862 | tr->data[cpu]->skipped_entries = 0; | 1862 | tr->data[cpu]->skipped_entries = 0; |
1863 | 1863 | ||
1864 | buf_iter = trace_buffer_iter(iter, cpu); | 1864 | buf_iter = trace_buffer_iter(iter, cpu); |
1865 | if (!buf_iter) | 1865 | if (!buf_iter) |
1866 | return; | 1866 | return; |
1867 | 1867 | ||
1868 | ring_buffer_iter_reset(buf_iter); | 1868 | ring_buffer_iter_reset(buf_iter); |
1869 | 1869 | ||
1870 | /* | 1870 | /* |
1871 | * We could have the case with the max latency tracers | 1871 | * We could have the case with the max latency tracers |
1872 | * that a reset never took place on a cpu. This is evident | 1872 | * that a reset never took place on a cpu. This is evident |
1873 | * by the timestamp being before the start of the buffer. | 1873 | * by the timestamp being before the start of the buffer. |
1874 | */ | 1874 | */ |
1875 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { | 1875 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { |
1876 | if (ts >= iter->tr->time_start) | 1876 | if (ts >= iter->tr->time_start) |
1877 | break; | 1877 | break; |
1878 | entries++; | 1878 | entries++; |
1879 | ring_buffer_read(buf_iter, NULL); | 1879 | ring_buffer_read(buf_iter, NULL); |
1880 | } | 1880 | } |
1881 | 1881 | ||
1882 | tr->data[cpu]->skipped_entries = entries; | 1882 | tr->data[cpu]->skipped_entries = entries; |
1883 | } | 1883 | } |
1884 | 1884 | ||
1885 | /* | 1885 | /* |
1886 | * The current tracer is copied to avoid a global locking | 1886 | * The current tracer is copied to avoid a global locking |
1887 | * all around. | 1887 | * all around. |
1888 | */ | 1888 | */ |
1889 | static void *s_start(struct seq_file *m, loff_t *pos) | 1889 | static void *s_start(struct seq_file *m, loff_t *pos) |
1890 | { | 1890 | { |
1891 | struct trace_iterator *iter = m->private; | 1891 | struct trace_iterator *iter = m->private; |
1892 | static struct tracer *old_tracer; | 1892 | static struct tracer *old_tracer; |
1893 | int cpu_file = iter->cpu_file; | 1893 | int cpu_file = iter->cpu_file; |
1894 | void *p = NULL; | 1894 | void *p = NULL; |
1895 | loff_t l = 0; | 1895 | loff_t l = 0; |
1896 | int cpu; | 1896 | int cpu; |
1897 | 1897 | ||
1898 | /* copy the tracer to avoid using a global lock all around */ | 1898 | /* copy the tracer to avoid using a global lock all around */ |
1899 | mutex_lock(&trace_types_lock); | 1899 | mutex_lock(&trace_types_lock); |
1900 | if (unlikely(old_tracer != current_trace && current_trace)) { | 1900 | if (unlikely(old_tracer != current_trace && current_trace)) { |
1901 | old_tracer = current_trace; | 1901 | old_tracer = current_trace; |
1902 | *iter->trace = *current_trace; | 1902 | *iter->trace = *current_trace; |
1903 | } | 1903 | } |
1904 | mutex_unlock(&trace_types_lock); | 1904 | mutex_unlock(&trace_types_lock); |
1905 | 1905 | ||
1906 | atomic_inc(&trace_record_cmdline_disabled); | 1906 | atomic_inc(&trace_record_cmdline_disabled); |
1907 | 1907 | ||
1908 | if (*pos != iter->pos) { | 1908 | if (*pos != iter->pos) { |
1909 | iter->ent = NULL; | 1909 | iter->ent = NULL; |
1910 | iter->cpu = 0; | 1910 | iter->cpu = 0; |
1911 | iter->idx = -1; | 1911 | iter->idx = -1; |
1912 | 1912 | ||
1913 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | 1913 | if (cpu_file == TRACE_PIPE_ALL_CPU) { |
1914 | for_each_tracing_cpu(cpu) | 1914 | for_each_tracing_cpu(cpu) |
1915 | tracing_iter_reset(iter, cpu); | 1915 | tracing_iter_reset(iter, cpu); |
1916 | } else | 1916 | } else |
1917 | tracing_iter_reset(iter, cpu_file); | 1917 | tracing_iter_reset(iter, cpu_file); |
1918 | 1918 | ||
1919 | iter->leftover = 0; | 1919 | iter->leftover = 0; |
1920 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) | 1920 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) |
1921 | ; | 1921 | ; |
1922 | 1922 | ||
1923 | } else { | 1923 | } else { |
1924 | /* | 1924 | /* |
1925 | * If we overflowed the seq_file before, then we want | 1925 | * If we overflowed the seq_file before, then we want |
1926 | * to just reuse the trace_seq buffer again. | 1926 | * to just reuse the trace_seq buffer again. |
1927 | */ | 1927 | */ |
1928 | if (iter->leftover) | 1928 | if (iter->leftover) |
1929 | p = iter; | 1929 | p = iter; |
1930 | else { | 1930 | else { |
1931 | l = *pos - 1; | 1931 | l = *pos - 1; |
1932 | p = s_next(m, p, &l); | 1932 | p = s_next(m, p, &l); |
1933 | } | 1933 | } |
1934 | } | 1934 | } |
1935 | 1935 | ||
1936 | trace_event_read_lock(); | 1936 | trace_event_read_lock(); |
1937 | trace_access_lock(cpu_file); | 1937 | trace_access_lock(cpu_file); |
1938 | return p; | 1938 | return p; |
1939 | } | 1939 | } |
1940 | 1940 | ||
1941 | static void s_stop(struct seq_file *m, void *p) | 1941 | static void s_stop(struct seq_file *m, void *p) |
1942 | { | 1942 | { |
1943 | struct trace_iterator *iter = m->private; | 1943 | struct trace_iterator *iter = m->private; |
1944 | 1944 | ||
1945 | atomic_dec(&trace_record_cmdline_disabled); | 1945 | atomic_dec(&trace_record_cmdline_disabled); |
1946 | trace_access_unlock(iter->cpu_file); | 1946 | trace_access_unlock(iter->cpu_file); |
1947 | trace_event_read_unlock(); | 1947 | trace_event_read_unlock(); |
1948 | } | 1948 | } |
1949 | 1949 | ||
1950 | static void | 1950 | static void |
1951 | get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries) | 1951 | get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries) |
1952 | { | 1952 | { |
1953 | unsigned long count; | 1953 | unsigned long count; |
1954 | int cpu; | 1954 | int cpu; |
1955 | 1955 | ||
1956 | *total = 0; | 1956 | *total = 0; |
1957 | *entries = 0; | 1957 | *entries = 0; |
1958 | 1958 | ||
1959 | for_each_tracing_cpu(cpu) { | 1959 | for_each_tracing_cpu(cpu) { |
1960 | count = ring_buffer_entries_cpu(tr->buffer, cpu); | 1960 | count = ring_buffer_entries_cpu(tr->buffer, cpu); |
1961 | /* | 1961 | /* |
1962 | * If this buffer has skipped entries, then we hold all | 1962 | * If this buffer has skipped entries, then we hold all |
1963 | * entries for the trace and we need to ignore the | 1963 | * entries for the trace and we need to ignore the |
1964 | * ones before the time stamp. | 1964 | * ones before the time stamp. |
1965 | */ | 1965 | */ |
1966 | if (tr->data[cpu]->skipped_entries) { | 1966 | if (tr->data[cpu]->skipped_entries) { |
1967 | count -= tr->data[cpu]->skipped_entries; | 1967 | count -= tr->data[cpu]->skipped_entries; |
1968 | /* total is the same as the entries */ | 1968 | /* total is the same as the entries */ |
1969 | *total += count; | 1969 | *total += count; |
1970 | } else | 1970 | } else |
1971 | *total += count + | 1971 | *total += count + |
1972 | ring_buffer_overrun_cpu(tr->buffer, cpu); | 1972 | ring_buffer_overrun_cpu(tr->buffer, cpu); |
1973 | *entries += count; | 1973 | *entries += count; |
1974 | } | 1974 | } |
1975 | } | 1975 | } |
1976 | 1976 | ||
1977 | static void print_lat_help_header(struct seq_file *m) | 1977 | static void print_lat_help_header(struct seq_file *m) |
1978 | { | 1978 | { |
1979 | seq_puts(m, "# _------=> CPU# \n"); | 1979 | seq_puts(m, "# _------=> CPU# \n"); |
1980 | seq_puts(m, "# / _-----=> irqs-off \n"); | 1980 | seq_puts(m, "# / _-----=> irqs-off \n"); |
1981 | seq_puts(m, "# | / _----=> need-resched \n"); | 1981 | seq_puts(m, "# | / _----=> need-resched \n"); |
1982 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); | 1982 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); |
1983 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); | 1983 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); |
1984 | seq_puts(m, "# |||| / delay \n"); | 1984 | seq_puts(m, "# |||| / delay \n"); |
1985 | seq_puts(m, "# cmd pid ||||| time | caller \n"); | 1985 | seq_puts(m, "# cmd pid ||||| time | caller \n"); |
1986 | seq_puts(m, "# \\ / ||||| \\ | / \n"); | 1986 | seq_puts(m, "# \\ / ||||| \\ | / \n"); |
1987 | } | 1987 | } |
1988 | 1988 | ||
1989 | static void print_event_info(struct trace_array *tr, struct seq_file *m) | 1989 | static void print_event_info(struct trace_array *tr, struct seq_file *m) |
1990 | { | 1990 | { |
1991 | unsigned long total; | 1991 | unsigned long total; |
1992 | unsigned long entries; | 1992 | unsigned long entries; |
1993 | 1993 | ||
1994 | get_total_entries(tr, &total, &entries); | 1994 | get_total_entries(tr, &total, &entries); |
1995 | seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", | 1995 | seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", |
1996 | entries, total, num_online_cpus()); | 1996 | entries, total, num_online_cpus()); |
1997 | seq_puts(m, "#\n"); | 1997 | seq_puts(m, "#\n"); |
1998 | } | 1998 | } |
1999 | 1999 | ||
2000 | static void print_func_help_header(struct trace_array *tr, struct seq_file *m) | 2000 | static void print_func_help_header(struct trace_array *tr, struct seq_file *m) |
2001 | { | 2001 | { |
2002 | print_event_info(tr, m); | 2002 | print_event_info(tr, m); |
2003 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); | 2003 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); |
2004 | seq_puts(m, "# | | | | |\n"); | 2004 | seq_puts(m, "# | | | | |\n"); |
2005 | } | 2005 | } |
2006 | 2006 | ||
2007 | static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) | 2007 | static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) |
2008 | { | 2008 | { |
2009 | print_event_info(tr, m); | 2009 | print_event_info(tr, m); |
2010 | seq_puts(m, "# _-----=> irqs-off\n"); | 2010 | seq_puts(m, "# _-----=> irqs-off\n"); |
2011 | seq_puts(m, "# / _----=> need-resched\n"); | 2011 | seq_puts(m, "# / _----=> need-resched\n"); |
2012 | seq_puts(m, "# | / _---=> hardirq/softirq\n"); | 2012 | seq_puts(m, "# | / _---=> hardirq/softirq\n"); |
2013 | seq_puts(m, "# || / _--=> preempt-depth\n"); | 2013 | seq_puts(m, "# || / _--=> preempt-depth\n"); |
2014 | seq_puts(m, "# ||| / delay\n"); | 2014 | seq_puts(m, "# ||| / delay\n"); |
2015 | seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); | 2015 | seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); |
2016 | seq_puts(m, "# | | | |||| | |\n"); | 2016 | seq_puts(m, "# | | | |||| | |\n"); |
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | void | 2019 | void |
2020 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) | 2020 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) |
2021 | { | 2021 | { |
2022 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); | 2022 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); |
2023 | struct trace_array *tr = iter->tr; | 2023 | struct trace_array *tr = iter->tr; |
2024 | struct trace_array_cpu *data = tr->data[tr->cpu]; | 2024 | struct trace_array_cpu *data = tr->data[tr->cpu]; |
2025 | struct tracer *type = current_trace; | 2025 | struct tracer *type = current_trace; |
2026 | unsigned long entries; | 2026 | unsigned long entries; |
2027 | unsigned long total; | 2027 | unsigned long total; |
2028 | const char *name = "preemption"; | 2028 | const char *name = "preemption"; |
2029 | 2029 | ||
2030 | if (type) | 2030 | if (type) |
2031 | name = type->name; | 2031 | name = type->name; |
2032 | 2032 | ||
2033 | get_total_entries(tr, &total, &entries); | 2033 | get_total_entries(tr, &total, &entries); |
2034 | 2034 | ||
2035 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", | 2035 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", |
2036 | name, UTS_RELEASE); | 2036 | name, UTS_RELEASE); |
2037 | seq_puts(m, "# -----------------------------------" | 2037 | seq_puts(m, "# -----------------------------------" |
2038 | "---------------------------------\n"); | 2038 | "---------------------------------\n"); |
2039 | seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" | 2039 | seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" |
2040 | " (M:%s VP:%d, KP:%d, SP:%d HP:%d", | 2040 | " (M:%s VP:%d, KP:%d, SP:%d HP:%d", |
2041 | nsecs_to_usecs(data->saved_latency), | 2041 | nsecs_to_usecs(data->saved_latency), |
2042 | entries, | 2042 | entries, |
2043 | total, | 2043 | total, |
2044 | tr->cpu, | 2044 | tr->cpu, |
2045 | #if defined(CONFIG_PREEMPT_NONE) | 2045 | #if defined(CONFIG_PREEMPT_NONE) |
2046 | "server", | 2046 | "server", |
2047 | #elif defined(CONFIG_PREEMPT_VOLUNTARY) | 2047 | #elif defined(CONFIG_PREEMPT_VOLUNTARY) |
2048 | "desktop", | 2048 | "desktop", |
2049 | #elif defined(CONFIG_PREEMPT) | 2049 | #elif defined(CONFIG_PREEMPT) |
2050 | "preempt", | 2050 | "preempt", |
2051 | #else | 2051 | #else |
2052 | "unknown", | 2052 | "unknown", |
2053 | #endif | 2053 | #endif |
2054 | /* These are reserved for later use */ | 2054 | /* These are reserved for later use */ |
2055 | 0, 0, 0, 0); | 2055 | 0, 0, 0, 0); |
2056 | #ifdef CONFIG_SMP | 2056 | #ifdef CONFIG_SMP |
2057 | seq_printf(m, " #P:%d)\n", num_online_cpus()); | 2057 | seq_printf(m, " #P:%d)\n", num_online_cpus()); |
2058 | #else | 2058 | #else |
2059 | seq_puts(m, ")\n"); | 2059 | seq_puts(m, ")\n"); |
2060 | #endif | 2060 | #endif |
2061 | seq_puts(m, "# -----------------\n"); | 2061 | seq_puts(m, "# -----------------\n"); |
2062 | seq_printf(m, "# | task: %.16s-%d " | 2062 | seq_printf(m, "# | task: %.16s-%d " |
2063 | "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", | 2063 | "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", |
2064 | data->comm, data->pid, | 2064 | data->comm, data->pid, |
2065 | from_kuid_munged(seq_user_ns(m), data->uid), data->nice, | 2065 | from_kuid_munged(seq_user_ns(m), data->uid), data->nice, |
2066 | data->policy, data->rt_priority); | 2066 | data->policy, data->rt_priority); |
2067 | seq_puts(m, "# -----------------\n"); | 2067 | seq_puts(m, "# -----------------\n"); |
2068 | 2068 | ||
2069 | if (data->critical_start) { | 2069 | if (data->critical_start) { |
2070 | seq_puts(m, "# => started at: "); | 2070 | seq_puts(m, "# => started at: "); |
2071 | seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); | 2071 | seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); |
2072 | trace_print_seq(m, &iter->seq); | 2072 | trace_print_seq(m, &iter->seq); |
2073 | seq_puts(m, "\n# => ended at: "); | 2073 | seq_puts(m, "\n# => ended at: "); |
2074 | seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); | 2074 | seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); |
2075 | trace_print_seq(m, &iter->seq); | 2075 | trace_print_seq(m, &iter->seq); |
2076 | seq_puts(m, "\n#\n"); | 2076 | seq_puts(m, "\n#\n"); |
2077 | } | 2077 | } |
2078 | 2078 | ||
2079 | seq_puts(m, "#\n"); | 2079 | seq_puts(m, "#\n"); |
2080 | } | 2080 | } |
2081 | 2081 | ||
2082 | static void test_cpu_buff_start(struct trace_iterator *iter) | 2082 | static void test_cpu_buff_start(struct trace_iterator *iter) |
2083 | { | 2083 | { |
2084 | struct trace_seq *s = &iter->seq; | 2084 | struct trace_seq *s = &iter->seq; |
2085 | 2085 | ||
2086 | if (!(trace_flags & TRACE_ITER_ANNOTATE)) | 2086 | if (!(trace_flags & TRACE_ITER_ANNOTATE)) |
2087 | return; | 2087 | return; |
2088 | 2088 | ||
2089 | if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) | 2089 | if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) |
2090 | return; | 2090 | return; |
2091 | 2091 | ||
2092 | if (cpumask_test_cpu(iter->cpu, iter->started)) | 2092 | if (cpumask_test_cpu(iter->cpu, iter->started)) |
2093 | return; | 2093 | return; |
2094 | 2094 | ||
2095 | if (iter->tr->data[iter->cpu]->skipped_entries) | 2095 | if (iter->tr->data[iter->cpu]->skipped_entries) |
2096 | return; | 2096 | return; |
2097 | 2097 | ||
2098 | cpumask_set_cpu(iter->cpu, iter->started); | 2098 | cpumask_set_cpu(iter->cpu, iter->started); |
2099 | 2099 | ||
2100 | /* Don't print started cpu buffer for the first entry of the trace */ | 2100 | /* Don't print started cpu buffer for the first entry of the trace */ |
2101 | if (iter->idx > 1) | 2101 | if (iter->idx > 1) |
2102 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", | 2102 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", |
2103 | iter->cpu); | 2103 | iter->cpu); |
2104 | } | 2104 | } |
2105 | 2105 | ||
2106 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) | 2106 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) |
2107 | { | 2107 | { |
2108 | struct trace_seq *s = &iter->seq; | 2108 | struct trace_seq *s = &iter->seq; |
2109 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); | 2109 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); |
2110 | struct trace_entry *entry; | 2110 | struct trace_entry *entry; |
2111 | struct trace_event *event; | 2111 | struct trace_event *event; |
2112 | 2112 | ||
2113 | entry = iter->ent; | 2113 | entry = iter->ent; |
2114 | 2114 | ||
2115 | test_cpu_buff_start(iter); | 2115 | test_cpu_buff_start(iter); |
2116 | 2116 | ||
2117 | event = ftrace_find_event(entry->type); | 2117 | event = ftrace_find_event(entry->type); |
2118 | 2118 | ||
2119 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2119 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2120 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2120 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { |
2121 | if (!trace_print_lat_context(iter)) | 2121 | if (!trace_print_lat_context(iter)) |
2122 | goto partial; | 2122 | goto partial; |
2123 | } else { | 2123 | } else { |
2124 | if (!trace_print_context(iter)) | 2124 | if (!trace_print_context(iter)) |
2125 | goto partial; | 2125 | goto partial; |
2126 | } | 2126 | } |
2127 | } | 2127 | } |
2128 | 2128 | ||
2129 | if (event) | 2129 | if (event) |
2130 | return event->funcs->trace(iter, sym_flags, event); | 2130 | return event->funcs->trace(iter, sym_flags, event); |
2131 | 2131 | ||
2132 | if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) | 2132 | if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) |
2133 | goto partial; | 2133 | goto partial; |
2134 | 2134 | ||
2135 | return TRACE_TYPE_HANDLED; | 2135 | return TRACE_TYPE_HANDLED; |
2136 | partial: | 2136 | partial: |
2137 | return TRACE_TYPE_PARTIAL_LINE; | 2137 | return TRACE_TYPE_PARTIAL_LINE; |
2138 | } | 2138 | } |
2139 | 2139 | ||
2140 | static enum print_line_t print_raw_fmt(struct trace_iterator *iter) | 2140 | static enum print_line_t print_raw_fmt(struct trace_iterator *iter) |
2141 | { | 2141 | { |
2142 | struct trace_seq *s = &iter->seq; | 2142 | struct trace_seq *s = &iter->seq; |
2143 | struct trace_entry *entry; | 2143 | struct trace_entry *entry; |
2144 | struct trace_event *event; | 2144 | struct trace_event *event; |
2145 | 2145 | ||
2146 | entry = iter->ent; | 2146 | entry = iter->ent; |
2147 | 2147 | ||
2148 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2148 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2149 | if (!trace_seq_printf(s, "%d %d %llu ", | 2149 | if (!trace_seq_printf(s, "%d %d %llu ", |
2150 | entry->pid, iter->cpu, iter->ts)) | 2150 | entry->pid, iter->cpu, iter->ts)) |
2151 | goto partial; | 2151 | goto partial; |
2152 | } | 2152 | } |
2153 | 2153 | ||
2154 | event = ftrace_find_event(entry->type); | 2154 | event = ftrace_find_event(entry->type); |
2155 | if (event) | 2155 | if (event) |
2156 | return event->funcs->raw(iter, 0, event); | 2156 | return event->funcs->raw(iter, 0, event); |
2157 | 2157 | ||
2158 | if (!trace_seq_printf(s, "%d ?\n", entry->type)) | 2158 | if (!trace_seq_printf(s, "%d ?\n", entry->type)) |
2159 | goto partial; | 2159 | goto partial; |
2160 | 2160 | ||
2161 | return TRACE_TYPE_HANDLED; | 2161 | return TRACE_TYPE_HANDLED; |
2162 | partial: | 2162 | partial: |
2163 | return TRACE_TYPE_PARTIAL_LINE; | 2163 | return TRACE_TYPE_PARTIAL_LINE; |
2164 | } | 2164 | } |
2165 | 2165 | ||
2166 | static enum print_line_t print_hex_fmt(struct trace_iterator *iter) | 2166 | static enum print_line_t print_hex_fmt(struct trace_iterator *iter) |
2167 | { | 2167 | { |
2168 | struct trace_seq *s = &iter->seq; | 2168 | struct trace_seq *s = &iter->seq; |
2169 | unsigned char newline = '\n'; | 2169 | unsigned char newline = '\n'; |
2170 | struct trace_entry *entry; | 2170 | struct trace_entry *entry; |
2171 | struct trace_event *event; | 2171 | struct trace_event *event; |
2172 | 2172 | ||
2173 | entry = iter->ent; | 2173 | entry = iter->ent; |
2174 | 2174 | ||
2175 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2175 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2176 | SEQ_PUT_HEX_FIELD_RET(s, entry->pid); | 2176 | SEQ_PUT_HEX_FIELD_RET(s, entry->pid); |
2177 | SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); | 2177 | SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); |
2178 | SEQ_PUT_HEX_FIELD_RET(s, iter->ts); | 2178 | SEQ_PUT_HEX_FIELD_RET(s, iter->ts); |
2179 | } | 2179 | } |
2180 | 2180 | ||
2181 | event = ftrace_find_event(entry->type); | 2181 | event = ftrace_find_event(entry->type); |
2182 | if (event) { | 2182 | if (event) { |
2183 | enum print_line_t ret = event->funcs->hex(iter, 0, event); | 2183 | enum print_line_t ret = event->funcs->hex(iter, 0, event); |
2184 | if (ret != TRACE_TYPE_HANDLED) | 2184 | if (ret != TRACE_TYPE_HANDLED) |
2185 | return ret; | 2185 | return ret; |
2186 | } | 2186 | } |
2187 | 2187 | ||
2188 | SEQ_PUT_FIELD_RET(s, newline); | 2188 | SEQ_PUT_FIELD_RET(s, newline); |
2189 | 2189 | ||
2190 | return TRACE_TYPE_HANDLED; | 2190 | return TRACE_TYPE_HANDLED; |
2191 | } | 2191 | } |
2192 | 2192 | ||
2193 | static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | 2193 | static enum print_line_t print_bin_fmt(struct trace_iterator *iter) |
2194 | { | 2194 | { |
2195 | struct trace_seq *s = &iter->seq; | 2195 | struct trace_seq *s = &iter->seq; |
2196 | struct trace_entry *entry; | 2196 | struct trace_entry *entry; |
2197 | struct trace_event *event; | 2197 | struct trace_event *event; |
2198 | 2198 | ||
2199 | entry = iter->ent; | 2199 | entry = iter->ent; |
2200 | 2200 | ||
2201 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2201 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2202 | SEQ_PUT_FIELD_RET(s, entry->pid); | 2202 | SEQ_PUT_FIELD_RET(s, entry->pid); |
2203 | SEQ_PUT_FIELD_RET(s, iter->cpu); | 2203 | SEQ_PUT_FIELD_RET(s, iter->cpu); |
2204 | SEQ_PUT_FIELD_RET(s, iter->ts); | 2204 | SEQ_PUT_FIELD_RET(s, iter->ts); |
2205 | } | 2205 | } |
2206 | 2206 | ||
2207 | event = ftrace_find_event(entry->type); | 2207 | event = ftrace_find_event(entry->type); |
2208 | return event ? event->funcs->binary(iter, 0, event) : | 2208 | return event ? event->funcs->binary(iter, 0, event) : |
2209 | TRACE_TYPE_HANDLED; | 2209 | TRACE_TYPE_HANDLED; |
2210 | } | 2210 | } |
2211 | 2211 | ||
2212 | int trace_empty(struct trace_iterator *iter) | 2212 | int trace_empty(struct trace_iterator *iter) |
2213 | { | 2213 | { |
2214 | struct ring_buffer_iter *buf_iter; | 2214 | struct ring_buffer_iter *buf_iter; |
2215 | int cpu; | 2215 | int cpu; |
2216 | 2216 | ||
2217 | /* If we are looking at one CPU buffer, only check that one */ | 2217 | /* If we are looking at one CPU buffer, only check that one */ |
2218 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { | 2218 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { |
2219 | cpu = iter->cpu_file; | 2219 | cpu = iter->cpu_file; |
2220 | buf_iter = trace_buffer_iter(iter, cpu); | 2220 | buf_iter = trace_buffer_iter(iter, cpu); |
2221 | if (buf_iter) { | 2221 | if (buf_iter) { |
2222 | if (!ring_buffer_iter_empty(buf_iter)) | 2222 | if (!ring_buffer_iter_empty(buf_iter)) |
2223 | return 0; | 2223 | return 0; |
2224 | } else { | 2224 | } else { |
2225 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2225 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
2226 | return 0; | 2226 | return 0; |
2227 | } | 2227 | } |
2228 | return 1; | 2228 | return 1; |
2229 | } | 2229 | } |
2230 | 2230 | ||
2231 | for_each_tracing_cpu(cpu) { | 2231 | for_each_tracing_cpu(cpu) { |
2232 | buf_iter = trace_buffer_iter(iter, cpu); | 2232 | buf_iter = trace_buffer_iter(iter, cpu); |
2233 | if (buf_iter) { | 2233 | if (buf_iter) { |
2234 | if (!ring_buffer_iter_empty(buf_iter)) | 2234 | if (!ring_buffer_iter_empty(buf_iter)) |
2235 | return 0; | 2235 | return 0; |
2236 | } else { | 2236 | } else { |
2237 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2237 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
2238 | return 0; | 2238 | return 0; |
2239 | } | 2239 | } |
2240 | } | 2240 | } |
2241 | 2241 | ||
2242 | return 1; | 2242 | return 1; |
2243 | } | 2243 | } |
2244 | 2244 | ||
2245 | /* Called with trace_event_read_lock() held. */ | 2245 | /* Called with trace_event_read_lock() held. */ |
2246 | enum print_line_t print_trace_line(struct trace_iterator *iter) | 2246 | enum print_line_t print_trace_line(struct trace_iterator *iter) |
2247 | { | 2247 | { |
2248 | enum print_line_t ret; | 2248 | enum print_line_t ret; |
2249 | 2249 | ||
2250 | if (iter->lost_events && | 2250 | if (iter->lost_events && |
2251 | !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", | 2251 | !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", |
2252 | iter->cpu, iter->lost_events)) | 2252 | iter->cpu, iter->lost_events)) |
2253 | return TRACE_TYPE_PARTIAL_LINE; | 2253 | return TRACE_TYPE_PARTIAL_LINE; |
2254 | 2254 | ||
2255 | if (iter->trace && iter->trace->print_line) { | 2255 | if (iter->trace && iter->trace->print_line) { |
2256 | ret = iter->trace->print_line(iter); | 2256 | ret = iter->trace->print_line(iter); |
2257 | if (ret != TRACE_TYPE_UNHANDLED) | 2257 | if (ret != TRACE_TYPE_UNHANDLED) |
2258 | return ret; | 2258 | return ret; |
2259 | } | 2259 | } |
2260 | 2260 | ||
2261 | if (iter->ent->type == TRACE_BPRINT && | 2261 | if (iter->ent->type == TRACE_BPRINT && |
2262 | trace_flags & TRACE_ITER_PRINTK && | 2262 | trace_flags & TRACE_ITER_PRINTK && |
2263 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) | 2263 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) |
2264 | return trace_print_bprintk_msg_only(iter); | 2264 | return trace_print_bprintk_msg_only(iter); |
2265 | 2265 | ||
2266 | if (iter->ent->type == TRACE_PRINT && | 2266 | if (iter->ent->type == TRACE_PRINT && |
2267 | trace_flags & TRACE_ITER_PRINTK && | 2267 | trace_flags & TRACE_ITER_PRINTK && |
2268 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) | 2268 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) |
2269 | return trace_print_printk_msg_only(iter); | 2269 | return trace_print_printk_msg_only(iter); |
2270 | 2270 | ||
2271 | if (trace_flags & TRACE_ITER_BIN) | 2271 | if (trace_flags & TRACE_ITER_BIN) |
2272 | return print_bin_fmt(iter); | 2272 | return print_bin_fmt(iter); |
2273 | 2273 | ||
2274 | if (trace_flags & TRACE_ITER_HEX) | 2274 | if (trace_flags & TRACE_ITER_HEX) |
2275 | return print_hex_fmt(iter); | 2275 | return print_hex_fmt(iter); |
2276 | 2276 | ||
2277 | if (trace_flags & TRACE_ITER_RAW) | 2277 | if (trace_flags & TRACE_ITER_RAW) |
2278 | return print_raw_fmt(iter); | 2278 | return print_raw_fmt(iter); |
2279 | 2279 | ||
2280 | return print_trace_fmt(iter); | 2280 | return print_trace_fmt(iter); |
2281 | } | 2281 | } |
2282 | 2282 | ||
2283 | void trace_latency_header(struct seq_file *m) | 2283 | void trace_latency_header(struct seq_file *m) |
2284 | { | 2284 | { |
2285 | struct trace_iterator *iter = m->private; | 2285 | struct trace_iterator *iter = m->private; |
2286 | 2286 | ||
2287 | /* print nothing if the buffers are empty */ | 2287 | /* print nothing if the buffers are empty */ |
2288 | if (trace_empty(iter)) | 2288 | if (trace_empty(iter)) |
2289 | return; | 2289 | return; |
2290 | 2290 | ||
2291 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) | 2291 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) |
2292 | print_trace_header(m, iter); | 2292 | print_trace_header(m, iter); |
2293 | 2293 | ||
2294 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | 2294 | if (!(trace_flags & TRACE_ITER_VERBOSE)) |
2295 | print_lat_help_header(m); | 2295 | print_lat_help_header(m); |
2296 | } | 2296 | } |
2297 | 2297 | ||
2298 | void trace_default_header(struct seq_file *m) | 2298 | void trace_default_header(struct seq_file *m) |
2299 | { | 2299 | { |
2300 | struct trace_iterator *iter = m->private; | 2300 | struct trace_iterator *iter = m->private; |
2301 | 2301 | ||
2302 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | 2302 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) |
2303 | return; | 2303 | return; |
2304 | 2304 | ||
2305 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2305 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { |
2306 | /* print nothing if the buffers are empty */ | 2306 | /* print nothing if the buffers are empty */ |
2307 | if (trace_empty(iter)) | 2307 | if (trace_empty(iter)) |
2308 | return; | 2308 | return; |
2309 | print_trace_header(m, iter); | 2309 | print_trace_header(m, iter); |
2310 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | 2310 | if (!(trace_flags & TRACE_ITER_VERBOSE)) |
2311 | print_lat_help_header(m); | 2311 | print_lat_help_header(m); |
2312 | } else { | 2312 | } else { |
2313 | if (!(trace_flags & TRACE_ITER_VERBOSE)) { | 2313 | if (!(trace_flags & TRACE_ITER_VERBOSE)) { |
2314 | if (trace_flags & TRACE_ITER_IRQ_INFO) | 2314 | if (trace_flags & TRACE_ITER_IRQ_INFO) |
2315 | print_func_help_header_irq(iter->tr, m); | 2315 | print_func_help_header_irq(iter->tr, m); |
2316 | else | 2316 | else |
2317 | print_func_help_header(iter->tr, m); | 2317 | print_func_help_header(iter->tr, m); |
2318 | } | 2318 | } |
2319 | } | 2319 | } |
2320 | } | 2320 | } |
2321 | 2321 | ||
2322 | static void test_ftrace_alive(struct seq_file *m) | 2322 | static void test_ftrace_alive(struct seq_file *m) |
2323 | { | 2323 | { |
2324 | if (!ftrace_is_dead()) | 2324 | if (!ftrace_is_dead()) |
2325 | return; | 2325 | return; |
2326 | seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | 2326 | seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); |
2327 | seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); | 2327 | seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); |
2328 | } | 2328 | } |
2329 | 2329 | ||
2330 | static int s_show(struct seq_file *m, void *v) | 2330 | static int s_show(struct seq_file *m, void *v) |
2331 | { | 2331 | { |
2332 | struct trace_iterator *iter = v; | 2332 | struct trace_iterator *iter = v; |
2333 | int ret; | 2333 | int ret; |
2334 | 2334 | ||
2335 | if (iter->ent == NULL) { | 2335 | if (iter->ent == NULL) { |
2336 | if (iter->tr) { | 2336 | if (iter->tr) { |
2337 | seq_printf(m, "# tracer: %s\n", iter->trace->name); | 2337 | seq_printf(m, "# tracer: %s\n", iter->trace->name); |
2338 | seq_puts(m, "#\n"); | 2338 | seq_puts(m, "#\n"); |
2339 | test_ftrace_alive(m); | 2339 | test_ftrace_alive(m); |
2340 | } | 2340 | } |
2341 | if (iter->trace && iter->trace->print_header) | 2341 | if (iter->trace && iter->trace->print_header) |
2342 | iter->trace->print_header(m); | 2342 | iter->trace->print_header(m); |
2343 | else | 2343 | else |
2344 | trace_default_header(m); | 2344 | trace_default_header(m); |
2345 | 2345 | ||
2346 | } else if (iter->leftover) { | 2346 | } else if (iter->leftover) { |
2347 | /* | 2347 | /* |
2348 | * If we filled the seq_file buffer earlier, we | 2348 | * If we filled the seq_file buffer earlier, we |
2349 | * want to just show it now. | 2349 | * want to just show it now. |
2350 | */ | 2350 | */ |
2351 | ret = trace_print_seq(m, &iter->seq); | 2351 | ret = trace_print_seq(m, &iter->seq); |
2352 | 2352 | ||
2353 | /* ret should this time be zero, but you never know */ | 2353 | /* ret should this time be zero, but you never know */ |
2354 | iter->leftover = ret; | 2354 | iter->leftover = ret; |
2355 | 2355 | ||
2356 | } else { | 2356 | } else { |
2357 | print_trace_line(iter); | 2357 | print_trace_line(iter); |
2358 | ret = trace_print_seq(m, &iter->seq); | 2358 | ret = trace_print_seq(m, &iter->seq); |
2359 | /* | 2359 | /* |
2360 | * If we overflow the seq_file buffer, then it will | 2360 | * If we overflow the seq_file buffer, then it will |
2361 | * ask us for this data again at start up. | 2361 | * ask us for this data again at start up. |
2362 | * Use that instead. | 2362 | * Use that instead. |
2363 | * ret is 0 if seq_file write succeeded. | 2363 | * ret is 0 if seq_file write succeeded. |
2364 | * -1 otherwise. | 2364 | * -1 otherwise. |
2365 | */ | 2365 | */ |
2366 | iter->leftover = ret; | 2366 | iter->leftover = ret; |
2367 | } | 2367 | } |
2368 | 2368 | ||
2369 | return 0; | 2369 | return 0; |
2370 | } | 2370 | } |
2371 | 2371 | ||
2372 | static const struct seq_operations tracer_seq_ops = { | 2372 | static const struct seq_operations tracer_seq_ops = { |
2373 | .start = s_start, | 2373 | .start = s_start, |
2374 | .next = s_next, | 2374 | .next = s_next, |
2375 | .stop = s_stop, | 2375 | .stop = s_stop, |
2376 | .show = s_show, | 2376 | .show = s_show, |
2377 | }; | 2377 | }; |
2378 | 2378 | ||
2379 | static struct trace_iterator * | 2379 | static struct trace_iterator * |
2380 | __tracing_open(struct inode *inode, struct file *file) | 2380 | __tracing_open(struct inode *inode, struct file *file) |
2381 | { | 2381 | { |
2382 | long cpu_file = (long) inode->i_private; | 2382 | long cpu_file = (long) inode->i_private; |
2383 | struct trace_iterator *iter; | 2383 | struct trace_iterator *iter; |
2384 | int cpu; | 2384 | int cpu; |
2385 | 2385 | ||
2386 | if (tracing_disabled) | 2386 | if (tracing_disabled) |
2387 | return ERR_PTR(-ENODEV); | 2387 | return ERR_PTR(-ENODEV); |
2388 | 2388 | ||
2389 | iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); | 2389 | iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); |
2390 | if (!iter) | 2390 | if (!iter) |
2391 | return ERR_PTR(-ENOMEM); | 2391 | return ERR_PTR(-ENOMEM); |
2392 | 2392 | ||
2393 | iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), | 2393 | iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), |
2394 | GFP_KERNEL); | 2394 | GFP_KERNEL); |
2395 | if (!iter->buffer_iter) | 2395 | if (!iter->buffer_iter) |
2396 | goto release; | 2396 | goto release; |
2397 | 2397 | ||
2398 | /* | 2398 | /* |
2399 | * We make a copy of the current tracer to avoid concurrent | 2399 | * We make a copy of the current tracer to avoid concurrent |
2400 | * changes on it while we are reading. | 2400 | * changes on it while we are reading. |
2401 | */ | 2401 | */ |
2402 | mutex_lock(&trace_types_lock); | 2402 | mutex_lock(&trace_types_lock); |
2403 | iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); | 2403 | iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); |
2404 | if (!iter->trace) | 2404 | if (!iter->trace) |
2405 | goto fail; | 2405 | goto fail; |
2406 | 2406 | ||
2407 | if (current_trace) | 2407 | if (current_trace) |
2408 | *iter->trace = *current_trace; | 2408 | *iter->trace = *current_trace; |
2409 | 2409 | ||
2410 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) | 2410 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) |
2411 | goto fail; | 2411 | goto fail; |
2412 | 2412 | ||
2413 | if (current_trace && current_trace->print_max) | 2413 | if (current_trace && current_trace->print_max) |
2414 | iter->tr = &max_tr; | 2414 | iter->tr = &max_tr; |
2415 | else | 2415 | else |
2416 | iter->tr = &global_trace; | 2416 | iter->tr = &global_trace; |
2417 | iter->pos = -1; | 2417 | iter->pos = -1; |
2418 | mutex_init(&iter->mutex); | 2418 | mutex_init(&iter->mutex); |
2419 | iter->cpu_file = cpu_file; | 2419 | iter->cpu_file = cpu_file; |
2420 | 2420 | ||
2421 | /* Notify the tracer early; before we stop tracing. */ | 2421 | /* Notify the tracer early; before we stop tracing. */ |
2422 | if (iter->trace && iter->trace->open) | 2422 | if (iter->trace && iter->trace->open) |
2423 | iter->trace->open(iter); | 2423 | iter->trace->open(iter); |
2424 | 2424 | ||
2425 | /* Annotate start of buffers if we had overruns */ | 2425 | /* Annotate start of buffers if we had overruns */ |
2426 | if (ring_buffer_overruns(iter->tr->buffer)) | 2426 | if (ring_buffer_overruns(iter->tr->buffer)) |
2427 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 2427 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
2428 | 2428 | ||
2429 | /* stop the trace while dumping */ | 2429 | /* stop the trace while dumping */ |
2430 | tracing_stop(); | 2430 | tracing_stop(); |
2431 | 2431 | ||
2432 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { | 2432 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { |
2433 | for_each_tracing_cpu(cpu) { | 2433 | for_each_tracing_cpu(cpu) { |
2434 | iter->buffer_iter[cpu] = | 2434 | iter->buffer_iter[cpu] = |
2435 | ring_buffer_read_prepare(iter->tr->buffer, cpu); | 2435 | ring_buffer_read_prepare(iter->tr->buffer, cpu); |
2436 | } | 2436 | } |
2437 | ring_buffer_read_prepare_sync(); | 2437 | ring_buffer_read_prepare_sync(); |
2438 | for_each_tracing_cpu(cpu) { | 2438 | for_each_tracing_cpu(cpu) { |
2439 | ring_buffer_read_start(iter->buffer_iter[cpu]); | 2439 | ring_buffer_read_start(iter->buffer_iter[cpu]); |
2440 | tracing_iter_reset(iter, cpu); | 2440 | tracing_iter_reset(iter, cpu); |
2441 | } | 2441 | } |
2442 | } else { | 2442 | } else { |
2443 | cpu = iter->cpu_file; | 2443 | cpu = iter->cpu_file; |
2444 | iter->buffer_iter[cpu] = | 2444 | iter->buffer_iter[cpu] = |
2445 | ring_buffer_read_prepare(iter->tr->buffer, cpu); | 2445 | ring_buffer_read_prepare(iter->tr->buffer, cpu); |
2446 | ring_buffer_read_prepare_sync(); | 2446 | ring_buffer_read_prepare_sync(); |
2447 | ring_buffer_read_start(iter->buffer_iter[cpu]); | 2447 | ring_buffer_read_start(iter->buffer_iter[cpu]); |
2448 | tracing_iter_reset(iter, cpu); | 2448 | tracing_iter_reset(iter, cpu); |
2449 | } | 2449 | } |
2450 | 2450 | ||
2451 | mutex_unlock(&trace_types_lock); | 2451 | mutex_unlock(&trace_types_lock); |
2452 | 2452 | ||
2453 | return iter; | 2453 | return iter; |
2454 | 2454 | ||
2455 | fail: | 2455 | fail: |
2456 | mutex_unlock(&trace_types_lock); | 2456 | mutex_unlock(&trace_types_lock); |
2457 | kfree(iter->trace); | 2457 | kfree(iter->trace); |
2458 | kfree(iter->buffer_iter); | 2458 | kfree(iter->buffer_iter); |
2459 | release: | 2459 | release: |
2460 | seq_release_private(inode, file); | 2460 | seq_release_private(inode, file); |
2461 | return ERR_PTR(-ENOMEM); | 2461 | return ERR_PTR(-ENOMEM); |
2462 | } | 2462 | } |
2463 | 2463 | ||
2464 | int tracing_open_generic(struct inode *inode, struct file *filp) | 2464 | int tracing_open_generic(struct inode *inode, struct file *filp) |
2465 | { | 2465 | { |
2466 | if (tracing_disabled) | 2466 | if (tracing_disabled) |
2467 | return -ENODEV; | 2467 | return -ENODEV; |
2468 | 2468 | ||
2469 | filp->private_data = inode->i_private; | 2469 | filp->private_data = inode->i_private; |
2470 | return 0; | 2470 | return 0; |
2471 | } | 2471 | } |
2472 | 2472 | ||
2473 | static int tracing_release(struct inode *inode, struct file *file) | 2473 | static int tracing_release(struct inode *inode, struct file *file) |
2474 | { | 2474 | { |
2475 | struct seq_file *m = file->private_data; | 2475 | struct seq_file *m = file->private_data; |
2476 | struct trace_iterator *iter; | 2476 | struct trace_iterator *iter; |
2477 | int cpu; | 2477 | int cpu; |
2478 | 2478 | ||
2479 | if (!(file->f_mode & FMODE_READ)) | 2479 | if (!(file->f_mode & FMODE_READ)) |
2480 | return 0; | 2480 | return 0; |
2481 | 2481 | ||
2482 | iter = m->private; | 2482 | iter = m->private; |
2483 | 2483 | ||
2484 | mutex_lock(&trace_types_lock); | 2484 | mutex_lock(&trace_types_lock); |
2485 | for_each_tracing_cpu(cpu) { | 2485 | for_each_tracing_cpu(cpu) { |
2486 | if (iter->buffer_iter[cpu]) | 2486 | if (iter->buffer_iter[cpu]) |
2487 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 2487 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
2488 | } | 2488 | } |
2489 | 2489 | ||
2490 | if (iter->trace && iter->trace->close) | 2490 | if (iter->trace && iter->trace->close) |
2491 | iter->trace->close(iter); | 2491 | iter->trace->close(iter); |
2492 | 2492 | ||
2493 | /* reenable tracing if it was previously enabled */ | 2493 | /* reenable tracing if it was previously enabled */ |
2494 | tracing_start(); | 2494 | tracing_start(); |
2495 | mutex_unlock(&trace_types_lock); | 2495 | mutex_unlock(&trace_types_lock); |
2496 | 2496 | ||
2497 | mutex_destroy(&iter->mutex); | 2497 | mutex_destroy(&iter->mutex); |
2498 | free_cpumask_var(iter->started); | 2498 | free_cpumask_var(iter->started); |
2499 | kfree(iter->trace); | 2499 | kfree(iter->trace); |
2500 | kfree(iter->buffer_iter); | 2500 | kfree(iter->buffer_iter); |
2501 | seq_release_private(inode, file); | 2501 | seq_release_private(inode, file); |
2502 | return 0; | 2502 | return 0; |
2503 | } | 2503 | } |
2504 | 2504 | ||
2505 | static int tracing_open(struct inode *inode, struct file *file) | 2505 | static int tracing_open(struct inode *inode, struct file *file) |
2506 | { | 2506 | { |
2507 | struct trace_iterator *iter; | 2507 | struct trace_iterator *iter; |
2508 | int ret = 0; | 2508 | int ret = 0; |
2509 | 2509 | ||
2510 | /* If this file was open for write, then erase contents */ | 2510 | /* If this file was open for write, then erase contents */ |
2511 | if ((file->f_mode & FMODE_WRITE) && | 2511 | if ((file->f_mode & FMODE_WRITE) && |
2512 | (file->f_flags & O_TRUNC)) { | 2512 | (file->f_flags & O_TRUNC)) { |
2513 | long cpu = (long) inode->i_private; | 2513 | long cpu = (long) inode->i_private; |
2514 | 2514 | ||
2515 | if (cpu == TRACE_PIPE_ALL_CPU) | 2515 | if (cpu == TRACE_PIPE_ALL_CPU) |
2516 | tracing_reset_online_cpus(&global_trace); | 2516 | tracing_reset_online_cpus(&global_trace); |
2517 | else | 2517 | else |
2518 | tracing_reset(&global_trace, cpu); | 2518 | tracing_reset(&global_trace, cpu); |
2519 | } | 2519 | } |
2520 | 2520 | ||
2521 | if (file->f_mode & FMODE_READ) { | 2521 | if (file->f_mode & FMODE_READ) { |
2522 | iter = __tracing_open(inode, file); | 2522 | iter = __tracing_open(inode, file); |
2523 | if (IS_ERR(iter)) | 2523 | if (IS_ERR(iter)) |
2524 | ret = PTR_ERR(iter); | 2524 | ret = PTR_ERR(iter); |
2525 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) | 2525 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) |
2526 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 2526 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
2527 | } | 2527 | } |
2528 | return ret; | 2528 | return ret; |
2529 | } | 2529 | } |
2530 | 2530 | ||
2531 | static void * | 2531 | static void * |
2532 | t_next(struct seq_file *m, void *v, loff_t *pos) | 2532 | t_next(struct seq_file *m, void *v, loff_t *pos) |
2533 | { | 2533 | { |
2534 | struct tracer *t = v; | 2534 | struct tracer *t = v; |
2535 | 2535 | ||
2536 | (*pos)++; | 2536 | (*pos)++; |
2537 | 2537 | ||
2538 | if (t) | 2538 | if (t) |
2539 | t = t->next; | 2539 | t = t->next; |
2540 | 2540 | ||
2541 | return t; | 2541 | return t; |
2542 | } | 2542 | } |
2543 | 2543 | ||
2544 | static void *t_start(struct seq_file *m, loff_t *pos) | 2544 | static void *t_start(struct seq_file *m, loff_t *pos) |
2545 | { | 2545 | { |
2546 | struct tracer *t; | 2546 | struct tracer *t; |
2547 | loff_t l = 0; | 2547 | loff_t l = 0; |
2548 | 2548 | ||
2549 | mutex_lock(&trace_types_lock); | 2549 | mutex_lock(&trace_types_lock); |
2550 | for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) | 2550 | for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) |
2551 | ; | 2551 | ; |
2552 | 2552 | ||
2553 | return t; | 2553 | return t; |
2554 | } | 2554 | } |
2555 | 2555 | ||
2556 | static void t_stop(struct seq_file *m, void *p) | 2556 | static void t_stop(struct seq_file *m, void *p) |
2557 | { | 2557 | { |
2558 | mutex_unlock(&trace_types_lock); | 2558 | mutex_unlock(&trace_types_lock); |
2559 | } | 2559 | } |
2560 | 2560 | ||
2561 | static int t_show(struct seq_file *m, void *v) | 2561 | static int t_show(struct seq_file *m, void *v) |
2562 | { | 2562 | { |
2563 | struct tracer *t = v; | 2563 | struct tracer *t = v; |
2564 | 2564 | ||
2565 | if (!t) | 2565 | if (!t) |
2566 | return 0; | 2566 | return 0; |
2567 | 2567 | ||
2568 | seq_printf(m, "%s", t->name); | 2568 | seq_printf(m, "%s", t->name); |
2569 | if (t->next) | 2569 | if (t->next) |
2570 | seq_putc(m, ' '); | 2570 | seq_putc(m, ' '); |
2571 | else | 2571 | else |
2572 | seq_putc(m, '\n'); | 2572 | seq_putc(m, '\n'); |
2573 | 2573 | ||
2574 | return 0; | 2574 | return 0; |
2575 | } | 2575 | } |
2576 | 2576 | ||
2577 | static const struct seq_operations show_traces_seq_ops = { | 2577 | static const struct seq_operations show_traces_seq_ops = { |
2578 | .start = t_start, | 2578 | .start = t_start, |
2579 | .next = t_next, | 2579 | .next = t_next, |
2580 | .stop = t_stop, | 2580 | .stop = t_stop, |
2581 | .show = t_show, | 2581 | .show = t_show, |
2582 | }; | 2582 | }; |
2583 | 2583 | ||
2584 | static int show_traces_open(struct inode *inode, struct file *file) | 2584 | static int show_traces_open(struct inode *inode, struct file *file) |
2585 | { | 2585 | { |
2586 | if (tracing_disabled) | 2586 | if (tracing_disabled) |
2587 | return -ENODEV; | 2587 | return -ENODEV; |
2588 | 2588 | ||
2589 | return seq_open(file, &show_traces_seq_ops); | 2589 | return seq_open(file, &show_traces_seq_ops); |
2590 | } | 2590 | } |
2591 | 2591 | ||
2592 | static ssize_t | 2592 | static ssize_t |
2593 | tracing_write_stub(struct file *filp, const char __user *ubuf, | 2593 | tracing_write_stub(struct file *filp, const char __user *ubuf, |
2594 | size_t count, loff_t *ppos) | 2594 | size_t count, loff_t *ppos) |
2595 | { | 2595 | { |
2596 | return count; | 2596 | return count; |
2597 | } | 2597 | } |
2598 | 2598 | ||
2599 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | 2599 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) |
2600 | { | 2600 | { |
2601 | if (file->f_mode & FMODE_READ) | 2601 | if (file->f_mode & FMODE_READ) |
2602 | return seq_lseek(file, offset, origin); | 2602 | return seq_lseek(file, offset, origin); |
2603 | else | 2603 | else |
2604 | return 0; | 2604 | return 0; |
2605 | } | 2605 | } |
2606 | 2606 | ||
2607 | static const struct file_operations tracing_fops = { | 2607 | static const struct file_operations tracing_fops = { |
2608 | .open = tracing_open, | 2608 | .open = tracing_open, |
2609 | .read = seq_read, | 2609 | .read = seq_read, |
2610 | .write = tracing_write_stub, | 2610 | .write = tracing_write_stub, |
2611 | .llseek = tracing_seek, | 2611 | .llseek = tracing_seek, |
2612 | .release = tracing_release, | 2612 | .release = tracing_release, |
2613 | }; | 2613 | }; |
2614 | 2614 | ||
2615 | static const struct file_operations show_traces_fops = { | 2615 | static const struct file_operations show_traces_fops = { |
2616 | .open = show_traces_open, | 2616 | .open = show_traces_open, |
2617 | .read = seq_read, | 2617 | .read = seq_read, |
2618 | .release = seq_release, | 2618 | .release = seq_release, |
2619 | .llseek = seq_lseek, | 2619 | .llseek = seq_lseek, |
2620 | }; | 2620 | }; |
2621 | 2621 | ||
2622 | /* | 2622 | /* |
2623 | * Only trace on a CPU if the bitmask is set: | 2623 | * Only trace on a CPU if the bitmask is set: |
2624 | */ | 2624 | */ |
2625 | static cpumask_var_t tracing_cpumask; | 2625 | static cpumask_var_t tracing_cpumask; |
2626 | 2626 | ||
2627 | /* | 2627 | /* |
2628 | * The tracer itself will not take this lock, but still we want | 2628 | * The tracer itself will not take this lock, but still we want |
2629 | * to provide a consistent cpumask to user-space: | 2629 | * to provide a consistent cpumask to user-space: |
2630 | */ | 2630 | */ |
2631 | static DEFINE_MUTEX(tracing_cpumask_update_lock); | 2631 | static DEFINE_MUTEX(tracing_cpumask_update_lock); |
2632 | 2632 | ||
2633 | /* | 2633 | /* |
2634 | * Temporary storage for the character representation of the | 2634 | * Temporary storage for the character representation of the |
2635 | * CPU bitmask (and one more byte for the newline): | 2635 | * CPU bitmask (and one more byte for the newline): |
2636 | */ | 2636 | */ |
2637 | static char mask_str[NR_CPUS + 1]; | 2637 | static char mask_str[NR_CPUS + 1]; |
2638 | 2638 | ||
2639 | static ssize_t | 2639 | static ssize_t |
2640 | tracing_cpumask_read(struct file *filp, char __user *ubuf, | 2640 | tracing_cpumask_read(struct file *filp, char __user *ubuf, |
2641 | size_t count, loff_t *ppos) | 2641 | size_t count, loff_t *ppos) |
2642 | { | 2642 | { |
2643 | int len; | 2643 | int len; |
2644 | 2644 | ||
2645 | mutex_lock(&tracing_cpumask_update_lock); | 2645 | mutex_lock(&tracing_cpumask_update_lock); |
2646 | 2646 | ||
2647 | len = cpumask_scnprintf(mask_str, count, tracing_cpumask); | 2647 | len = cpumask_scnprintf(mask_str, count, tracing_cpumask); |
2648 | if (count - len < 2) { | 2648 | if (count - len < 2) { |
2649 | count = -EINVAL; | 2649 | count = -EINVAL; |
2650 | goto out_err; | 2650 | goto out_err; |
2651 | } | 2651 | } |
2652 | len += sprintf(mask_str + len, "\n"); | 2652 | len += sprintf(mask_str + len, "\n"); |
2653 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); | 2653 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); |
2654 | 2654 | ||
2655 | out_err: | 2655 | out_err: |
2656 | mutex_unlock(&tracing_cpumask_update_lock); | 2656 | mutex_unlock(&tracing_cpumask_update_lock); |
2657 | 2657 | ||
2658 | return count; | 2658 | return count; |
2659 | } | 2659 | } |
2660 | 2660 | ||
2661 | static ssize_t | 2661 | static ssize_t |
2662 | tracing_cpumask_write(struct file *filp, const char __user *ubuf, | 2662 | tracing_cpumask_write(struct file *filp, const char __user *ubuf, |
2663 | size_t count, loff_t *ppos) | 2663 | size_t count, loff_t *ppos) |
2664 | { | 2664 | { |
2665 | int err, cpu; | 2665 | int err, cpu; |
2666 | cpumask_var_t tracing_cpumask_new; | 2666 | cpumask_var_t tracing_cpumask_new; |
2667 | 2667 | ||
2668 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) | 2668 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) |
2669 | return -ENOMEM; | 2669 | return -ENOMEM; |
2670 | 2670 | ||
2671 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); | 2671 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); |
2672 | if (err) | 2672 | if (err) |
2673 | goto err_unlock; | 2673 | goto err_unlock; |
2674 | 2674 | ||
2675 | mutex_lock(&tracing_cpumask_update_lock); | 2675 | mutex_lock(&tracing_cpumask_update_lock); |
2676 | 2676 | ||
2677 | local_irq_disable(); | 2677 | local_irq_disable(); |
2678 | arch_spin_lock(&ftrace_max_lock); | 2678 | arch_spin_lock(&ftrace_max_lock); |
2679 | for_each_tracing_cpu(cpu) { | 2679 | for_each_tracing_cpu(cpu) { |
2680 | /* | 2680 | /* |
2681 | * Increase/decrease the disabled counter if we are | 2681 | * Increase/decrease the disabled counter if we are |
2682 | * about to flip a bit in the cpumask: | 2682 | * about to flip a bit in the cpumask: |
2683 | */ | 2683 | */ |
2684 | if (cpumask_test_cpu(cpu, tracing_cpumask) && | 2684 | if (cpumask_test_cpu(cpu, tracing_cpumask) && |
2685 | !cpumask_test_cpu(cpu, tracing_cpumask_new)) { | 2685 | !cpumask_test_cpu(cpu, tracing_cpumask_new)) { |
2686 | atomic_inc(&global_trace.data[cpu]->disabled); | 2686 | atomic_inc(&global_trace.data[cpu]->disabled); |
2687 | ring_buffer_record_disable_cpu(global_trace.buffer, cpu); | 2687 | ring_buffer_record_disable_cpu(global_trace.buffer, cpu); |
2688 | } | 2688 | } |
2689 | if (!cpumask_test_cpu(cpu, tracing_cpumask) && | 2689 | if (!cpumask_test_cpu(cpu, tracing_cpumask) && |
2690 | cpumask_test_cpu(cpu, tracing_cpumask_new)) { | 2690 | cpumask_test_cpu(cpu, tracing_cpumask_new)) { |
2691 | atomic_dec(&global_trace.data[cpu]->disabled); | 2691 | atomic_dec(&global_trace.data[cpu]->disabled); |
2692 | ring_buffer_record_enable_cpu(global_trace.buffer, cpu); | 2692 | ring_buffer_record_enable_cpu(global_trace.buffer, cpu); |
2693 | } | 2693 | } |
2694 | } | 2694 | } |
2695 | arch_spin_unlock(&ftrace_max_lock); | 2695 | arch_spin_unlock(&ftrace_max_lock); |
2696 | local_irq_enable(); | 2696 | local_irq_enable(); |
2697 | 2697 | ||
2698 | cpumask_copy(tracing_cpumask, tracing_cpumask_new); | 2698 | cpumask_copy(tracing_cpumask, tracing_cpumask_new); |
2699 | 2699 | ||
2700 | mutex_unlock(&tracing_cpumask_update_lock); | 2700 | mutex_unlock(&tracing_cpumask_update_lock); |
2701 | free_cpumask_var(tracing_cpumask_new); | 2701 | free_cpumask_var(tracing_cpumask_new); |
2702 | 2702 | ||
2703 | return count; | 2703 | return count; |
2704 | 2704 | ||
2705 | err_unlock: | 2705 | err_unlock: |
2706 | free_cpumask_var(tracing_cpumask_new); | 2706 | free_cpumask_var(tracing_cpumask_new); |
2707 | 2707 | ||
2708 | return err; | 2708 | return err; |
2709 | } | 2709 | } |
2710 | 2710 | ||
2711 | static const struct file_operations tracing_cpumask_fops = { | 2711 | static const struct file_operations tracing_cpumask_fops = { |
2712 | .open = tracing_open_generic, | 2712 | .open = tracing_open_generic, |
2713 | .read = tracing_cpumask_read, | 2713 | .read = tracing_cpumask_read, |
2714 | .write = tracing_cpumask_write, | 2714 | .write = tracing_cpumask_write, |
2715 | .llseek = generic_file_llseek, | 2715 | .llseek = generic_file_llseek, |
2716 | }; | 2716 | }; |
2717 | 2717 | ||
2718 | static int tracing_trace_options_show(struct seq_file *m, void *v) | 2718 | static int tracing_trace_options_show(struct seq_file *m, void *v) |
2719 | { | 2719 | { |
2720 | struct tracer_opt *trace_opts; | 2720 | struct tracer_opt *trace_opts; |
2721 | u32 tracer_flags; | 2721 | u32 tracer_flags; |
2722 | int i; | 2722 | int i; |
2723 | 2723 | ||
2724 | mutex_lock(&trace_types_lock); | 2724 | mutex_lock(&trace_types_lock); |
2725 | tracer_flags = current_trace->flags->val; | 2725 | tracer_flags = current_trace->flags->val; |
2726 | trace_opts = current_trace->flags->opts; | 2726 | trace_opts = current_trace->flags->opts; |
2727 | 2727 | ||
2728 | for (i = 0; trace_options[i]; i++) { | 2728 | for (i = 0; trace_options[i]; i++) { |
2729 | if (trace_flags & (1 << i)) | 2729 | if (trace_flags & (1 << i)) |
2730 | seq_printf(m, "%s\n", trace_options[i]); | 2730 | seq_printf(m, "%s\n", trace_options[i]); |
2731 | else | 2731 | else |
2732 | seq_printf(m, "no%s\n", trace_options[i]); | 2732 | seq_printf(m, "no%s\n", trace_options[i]); |
2733 | } | 2733 | } |
2734 | 2734 | ||
2735 | for (i = 0; trace_opts[i].name; i++) { | 2735 | for (i = 0; trace_opts[i].name; i++) { |
2736 | if (tracer_flags & trace_opts[i].bit) | 2736 | if (tracer_flags & trace_opts[i].bit) |
2737 | seq_printf(m, "%s\n", trace_opts[i].name); | 2737 | seq_printf(m, "%s\n", trace_opts[i].name); |
2738 | else | 2738 | else |
2739 | seq_printf(m, "no%s\n", trace_opts[i].name); | 2739 | seq_printf(m, "no%s\n", trace_opts[i].name); |
2740 | } | 2740 | } |
2741 | mutex_unlock(&trace_types_lock); | 2741 | mutex_unlock(&trace_types_lock); |
2742 | 2742 | ||
2743 | return 0; | 2743 | return 0; |
2744 | } | 2744 | } |
2745 | 2745 | ||
2746 | static int __set_tracer_option(struct tracer *trace, | 2746 | static int __set_tracer_option(struct tracer *trace, |
2747 | struct tracer_flags *tracer_flags, | 2747 | struct tracer_flags *tracer_flags, |
2748 | struct tracer_opt *opts, int neg) | 2748 | struct tracer_opt *opts, int neg) |
2749 | { | 2749 | { |
2750 | int ret; | 2750 | int ret; |
2751 | 2751 | ||
2752 | ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); | 2752 | ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); |
2753 | if (ret) | 2753 | if (ret) |
2754 | return ret; | 2754 | return ret; |
2755 | 2755 | ||
2756 | if (neg) | 2756 | if (neg) |
2757 | tracer_flags->val &= ~opts->bit; | 2757 | tracer_flags->val &= ~opts->bit; |
2758 | else | 2758 | else |
2759 | tracer_flags->val |= opts->bit; | 2759 | tracer_flags->val |= opts->bit; |
2760 | return 0; | 2760 | return 0; |
2761 | } | 2761 | } |
2762 | 2762 | ||
2763 | /* Try to assign a tracer specific option */ | 2763 | /* Try to assign a tracer specific option */ |
2764 | static int set_tracer_option(struct tracer *trace, char *cmp, int neg) | 2764 | static int set_tracer_option(struct tracer *trace, char *cmp, int neg) |
2765 | { | 2765 | { |
2766 | struct tracer_flags *tracer_flags = trace->flags; | 2766 | struct tracer_flags *tracer_flags = trace->flags; |
2767 | struct tracer_opt *opts = NULL; | 2767 | struct tracer_opt *opts = NULL; |
2768 | int i; | 2768 | int i; |
2769 | 2769 | ||
2770 | for (i = 0; tracer_flags->opts[i].name; i++) { | 2770 | for (i = 0; tracer_flags->opts[i].name; i++) { |
2771 | opts = &tracer_flags->opts[i]; | 2771 | opts = &tracer_flags->opts[i]; |
2772 | 2772 | ||
2773 | if (strcmp(cmp, opts->name) == 0) | 2773 | if (strcmp(cmp, opts->name) == 0) |
2774 | return __set_tracer_option(trace, trace->flags, | 2774 | return __set_tracer_option(trace, trace->flags, |
2775 | opts, neg); | 2775 | opts, neg); |
2776 | } | 2776 | } |
2777 | 2777 | ||
2778 | return -EINVAL; | 2778 | return -EINVAL; |
2779 | } | 2779 | } |
2780 | 2780 | ||
2781 | static void set_tracer_flags(unsigned int mask, int enabled) | 2781 | static void set_tracer_flags(unsigned int mask, int enabled) |
2782 | { | 2782 | { |
2783 | /* do nothing if flag is already set */ | 2783 | /* do nothing if flag is already set */ |
2784 | if (!!(trace_flags & mask) == !!enabled) | 2784 | if (!!(trace_flags & mask) == !!enabled) |
2785 | return; | 2785 | return; |
2786 | 2786 | ||
2787 | if (enabled) | 2787 | if (enabled) |
2788 | trace_flags |= mask; | 2788 | trace_flags |= mask; |
2789 | else | 2789 | else |
2790 | trace_flags &= ~mask; | 2790 | trace_flags &= ~mask; |
2791 | 2791 | ||
2792 | if (mask == TRACE_ITER_RECORD_CMD) | 2792 | if (mask == TRACE_ITER_RECORD_CMD) |
2793 | trace_event_enable_cmd_record(enabled); | 2793 | trace_event_enable_cmd_record(enabled); |
2794 | 2794 | ||
2795 | if (mask == TRACE_ITER_OVERWRITE) | 2795 | if (mask == TRACE_ITER_OVERWRITE) |
2796 | ring_buffer_change_overwrite(global_trace.buffer, enabled); | 2796 | ring_buffer_change_overwrite(global_trace.buffer, enabled); |
2797 | } | 2797 | } |
2798 | 2798 | ||
2799 | static ssize_t | 2799 | static ssize_t |
2800 | tracing_trace_options_write(struct file *filp, const char __user *ubuf, | 2800 | tracing_trace_options_write(struct file *filp, const char __user *ubuf, |
2801 | size_t cnt, loff_t *ppos) | 2801 | size_t cnt, loff_t *ppos) |
2802 | { | 2802 | { |
2803 | char buf[64]; | 2803 | char buf[64]; |
2804 | char *cmp; | 2804 | char *cmp; |
2805 | int neg = 0; | 2805 | int neg = 0; |
2806 | int ret; | 2806 | int ret; |
2807 | int i; | 2807 | int i; |
2808 | 2808 | ||
2809 | if (cnt >= sizeof(buf)) | 2809 | if (cnt >= sizeof(buf)) |
2810 | return -EINVAL; | 2810 | return -EINVAL; |
2811 | 2811 | ||
2812 | if (copy_from_user(&buf, ubuf, cnt)) | 2812 | if (copy_from_user(&buf, ubuf, cnt)) |
2813 | return -EFAULT; | 2813 | return -EFAULT; |
2814 | 2814 | ||
2815 | buf[cnt] = 0; | 2815 | buf[cnt] = 0; |
2816 | cmp = strstrip(buf); | 2816 | cmp = strstrip(buf); |
2817 | 2817 | ||
2818 | if (strncmp(cmp, "no", 2) == 0) { | 2818 | if (strncmp(cmp, "no", 2) == 0) { |
2819 | neg = 1; | 2819 | neg = 1; |
2820 | cmp += 2; | 2820 | cmp += 2; |
2821 | } | 2821 | } |
2822 | 2822 | ||
2823 | for (i = 0; trace_options[i]; i++) { | 2823 | for (i = 0; trace_options[i]; i++) { |
2824 | if (strcmp(cmp, trace_options[i]) == 0) { | 2824 | if (strcmp(cmp, trace_options[i]) == 0) { |
2825 | set_tracer_flags(1 << i, !neg); | 2825 | set_tracer_flags(1 << i, !neg); |
2826 | break; | 2826 | break; |
2827 | } | 2827 | } |
2828 | } | 2828 | } |
2829 | 2829 | ||
2830 | /* If no option could be set, test the specific tracer options */ | 2830 | /* If no option could be set, test the specific tracer options */ |
2831 | if (!trace_options[i]) { | 2831 | if (!trace_options[i]) { |
2832 | mutex_lock(&trace_types_lock); | 2832 | mutex_lock(&trace_types_lock); |
2833 | ret = set_tracer_option(current_trace, cmp, neg); | 2833 | ret = set_tracer_option(current_trace, cmp, neg); |
2834 | mutex_unlock(&trace_types_lock); | 2834 | mutex_unlock(&trace_types_lock); |
2835 | if (ret) | 2835 | if (ret) |
2836 | return ret; | 2836 | return ret; |
2837 | } | 2837 | } |
2838 | 2838 | ||
2839 | *ppos += cnt; | 2839 | *ppos += cnt; |
2840 | 2840 | ||
2841 | return cnt; | 2841 | return cnt; |
2842 | } | 2842 | } |
2843 | 2843 | ||
2844 | static int tracing_trace_options_open(struct inode *inode, struct file *file) | 2844 | static int tracing_trace_options_open(struct inode *inode, struct file *file) |
2845 | { | 2845 | { |
2846 | if (tracing_disabled) | 2846 | if (tracing_disabled) |
2847 | return -ENODEV; | 2847 | return -ENODEV; |
2848 | return single_open(file, tracing_trace_options_show, NULL); | 2848 | return single_open(file, tracing_trace_options_show, NULL); |
2849 | } | 2849 | } |
2850 | 2850 | ||
2851 | static const struct file_operations tracing_iter_fops = { | 2851 | static const struct file_operations tracing_iter_fops = { |
2852 | .open = tracing_trace_options_open, | 2852 | .open = tracing_trace_options_open, |
2853 | .read = seq_read, | 2853 | .read = seq_read, |
2854 | .llseek = seq_lseek, | 2854 | .llseek = seq_lseek, |
2855 | .release = single_release, | 2855 | .release = single_release, |
2856 | .write = tracing_trace_options_write, | 2856 | .write = tracing_trace_options_write, |
2857 | }; | 2857 | }; |
2858 | 2858 | ||
2859 | static const char readme_msg[] = | 2859 | static const char readme_msg[] = |
2860 | "tracing mini-HOWTO:\n\n" | 2860 | "tracing mini-HOWTO:\n\n" |
2861 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" | 2861 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" |
2862 | "# cat /sys/kernel/debug/tracing/available_tracers\n" | 2862 | "# cat /sys/kernel/debug/tracing/available_tracers\n" |
2863 | "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" | 2863 | "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" |
2864 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2864 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2865 | "nop\n" | 2865 | "nop\n" |
2866 | "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" | 2866 | "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" |
2867 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2867 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2868 | "wakeup\n" | 2868 | "wakeup\n" |
2869 | "# cat /sys/kernel/debug/tracing/trace_options\n" | 2869 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
2870 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2870 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
2871 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" | 2871 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
2872 | "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" | 2872 | "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" |
2873 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" | 2873 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" |
2874 | "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" | 2874 | "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" |
2875 | ; | 2875 | ; |
2876 | 2876 | ||
2877 | static ssize_t | 2877 | static ssize_t |
2878 | tracing_readme_read(struct file *filp, char __user *ubuf, | 2878 | tracing_readme_read(struct file *filp, char __user *ubuf, |
2879 | size_t cnt, loff_t *ppos) | 2879 | size_t cnt, loff_t *ppos) |
2880 | { | 2880 | { |
2881 | return simple_read_from_buffer(ubuf, cnt, ppos, | 2881 | return simple_read_from_buffer(ubuf, cnt, ppos, |
2882 | readme_msg, strlen(readme_msg)); | 2882 | readme_msg, strlen(readme_msg)); |
2883 | } | 2883 | } |
2884 | 2884 | ||
2885 | static const struct file_operations tracing_readme_fops = { | 2885 | static const struct file_operations tracing_readme_fops = { |
2886 | .open = tracing_open_generic, | 2886 | .open = tracing_open_generic, |
2887 | .read = tracing_readme_read, | 2887 | .read = tracing_readme_read, |
2888 | .llseek = generic_file_llseek, | 2888 | .llseek = generic_file_llseek, |
2889 | }; | 2889 | }; |
2890 | 2890 | ||
2891 | static ssize_t | 2891 | static ssize_t |
2892 | tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, | 2892 | tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, |
2893 | size_t cnt, loff_t *ppos) | 2893 | size_t cnt, loff_t *ppos) |
2894 | { | 2894 | { |
2895 | char *buf_comm; | 2895 | char *buf_comm; |
2896 | char *file_buf; | 2896 | char *file_buf; |
2897 | char *buf; | 2897 | char *buf; |
2898 | int len = 0; | 2898 | int len = 0; |
2899 | int pid; | 2899 | int pid; |
2900 | int i; | 2900 | int i; |
2901 | 2901 | ||
2902 | file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); | 2902 | file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); |
2903 | if (!file_buf) | 2903 | if (!file_buf) |
2904 | return -ENOMEM; | 2904 | return -ENOMEM; |
2905 | 2905 | ||
2906 | buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); | 2906 | buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); |
2907 | if (!buf_comm) { | 2907 | if (!buf_comm) { |
2908 | kfree(file_buf); | 2908 | kfree(file_buf); |
2909 | return -ENOMEM; | 2909 | return -ENOMEM; |
2910 | } | 2910 | } |
2911 | 2911 | ||
2912 | buf = file_buf; | 2912 | buf = file_buf; |
2913 | 2913 | ||
2914 | for (i = 0; i < SAVED_CMDLINES; i++) { | 2914 | for (i = 0; i < SAVED_CMDLINES; i++) { |
2915 | int r; | 2915 | int r; |
2916 | 2916 | ||
2917 | pid = map_cmdline_to_pid[i]; | 2917 | pid = map_cmdline_to_pid[i]; |
2918 | if (pid == -1 || pid == NO_CMDLINE_MAP) | 2918 | if (pid == -1 || pid == NO_CMDLINE_MAP) |
2919 | continue; | 2919 | continue; |
2920 | 2920 | ||
2921 | trace_find_cmdline(pid, buf_comm); | 2921 | trace_find_cmdline(pid, buf_comm); |
2922 | r = sprintf(buf, "%d %s\n", pid, buf_comm); | 2922 | r = sprintf(buf, "%d %s\n", pid, buf_comm); |
2923 | buf += r; | 2923 | buf += r; |
2924 | len += r; | 2924 | len += r; |
2925 | } | 2925 | } |
2926 | 2926 | ||
2927 | len = simple_read_from_buffer(ubuf, cnt, ppos, | 2927 | len = simple_read_from_buffer(ubuf, cnt, ppos, |
2928 | file_buf, len); | 2928 | file_buf, len); |
2929 | 2929 | ||
2930 | kfree(file_buf); | 2930 | kfree(file_buf); |
2931 | kfree(buf_comm); | 2931 | kfree(buf_comm); |
2932 | 2932 | ||
2933 | return len; | 2933 | return len; |
2934 | } | 2934 | } |
2935 | 2935 | ||
2936 | static const struct file_operations tracing_saved_cmdlines_fops = { | 2936 | static const struct file_operations tracing_saved_cmdlines_fops = { |
2937 | .open = tracing_open_generic, | 2937 | .open = tracing_open_generic, |
2938 | .read = tracing_saved_cmdlines_read, | 2938 | .read = tracing_saved_cmdlines_read, |
2939 | .llseek = generic_file_llseek, | 2939 | .llseek = generic_file_llseek, |
2940 | }; | 2940 | }; |
2941 | 2941 | ||
2942 | static ssize_t | 2942 | static ssize_t |
2943 | tracing_ctrl_read(struct file *filp, char __user *ubuf, | 2943 | tracing_ctrl_read(struct file *filp, char __user *ubuf, |
2944 | size_t cnt, loff_t *ppos) | 2944 | size_t cnt, loff_t *ppos) |
2945 | { | 2945 | { |
2946 | char buf[64]; | 2946 | char buf[64]; |
2947 | int r; | 2947 | int r; |
2948 | 2948 | ||
2949 | r = sprintf(buf, "%u\n", tracer_enabled); | 2949 | r = sprintf(buf, "%u\n", tracer_enabled); |
2950 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 2950 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
2951 | } | 2951 | } |
2952 | 2952 | ||
2953 | static ssize_t | 2953 | static ssize_t |
2954 | tracing_ctrl_write(struct file *filp, const char __user *ubuf, | 2954 | tracing_ctrl_write(struct file *filp, const char __user *ubuf, |
2955 | size_t cnt, loff_t *ppos) | 2955 | size_t cnt, loff_t *ppos) |
2956 | { | 2956 | { |
2957 | struct trace_array *tr = filp->private_data; | 2957 | struct trace_array *tr = filp->private_data; |
2958 | unsigned long val; | 2958 | unsigned long val; |
2959 | int ret; | 2959 | int ret; |
2960 | 2960 | ||
2961 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 2961 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
2962 | if (ret) | 2962 | if (ret) |
2963 | return ret; | 2963 | return ret; |
2964 | 2964 | ||
2965 | val = !!val; | 2965 | val = !!val; |
2966 | 2966 | ||
2967 | mutex_lock(&trace_types_lock); | 2967 | mutex_lock(&trace_types_lock); |
2968 | if (tracer_enabled ^ val) { | 2968 | if (tracer_enabled ^ val) { |
2969 | 2969 | ||
2970 | /* Only need to warn if this is used to change the state */ | 2970 | /* Only need to warn if this is used to change the state */ |
2971 | WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); | 2971 | WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); |
2972 | 2972 | ||
2973 | if (val) { | 2973 | if (val) { |
2974 | tracer_enabled = 1; | 2974 | tracer_enabled = 1; |
2975 | if (current_trace->start) | 2975 | if (current_trace->start) |
2976 | current_trace->start(tr); | 2976 | current_trace->start(tr); |
2977 | tracing_start(); | 2977 | tracing_start(); |
2978 | } else { | 2978 | } else { |
2979 | tracer_enabled = 0; | 2979 | tracer_enabled = 0; |
2980 | tracing_stop(); | 2980 | tracing_stop(); |
2981 | if (current_trace->stop) | 2981 | if (current_trace->stop) |
2982 | current_trace->stop(tr); | 2982 | current_trace->stop(tr); |
2983 | } | 2983 | } |
2984 | } | 2984 | } |
2985 | mutex_unlock(&trace_types_lock); | 2985 | mutex_unlock(&trace_types_lock); |
2986 | 2986 | ||
2987 | *ppos += cnt; | 2987 | *ppos += cnt; |
2988 | 2988 | ||
2989 | return cnt; | 2989 | return cnt; |
2990 | } | 2990 | } |
2991 | 2991 | ||
2992 | static ssize_t | 2992 | static ssize_t |
2993 | tracing_set_trace_read(struct file *filp, char __user *ubuf, | 2993 | tracing_set_trace_read(struct file *filp, char __user *ubuf, |
2994 | size_t cnt, loff_t *ppos) | 2994 | size_t cnt, loff_t *ppos) |
2995 | { | 2995 | { |
2996 | char buf[MAX_TRACER_SIZE+2]; | 2996 | char buf[MAX_TRACER_SIZE+2]; |
2997 | int r; | 2997 | int r; |
2998 | 2998 | ||
2999 | mutex_lock(&trace_types_lock); | 2999 | mutex_lock(&trace_types_lock); |
3000 | if (current_trace) | 3000 | if (current_trace) |
3001 | r = sprintf(buf, "%s\n", current_trace->name); | 3001 | r = sprintf(buf, "%s\n", current_trace->name); |
3002 | else | 3002 | else |
3003 | r = sprintf(buf, "\n"); | 3003 | r = sprintf(buf, "\n"); |
3004 | mutex_unlock(&trace_types_lock); | 3004 | mutex_unlock(&trace_types_lock); |
3005 | 3005 | ||
3006 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3006 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
3007 | } | 3007 | } |
3008 | 3008 | ||
3009 | int tracer_init(struct tracer *t, struct trace_array *tr) | 3009 | int tracer_init(struct tracer *t, struct trace_array *tr) |
3010 | { | 3010 | { |
3011 | tracing_reset_online_cpus(tr); | 3011 | tracing_reset_online_cpus(tr); |
3012 | return t->init(tr); | 3012 | return t->init(tr); |
3013 | } | 3013 | } |
3014 | 3014 | ||
3015 | static void set_buffer_entries(struct trace_array *tr, unsigned long val) | 3015 | static void set_buffer_entries(struct trace_array *tr, unsigned long val) |
3016 | { | 3016 | { |
3017 | int cpu; | 3017 | int cpu; |
3018 | for_each_tracing_cpu(cpu) | 3018 | for_each_tracing_cpu(cpu) |
3019 | tr->data[cpu]->entries = val; | 3019 | tr->data[cpu]->entries = val; |
3020 | } | 3020 | } |
3021 | 3021 | ||
3022 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | 3022 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) |
3023 | { | 3023 | { |
3024 | int ret; | 3024 | int ret; |
3025 | 3025 | ||
3026 | /* | 3026 | /* |
3027 | * If kernel or user changes the size of the ring buffer | 3027 | * If kernel or user changes the size of the ring buffer |
3028 | * we use the size that was given, and we can forget about | 3028 | * we use the size that was given, and we can forget about |
3029 | * expanding it later. | 3029 | * expanding it later. |
3030 | */ | 3030 | */ |
3031 | ring_buffer_expanded = 1; | 3031 | ring_buffer_expanded = 1; |
3032 | 3032 | ||
3033 | ret = ring_buffer_resize(global_trace.buffer, size, cpu); | 3033 | ret = ring_buffer_resize(global_trace.buffer, size, cpu); |
3034 | if (ret < 0) | 3034 | if (ret < 0) |
3035 | return ret; | 3035 | return ret; |
3036 | 3036 | ||
3037 | if (!current_trace->use_max_tr) | 3037 | if (!current_trace->use_max_tr) |
3038 | goto out; | 3038 | goto out; |
3039 | 3039 | ||
3040 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); | 3040 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); |
3041 | if (ret < 0) { | 3041 | if (ret < 0) { |
3042 | int r = 0; | 3042 | int r = 0; |
3043 | 3043 | ||
3044 | if (cpu == RING_BUFFER_ALL_CPUS) { | 3044 | if (cpu == RING_BUFFER_ALL_CPUS) { |
3045 | int i; | 3045 | int i; |
3046 | for_each_tracing_cpu(i) { | 3046 | for_each_tracing_cpu(i) { |
3047 | r = ring_buffer_resize(global_trace.buffer, | 3047 | r = ring_buffer_resize(global_trace.buffer, |
3048 | global_trace.data[i]->entries, | 3048 | global_trace.data[i]->entries, |
3049 | i); | 3049 | i); |
3050 | if (r < 0) | 3050 | if (r < 0) |
3051 | break; | 3051 | break; |
3052 | } | 3052 | } |
3053 | } else { | 3053 | } else { |
3054 | r = ring_buffer_resize(global_trace.buffer, | 3054 | r = ring_buffer_resize(global_trace.buffer, |
3055 | global_trace.data[cpu]->entries, | 3055 | global_trace.data[cpu]->entries, |
3056 | cpu); | 3056 | cpu); |
3057 | } | 3057 | } |
3058 | 3058 | ||
3059 | if (r < 0) { | 3059 | if (r < 0) { |
3060 | /* | 3060 | /* |
3061 | * AARGH! We are left with different | 3061 | * AARGH! We are left with different |
3062 | * size max buffer!!!! | 3062 | * size max buffer!!!! |
3063 | * The max buffer is our "snapshot" buffer. | 3063 | * The max buffer is our "snapshot" buffer. |
3064 | * When a tracer needs a snapshot (one of the | 3064 | * When a tracer needs a snapshot (one of the |
3065 | * latency tracers), it swaps the max buffer | 3065 | * latency tracers), it swaps the max buffer |
3066 | * with the saved snap shot. We succeeded to | 3066 | * with the saved snap shot. We succeeded to |
3067 | * update the size of the main buffer, but failed to | 3067 | * update the size of the main buffer, but failed to |
3068 | * update the size of the max buffer. But when we tried | 3068 | * update the size of the max buffer. But when we tried |
3069 | * to reset the main buffer to the original size, we | 3069 | * to reset the main buffer to the original size, we |
3070 | * failed there too. This is very unlikely to | 3070 | * failed there too. This is very unlikely to |
3071 | * happen, but if it does, warn and kill all | 3071 | * happen, but if it does, warn and kill all |
3072 | * tracing. | 3072 | * tracing. |
3073 | */ | 3073 | */ |
3074 | WARN_ON(1); | 3074 | WARN_ON(1); |
3075 | tracing_disabled = 1; | 3075 | tracing_disabled = 1; |
3076 | } | 3076 | } |
3077 | return ret; | 3077 | return ret; |
3078 | } | 3078 | } |
3079 | 3079 | ||
3080 | if (cpu == RING_BUFFER_ALL_CPUS) | 3080 | if (cpu == RING_BUFFER_ALL_CPUS) |
3081 | set_buffer_entries(&max_tr, size); | 3081 | set_buffer_entries(&max_tr, size); |
3082 | else | 3082 | else |
3083 | max_tr.data[cpu]->entries = size; | 3083 | max_tr.data[cpu]->entries = size; |
3084 | 3084 | ||
3085 | out: | 3085 | out: |
3086 | if (cpu == RING_BUFFER_ALL_CPUS) | 3086 | if (cpu == RING_BUFFER_ALL_CPUS) |
3087 | set_buffer_entries(&global_trace, size); | 3087 | set_buffer_entries(&global_trace, size); |
3088 | else | 3088 | else |
3089 | global_trace.data[cpu]->entries = size; | 3089 | global_trace.data[cpu]->entries = size; |
3090 | 3090 | ||
3091 | return ret; | 3091 | return ret; |
3092 | } | 3092 | } |
3093 | 3093 | ||
3094 | static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id) | 3094 | static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id) |
3095 | { | 3095 | { |
3096 | int ret = size; | 3096 | int ret = size; |
3097 | 3097 | ||
3098 | mutex_lock(&trace_types_lock); | 3098 | mutex_lock(&trace_types_lock); |
3099 | 3099 | ||
3100 | if (cpu_id != RING_BUFFER_ALL_CPUS) { | 3100 | if (cpu_id != RING_BUFFER_ALL_CPUS) { |
3101 | /* make sure, this cpu is enabled in the mask */ | 3101 | /* make sure, this cpu is enabled in the mask */ |
3102 | if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { | 3102 | if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { |
3103 | ret = -EINVAL; | 3103 | ret = -EINVAL; |
3104 | goto out; | 3104 | goto out; |
3105 | } | 3105 | } |
3106 | } | 3106 | } |
3107 | 3107 | ||
3108 | ret = __tracing_resize_ring_buffer(size, cpu_id); | 3108 | ret = __tracing_resize_ring_buffer(size, cpu_id); |
3109 | if (ret < 0) | 3109 | if (ret < 0) |
3110 | ret = -ENOMEM; | 3110 | ret = -ENOMEM; |
3111 | 3111 | ||
3112 | out: | 3112 | out: |
3113 | mutex_unlock(&trace_types_lock); | 3113 | mutex_unlock(&trace_types_lock); |
3114 | 3114 | ||
3115 | return ret; | 3115 | return ret; |
3116 | } | 3116 | } |
3117 | 3117 | ||
3118 | 3118 | ||
3119 | /** | 3119 | /** |
3120 | * tracing_update_buffers - used by tracing facility to expand ring buffers | 3120 | * tracing_update_buffers - used by tracing facility to expand ring buffers |
3121 | * | 3121 | * |
3122 | * To save on memory when the tracing is never used on a system with it | 3122 | * To save on memory when the tracing is never used on a system with it |
3123 | * configured in. The ring buffers are set to a minimum size. But once | 3123 | * configured in. The ring buffers are set to a minimum size. But once |
3124 | * a user starts to use the tracing facility, then they need to grow | 3124 | * a user starts to use the tracing facility, then they need to grow |
3125 | * to their default size. | 3125 | * to their default size. |
3126 | * | 3126 | * |
3127 | * This function is to be called when a tracer is about to be used. | 3127 | * This function is to be called when a tracer is about to be used. |
3128 | */ | 3128 | */ |
3129 | int tracing_update_buffers(void) | 3129 | int tracing_update_buffers(void) |
3130 | { | 3130 | { |
3131 | int ret = 0; | 3131 | int ret = 0; |
3132 | 3132 | ||
3133 | mutex_lock(&trace_types_lock); | 3133 | mutex_lock(&trace_types_lock); |
3134 | if (!ring_buffer_expanded) | 3134 | if (!ring_buffer_expanded) |
3135 | ret = __tracing_resize_ring_buffer(trace_buf_size, | 3135 | ret = __tracing_resize_ring_buffer(trace_buf_size, |
3136 | RING_BUFFER_ALL_CPUS); | 3136 | RING_BUFFER_ALL_CPUS); |
3137 | mutex_unlock(&trace_types_lock); | 3137 | mutex_unlock(&trace_types_lock); |
3138 | 3138 | ||
3139 | return ret; | 3139 | return ret; |
3140 | } | 3140 | } |
3141 | 3141 | ||
3142 | struct trace_option_dentry; | 3142 | struct trace_option_dentry; |
3143 | 3143 | ||
3144 | static struct trace_option_dentry * | 3144 | static struct trace_option_dentry * |
3145 | create_trace_option_files(struct tracer *tracer); | 3145 | create_trace_option_files(struct tracer *tracer); |
3146 | 3146 | ||
3147 | static void | 3147 | static void |
3148 | destroy_trace_option_files(struct trace_option_dentry *topts); | 3148 | destroy_trace_option_files(struct trace_option_dentry *topts); |
3149 | 3149 | ||
3150 | static int tracing_set_tracer(const char *buf) | 3150 | static int tracing_set_tracer(const char *buf) |
3151 | { | 3151 | { |
3152 | static struct trace_option_dentry *topts; | 3152 | static struct trace_option_dentry *topts; |
3153 | struct trace_array *tr = &global_trace; | 3153 | struct trace_array *tr = &global_trace; |
3154 | struct tracer *t; | 3154 | struct tracer *t; |
3155 | int ret = 0; | 3155 | int ret = 0; |
3156 | 3156 | ||
3157 | mutex_lock(&trace_types_lock); | 3157 | mutex_lock(&trace_types_lock); |
3158 | 3158 | ||
3159 | if (!ring_buffer_expanded) { | 3159 | if (!ring_buffer_expanded) { |
3160 | ret = __tracing_resize_ring_buffer(trace_buf_size, | 3160 | ret = __tracing_resize_ring_buffer(trace_buf_size, |
3161 | RING_BUFFER_ALL_CPUS); | 3161 | RING_BUFFER_ALL_CPUS); |
3162 | if (ret < 0) | 3162 | if (ret < 0) |
3163 | goto out; | 3163 | goto out; |
3164 | ret = 0; | 3164 | ret = 0; |
3165 | } | 3165 | } |
3166 | 3166 | ||
3167 | for (t = trace_types; t; t = t->next) { | 3167 | for (t = trace_types; t; t = t->next) { |
3168 | if (strcmp(t->name, buf) == 0) | 3168 | if (strcmp(t->name, buf) == 0) |
3169 | break; | 3169 | break; |
3170 | } | 3170 | } |
3171 | if (!t) { | 3171 | if (!t) { |
3172 | ret = -EINVAL; | 3172 | ret = -EINVAL; |
3173 | goto out; | 3173 | goto out; |
3174 | } | 3174 | } |
3175 | if (t == current_trace) | 3175 | if (t == current_trace) |
3176 | goto out; | 3176 | goto out; |
3177 | 3177 | ||
3178 | trace_branch_disable(); | 3178 | trace_branch_disable(); |
3179 | if (current_trace && current_trace->reset) | 3179 | if (current_trace && current_trace->reset) |
3180 | current_trace->reset(tr); | 3180 | current_trace->reset(tr); |
3181 | if (current_trace && current_trace->use_max_tr) { | 3181 | if (current_trace && current_trace->use_max_tr) { |
3182 | /* | 3182 | /* |
3183 | * We don't free the ring buffer. instead, resize it because | 3183 | * We don't free the ring buffer. instead, resize it because |
3184 | * The max_tr ring buffer has some state (e.g. ring->clock) and | 3184 | * The max_tr ring buffer has some state (e.g. ring->clock) and |
3185 | * we want preserve it. | 3185 | * we want preserve it. |
3186 | */ | 3186 | */ |
3187 | ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); | 3187 | ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); |
3188 | set_buffer_entries(&max_tr, 1); | 3188 | set_buffer_entries(&max_tr, 1); |
3189 | } | 3189 | } |
3190 | destroy_trace_option_files(topts); | 3190 | destroy_trace_option_files(topts); |
3191 | 3191 | ||
3192 | current_trace = &nop_trace; | 3192 | current_trace = &nop_trace; |
3193 | 3193 | ||
3194 | topts = create_trace_option_files(t); | 3194 | topts = create_trace_option_files(t); |
3195 | if (t->use_max_tr) { | 3195 | if (t->use_max_tr) { |
3196 | int cpu; | 3196 | int cpu; |
3197 | /* we need to make per cpu buffer sizes equivalent */ | 3197 | /* we need to make per cpu buffer sizes equivalent */ |
3198 | for_each_tracing_cpu(cpu) { | 3198 | for_each_tracing_cpu(cpu) { |
3199 | ret = ring_buffer_resize(max_tr.buffer, | 3199 | ret = ring_buffer_resize(max_tr.buffer, |
3200 | global_trace.data[cpu]->entries, | 3200 | global_trace.data[cpu]->entries, |
3201 | cpu); | 3201 | cpu); |
3202 | if (ret < 0) | 3202 | if (ret < 0) |
3203 | goto out; | 3203 | goto out; |
3204 | max_tr.data[cpu]->entries = | 3204 | max_tr.data[cpu]->entries = |
3205 | global_trace.data[cpu]->entries; | 3205 | global_trace.data[cpu]->entries; |
3206 | } | 3206 | } |
3207 | } | 3207 | } |
3208 | 3208 | ||
3209 | if (t->init) { | 3209 | if (t->init) { |
3210 | ret = tracer_init(t, tr); | 3210 | ret = tracer_init(t, tr); |
3211 | if (ret) | 3211 | if (ret) |
3212 | goto out; | 3212 | goto out; |
3213 | } | 3213 | } |
3214 | 3214 | ||
3215 | current_trace = t; | 3215 | current_trace = t; |
3216 | trace_branch_enable(tr); | 3216 | trace_branch_enable(tr); |
3217 | out: | 3217 | out: |
3218 | mutex_unlock(&trace_types_lock); | 3218 | mutex_unlock(&trace_types_lock); |
3219 | 3219 | ||
3220 | return ret; | 3220 | return ret; |
3221 | } | 3221 | } |
3222 | 3222 | ||
3223 | static ssize_t | 3223 | static ssize_t |
3224 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, | 3224 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, |
3225 | size_t cnt, loff_t *ppos) | 3225 | size_t cnt, loff_t *ppos) |
3226 | { | 3226 | { |
3227 | char buf[MAX_TRACER_SIZE+1]; | 3227 | char buf[MAX_TRACER_SIZE+1]; |
3228 | int i; | 3228 | int i; |
3229 | size_t ret; | 3229 | size_t ret; |
3230 | int err; | 3230 | int err; |
3231 | 3231 | ||
3232 | ret = cnt; | 3232 | ret = cnt; |
3233 | 3233 | ||
3234 | if (cnt > MAX_TRACER_SIZE) | 3234 | if (cnt > MAX_TRACER_SIZE) |
3235 | cnt = MAX_TRACER_SIZE; | 3235 | cnt = MAX_TRACER_SIZE; |
3236 | 3236 | ||
3237 | if (copy_from_user(&buf, ubuf, cnt)) | 3237 | if (copy_from_user(&buf, ubuf, cnt)) |
3238 | return -EFAULT; | 3238 | return -EFAULT; |
3239 | 3239 | ||
3240 | buf[cnt] = 0; | 3240 | buf[cnt] = 0; |
3241 | 3241 | ||
3242 | /* strip ending whitespace. */ | 3242 | /* strip ending whitespace. */ |
3243 | for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) | 3243 | for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) |
3244 | buf[i] = 0; | 3244 | buf[i] = 0; |
3245 | 3245 | ||
3246 | err = tracing_set_tracer(buf); | 3246 | err = tracing_set_tracer(buf); |
3247 | if (err) | 3247 | if (err) |
3248 | return err; | 3248 | return err; |
3249 | 3249 | ||
3250 | *ppos += ret; | 3250 | *ppos += ret; |
3251 | 3251 | ||
3252 | return ret; | 3252 | return ret; |
3253 | } | 3253 | } |
3254 | 3254 | ||
3255 | static ssize_t | 3255 | static ssize_t |
3256 | tracing_max_lat_read(struct file *filp, char __user *ubuf, | 3256 | tracing_max_lat_read(struct file *filp, char __user *ubuf, |
3257 | size_t cnt, loff_t *ppos) | 3257 | size_t cnt, loff_t *ppos) |
3258 | { | 3258 | { |
3259 | unsigned long *ptr = filp->private_data; | 3259 | unsigned long *ptr = filp->private_data; |
3260 | char buf[64]; | 3260 | char buf[64]; |
3261 | int r; | 3261 | int r; |
3262 | 3262 | ||
3263 | r = snprintf(buf, sizeof(buf), "%ld\n", | 3263 | r = snprintf(buf, sizeof(buf), "%ld\n", |
3264 | *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); | 3264 | *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); |
3265 | if (r > sizeof(buf)) | 3265 | if (r > sizeof(buf)) |
3266 | r = sizeof(buf); | 3266 | r = sizeof(buf); |
3267 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3267 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
3268 | } | 3268 | } |
3269 | 3269 | ||
3270 | static ssize_t | 3270 | static ssize_t |
3271 | tracing_max_lat_write(struct file *filp, const char __user *ubuf, | 3271 | tracing_max_lat_write(struct file *filp, const char __user *ubuf, |
3272 | size_t cnt, loff_t *ppos) | 3272 | size_t cnt, loff_t *ppos) |
3273 | { | 3273 | { |
3274 | unsigned long *ptr = filp->private_data; | 3274 | unsigned long *ptr = filp->private_data; |
3275 | unsigned long val; | 3275 | unsigned long val; |
3276 | int ret; | 3276 | int ret; |
3277 | 3277 | ||
3278 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 3278 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
3279 | if (ret) | 3279 | if (ret) |
3280 | return ret; | 3280 | return ret; |
3281 | 3281 | ||
3282 | *ptr = val * 1000; | 3282 | *ptr = val * 1000; |
3283 | 3283 | ||
3284 | return cnt; | 3284 | return cnt; |
3285 | } | 3285 | } |
3286 | 3286 | ||
3287 | static int tracing_open_pipe(struct inode *inode, struct file *filp) | 3287 | static int tracing_open_pipe(struct inode *inode, struct file *filp) |
3288 | { | 3288 | { |
3289 | long cpu_file = (long) inode->i_private; | 3289 | long cpu_file = (long) inode->i_private; |
3290 | struct trace_iterator *iter; | 3290 | struct trace_iterator *iter; |
3291 | int ret = 0; | 3291 | int ret = 0; |
3292 | 3292 | ||
3293 | if (tracing_disabled) | 3293 | if (tracing_disabled) |
3294 | return -ENODEV; | 3294 | return -ENODEV; |
3295 | 3295 | ||
3296 | mutex_lock(&trace_types_lock); | 3296 | mutex_lock(&trace_types_lock); |
3297 | 3297 | ||
3298 | /* create a buffer to store the information to pass to userspace */ | 3298 | /* create a buffer to store the information to pass to userspace */ |
3299 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 3299 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
3300 | if (!iter) { | 3300 | if (!iter) { |
3301 | ret = -ENOMEM; | 3301 | ret = -ENOMEM; |
3302 | goto out; | 3302 | goto out; |
3303 | } | 3303 | } |
3304 | 3304 | ||
3305 | /* | 3305 | /* |
3306 | * We make a copy of the current tracer to avoid concurrent | 3306 | * We make a copy of the current tracer to avoid concurrent |
3307 | * changes on it while we are reading. | 3307 | * changes on it while we are reading. |
3308 | */ | 3308 | */ |
3309 | iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); | 3309 | iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); |
3310 | if (!iter->trace) { | 3310 | if (!iter->trace) { |
3311 | ret = -ENOMEM; | 3311 | ret = -ENOMEM; |
3312 | goto fail; | 3312 | goto fail; |
3313 | } | 3313 | } |
3314 | if (current_trace) | 3314 | if (current_trace) |
3315 | *iter->trace = *current_trace; | 3315 | *iter->trace = *current_trace; |
3316 | 3316 | ||
3317 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { | 3317 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { |
3318 | ret = -ENOMEM; | 3318 | ret = -ENOMEM; |
3319 | goto fail; | 3319 | goto fail; |
3320 | } | 3320 | } |
3321 | 3321 | ||
3322 | /* trace pipe does not show start of buffer */ | 3322 | /* trace pipe does not show start of buffer */ |
3323 | cpumask_setall(iter->started); | 3323 | cpumask_setall(iter->started); |
3324 | 3324 | ||
3325 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | 3325 | if (trace_flags & TRACE_ITER_LATENCY_FMT) |
3326 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 3326 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
3327 | 3327 | ||
3328 | iter->cpu_file = cpu_file; | 3328 | iter->cpu_file = cpu_file; |
3329 | iter->tr = &global_trace; | 3329 | iter->tr = &global_trace; |
3330 | mutex_init(&iter->mutex); | 3330 | mutex_init(&iter->mutex); |
3331 | filp->private_data = iter; | 3331 | filp->private_data = iter; |
3332 | 3332 | ||
3333 | if (iter->trace->pipe_open) | 3333 | if (iter->trace->pipe_open) |
3334 | iter->trace->pipe_open(iter); | 3334 | iter->trace->pipe_open(iter); |
3335 | 3335 | ||
3336 | nonseekable_open(inode, filp); | 3336 | nonseekable_open(inode, filp); |
3337 | out: | 3337 | out: |
3338 | mutex_unlock(&trace_types_lock); | 3338 | mutex_unlock(&trace_types_lock); |
3339 | return ret; | 3339 | return ret; |
3340 | 3340 | ||
3341 | fail: | 3341 | fail: |
3342 | kfree(iter->trace); | 3342 | kfree(iter->trace); |
3343 | kfree(iter); | 3343 | kfree(iter); |
3344 | mutex_unlock(&trace_types_lock); | 3344 | mutex_unlock(&trace_types_lock); |
3345 | return ret; | 3345 | return ret; |
3346 | } | 3346 | } |
3347 | 3347 | ||
3348 | static int tracing_release_pipe(struct inode *inode, struct file *file) | 3348 | static int tracing_release_pipe(struct inode *inode, struct file *file) |
3349 | { | 3349 | { |
3350 | struct trace_iterator *iter = file->private_data; | 3350 | struct trace_iterator *iter = file->private_data; |
3351 | 3351 | ||
3352 | mutex_lock(&trace_types_lock); | 3352 | mutex_lock(&trace_types_lock); |
3353 | 3353 | ||
3354 | if (iter->trace->pipe_close) | 3354 | if (iter->trace->pipe_close) |
3355 | iter->trace->pipe_close(iter); | 3355 | iter->trace->pipe_close(iter); |
3356 | 3356 | ||
3357 | mutex_unlock(&trace_types_lock); | 3357 | mutex_unlock(&trace_types_lock); |
3358 | 3358 | ||
3359 | free_cpumask_var(iter->started); | 3359 | free_cpumask_var(iter->started); |
3360 | mutex_destroy(&iter->mutex); | 3360 | mutex_destroy(&iter->mutex); |
3361 | kfree(iter->trace); | 3361 | kfree(iter->trace); |
3362 | kfree(iter); | 3362 | kfree(iter); |
3363 | 3363 | ||
3364 | return 0; | 3364 | return 0; |
3365 | } | 3365 | } |
3366 | 3366 | ||
3367 | static unsigned int | 3367 | static unsigned int |
3368 | tracing_poll_pipe(struct file *filp, poll_table *poll_table) | 3368 | tracing_poll_pipe(struct file *filp, poll_table *poll_table) |
3369 | { | 3369 | { |
3370 | struct trace_iterator *iter = filp->private_data; | 3370 | struct trace_iterator *iter = filp->private_data; |
3371 | 3371 | ||
3372 | if (trace_flags & TRACE_ITER_BLOCK) { | 3372 | if (trace_flags & TRACE_ITER_BLOCK) { |
3373 | /* | 3373 | /* |
3374 | * Always select as readable when in blocking mode | 3374 | * Always select as readable when in blocking mode |
3375 | */ | 3375 | */ |
3376 | return POLLIN | POLLRDNORM; | 3376 | return POLLIN | POLLRDNORM; |
3377 | } else { | 3377 | } else { |
3378 | if (!trace_empty(iter)) | 3378 | if (!trace_empty(iter)) |
3379 | return POLLIN | POLLRDNORM; | 3379 | return POLLIN | POLLRDNORM; |
3380 | poll_wait(filp, &trace_wait, poll_table); | 3380 | poll_wait(filp, &trace_wait, poll_table); |
3381 | if (!trace_empty(iter)) | 3381 | if (!trace_empty(iter)) |
3382 | return POLLIN | POLLRDNORM; | 3382 | return POLLIN | POLLRDNORM; |
3383 | 3383 | ||
3384 | return 0; | 3384 | return 0; |
3385 | } | 3385 | } |
3386 | } | 3386 | } |
3387 | 3387 | ||
3388 | 3388 | ||
3389 | void default_wait_pipe(struct trace_iterator *iter) | 3389 | void default_wait_pipe(struct trace_iterator *iter) |
3390 | { | 3390 | { |
3391 | DEFINE_WAIT(wait); | 3391 | DEFINE_WAIT(wait); |
3392 | 3392 | ||
3393 | prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); | 3393 | prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); |
3394 | 3394 | ||
3395 | if (trace_empty(iter)) | 3395 | if (trace_empty(iter)) |
3396 | schedule(); | 3396 | schedule(); |
3397 | 3397 | ||
3398 | finish_wait(&trace_wait, &wait); | 3398 | finish_wait(&trace_wait, &wait); |
3399 | } | 3399 | } |
3400 | 3400 | ||
3401 | /* | 3401 | /* |
3402 | * This is a make-shift waitqueue. | 3402 | * This is a make-shift waitqueue. |
3403 | * A tracer might use this callback on some rare cases: | 3403 | * A tracer might use this callback on some rare cases: |
3404 | * | 3404 | * |
3405 | * 1) the current tracer might hold the runqueue lock when it wakes up | 3405 | * 1) the current tracer might hold the runqueue lock when it wakes up |
3406 | * a reader, hence a deadlock (sched, function, and function graph tracers) | 3406 | * a reader, hence a deadlock (sched, function, and function graph tracers) |
3407 | * 2) the function tracers, trace all functions, we don't want | 3407 | * 2) the function tracers, trace all functions, we don't want |
3408 | * the overhead of calling wake_up and friends | 3408 | * the overhead of calling wake_up and friends |
3409 | * (and tracing them too) | 3409 | * (and tracing them too) |
3410 | * | 3410 | * |
3411 | * Anyway, this is really very primitive wakeup. | 3411 | * Anyway, this is really very primitive wakeup. |
3412 | */ | 3412 | */ |
3413 | void poll_wait_pipe(struct trace_iterator *iter) | 3413 | void poll_wait_pipe(struct trace_iterator *iter) |
3414 | { | 3414 | { |
3415 | set_current_state(TASK_INTERRUPTIBLE); | 3415 | set_current_state(TASK_INTERRUPTIBLE); |
3416 | /* sleep for 100 msecs, and try again. */ | 3416 | /* sleep for 100 msecs, and try again. */ |
3417 | schedule_timeout(HZ / 10); | 3417 | schedule_timeout(HZ / 10); |
3418 | } | 3418 | } |
3419 | 3419 | ||
3420 | /* Must be called with trace_types_lock mutex held. */ | 3420 | /* Must be called with trace_types_lock mutex held. */ |
3421 | static int tracing_wait_pipe(struct file *filp) | 3421 | static int tracing_wait_pipe(struct file *filp) |
3422 | { | 3422 | { |
3423 | struct trace_iterator *iter = filp->private_data; | 3423 | struct trace_iterator *iter = filp->private_data; |
3424 | 3424 | ||
3425 | while (trace_empty(iter)) { | 3425 | while (trace_empty(iter)) { |
3426 | 3426 | ||
3427 | if ((filp->f_flags & O_NONBLOCK)) { | 3427 | if ((filp->f_flags & O_NONBLOCK)) { |
3428 | return -EAGAIN; | 3428 | return -EAGAIN; |
3429 | } | 3429 | } |
3430 | 3430 | ||
3431 | mutex_unlock(&iter->mutex); | 3431 | mutex_unlock(&iter->mutex); |
3432 | 3432 | ||
3433 | iter->trace->wait_pipe(iter); | 3433 | iter->trace->wait_pipe(iter); |
3434 | 3434 | ||
3435 | mutex_lock(&iter->mutex); | 3435 | mutex_lock(&iter->mutex); |
3436 | 3436 | ||
3437 | if (signal_pending(current)) | 3437 | if (signal_pending(current)) |
3438 | return -EINTR; | 3438 | return -EINTR; |
3439 | 3439 | ||
3440 | /* | 3440 | /* |
3441 | * We block until we read something and tracing is disabled. | 3441 | * We block until we read something and tracing is disabled. |
3442 | * We still block if tracing is disabled, but we have never | 3442 | * We still block if tracing is disabled, but we have never |
3443 | * read anything. This allows a user to cat this file, and | 3443 | * read anything. This allows a user to cat this file, and |
3444 | * then enable tracing. But after we have read something, | 3444 | * then enable tracing. But after we have read something, |
3445 | * we give an EOF when tracing is again disabled. | 3445 | * we give an EOF when tracing is again disabled. |
3446 | * | 3446 | * |
3447 | * iter->pos will be 0 if we haven't read anything. | 3447 | * iter->pos will be 0 if we haven't read anything. |
3448 | */ | 3448 | */ |
3449 | if (!tracer_enabled && iter->pos) | 3449 | if (!tracer_enabled && iter->pos) |
3450 | break; | 3450 | break; |
3451 | } | 3451 | } |
3452 | 3452 | ||
3453 | return 1; | 3453 | return 1; |
3454 | } | 3454 | } |
3455 | 3455 | ||
3456 | /* | 3456 | /* |
3457 | * Consumer reader. | 3457 | * Consumer reader. |
3458 | */ | 3458 | */ |
3459 | static ssize_t | 3459 | static ssize_t |
3460 | tracing_read_pipe(struct file *filp, char __user *ubuf, | 3460 | tracing_read_pipe(struct file *filp, char __user *ubuf, |
3461 | size_t cnt, loff_t *ppos) | 3461 | size_t cnt, loff_t *ppos) |
3462 | { | 3462 | { |
3463 | struct trace_iterator *iter = filp->private_data; | 3463 | struct trace_iterator *iter = filp->private_data; |
3464 | static struct tracer *old_tracer; | 3464 | static struct tracer *old_tracer; |
3465 | ssize_t sret; | 3465 | ssize_t sret; |
3466 | 3466 | ||
3467 | /* return any leftover data */ | 3467 | /* return any leftover data */ |
3468 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); | 3468 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); |
3469 | if (sret != -EBUSY) | 3469 | if (sret != -EBUSY) |
3470 | return sret; | 3470 | return sret; |
3471 | 3471 | ||
3472 | trace_seq_init(&iter->seq); | 3472 | trace_seq_init(&iter->seq); |
3473 | 3473 | ||
3474 | /* copy the tracer to avoid using a global lock all around */ | 3474 | /* copy the tracer to avoid using a global lock all around */ |
3475 | mutex_lock(&trace_types_lock); | 3475 | mutex_lock(&trace_types_lock); |
3476 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3476 | if (unlikely(old_tracer != current_trace && current_trace)) { |
3477 | old_tracer = current_trace; | 3477 | old_tracer = current_trace; |
3478 | *iter->trace = *current_trace; | 3478 | *iter->trace = *current_trace; |
3479 | } | 3479 | } |
3480 | mutex_unlock(&trace_types_lock); | 3480 | mutex_unlock(&trace_types_lock); |
3481 | 3481 | ||
3482 | /* | 3482 | /* |
3483 | * Avoid more than one consumer on a single file descriptor | 3483 | * Avoid more than one consumer on a single file descriptor |
3484 | * This is just a matter of traces coherency, the ring buffer itself | 3484 | * This is just a matter of traces coherency, the ring buffer itself |
3485 | * is protected. | 3485 | * is protected. |
3486 | */ | 3486 | */ |
3487 | mutex_lock(&iter->mutex); | 3487 | mutex_lock(&iter->mutex); |
3488 | if (iter->trace->read) { | 3488 | if (iter->trace->read) { |
3489 | sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); | 3489 | sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); |
3490 | if (sret) | 3490 | if (sret) |
3491 | goto out; | 3491 | goto out; |
3492 | } | 3492 | } |
3493 | 3493 | ||
3494 | waitagain: | 3494 | waitagain: |
3495 | sret = tracing_wait_pipe(filp); | 3495 | sret = tracing_wait_pipe(filp); |
3496 | if (sret <= 0) | 3496 | if (sret <= 0) |
3497 | goto out; | 3497 | goto out; |
3498 | 3498 | ||
3499 | /* stop when tracing is finished */ | 3499 | /* stop when tracing is finished */ |
3500 | if (trace_empty(iter)) { | 3500 | if (trace_empty(iter)) { |
3501 | sret = 0; | 3501 | sret = 0; |
3502 | goto out; | 3502 | goto out; |
3503 | } | 3503 | } |
3504 | 3504 | ||
3505 | if (cnt >= PAGE_SIZE) | 3505 | if (cnt >= PAGE_SIZE) |
3506 | cnt = PAGE_SIZE - 1; | 3506 | cnt = PAGE_SIZE - 1; |
3507 | 3507 | ||
3508 | /* reset all but tr, trace, and overruns */ | 3508 | /* reset all but tr, trace, and overruns */ |
3509 | memset(&iter->seq, 0, | 3509 | memset(&iter->seq, 0, |
3510 | sizeof(struct trace_iterator) - | 3510 | sizeof(struct trace_iterator) - |
3511 | offsetof(struct trace_iterator, seq)); | 3511 | offsetof(struct trace_iterator, seq)); |
3512 | iter->pos = -1; | 3512 | iter->pos = -1; |
3513 | 3513 | ||
3514 | trace_event_read_lock(); | 3514 | trace_event_read_lock(); |
3515 | trace_access_lock(iter->cpu_file); | 3515 | trace_access_lock(iter->cpu_file); |
3516 | while (trace_find_next_entry_inc(iter) != NULL) { | 3516 | while (trace_find_next_entry_inc(iter) != NULL) { |
3517 | enum print_line_t ret; | 3517 | enum print_line_t ret; |
3518 | int len = iter->seq.len; | 3518 | int len = iter->seq.len; |
3519 | 3519 | ||
3520 | ret = print_trace_line(iter); | 3520 | ret = print_trace_line(iter); |
3521 | if (ret == TRACE_TYPE_PARTIAL_LINE) { | 3521 | if (ret == TRACE_TYPE_PARTIAL_LINE) { |
3522 | /* don't print partial lines */ | 3522 | /* don't print partial lines */ |
3523 | iter->seq.len = len; | 3523 | iter->seq.len = len; |
3524 | break; | 3524 | break; |
3525 | } | 3525 | } |
3526 | if (ret != TRACE_TYPE_NO_CONSUME) | 3526 | if (ret != TRACE_TYPE_NO_CONSUME) |
3527 | trace_consume(iter); | 3527 | trace_consume(iter); |
3528 | 3528 | ||
3529 | if (iter->seq.len >= cnt) | 3529 | if (iter->seq.len >= cnt) |
3530 | break; | 3530 | break; |
3531 | 3531 | ||
3532 | /* | 3532 | /* |
3533 | * Setting the full flag means we reached the trace_seq buffer | 3533 | * Setting the full flag means we reached the trace_seq buffer |
3534 | * size and we should leave by partial output condition above. | 3534 | * size and we should leave by partial output condition above. |
3535 | * One of the trace_seq_* functions is not used properly. | 3535 | * One of the trace_seq_* functions is not used properly. |
3536 | */ | 3536 | */ |
3537 | WARN_ONCE(iter->seq.full, "full flag set for trace type %d", | 3537 | WARN_ONCE(iter->seq.full, "full flag set for trace type %d", |
3538 | iter->ent->type); | 3538 | iter->ent->type); |
3539 | } | 3539 | } |
3540 | trace_access_unlock(iter->cpu_file); | 3540 | trace_access_unlock(iter->cpu_file); |
3541 | trace_event_read_unlock(); | 3541 | trace_event_read_unlock(); |
3542 | 3542 | ||
3543 | /* Now copy what we have to the user */ | 3543 | /* Now copy what we have to the user */ |
3544 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); | 3544 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); |
3545 | if (iter->seq.readpos >= iter->seq.len) | 3545 | if (iter->seq.readpos >= iter->seq.len) |
3546 | trace_seq_init(&iter->seq); | 3546 | trace_seq_init(&iter->seq); |
3547 | 3547 | ||
3548 | /* | 3548 | /* |
3549 | * If there was nothing to send to user, in spite of consuming trace | 3549 | * If there was nothing to send to user, in spite of consuming trace |
3550 | * entries, go back to wait for more entries. | 3550 | * entries, go back to wait for more entries. |
3551 | */ | 3551 | */ |
3552 | if (sret == -EBUSY) | 3552 | if (sret == -EBUSY) |
3553 | goto waitagain; | 3553 | goto waitagain; |
3554 | 3554 | ||
3555 | out: | 3555 | out: |
3556 | mutex_unlock(&iter->mutex); | 3556 | mutex_unlock(&iter->mutex); |
3557 | 3557 | ||
3558 | return sret; | 3558 | return sret; |
3559 | } | 3559 | } |
3560 | 3560 | ||
3561 | static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, | 3561 | static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, |
3562 | struct pipe_buffer *buf) | 3562 | struct pipe_buffer *buf) |
3563 | { | 3563 | { |
3564 | __free_page(buf->page); | 3564 | __free_page(buf->page); |
3565 | } | 3565 | } |
3566 | 3566 | ||
3567 | static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, | 3567 | static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, |
3568 | unsigned int idx) | 3568 | unsigned int idx) |
3569 | { | 3569 | { |
3570 | __free_page(spd->pages[idx]); | 3570 | __free_page(spd->pages[idx]); |
3571 | } | 3571 | } |
3572 | 3572 | ||
3573 | static const struct pipe_buf_operations tracing_pipe_buf_ops = { | 3573 | static const struct pipe_buf_operations tracing_pipe_buf_ops = { |
3574 | .can_merge = 0, | 3574 | .can_merge = 0, |
3575 | .map = generic_pipe_buf_map, | 3575 | .map = generic_pipe_buf_map, |
3576 | .unmap = generic_pipe_buf_unmap, | 3576 | .unmap = generic_pipe_buf_unmap, |
3577 | .confirm = generic_pipe_buf_confirm, | 3577 | .confirm = generic_pipe_buf_confirm, |
3578 | .release = tracing_pipe_buf_release, | 3578 | .release = tracing_pipe_buf_release, |
3579 | .steal = generic_pipe_buf_steal, | 3579 | .steal = generic_pipe_buf_steal, |
3580 | .get = generic_pipe_buf_get, | 3580 | .get = generic_pipe_buf_get, |
3581 | }; | 3581 | }; |
3582 | 3582 | ||
3583 | static size_t | 3583 | static size_t |
3584 | tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) | 3584 | tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) |
3585 | { | 3585 | { |
3586 | size_t count; | 3586 | size_t count; |
3587 | int ret; | 3587 | int ret; |
3588 | 3588 | ||
3589 | /* Seq buffer is page-sized, exactly what we need. */ | 3589 | /* Seq buffer is page-sized, exactly what we need. */ |
3590 | for (;;) { | 3590 | for (;;) { |
3591 | count = iter->seq.len; | 3591 | count = iter->seq.len; |
3592 | ret = print_trace_line(iter); | 3592 | ret = print_trace_line(iter); |
3593 | count = iter->seq.len - count; | 3593 | count = iter->seq.len - count; |
3594 | if (rem < count) { | 3594 | if (rem < count) { |
3595 | rem = 0; | 3595 | rem = 0; |
3596 | iter->seq.len -= count; | 3596 | iter->seq.len -= count; |
3597 | break; | 3597 | break; |
3598 | } | 3598 | } |
3599 | if (ret == TRACE_TYPE_PARTIAL_LINE) { | 3599 | if (ret == TRACE_TYPE_PARTIAL_LINE) { |
3600 | iter->seq.len -= count; | 3600 | iter->seq.len -= count; |
3601 | break; | 3601 | break; |
3602 | } | 3602 | } |
3603 | 3603 | ||
3604 | if (ret != TRACE_TYPE_NO_CONSUME) | 3604 | if (ret != TRACE_TYPE_NO_CONSUME) |
3605 | trace_consume(iter); | 3605 | trace_consume(iter); |
3606 | rem -= count; | 3606 | rem -= count; |
3607 | if (!trace_find_next_entry_inc(iter)) { | 3607 | if (!trace_find_next_entry_inc(iter)) { |
3608 | rem = 0; | 3608 | rem = 0; |
3609 | iter->ent = NULL; | 3609 | iter->ent = NULL; |
3610 | break; | 3610 | break; |
3611 | } | 3611 | } |
3612 | } | 3612 | } |
3613 | 3613 | ||
3614 | return rem; | 3614 | return rem; |
3615 | } | 3615 | } |
3616 | 3616 | ||
3617 | static ssize_t tracing_splice_read_pipe(struct file *filp, | 3617 | static ssize_t tracing_splice_read_pipe(struct file *filp, |
3618 | loff_t *ppos, | 3618 | loff_t *ppos, |
3619 | struct pipe_inode_info *pipe, | 3619 | struct pipe_inode_info *pipe, |
3620 | size_t len, | 3620 | size_t len, |
3621 | unsigned int flags) | 3621 | unsigned int flags) |
3622 | { | 3622 | { |
3623 | struct page *pages_def[PIPE_DEF_BUFFERS]; | 3623 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
3624 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; | 3624 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
3625 | struct trace_iterator *iter = filp->private_data; | 3625 | struct trace_iterator *iter = filp->private_data; |
3626 | struct splice_pipe_desc spd = { | 3626 | struct splice_pipe_desc spd = { |
3627 | .pages = pages_def, | 3627 | .pages = pages_def, |
3628 | .partial = partial_def, | 3628 | .partial = partial_def, |
3629 | .nr_pages = 0, /* This gets updated below. */ | 3629 | .nr_pages = 0, /* This gets updated below. */ |
3630 | .nr_pages_max = PIPE_DEF_BUFFERS, | 3630 | .nr_pages_max = PIPE_DEF_BUFFERS, |
3631 | .flags = flags, | 3631 | .flags = flags, |
3632 | .ops = &tracing_pipe_buf_ops, | 3632 | .ops = &tracing_pipe_buf_ops, |
3633 | .spd_release = tracing_spd_release_pipe, | 3633 | .spd_release = tracing_spd_release_pipe, |
3634 | }; | 3634 | }; |
3635 | static struct tracer *old_tracer; | 3635 | static struct tracer *old_tracer; |
3636 | ssize_t ret; | 3636 | ssize_t ret; |
3637 | size_t rem; | 3637 | size_t rem; |
3638 | unsigned int i; | 3638 | unsigned int i; |
3639 | 3639 | ||
3640 | if (splice_grow_spd(pipe, &spd)) | 3640 | if (splice_grow_spd(pipe, &spd)) |
3641 | return -ENOMEM; | 3641 | return -ENOMEM; |
3642 | 3642 | ||
3643 | /* copy the tracer to avoid using a global lock all around */ | 3643 | /* copy the tracer to avoid using a global lock all around */ |
3644 | mutex_lock(&trace_types_lock); | 3644 | mutex_lock(&trace_types_lock); |
3645 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3645 | if (unlikely(old_tracer != current_trace && current_trace)) { |
3646 | old_tracer = current_trace; | 3646 | old_tracer = current_trace; |
3647 | *iter->trace = *current_trace; | 3647 | *iter->trace = *current_trace; |
3648 | } | 3648 | } |
3649 | mutex_unlock(&trace_types_lock); | 3649 | mutex_unlock(&trace_types_lock); |
3650 | 3650 | ||
3651 | mutex_lock(&iter->mutex); | 3651 | mutex_lock(&iter->mutex); |
3652 | 3652 | ||
3653 | if (iter->trace->splice_read) { | 3653 | if (iter->trace->splice_read) { |
3654 | ret = iter->trace->splice_read(iter, filp, | 3654 | ret = iter->trace->splice_read(iter, filp, |
3655 | ppos, pipe, len, flags); | 3655 | ppos, pipe, len, flags); |
3656 | if (ret) | 3656 | if (ret) |
3657 | goto out_err; | 3657 | goto out_err; |
3658 | } | 3658 | } |
3659 | 3659 | ||
3660 | ret = tracing_wait_pipe(filp); | 3660 | ret = tracing_wait_pipe(filp); |
3661 | if (ret <= 0) | 3661 | if (ret <= 0) |
3662 | goto out_err; | 3662 | goto out_err; |
3663 | 3663 | ||
3664 | if (!iter->ent && !trace_find_next_entry_inc(iter)) { | 3664 | if (!iter->ent && !trace_find_next_entry_inc(iter)) { |
3665 | ret = -EFAULT; | 3665 | ret = -EFAULT; |
3666 | goto out_err; | 3666 | goto out_err; |
3667 | } | 3667 | } |
3668 | 3668 | ||
3669 | trace_event_read_lock(); | 3669 | trace_event_read_lock(); |
3670 | trace_access_lock(iter->cpu_file); | 3670 | trace_access_lock(iter->cpu_file); |
3671 | 3671 | ||
3672 | /* Fill as many pages as possible. */ | 3672 | /* Fill as many pages as possible. */ |
3673 | for (i = 0, rem = len; i < pipe->buffers && rem; i++) { | 3673 | for (i = 0, rem = len; i < pipe->buffers && rem; i++) { |
3674 | spd.pages[i] = alloc_page(GFP_KERNEL); | 3674 | spd.pages[i] = alloc_page(GFP_KERNEL); |
3675 | if (!spd.pages[i]) | 3675 | if (!spd.pages[i]) |
3676 | break; | 3676 | break; |
3677 | 3677 | ||
3678 | rem = tracing_fill_pipe_page(rem, iter); | 3678 | rem = tracing_fill_pipe_page(rem, iter); |
3679 | 3679 | ||
3680 | /* Copy the data into the page, so we can start over. */ | 3680 | /* Copy the data into the page, so we can start over. */ |
3681 | ret = trace_seq_to_buffer(&iter->seq, | 3681 | ret = trace_seq_to_buffer(&iter->seq, |
3682 | page_address(spd.pages[i]), | 3682 | page_address(spd.pages[i]), |
3683 | iter->seq.len); | 3683 | iter->seq.len); |
3684 | if (ret < 0) { | 3684 | if (ret < 0) { |
3685 | __free_page(spd.pages[i]); | 3685 | __free_page(spd.pages[i]); |
3686 | break; | 3686 | break; |
3687 | } | 3687 | } |
3688 | spd.partial[i].offset = 0; | 3688 | spd.partial[i].offset = 0; |
3689 | spd.partial[i].len = iter->seq.len; | 3689 | spd.partial[i].len = iter->seq.len; |
3690 | 3690 | ||
3691 | trace_seq_init(&iter->seq); | 3691 | trace_seq_init(&iter->seq); |
3692 | } | 3692 | } |
3693 | 3693 | ||
3694 | trace_access_unlock(iter->cpu_file); | 3694 | trace_access_unlock(iter->cpu_file); |
3695 | trace_event_read_unlock(); | 3695 | trace_event_read_unlock(); |
3696 | mutex_unlock(&iter->mutex); | 3696 | mutex_unlock(&iter->mutex); |
3697 | 3697 | ||
3698 | spd.nr_pages = i; | 3698 | spd.nr_pages = i; |
3699 | 3699 | ||
3700 | ret = splice_to_pipe(pipe, &spd); | 3700 | ret = splice_to_pipe(pipe, &spd); |
3701 | out: | 3701 | out: |
3702 | splice_shrink_spd(&spd); | 3702 | splice_shrink_spd(&spd); |
3703 | return ret; | 3703 | return ret; |
3704 | 3704 | ||
3705 | out_err: | 3705 | out_err: |
3706 | mutex_unlock(&iter->mutex); | 3706 | mutex_unlock(&iter->mutex); |
3707 | goto out; | 3707 | goto out; |
3708 | } | 3708 | } |
3709 | 3709 | ||
3710 | struct ftrace_entries_info { | 3710 | struct ftrace_entries_info { |
3711 | struct trace_array *tr; | 3711 | struct trace_array *tr; |
3712 | int cpu; | 3712 | int cpu; |
3713 | }; | 3713 | }; |
3714 | 3714 | ||
3715 | static int tracing_entries_open(struct inode *inode, struct file *filp) | 3715 | static int tracing_entries_open(struct inode *inode, struct file *filp) |
3716 | { | 3716 | { |
3717 | struct ftrace_entries_info *info; | 3717 | struct ftrace_entries_info *info; |
3718 | 3718 | ||
3719 | if (tracing_disabled) | 3719 | if (tracing_disabled) |
3720 | return -ENODEV; | 3720 | return -ENODEV; |
3721 | 3721 | ||
3722 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 3722 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
3723 | if (!info) | 3723 | if (!info) |
3724 | return -ENOMEM; | 3724 | return -ENOMEM; |
3725 | 3725 | ||
3726 | info->tr = &global_trace; | 3726 | info->tr = &global_trace; |
3727 | info->cpu = (unsigned long)inode->i_private; | 3727 | info->cpu = (unsigned long)inode->i_private; |
3728 | 3728 | ||
3729 | filp->private_data = info; | 3729 | filp->private_data = info; |
3730 | 3730 | ||
3731 | return 0; | 3731 | return 0; |
3732 | } | 3732 | } |
3733 | 3733 | ||
3734 | static ssize_t | 3734 | static ssize_t |
3735 | tracing_entries_read(struct file *filp, char __user *ubuf, | 3735 | tracing_entries_read(struct file *filp, char __user *ubuf, |
3736 | size_t cnt, loff_t *ppos) | 3736 | size_t cnt, loff_t *ppos) |
3737 | { | 3737 | { |
3738 | struct ftrace_entries_info *info = filp->private_data; | 3738 | struct ftrace_entries_info *info = filp->private_data; |
3739 | struct trace_array *tr = info->tr; | 3739 | struct trace_array *tr = info->tr; |
3740 | char buf[64]; | 3740 | char buf[64]; |
3741 | int r = 0; | 3741 | int r = 0; |
3742 | ssize_t ret; | 3742 | ssize_t ret; |
3743 | 3743 | ||
3744 | mutex_lock(&trace_types_lock); | 3744 | mutex_lock(&trace_types_lock); |
3745 | 3745 | ||
3746 | if (info->cpu == RING_BUFFER_ALL_CPUS) { | 3746 | if (info->cpu == RING_BUFFER_ALL_CPUS) { |
3747 | int cpu, buf_size_same; | 3747 | int cpu, buf_size_same; |
3748 | unsigned long size; | 3748 | unsigned long size; |
3749 | 3749 | ||
3750 | size = 0; | 3750 | size = 0; |
3751 | buf_size_same = 1; | 3751 | buf_size_same = 1; |
3752 | /* check if all cpu sizes are same */ | 3752 | /* check if all cpu sizes are same */ |
3753 | for_each_tracing_cpu(cpu) { | 3753 | for_each_tracing_cpu(cpu) { |
3754 | /* fill in the size from first enabled cpu */ | 3754 | /* fill in the size from first enabled cpu */ |
3755 | if (size == 0) | 3755 | if (size == 0) |
3756 | size = tr->data[cpu]->entries; | 3756 | size = tr->data[cpu]->entries; |
3757 | if (size != tr->data[cpu]->entries) { | 3757 | if (size != tr->data[cpu]->entries) { |
3758 | buf_size_same = 0; | 3758 | buf_size_same = 0; |
3759 | break; | 3759 | break; |
3760 | } | 3760 | } |
3761 | } | 3761 | } |
3762 | 3762 | ||
3763 | if (buf_size_same) { | 3763 | if (buf_size_same) { |
3764 | if (!ring_buffer_expanded) | 3764 | if (!ring_buffer_expanded) |
3765 | r = sprintf(buf, "%lu (expanded: %lu)\n", | 3765 | r = sprintf(buf, "%lu (expanded: %lu)\n", |
3766 | size >> 10, | 3766 | size >> 10, |
3767 | trace_buf_size >> 10); | 3767 | trace_buf_size >> 10); |
3768 | else | 3768 | else |
3769 | r = sprintf(buf, "%lu\n", size >> 10); | 3769 | r = sprintf(buf, "%lu\n", size >> 10); |
3770 | } else | 3770 | } else |
3771 | r = sprintf(buf, "X\n"); | 3771 | r = sprintf(buf, "X\n"); |
3772 | } else | 3772 | } else |
3773 | r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10); | 3773 | r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10); |
3774 | 3774 | ||
3775 | mutex_unlock(&trace_types_lock); | 3775 | mutex_unlock(&trace_types_lock); |
3776 | 3776 | ||
3777 | ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3777 | ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
3778 | return ret; | 3778 | return ret; |
3779 | } | 3779 | } |
3780 | 3780 | ||
3781 | static ssize_t | 3781 | static ssize_t |
3782 | tracing_entries_write(struct file *filp, const char __user *ubuf, | 3782 | tracing_entries_write(struct file *filp, const char __user *ubuf, |
3783 | size_t cnt, loff_t *ppos) | 3783 | size_t cnt, loff_t *ppos) |
3784 | { | 3784 | { |
3785 | struct ftrace_entries_info *info = filp->private_data; | 3785 | struct ftrace_entries_info *info = filp->private_data; |
3786 | unsigned long val; | 3786 | unsigned long val; |
3787 | int ret; | 3787 | int ret; |
3788 | 3788 | ||
3789 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 3789 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
3790 | if (ret) | 3790 | if (ret) |
3791 | return ret; | 3791 | return ret; |
3792 | 3792 | ||
3793 | /* must have at least 1 entry */ | 3793 | /* must have at least 1 entry */ |
3794 | if (!val) | 3794 | if (!val) |
3795 | return -EINVAL; | 3795 | return -EINVAL; |
3796 | 3796 | ||
3797 | /* value is in KB */ | 3797 | /* value is in KB */ |
3798 | val <<= 10; | 3798 | val <<= 10; |
3799 | 3799 | ||
3800 | ret = tracing_resize_ring_buffer(val, info->cpu); | 3800 | ret = tracing_resize_ring_buffer(val, info->cpu); |
3801 | if (ret < 0) | 3801 | if (ret < 0) |
3802 | return ret; | 3802 | return ret; |
3803 | 3803 | ||
3804 | *ppos += cnt; | 3804 | *ppos += cnt; |
3805 | 3805 | ||
3806 | return cnt; | 3806 | return cnt; |
3807 | } | 3807 | } |
3808 | 3808 | ||
3809 | static int | 3809 | static int |
3810 | tracing_entries_release(struct inode *inode, struct file *filp) | 3810 | tracing_entries_release(struct inode *inode, struct file *filp) |
3811 | { | 3811 | { |
3812 | struct ftrace_entries_info *info = filp->private_data; | 3812 | struct ftrace_entries_info *info = filp->private_data; |
3813 | 3813 | ||
3814 | kfree(info); | 3814 | kfree(info); |
3815 | 3815 | ||
3816 | return 0; | 3816 | return 0; |
3817 | } | 3817 | } |
3818 | 3818 | ||
3819 | static ssize_t | 3819 | static ssize_t |
3820 | tracing_total_entries_read(struct file *filp, char __user *ubuf, | 3820 | tracing_total_entries_read(struct file *filp, char __user *ubuf, |
3821 | size_t cnt, loff_t *ppos) | 3821 | size_t cnt, loff_t *ppos) |
3822 | { | 3822 | { |
3823 | struct trace_array *tr = filp->private_data; | 3823 | struct trace_array *tr = filp->private_data; |
3824 | char buf[64]; | 3824 | char buf[64]; |
3825 | int r, cpu; | 3825 | int r, cpu; |
3826 | unsigned long size = 0, expanded_size = 0; | 3826 | unsigned long size = 0, expanded_size = 0; |
3827 | 3827 | ||
3828 | mutex_lock(&trace_types_lock); | 3828 | mutex_lock(&trace_types_lock); |
3829 | for_each_tracing_cpu(cpu) { | 3829 | for_each_tracing_cpu(cpu) { |
3830 | size += tr->data[cpu]->entries >> 10; | 3830 | size += tr->data[cpu]->entries >> 10; |
3831 | if (!ring_buffer_expanded) | 3831 | if (!ring_buffer_expanded) |
3832 | expanded_size += trace_buf_size >> 10; | 3832 | expanded_size += trace_buf_size >> 10; |
3833 | } | 3833 | } |
3834 | if (ring_buffer_expanded) | 3834 | if (ring_buffer_expanded) |
3835 | r = sprintf(buf, "%lu\n", size); | 3835 | r = sprintf(buf, "%lu\n", size); |
3836 | else | 3836 | else |
3837 | r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); | 3837 | r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); |
3838 | mutex_unlock(&trace_types_lock); | 3838 | mutex_unlock(&trace_types_lock); |
3839 | 3839 | ||
3840 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3840 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
3841 | } | 3841 | } |
3842 | 3842 | ||
3843 | static ssize_t | 3843 | static ssize_t |
3844 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, | 3844 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, |
3845 | size_t cnt, loff_t *ppos) | 3845 | size_t cnt, loff_t *ppos) |
3846 | { | 3846 | { |
3847 | /* | 3847 | /* |
3848 | * There is no need to read what the user has written, this function | 3848 | * There is no need to read what the user has written, this function |
3849 | * is just to make sure that there is no error when "echo" is used | 3849 | * is just to make sure that there is no error when "echo" is used |
3850 | */ | 3850 | */ |
3851 | 3851 | ||
3852 | *ppos += cnt; | 3852 | *ppos += cnt; |
3853 | 3853 | ||
3854 | return cnt; | 3854 | return cnt; |
3855 | } | 3855 | } |
3856 | 3856 | ||
3857 | static int | 3857 | static int |
3858 | tracing_free_buffer_release(struct inode *inode, struct file *filp) | 3858 | tracing_free_buffer_release(struct inode *inode, struct file *filp) |
3859 | { | 3859 | { |
3860 | /* disable tracing ? */ | 3860 | /* disable tracing ? */ |
3861 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) | 3861 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) |
3862 | tracing_off(); | 3862 | tracing_off(); |
3863 | /* resize the ring buffer to 0 */ | 3863 | /* resize the ring buffer to 0 */ |
3864 | tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS); | 3864 | tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS); |
3865 | 3865 | ||
3866 | return 0; | 3866 | return 0; |
3867 | } | 3867 | } |
3868 | 3868 | ||
3869 | static ssize_t | 3869 | static ssize_t |
3870 | tracing_mark_write(struct file *filp, const char __user *ubuf, | 3870 | tracing_mark_write(struct file *filp, const char __user *ubuf, |
3871 | size_t cnt, loff_t *fpos) | 3871 | size_t cnt, loff_t *fpos) |
3872 | { | 3872 | { |
3873 | unsigned long addr = (unsigned long)ubuf; | 3873 | unsigned long addr = (unsigned long)ubuf; |
3874 | struct ring_buffer_event *event; | 3874 | struct ring_buffer_event *event; |
3875 | struct ring_buffer *buffer; | 3875 | struct ring_buffer *buffer; |
3876 | struct print_entry *entry; | 3876 | struct print_entry *entry; |
3877 | unsigned long irq_flags; | 3877 | unsigned long irq_flags; |
3878 | struct page *pages[2]; | 3878 | struct page *pages[2]; |
3879 | void *map_page[2]; | 3879 | void *map_page[2]; |
3880 | int nr_pages = 1; | 3880 | int nr_pages = 1; |
3881 | ssize_t written; | 3881 | ssize_t written; |
3882 | int offset; | 3882 | int offset; |
3883 | int size; | 3883 | int size; |
3884 | int len; | 3884 | int len; |
3885 | int ret; | 3885 | int ret; |
3886 | int i; | 3886 | int i; |
3887 | 3887 | ||
3888 | if (tracing_disabled) | 3888 | if (tracing_disabled) |
3889 | return -EINVAL; | 3889 | return -EINVAL; |
3890 | 3890 | ||
3891 | if (!(trace_flags & TRACE_ITER_MARKERS)) | 3891 | if (!(trace_flags & TRACE_ITER_MARKERS)) |
3892 | return -EINVAL; | 3892 | return -EINVAL; |
3893 | 3893 | ||
3894 | if (cnt > TRACE_BUF_SIZE) | 3894 | if (cnt > TRACE_BUF_SIZE) |
3895 | cnt = TRACE_BUF_SIZE; | 3895 | cnt = TRACE_BUF_SIZE; |
3896 | 3896 | ||
3897 | /* | 3897 | /* |
3898 | * Userspace is injecting traces into the kernel trace buffer. | 3898 | * Userspace is injecting traces into the kernel trace buffer. |
3899 | * We want to be as non intrusive as possible. | 3899 | * We want to be as non intrusive as possible. |
3900 | * To do so, we do not want to allocate any special buffers | 3900 | * To do so, we do not want to allocate any special buffers |
3901 | * or take any locks, but instead write the userspace data | 3901 | * or take any locks, but instead write the userspace data |
3902 | * straight into the ring buffer. | 3902 | * straight into the ring buffer. |
3903 | * | 3903 | * |
3904 | * First we need to pin the userspace buffer into memory, | 3904 | * First we need to pin the userspace buffer into memory, |
3905 | * which, most likely it is, because it just referenced it. | 3905 | * which, most likely it is, because it just referenced it. |
3906 | * But there's no guarantee that it is. By using get_user_pages_fast() | 3906 | * But there's no guarantee that it is. By using get_user_pages_fast() |
3907 | * and kmap_atomic/kunmap_atomic() we can get access to the | 3907 | * and kmap_atomic/kunmap_atomic() we can get access to the |
3908 | * pages directly. We then write the data directly into the | 3908 | * pages directly. We then write the data directly into the |
3909 | * ring buffer. | 3909 | * ring buffer. |
3910 | */ | 3910 | */ |
3911 | BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); | 3911 | BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); |
3912 | 3912 | ||
3913 | /* check if we cross pages */ | 3913 | /* check if we cross pages */ |
3914 | if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) | 3914 | if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) |
3915 | nr_pages = 2; | 3915 | nr_pages = 2; |
3916 | 3916 | ||
3917 | offset = addr & (PAGE_SIZE - 1); | 3917 | offset = addr & (PAGE_SIZE - 1); |
3918 | addr &= PAGE_MASK; | 3918 | addr &= PAGE_MASK; |
3919 | 3919 | ||
3920 | ret = get_user_pages_fast(addr, nr_pages, 0, pages); | 3920 | ret = get_user_pages_fast(addr, nr_pages, 0, pages); |
3921 | if (ret < nr_pages) { | 3921 | if (ret < nr_pages) { |
3922 | while (--ret >= 0) | 3922 | while (--ret >= 0) |
3923 | put_page(pages[ret]); | 3923 | put_page(pages[ret]); |
3924 | written = -EFAULT; | 3924 | written = -EFAULT; |
3925 | goto out; | 3925 | goto out; |
3926 | } | 3926 | } |
3927 | 3927 | ||
3928 | for (i = 0; i < nr_pages; i++) | 3928 | for (i = 0; i < nr_pages; i++) |
3929 | map_page[i] = kmap_atomic(pages[i]); | 3929 | map_page[i] = kmap_atomic(pages[i]); |
3930 | 3930 | ||
3931 | local_save_flags(irq_flags); | 3931 | local_save_flags(irq_flags); |
3932 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ | 3932 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ |
3933 | buffer = global_trace.buffer; | 3933 | buffer = global_trace.buffer; |
3934 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 3934 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
3935 | irq_flags, preempt_count()); | 3935 | irq_flags, preempt_count()); |
3936 | if (!event) { | 3936 | if (!event) { |
3937 | /* Ring buffer disabled, return as if not open for write */ | 3937 | /* Ring buffer disabled, return as if not open for write */ |
3938 | written = -EBADF; | 3938 | written = -EBADF; |
3939 | goto out_unlock; | 3939 | goto out_unlock; |
3940 | } | 3940 | } |
3941 | 3941 | ||
3942 | entry = ring_buffer_event_data(event); | 3942 | entry = ring_buffer_event_data(event); |
3943 | entry->ip = _THIS_IP_; | 3943 | entry->ip = _THIS_IP_; |
3944 | 3944 | ||
3945 | if (nr_pages == 2) { | 3945 | if (nr_pages == 2) { |
3946 | len = PAGE_SIZE - offset; | 3946 | len = PAGE_SIZE - offset; |
3947 | memcpy(&entry->buf, map_page[0] + offset, len); | 3947 | memcpy(&entry->buf, map_page[0] + offset, len); |
3948 | memcpy(&entry->buf[len], map_page[1], cnt - len); | 3948 | memcpy(&entry->buf[len], map_page[1], cnt - len); |
3949 | } else | 3949 | } else |
3950 | memcpy(&entry->buf, map_page[0] + offset, cnt); | 3950 | memcpy(&entry->buf, map_page[0] + offset, cnt); |
3951 | 3951 | ||
3952 | if (entry->buf[cnt - 1] != '\n') { | 3952 | if (entry->buf[cnt - 1] != '\n') { |
3953 | entry->buf[cnt] = '\n'; | 3953 | entry->buf[cnt] = '\n'; |
3954 | entry->buf[cnt + 1] = '\0'; | 3954 | entry->buf[cnt + 1] = '\0'; |
3955 | } else | 3955 | } else |
3956 | entry->buf[cnt] = '\0'; | 3956 | entry->buf[cnt] = '\0'; |
3957 | 3957 | ||
3958 | ring_buffer_unlock_commit(buffer, event); | 3958 | ring_buffer_unlock_commit(buffer, event); |
3959 | 3959 | ||
3960 | written = cnt; | 3960 | written = cnt; |
3961 | 3961 | ||
3962 | *fpos += written; | 3962 | *fpos += written; |
3963 | 3963 | ||
3964 | out_unlock: | 3964 | out_unlock: |
3965 | for (i = 0; i < nr_pages; i++){ | 3965 | for (i = 0; i < nr_pages; i++){ |
3966 | kunmap_atomic(map_page[i]); | 3966 | kunmap_atomic(map_page[i]); |
3967 | put_page(pages[i]); | 3967 | put_page(pages[i]); |
3968 | } | 3968 | } |
3969 | out: | 3969 | out: |
3970 | return written; | 3970 | return written; |
3971 | } | 3971 | } |
3972 | 3972 | ||
3973 | static int tracing_clock_show(struct seq_file *m, void *v) | 3973 | static int tracing_clock_show(struct seq_file *m, void *v) |
3974 | { | 3974 | { |
3975 | int i; | 3975 | int i; |
3976 | 3976 | ||
3977 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) | 3977 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) |
3978 | seq_printf(m, | 3978 | seq_printf(m, |
3979 | "%s%s%s%s", i ? " " : "", | 3979 | "%s%s%s%s", i ? " " : "", |
3980 | i == trace_clock_id ? "[" : "", trace_clocks[i].name, | 3980 | i == trace_clock_id ? "[" : "", trace_clocks[i].name, |
3981 | i == trace_clock_id ? "]" : ""); | 3981 | i == trace_clock_id ? "]" : ""); |
3982 | seq_putc(m, '\n'); | 3982 | seq_putc(m, '\n'); |
3983 | 3983 | ||
3984 | return 0; | 3984 | return 0; |
3985 | } | 3985 | } |
3986 | 3986 | ||
3987 | static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | 3987 | static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, |
3988 | size_t cnt, loff_t *fpos) | 3988 | size_t cnt, loff_t *fpos) |
3989 | { | 3989 | { |
3990 | char buf[64]; | 3990 | char buf[64]; |
3991 | const char *clockstr; | 3991 | const char *clockstr; |
3992 | int i; | 3992 | int i; |
3993 | 3993 | ||
3994 | if (cnt >= sizeof(buf)) | 3994 | if (cnt >= sizeof(buf)) |
3995 | return -EINVAL; | 3995 | return -EINVAL; |
3996 | 3996 | ||
3997 | if (copy_from_user(&buf, ubuf, cnt)) | 3997 | if (copy_from_user(&buf, ubuf, cnt)) |
3998 | return -EFAULT; | 3998 | return -EFAULT; |
3999 | 3999 | ||
4000 | buf[cnt] = 0; | 4000 | buf[cnt] = 0; |
4001 | 4001 | ||
4002 | clockstr = strstrip(buf); | 4002 | clockstr = strstrip(buf); |
4003 | 4003 | ||
4004 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { | 4004 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { |
4005 | if (strcmp(trace_clocks[i].name, clockstr) == 0) | 4005 | if (strcmp(trace_clocks[i].name, clockstr) == 0) |
4006 | break; | 4006 | break; |
4007 | } | 4007 | } |
4008 | if (i == ARRAY_SIZE(trace_clocks)) | 4008 | if (i == ARRAY_SIZE(trace_clocks)) |
4009 | return -EINVAL; | 4009 | return -EINVAL; |
4010 | 4010 | ||
4011 | trace_clock_id = i; | 4011 | trace_clock_id = i; |
4012 | 4012 | ||
4013 | mutex_lock(&trace_types_lock); | 4013 | mutex_lock(&trace_types_lock); |
4014 | 4014 | ||
4015 | ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func); | 4015 | ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func); |
4016 | if (max_tr.buffer) | 4016 | if (max_tr.buffer) |
4017 | ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); | 4017 | ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); |
4018 | 4018 | ||
4019 | mutex_unlock(&trace_types_lock); | 4019 | mutex_unlock(&trace_types_lock); |
4020 | 4020 | ||
4021 | *fpos += cnt; | 4021 | *fpos += cnt; |
4022 | 4022 | ||
4023 | return cnt; | 4023 | return cnt; |
4024 | } | 4024 | } |
4025 | 4025 | ||
4026 | static int tracing_clock_open(struct inode *inode, struct file *file) | 4026 | static int tracing_clock_open(struct inode *inode, struct file *file) |
4027 | { | 4027 | { |
4028 | if (tracing_disabled) | 4028 | if (tracing_disabled) |
4029 | return -ENODEV; | 4029 | return -ENODEV; |
4030 | return single_open(file, tracing_clock_show, NULL); | 4030 | return single_open(file, tracing_clock_show, NULL); |
4031 | } | 4031 | } |
4032 | 4032 | ||
4033 | static const struct file_operations tracing_max_lat_fops = { | 4033 | static const struct file_operations tracing_max_lat_fops = { |
4034 | .open = tracing_open_generic, | 4034 | .open = tracing_open_generic, |
4035 | .read = tracing_max_lat_read, | 4035 | .read = tracing_max_lat_read, |
4036 | .write = tracing_max_lat_write, | 4036 | .write = tracing_max_lat_write, |
4037 | .llseek = generic_file_llseek, | 4037 | .llseek = generic_file_llseek, |
4038 | }; | 4038 | }; |
4039 | 4039 | ||
4040 | static const struct file_operations tracing_ctrl_fops = { | 4040 | static const struct file_operations tracing_ctrl_fops = { |
4041 | .open = tracing_open_generic, | 4041 | .open = tracing_open_generic, |
4042 | .read = tracing_ctrl_read, | 4042 | .read = tracing_ctrl_read, |
4043 | .write = tracing_ctrl_write, | 4043 | .write = tracing_ctrl_write, |
4044 | .llseek = generic_file_llseek, | 4044 | .llseek = generic_file_llseek, |
4045 | }; | 4045 | }; |
4046 | 4046 | ||
4047 | static const struct file_operations set_tracer_fops = { | 4047 | static const struct file_operations set_tracer_fops = { |
4048 | .open = tracing_open_generic, | 4048 | .open = tracing_open_generic, |
4049 | .read = tracing_set_trace_read, | 4049 | .read = tracing_set_trace_read, |
4050 | .write = tracing_set_trace_write, | 4050 | .write = tracing_set_trace_write, |
4051 | .llseek = generic_file_llseek, | 4051 | .llseek = generic_file_llseek, |
4052 | }; | 4052 | }; |
4053 | 4053 | ||
4054 | static const struct file_operations tracing_pipe_fops = { | 4054 | static const struct file_operations tracing_pipe_fops = { |
4055 | .open = tracing_open_pipe, | 4055 | .open = tracing_open_pipe, |
4056 | .poll = tracing_poll_pipe, | 4056 | .poll = tracing_poll_pipe, |
4057 | .read = tracing_read_pipe, | 4057 | .read = tracing_read_pipe, |
4058 | .splice_read = tracing_splice_read_pipe, | 4058 | .splice_read = tracing_splice_read_pipe, |
4059 | .release = tracing_release_pipe, | 4059 | .release = tracing_release_pipe, |
4060 | .llseek = no_llseek, | 4060 | .llseek = no_llseek, |
4061 | }; | 4061 | }; |
4062 | 4062 | ||
4063 | static const struct file_operations tracing_entries_fops = { | 4063 | static const struct file_operations tracing_entries_fops = { |
4064 | .open = tracing_entries_open, | 4064 | .open = tracing_entries_open, |
4065 | .read = tracing_entries_read, | 4065 | .read = tracing_entries_read, |
4066 | .write = tracing_entries_write, | 4066 | .write = tracing_entries_write, |
4067 | .release = tracing_entries_release, | 4067 | .release = tracing_entries_release, |
4068 | .llseek = generic_file_llseek, | 4068 | .llseek = generic_file_llseek, |
4069 | }; | 4069 | }; |
4070 | 4070 | ||
4071 | static const struct file_operations tracing_total_entries_fops = { | 4071 | static const struct file_operations tracing_total_entries_fops = { |
4072 | .open = tracing_open_generic, | 4072 | .open = tracing_open_generic, |
4073 | .read = tracing_total_entries_read, | 4073 | .read = tracing_total_entries_read, |
4074 | .llseek = generic_file_llseek, | 4074 | .llseek = generic_file_llseek, |
4075 | }; | 4075 | }; |
4076 | 4076 | ||
4077 | static const struct file_operations tracing_free_buffer_fops = { | 4077 | static const struct file_operations tracing_free_buffer_fops = { |
4078 | .write = tracing_free_buffer_write, | 4078 | .write = tracing_free_buffer_write, |
4079 | .release = tracing_free_buffer_release, | 4079 | .release = tracing_free_buffer_release, |
4080 | }; | 4080 | }; |
4081 | 4081 | ||
4082 | static const struct file_operations tracing_mark_fops = { | 4082 | static const struct file_operations tracing_mark_fops = { |
4083 | .open = tracing_open_generic, | 4083 | .open = tracing_open_generic, |
4084 | .write = tracing_mark_write, | 4084 | .write = tracing_mark_write, |
4085 | .llseek = generic_file_llseek, | 4085 | .llseek = generic_file_llseek, |
4086 | }; | 4086 | }; |
4087 | 4087 | ||
4088 | static const struct file_operations trace_clock_fops = { | 4088 | static const struct file_operations trace_clock_fops = { |
4089 | .open = tracing_clock_open, | 4089 | .open = tracing_clock_open, |
4090 | .read = seq_read, | 4090 | .read = seq_read, |
4091 | .llseek = seq_lseek, | 4091 | .llseek = seq_lseek, |
4092 | .release = single_release, | 4092 | .release = single_release, |
4093 | .write = tracing_clock_write, | 4093 | .write = tracing_clock_write, |
4094 | }; | 4094 | }; |
4095 | 4095 | ||
4096 | struct ftrace_buffer_info { | 4096 | struct ftrace_buffer_info { |
4097 | struct trace_array *tr; | 4097 | struct trace_array *tr; |
4098 | void *spare; | 4098 | void *spare; |
4099 | int cpu; | 4099 | int cpu; |
4100 | unsigned int read; | 4100 | unsigned int read; |
4101 | }; | 4101 | }; |
4102 | 4102 | ||
4103 | static int tracing_buffers_open(struct inode *inode, struct file *filp) | 4103 | static int tracing_buffers_open(struct inode *inode, struct file *filp) |
4104 | { | 4104 | { |
4105 | int cpu = (int)(long)inode->i_private; | 4105 | int cpu = (int)(long)inode->i_private; |
4106 | struct ftrace_buffer_info *info; | 4106 | struct ftrace_buffer_info *info; |
4107 | 4107 | ||
4108 | if (tracing_disabled) | 4108 | if (tracing_disabled) |
4109 | return -ENODEV; | 4109 | return -ENODEV; |
4110 | 4110 | ||
4111 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 4111 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
4112 | if (!info) | 4112 | if (!info) |
4113 | return -ENOMEM; | 4113 | return -ENOMEM; |
4114 | 4114 | ||
4115 | info->tr = &global_trace; | 4115 | info->tr = &global_trace; |
4116 | info->cpu = cpu; | 4116 | info->cpu = cpu; |
4117 | info->spare = NULL; | 4117 | info->spare = NULL; |
4118 | /* Force reading ring buffer for first read */ | 4118 | /* Force reading ring buffer for first read */ |
4119 | info->read = (unsigned int)-1; | 4119 | info->read = (unsigned int)-1; |
4120 | 4120 | ||
4121 | filp->private_data = info; | 4121 | filp->private_data = info; |
4122 | 4122 | ||
4123 | return nonseekable_open(inode, filp); | 4123 | return nonseekable_open(inode, filp); |
4124 | } | 4124 | } |
4125 | 4125 | ||
4126 | static ssize_t | 4126 | static ssize_t |
4127 | tracing_buffers_read(struct file *filp, char __user *ubuf, | 4127 | tracing_buffers_read(struct file *filp, char __user *ubuf, |
4128 | size_t count, loff_t *ppos) | 4128 | size_t count, loff_t *ppos) |
4129 | { | 4129 | { |
4130 | struct ftrace_buffer_info *info = filp->private_data; | 4130 | struct ftrace_buffer_info *info = filp->private_data; |
4131 | ssize_t ret; | 4131 | ssize_t ret; |
4132 | size_t size; | 4132 | size_t size; |
4133 | 4133 | ||
4134 | if (!count) | 4134 | if (!count) |
4135 | return 0; | 4135 | return 0; |
4136 | 4136 | ||
4137 | if (!info->spare) | 4137 | if (!info->spare) |
4138 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); | 4138 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); |
4139 | if (!info->spare) | 4139 | if (!info->spare) |
4140 | return -ENOMEM; | 4140 | return -ENOMEM; |
4141 | 4141 | ||
4142 | /* Do we have previous read data to read? */ | 4142 | /* Do we have previous read data to read? */ |
4143 | if (info->read < PAGE_SIZE) | 4143 | if (info->read < PAGE_SIZE) |
4144 | goto read; | 4144 | goto read; |
4145 | 4145 | ||
4146 | trace_access_lock(info->cpu); | 4146 | trace_access_lock(info->cpu); |
4147 | ret = ring_buffer_read_page(info->tr->buffer, | 4147 | ret = ring_buffer_read_page(info->tr->buffer, |
4148 | &info->spare, | 4148 | &info->spare, |
4149 | count, | 4149 | count, |
4150 | info->cpu, 0); | 4150 | info->cpu, 0); |
4151 | trace_access_unlock(info->cpu); | 4151 | trace_access_unlock(info->cpu); |
4152 | if (ret < 0) | 4152 | if (ret < 0) |
4153 | return 0; | 4153 | return 0; |
4154 | 4154 | ||
4155 | info->read = 0; | 4155 | info->read = 0; |
4156 | 4156 | ||
4157 | read: | 4157 | read: |
4158 | size = PAGE_SIZE - info->read; | 4158 | size = PAGE_SIZE - info->read; |
4159 | if (size > count) | 4159 | if (size > count) |
4160 | size = count; | 4160 | size = count; |
4161 | 4161 | ||
4162 | ret = copy_to_user(ubuf, info->spare + info->read, size); | 4162 | ret = copy_to_user(ubuf, info->spare + info->read, size); |
4163 | if (ret == size) | 4163 | if (ret == size) |
4164 | return -EFAULT; | 4164 | return -EFAULT; |
4165 | size -= ret; | 4165 | size -= ret; |
4166 | 4166 | ||
4167 | *ppos += size; | 4167 | *ppos += size; |
4168 | info->read += size; | 4168 | info->read += size; |
4169 | 4169 | ||
4170 | return size; | 4170 | return size; |
4171 | } | 4171 | } |
4172 | 4172 | ||
4173 | static int tracing_buffers_release(struct inode *inode, struct file *file) | 4173 | static int tracing_buffers_release(struct inode *inode, struct file *file) |
4174 | { | 4174 | { |
4175 | struct ftrace_buffer_info *info = file->private_data; | 4175 | struct ftrace_buffer_info *info = file->private_data; |
4176 | 4176 | ||
4177 | if (info->spare) | 4177 | if (info->spare) |
4178 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | 4178 | ring_buffer_free_read_page(info->tr->buffer, info->spare); |
4179 | kfree(info); | 4179 | kfree(info); |
4180 | 4180 | ||
4181 | return 0; | 4181 | return 0; |
4182 | } | 4182 | } |
4183 | 4183 | ||
4184 | struct buffer_ref { | 4184 | struct buffer_ref { |
4185 | struct ring_buffer *buffer; | 4185 | struct ring_buffer *buffer; |
4186 | void *page; | 4186 | void *page; |
4187 | int ref; | 4187 | int ref; |
4188 | }; | 4188 | }; |
4189 | 4189 | ||
4190 | static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, | 4190 | static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, |
4191 | struct pipe_buffer *buf) | 4191 | struct pipe_buffer *buf) |
4192 | { | 4192 | { |
4193 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; | 4193 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; |
4194 | 4194 | ||
4195 | if (--ref->ref) | 4195 | if (--ref->ref) |
4196 | return; | 4196 | return; |
4197 | 4197 | ||
4198 | ring_buffer_free_read_page(ref->buffer, ref->page); | 4198 | ring_buffer_free_read_page(ref->buffer, ref->page); |
4199 | kfree(ref); | 4199 | kfree(ref); |
4200 | buf->private = 0; | 4200 | buf->private = 0; |
4201 | } | 4201 | } |
4202 | 4202 | ||
4203 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, | 4203 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, |
4204 | struct pipe_buffer *buf) | 4204 | struct pipe_buffer *buf) |
4205 | { | 4205 | { |
4206 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; | 4206 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; |
4207 | 4207 | ||
4208 | ref->ref++; | 4208 | ref->ref++; |
4209 | } | 4209 | } |
4210 | 4210 | ||
4211 | /* Pipe buffer operations for a buffer. */ | 4211 | /* Pipe buffer operations for a buffer. */ |
4212 | static const struct pipe_buf_operations buffer_pipe_buf_ops = { | 4212 | static const struct pipe_buf_operations buffer_pipe_buf_ops = { |
4213 | .can_merge = 0, | 4213 | .can_merge = 0, |
4214 | .map = generic_pipe_buf_map, | 4214 | .map = generic_pipe_buf_map, |
4215 | .unmap = generic_pipe_buf_unmap, | 4215 | .unmap = generic_pipe_buf_unmap, |
4216 | .confirm = generic_pipe_buf_confirm, | 4216 | .confirm = generic_pipe_buf_confirm, |
4217 | .release = buffer_pipe_buf_release, | 4217 | .release = buffer_pipe_buf_release, |
4218 | .steal = generic_pipe_buf_steal, | 4218 | .steal = generic_pipe_buf_steal, |
4219 | .get = buffer_pipe_buf_get, | 4219 | .get = buffer_pipe_buf_get, |
4220 | }; | 4220 | }; |
4221 | 4221 | ||
4222 | /* | 4222 | /* |
4223 | * Callback from splice_to_pipe(), if we need to release some pages | 4223 | * Callback from splice_to_pipe(), if we need to release some pages |
4224 | * at the end of the spd in case we error'ed out in filling the pipe. | 4224 | * at the end of the spd in case we error'ed out in filling the pipe. |
4225 | */ | 4225 | */ |
4226 | static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) | 4226 | static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) |
4227 | { | 4227 | { |
4228 | struct buffer_ref *ref = | 4228 | struct buffer_ref *ref = |
4229 | (struct buffer_ref *)spd->partial[i].private; | 4229 | (struct buffer_ref *)spd->partial[i].private; |
4230 | 4230 | ||
4231 | if (--ref->ref) | 4231 | if (--ref->ref) |
4232 | return; | 4232 | return; |
4233 | 4233 | ||
4234 | ring_buffer_free_read_page(ref->buffer, ref->page); | 4234 | ring_buffer_free_read_page(ref->buffer, ref->page); |
4235 | kfree(ref); | 4235 | kfree(ref); |
4236 | spd->partial[i].private = 0; | 4236 | spd->partial[i].private = 0; |
4237 | } | 4237 | } |
4238 | 4238 | ||
4239 | static ssize_t | 4239 | static ssize_t |
4240 | tracing_buffers_splice_read(struct file *file, loff_t *ppos, | 4240 | tracing_buffers_splice_read(struct file *file, loff_t *ppos, |
4241 | struct pipe_inode_info *pipe, size_t len, | 4241 | struct pipe_inode_info *pipe, size_t len, |
4242 | unsigned int flags) | 4242 | unsigned int flags) |
4243 | { | 4243 | { |
4244 | struct ftrace_buffer_info *info = file->private_data; | 4244 | struct ftrace_buffer_info *info = file->private_data; |
4245 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; | 4245 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
4246 | struct page *pages_def[PIPE_DEF_BUFFERS]; | 4246 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
4247 | struct splice_pipe_desc spd = { | 4247 | struct splice_pipe_desc spd = { |
4248 | .pages = pages_def, | 4248 | .pages = pages_def, |
4249 | .partial = partial_def, | 4249 | .partial = partial_def, |
4250 | .nr_pages_max = PIPE_DEF_BUFFERS, | 4250 | .nr_pages_max = PIPE_DEF_BUFFERS, |
4251 | .flags = flags, | 4251 | .flags = flags, |
4252 | .ops = &buffer_pipe_buf_ops, | 4252 | .ops = &buffer_pipe_buf_ops, |
4253 | .spd_release = buffer_spd_release, | 4253 | .spd_release = buffer_spd_release, |
4254 | }; | 4254 | }; |
4255 | struct buffer_ref *ref; | 4255 | struct buffer_ref *ref; |
4256 | int entries, size, i; | 4256 | int entries, size, i; |
4257 | size_t ret; | 4257 | size_t ret; |
4258 | 4258 | ||
4259 | if (splice_grow_spd(pipe, &spd)) | 4259 | if (splice_grow_spd(pipe, &spd)) |
4260 | return -ENOMEM; | 4260 | return -ENOMEM; |
4261 | 4261 | ||
4262 | if (*ppos & (PAGE_SIZE - 1)) { | 4262 | if (*ppos & (PAGE_SIZE - 1)) { |
4263 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); | 4263 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); |
4264 | ret = -EINVAL; | 4264 | ret = -EINVAL; |
4265 | goto out; | 4265 | goto out; |
4266 | } | 4266 | } |
4267 | 4267 | ||
4268 | if (len & (PAGE_SIZE - 1)) { | 4268 | if (len & (PAGE_SIZE - 1)) { |
4269 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | 4269 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); |
4270 | if (len < PAGE_SIZE) { | 4270 | if (len < PAGE_SIZE) { |
4271 | ret = -EINVAL; | 4271 | ret = -EINVAL; |
4272 | goto out; | 4272 | goto out; |
4273 | } | 4273 | } |
4274 | len &= PAGE_MASK; | 4274 | len &= PAGE_MASK; |
4275 | } | 4275 | } |
4276 | 4276 | ||
4277 | trace_access_lock(info->cpu); | 4277 | trace_access_lock(info->cpu); |
4278 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 4278 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
4279 | 4279 | ||
4280 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { | 4280 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { |
4281 | struct page *page; | 4281 | struct page *page; |
4282 | int r; | 4282 | int r; |
4283 | 4283 | ||
4284 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); | 4284 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); |
4285 | if (!ref) | 4285 | if (!ref) |
4286 | break; | 4286 | break; |
4287 | 4287 | ||
4288 | ref->ref = 1; | 4288 | ref->ref = 1; |
4289 | ref->buffer = info->tr->buffer; | 4289 | ref->buffer = info->tr->buffer; |
4290 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); | 4290 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); |
4291 | if (!ref->page) { | 4291 | if (!ref->page) { |
4292 | kfree(ref); | 4292 | kfree(ref); |
4293 | break; | 4293 | break; |
4294 | } | 4294 | } |
4295 | 4295 | ||
4296 | r = ring_buffer_read_page(ref->buffer, &ref->page, | 4296 | r = ring_buffer_read_page(ref->buffer, &ref->page, |
4297 | len, info->cpu, 1); | 4297 | len, info->cpu, 1); |
4298 | if (r < 0) { | 4298 | if (r < 0) { |
4299 | ring_buffer_free_read_page(ref->buffer, ref->page); | 4299 | ring_buffer_free_read_page(ref->buffer, ref->page); |
4300 | kfree(ref); | 4300 | kfree(ref); |
4301 | break; | 4301 | break; |
4302 | } | 4302 | } |
4303 | 4303 | ||
4304 | /* | 4304 | /* |
4305 | * zero out any left over data, this is going to | 4305 | * zero out any left over data, this is going to |
4306 | * user land. | 4306 | * user land. |
4307 | */ | 4307 | */ |
4308 | size = ring_buffer_page_len(ref->page); | 4308 | size = ring_buffer_page_len(ref->page); |
4309 | if (size < PAGE_SIZE) | 4309 | if (size < PAGE_SIZE) |
4310 | memset(ref->page + size, 0, PAGE_SIZE - size); | 4310 | memset(ref->page + size, 0, PAGE_SIZE - size); |
4311 | 4311 | ||
4312 | page = virt_to_page(ref->page); | 4312 | page = virt_to_page(ref->page); |
4313 | 4313 | ||
4314 | spd.pages[i] = page; | 4314 | spd.pages[i] = page; |
4315 | spd.partial[i].len = PAGE_SIZE; | 4315 | spd.partial[i].len = PAGE_SIZE; |
4316 | spd.partial[i].offset = 0; | 4316 | spd.partial[i].offset = 0; |
4317 | spd.partial[i].private = (unsigned long)ref; | 4317 | spd.partial[i].private = (unsigned long)ref; |
4318 | spd.nr_pages++; | 4318 | spd.nr_pages++; |
4319 | *ppos += PAGE_SIZE; | 4319 | *ppos += PAGE_SIZE; |
4320 | 4320 | ||
4321 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 4321 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
4322 | } | 4322 | } |
4323 | 4323 | ||
4324 | trace_access_unlock(info->cpu); | 4324 | trace_access_unlock(info->cpu); |
4325 | spd.nr_pages = i; | 4325 | spd.nr_pages = i; |
4326 | 4326 | ||
4327 | /* did we read anything? */ | 4327 | /* did we read anything? */ |
4328 | if (!spd.nr_pages) { | 4328 | if (!spd.nr_pages) { |
4329 | if (flags & SPLICE_F_NONBLOCK) | 4329 | if (flags & SPLICE_F_NONBLOCK) |
4330 | ret = -EAGAIN; | 4330 | ret = -EAGAIN; |
4331 | else | 4331 | else |
4332 | ret = 0; | 4332 | ret = 0; |
4333 | /* TODO: block */ | 4333 | /* TODO: block */ |
4334 | goto out; | 4334 | goto out; |
4335 | } | 4335 | } |
4336 | 4336 | ||
4337 | ret = splice_to_pipe(pipe, &spd); | 4337 | ret = splice_to_pipe(pipe, &spd); |
4338 | splice_shrink_spd(&spd); | 4338 | splice_shrink_spd(&spd); |
4339 | out: | 4339 | out: |
4340 | return ret; | 4340 | return ret; |
4341 | } | 4341 | } |
4342 | 4342 | ||
4343 | static const struct file_operations tracing_buffers_fops = { | 4343 | static const struct file_operations tracing_buffers_fops = { |
4344 | .open = tracing_buffers_open, | 4344 | .open = tracing_buffers_open, |
4345 | .read = tracing_buffers_read, | 4345 | .read = tracing_buffers_read, |
4346 | .release = tracing_buffers_release, | 4346 | .release = tracing_buffers_release, |
4347 | .splice_read = tracing_buffers_splice_read, | 4347 | .splice_read = tracing_buffers_splice_read, |
4348 | .llseek = no_llseek, | 4348 | .llseek = no_llseek, |
4349 | }; | 4349 | }; |
4350 | 4350 | ||
4351 | static ssize_t | 4351 | static ssize_t |
4352 | tracing_stats_read(struct file *filp, char __user *ubuf, | 4352 | tracing_stats_read(struct file *filp, char __user *ubuf, |
4353 | size_t count, loff_t *ppos) | 4353 | size_t count, loff_t *ppos) |
4354 | { | 4354 | { |
4355 | unsigned long cpu = (unsigned long)filp->private_data; | 4355 | unsigned long cpu = (unsigned long)filp->private_data; |
4356 | struct trace_array *tr = &global_trace; | 4356 | struct trace_array *tr = &global_trace; |
4357 | struct trace_seq *s; | 4357 | struct trace_seq *s; |
4358 | unsigned long cnt; | 4358 | unsigned long cnt; |
4359 | unsigned long long t; | 4359 | unsigned long long t; |
4360 | unsigned long usec_rem; | 4360 | unsigned long usec_rem; |
4361 | 4361 | ||
4362 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 4362 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
4363 | if (!s) | 4363 | if (!s) |
4364 | return -ENOMEM; | 4364 | return -ENOMEM; |
4365 | 4365 | ||
4366 | trace_seq_init(s); | 4366 | trace_seq_init(s); |
4367 | 4367 | ||
4368 | cnt = ring_buffer_entries_cpu(tr->buffer, cpu); | 4368 | cnt = ring_buffer_entries_cpu(tr->buffer, cpu); |
4369 | trace_seq_printf(s, "entries: %ld\n", cnt); | 4369 | trace_seq_printf(s, "entries: %ld\n", cnt); |
4370 | 4370 | ||
4371 | cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); | 4371 | cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); |
4372 | trace_seq_printf(s, "overrun: %ld\n", cnt); | 4372 | trace_seq_printf(s, "overrun: %ld\n", cnt); |
4373 | 4373 | ||
4374 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); | 4374 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); |
4375 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); | 4375 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); |
4376 | 4376 | ||
4377 | cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); | 4377 | cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); |
4378 | trace_seq_printf(s, "bytes: %ld\n", cnt); | 4378 | trace_seq_printf(s, "bytes: %ld\n", cnt); |
4379 | 4379 | ||
4380 | t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); | 4380 | t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); |
4381 | usec_rem = do_div(t, USEC_PER_SEC); | 4381 | usec_rem = do_div(t, USEC_PER_SEC); |
4382 | trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); | 4382 | trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); |
4383 | 4383 | ||
4384 | t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); | 4384 | t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); |
4385 | usec_rem = do_div(t, USEC_PER_SEC); | 4385 | usec_rem = do_div(t, USEC_PER_SEC); |
4386 | trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); | 4386 | trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); |
4387 | 4387 | ||
4388 | cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); | ||
4389 | trace_seq_printf(s, "dropped events: %ld\n", cnt); | ||
4390 | |||
4388 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 4391 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); |
4389 | 4392 | ||
4390 | kfree(s); | 4393 | kfree(s); |
4391 | 4394 | ||
4392 | return count; | 4395 | return count; |
4393 | } | 4396 | } |
4394 | 4397 | ||
4395 | static const struct file_operations tracing_stats_fops = { | 4398 | static const struct file_operations tracing_stats_fops = { |
4396 | .open = tracing_open_generic, | 4399 | .open = tracing_open_generic, |
4397 | .read = tracing_stats_read, | 4400 | .read = tracing_stats_read, |
4398 | .llseek = generic_file_llseek, | 4401 | .llseek = generic_file_llseek, |
4399 | }; | 4402 | }; |
4400 | 4403 | ||
4401 | #ifdef CONFIG_DYNAMIC_FTRACE | 4404 | #ifdef CONFIG_DYNAMIC_FTRACE |
4402 | 4405 | ||
4403 | int __weak ftrace_arch_read_dyn_info(char *buf, int size) | 4406 | int __weak ftrace_arch_read_dyn_info(char *buf, int size) |
4404 | { | 4407 | { |
4405 | return 0; | 4408 | return 0; |
4406 | } | 4409 | } |
4407 | 4410 | ||
4408 | static ssize_t | 4411 | static ssize_t |
4409 | tracing_read_dyn_info(struct file *filp, char __user *ubuf, | 4412 | tracing_read_dyn_info(struct file *filp, char __user *ubuf, |
4410 | size_t cnt, loff_t *ppos) | 4413 | size_t cnt, loff_t *ppos) |
4411 | { | 4414 | { |
4412 | static char ftrace_dyn_info_buffer[1024]; | 4415 | static char ftrace_dyn_info_buffer[1024]; |
4413 | static DEFINE_MUTEX(dyn_info_mutex); | 4416 | static DEFINE_MUTEX(dyn_info_mutex); |
4414 | unsigned long *p = filp->private_data; | 4417 | unsigned long *p = filp->private_data; |
4415 | char *buf = ftrace_dyn_info_buffer; | 4418 | char *buf = ftrace_dyn_info_buffer; |
4416 | int size = ARRAY_SIZE(ftrace_dyn_info_buffer); | 4419 | int size = ARRAY_SIZE(ftrace_dyn_info_buffer); |
4417 | int r; | 4420 | int r; |
4418 | 4421 | ||
4419 | mutex_lock(&dyn_info_mutex); | 4422 | mutex_lock(&dyn_info_mutex); |
4420 | r = sprintf(buf, "%ld ", *p); | 4423 | r = sprintf(buf, "%ld ", *p); |
4421 | 4424 | ||
4422 | r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); | 4425 | r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); |
4423 | buf[r++] = '\n'; | 4426 | buf[r++] = '\n'; |
4424 | 4427 | ||
4425 | r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 4428 | r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
4426 | 4429 | ||
4427 | mutex_unlock(&dyn_info_mutex); | 4430 | mutex_unlock(&dyn_info_mutex); |
4428 | 4431 | ||
4429 | return r; | 4432 | return r; |
4430 | } | 4433 | } |
4431 | 4434 | ||
4432 | static const struct file_operations tracing_dyn_info_fops = { | 4435 | static const struct file_operations tracing_dyn_info_fops = { |
4433 | .open = tracing_open_generic, | 4436 | .open = tracing_open_generic, |
4434 | .read = tracing_read_dyn_info, | 4437 | .read = tracing_read_dyn_info, |
4435 | .llseek = generic_file_llseek, | 4438 | .llseek = generic_file_llseek, |
4436 | }; | 4439 | }; |
4437 | #endif | 4440 | #endif |
4438 | 4441 | ||
4439 | static struct dentry *d_tracer; | 4442 | static struct dentry *d_tracer; |
4440 | 4443 | ||
4441 | struct dentry *tracing_init_dentry(void) | 4444 | struct dentry *tracing_init_dentry(void) |
4442 | { | 4445 | { |
4443 | static int once; | 4446 | static int once; |
4444 | 4447 | ||
4445 | if (d_tracer) | 4448 | if (d_tracer) |
4446 | return d_tracer; | 4449 | return d_tracer; |
4447 | 4450 | ||
4448 | if (!debugfs_initialized()) | 4451 | if (!debugfs_initialized()) |
4449 | return NULL; | 4452 | return NULL; |
4450 | 4453 | ||
4451 | d_tracer = debugfs_create_dir("tracing", NULL); | 4454 | d_tracer = debugfs_create_dir("tracing", NULL); |
4452 | 4455 | ||
4453 | if (!d_tracer && !once) { | 4456 | if (!d_tracer && !once) { |
4454 | once = 1; | 4457 | once = 1; |
4455 | pr_warning("Could not create debugfs directory 'tracing'\n"); | 4458 | pr_warning("Could not create debugfs directory 'tracing'\n"); |
4456 | return NULL; | 4459 | return NULL; |
4457 | } | 4460 | } |
4458 | 4461 | ||
4459 | return d_tracer; | 4462 | return d_tracer; |
4460 | } | 4463 | } |
4461 | 4464 | ||
4462 | static struct dentry *d_percpu; | 4465 | static struct dentry *d_percpu; |
4463 | 4466 | ||
4464 | struct dentry *tracing_dentry_percpu(void) | 4467 | struct dentry *tracing_dentry_percpu(void) |
4465 | { | 4468 | { |
4466 | static int once; | 4469 | static int once; |
4467 | struct dentry *d_tracer; | 4470 | struct dentry *d_tracer; |
4468 | 4471 | ||
4469 | if (d_percpu) | 4472 | if (d_percpu) |
4470 | return d_percpu; | 4473 | return d_percpu; |
4471 | 4474 | ||
4472 | d_tracer = tracing_init_dentry(); | 4475 | d_tracer = tracing_init_dentry(); |
4473 | 4476 | ||
4474 | if (!d_tracer) | 4477 | if (!d_tracer) |
4475 | return NULL; | 4478 | return NULL; |
4476 | 4479 | ||
4477 | d_percpu = debugfs_create_dir("per_cpu", d_tracer); | 4480 | d_percpu = debugfs_create_dir("per_cpu", d_tracer); |
4478 | 4481 | ||
4479 | if (!d_percpu && !once) { | 4482 | if (!d_percpu && !once) { |
4480 | once = 1; | 4483 | once = 1; |
4481 | pr_warning("Could not create debugfs directory 'per_cpu'\n"); | 4484 | pr_warning("Could not create debugfs directory 'per_cpu'\n"); |
4482 | return NULL; | 4485 | return NULL; |
4483 | } | 4486 | } |
4484 | 4487 | ||
4485 | return d_percpu; | 4488 | return d_percpu; |
4486 | } | 4489 | } |
4487 | 4490 | ||
4488 | static void tracing_init_debugfs_percpu(long cpu) | 4491 | static void tracing_init_debugfs_percpu(long cpu) |
4489 | { | 4492 | { |
4490 | struct dentry *d_percpu = tracing_dentry_percpu(); | 4493 | struct dentry *d_percpu = tracing_dentry_percpu(); |
4491 | struct dentry *d_cpu; | 4494 | struct dentry *d_cpu; |
4492 | char cpu_dir[30]; /* 30 characters should be more than enough */ | 4495 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
4493 | 4496 | ||
4494 | if (!d_percpu) | 4497 | if (!d_percpu) |
4495 | return; | 4498 | return; |
4496 | 4499 | ||
4497 | snprintf(cpu_dir, 30, "cpu%ld", cpu); | 4500 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
4498 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4501 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
4499 | if (!d_cpu) { | 4502 | if (!d_cpu) { |
4500 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4503 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
4501 | return; | 4504 | return; |
4502 | } | 4505 | } |
4503 | 4506 | ||
4504 | /* per cpu trace_pipe */ | 4507 | /* per cpu trace_pipe */ |
4505 | trace_create_file("trace_pipe", 0444, d_cpu, | 4508 | trace_create_file("trace_pipe", 0444, d_cpu, |
4506 | (void *) cpu, &tracing_pipe_fops); | 4509 | (void *) cpu, &tracing_pipe_fops); |
4507 | 4510 | ||
4508 | /* per cpu trace */ | 4511 | /* per cpu trace */ |
4509 | trace_create_file("trace", 0644, d_cpu, | 4512 | trace_create_file("trace", 0644, d_cpu, |
4510 | (void *) cpu, &tracing_fops); | 4513 | (void *) cpu, &tracing_fops); |
4511 | 4514 | ||
4512 | trace_create_file("trace_pipe_raw", 0444, d_cpu, | 4515 | trace_create_file("trace_pipe_raw", 0444, d_cpu, |
4513 | (void *) cpu, &tracing_buffers_fops); | 4516 | (void *) cpu, &tracing_buffers_fops); |
4514 | 4517 | ||
4515 | trace_create_file("stats", 0444, d_cpu, | 4518 | trace_create_file("stats", 0444, d_cpu, |
4516 | (void *) cpu, &tracing_stats_fops); | 4519 | (void *) cpu, &tracing_stats_fops); |
4517 | 4520 | ||
4518 | trace_create_file("buffer_size_kb", 0444, d_cpu, | 4521 | trace_create_file("buffer_size_kb", 0444, d_cpu, |
4519 | (void *) cpu, &tracing_entries_fops); | 4522 | (void *) cpu, &tracing_entries_fops); |
4520 | } | 4523 | } |
4521 | 4524 | ||
4522 | #ifdef CONFIG_FTRACE_SELFTEST | 4525 | #ifdef CONFIG_FTRACE_SELFTEST |
4523 | /* Let selftest have access to static functions in this file */ | 4526 | /* Let selftest have access to static functions in this file */ |
4524 | #include "trace_selftest.c" | 4527 | #include "trace_selftest.c" |
4525 | #endif | 4528 | #endif |
4526 | 4529 | ||
4527 | struct trace_option_dentry { | 4530 | struct trace_option_dentry { |
4528 | struct tracer_opt *opt; | 4531 | struct tracer_opt *opt; |
4529 | struct tracer_flags *flags; | 4532 | struct tracer_flags *flags; |
4530 | struct dentry *entry; | 4533 | struct dentry *entry; |
4531 | }; | 4534 | }; |
4532 | 4535 | ||
4533 | static ssize_t | 4536 | static ssize_t |
4534 | trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, | 4537 | trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, |
4535 | loff_t *ppos) | 4538 | loff_t *ppos) |
4536 | { | 4539 | { |
4537 | struct trace_option_dentry *topt = filp->private_data; | 4540 | struct trace_option_dentry *topt = filp->private_data; |
4538 | char *buf; | 4541 | char *buf; |
4539 | 4542 | ||
4540 | if (topt->flags->val & topt->opt->bit) | 4543 | if (topt->flags->val & topt->opt->bit) |
4541 | buf = "1\n"; | 4544 | buf = "1\n"; |
4542 | else | 4545 | else |
4543 | buf = "0\n"; | 4546 | buf = "0\n"; |
4544 | 4547 | ||
4545 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); | 4548 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); |
4546 | } | 4549 | } |
4547 | 4550 | ||
4548 | static ssize_t | 4551 | static ssize_t |
4549 | trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | 4552 | trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, |
4550 | loff_t *ppos) | 4553 | loff_t *ppos) |
4551 | { | 4554 | { |
4552 | struct trace_option_dentry *topt = filp->private_data; | 4555 | struct trace_option_dentry *topt = filp->private_data; |
4553 | unsigned long val; | 4556 | unsigned long val; |
4554 | int ret; | 4557 | int ret; |
4555 | 4558 | ||
4556 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 4559 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
4557 | if (ret) | 4560 | if (ret) |
4558 | return ret; | 4561 | return ret; |
4559 | 4562 | ||
4560 | if (val != 0 && val != 1) | 4563 | if (val != 0 && val != 1) |
4561 | return -EINVAL; | 4564 | return -EINVAL; |
4562 | 4565 | ||
4563 | if (!!(topt->flags->val & topt->opt->bit) != val) { | 4566 | if (!!(topt->flags->val & topt->opt->bit) != val) { |
4564 | mutex_lock(&trace_types_lock); | 4567 | mutex_lock(&trace_types_lock); |
4565 | ret = __set_tracer_option(current_trace, topt->flags, | 4568 | ret = __set_tracer_option(current_trace, topt->flags, |
4566 | topt->opt, !val); | 4569 | topt->opt, !val); |
4567 | mutex_unlock(&trace_types_lock); | 4570 | mutex_unlock(&trace_types_lock); |
4568 | if (ret) | 4571 | if (ret) |
4569 | return ret; | 4572 | return ret; |
4570 | } | 4573 | } |
4571 | 4574 | ||
4572 | *ppos += cnt; | 4575 | *ppos += cnt; |
4573 | 4576 | ||
4574 | return cnt; | 4577 | return cnt; |
4575 | } | 4578 | } |
4576 | 4579 | ||
4577 | 4580 | ||
4578 | static const struct file_operations trace_options_fops = { | 4581 | static const struct file_operations trace_options_fops = { |
4579 | .open = tracing_open_generic, | 4582 | .open = tracing_open_generic, |
4580 | .read = trace_options_read, | 4583 | .read = trace_options_read, |
4581 | .write = trace_options_write, | 4584 | .write = trace_options_write, |
4582 | .llseek = generic_file_llseek, | 4585 | .llseek = generic_file_llseek, |
4583 | }; | 4586 | }; |
4584 | 4587 | ||
4585 | static ssize_t | 4588 | static ssize_t |
4586 | trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, | 4589 | trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, |
4587 | loff_t *ppos) | 4590 | loff_t *ppos) |
4588 | { | 4591 | { |
4589 | long index = (long)filp->private_data; | 4592 | long index = (long)filp->private_data; |
4590 | char *buf; | 4593 | char *buf; |
4591 | 4594 | ||
4592 | if (trace_flags & (1 << index)) | 4595 | if (trace_flags & (1 << index)) |
4593 | buf = "1\n"; | 4596 | buf = "1\n"; |
4594 | else | 4597 | else |
4595 | buf = "0\n"; | 4598 | buf = "0\n"; |
4596 | 4599 | ||
4597 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); | 4600 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); |
4598 | } | 4601 | } |
4599 | 4602 | ||
4600 | static ssize_t | 4603 | static ssize_t |
4601 | trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | 4604 | trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, |
4602 | loff_t *ppos) | 4605 | loff_t *ppos) |
4603 | { | 4606 | { |
4604 | long index = (long)filp->private_data; | 4607 | long index = (long)filp->private_data; |
4605 | unsigned long val; | 4608 | unsigned long val; |
4606 | int ret; | 4609 | int ret; |
4607 | 4610 | ||
4608 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 4611 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
4609 | if (ret) | 4612 | if (ret) |
4610 | return ret; | 4613 | return ret; |
4611 | 4614 | ||
4612 | if (val != 0 && val != 1) | 4615 | if (val != 0 && val != 1) |
4613 | return -EINVAL; | 4616 | return -EINVAL; |
4614 | set_tracer_flags(1 << index, val); | 4617 | set_tracer_flags(1 << index, val); |
4615 | 4618 | ||
4616 | *ppos += cnt; | 4619 | *ppos += cnt; |
4617 | 4620 | ||
4618 | return cnt; | 4621 | return cnt; |
4619 | } | 4622 | } |
4620 | 4623 | ||
4621 | static const struct file_operations trace_options_core_fops = { | 4624 | static const struct file_operations trace_options_core_fops = { |
4622 | .open = tracing_open_generic, | 4625 | .open = tracing_open_generic, |
4623 | .read = trace_options_core_read, | 4626 | .read = trace_options_core_read, |
4624 | .write = trace_options_core_write, | 4627 | .write = trace_options_core_write, |
4625 | .llseek = generic_file_llseek, | 4628 | .llseek = generic_file_llseek, |
4626 | }; | 4629 | }; |
4627 | 4630 | ||
4628 | struct dentry *trace_create_file(const char *name, | 4631 | struct dentry *trace_create_file(const char *name, |
4629 | umode_t mode, | 4632 | umode_t mode, |
4630 | struct dentry *parent, | 4633 | struct dentry *parent, |
4631 | void *data, | 4634 | void *data, |
4632 | const struct file_operations *fops) | 4635 | const struct file_operations *fops) |
4633 | { | 4636 | { |
4634 | struct dentry *ret; | 4637 | struct dentry *ret; |
4635 | 4638 | ||
4636 | ret = debugfs_create_file(name, mode, parent, data, fops); | 4639 | ret = debugfs_create_file(name, mode, parent, data, fops); |
4637 | if (!ret) | 4640 | if (!ret) |
4638 | pr_warning("Could not create debugfs '%s' entry\n", name); | 4641 | pr_warning("Could not create debugfs '%s' entry\n", name); |
4639 | 4642 | ||
4640 | return ret; | 4643 | return ret; |
4641 | } | 4644 | } |
4642 | 4645 | ||
4643 | 4646 | ||
4644 | static struct dentry *trace_options_init_dentry(void) | 4647 | static struct dentry *trace_options_init_dentry(void) |
4645 | { | 4648 | { |
4646 | struct dentry *d_tracer; | 4649 | struct dentry *d_tracer; |
4647 | static struct dentry *t_options; | 4650 | static struct dentry *t_options; |
4648 | 4651 | ||
4649 | if (t_options) | 4652 | if (t_options) |
4650 | return t_options; | 4653 | return t_options; |
4651 | 4654 | ||
4652 | d_tracer = tracing_init_dentry(); | 4655 | d_tracer = tracing_init_dentry(); |
4653 | if (!d_tracer) | 4656 | if (!d_tracer) |
4654 | return NULL; | 4657 | return NULL; |
4655 | 4658 | ||
4656 | t_options = debugfs_create_dir("options", d_tracer); | 4659 | t_options = debugfs_create_dir("options", d_tracer); |
4657 | if (!t_options) { | 4660 | if (!t_options) { |
4658 | pr_warning("Could not create debugfs directory 'options'\n"); | 4661 | pr_warning("Could not create debugfs directory 'options'\n"); |
4659 | return NULL; | 4662 | return NULL; |
4660 | } | 4663 | } |
4661 | 4664 | ||
4662 | return t_options; | 4665 | return t_options; |
4663 | } | 4666 | } |
4664 | 4667 | ||
4665 | static void | 4668 | static void |
4666 | create_trace_option_file(struct trace_option_dentry *topt, | 4669 | create_trace_option_file(struct trace_option_dentry *topt, |
4667 | struct tracer_flags *flags, | 4670 | struct tracer_flags *flags, |
4668 | struct tracer_opt *opt) | 4671 | struct tracer_opt *opt) |
4669 | { | 4672 | { |
4670 | struct dentry *t_options; | 4673 | struct dentry *t_options; |
4671 | 4674 | ||
4672 | t_options = trace_options_init_dentry(); | 4675 | t_options = trace_options_init_dentry(); |
4673 | if (!t_options) | 4676 | if (!t_options) |
4674 | return; | 4677 | return; |
4675 | 4678 | ||
4676 | topt->flags = flags; | 4679 | topt->flags = flags; |
4677 | topt->opt = opt; | 4680 | topt->opt = opt; |
4678 | 4681 | ||
4679 | topt->entry = trace_create_file(opt->name, 0644, t_options, topt, | 4682 | topt->entry = trace_create_file(opt->name, 0644, t_options, topt, |
4680 | &trace_options_fops); | 4683 | &trace_options_fops); |
4681 | 4684 | ||
4682 | } | 4685 | } |
4683 | 4686 | ||
4684 | static struct trace_option_dentry * | 4687 | static struct trace_option_dentry * |
4685 | create_trace_option_files(struct tracer *tracer) | 4688 | create_trace_option_files(struct tracer *tracer) |
4686 | { | 4689 | { |
4687 | struct trace_option_dentry *topts; | 4690 | struct trace_option_dentry *topts; |
4688 | struct tracer_flags *flags; | 4691 | struct tracer_flags *flags; |
4689 | struct tracer_opt *opts; | 4692 | struct tracer_opt *opts; |
4690 | int cnt; | 4693 | int cnt; |
4691 | 4694 | ||
4692 | if (!tracer) | 4695 | if (!tracer) |
4693 | return NULL; | 4696 | return NULL; |
4694 | 4697 | ||
4695 | flags = tracer->flags; | 4698 | flags = tracer->flags; |
4696 | 4699 | ||
4697 | if (!flags || !flags->opts) | 4700 | if (!flags || !flags->opts) |
4698 | return NULL; | 4701 | return NULL; |
4699 | 4702 | ||
4700 | opts = flags->opts; | 4703 | opts = flags->opts; |
4701 | 4704 | ||
4702 | for (cnt = 0; opts[cnt].name; cnt++) | 4705 | for (cnt = 0; opts[cnt].name; cnt++) |
4703 | ; | 4706 | ; |
4704 | 4707 | ||
4705 | topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); | 4708 | topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); |
4706 | if (!topts) | 4709 | if (!topts) |
4707 | return NULL; | 4710 | return NULL; |
4708 | 4711 | ||
4709 | for (cnt = 0; opts[cnt].name; cnt++) | 4712 | for (cnt = 0; opts[cnt].name; cnt++) |
4710 | create_trace_option_file(&topts[cnt], flags, | 4713 | create_trace_option_file(&topts[cnt], flags, |
4711 | &opts[cnt]); | 4714 | &opts[cnt]); |
4712 | 4715 | ||
4713 | return topts; | 4716 | return topts; |
4714 | } | 4717 | } |
4715 | 4718 | ||
4716 | static void | 4719 | static void |
4717 | destroy_trace_option_files(struct trace_option_dentry *topts) | 4720 | destroy_trace_option_files(struct trace_option_dentry *topts) |
4718 | { | 4721 | { |
4719 | int cnt; | 4722 | int cnt; |
4720 | 4723 | ||
4721 | if (!topts) | 4724 | if (!topts) |
4722 | return; | 4725 | return; |
4723 | 4726 | ||
4724 | for (cnt = 0; topts[cnt].opt; cnt++) { | 4727 | for (cnt = 0; topts[cnt].opt; cnt++) { |
4725 | if (topts[cnt].entry) | 4728 | if (topts[cnt].entry) |
4726 | debugfs_remove(topts[cnt].entry); | 4729 | debugfs_remove(topts[cnt].entry); |
4727 | } | 4730 | } |
4728 | 4731 | ||
4729 | kfree(topts); | 4732 | kfree(topts); |
4730 | } | 4733 | } |
4731 | 4734 | ||
4732 | static struct dentry * | 4735 | static struct dentry * |
4733 | create_trace_option_core_file(const char *option, long index) | 4736 | create_trace_option_core_file(const char *option, long index) |
4734 | { | 4737 | { |
4735 | struct dentry *t_options; | 4738 | struct dentry *t_options; |
4736 | 4739 | ||
4737 | t_options = trace_options_init_dentry(); | 4740 | t_options = trace_options_init_dentry(); |
4738 | if (!t_options) | 4741 | if (!t_options) |
4739 | return NULL; | 4742 | return NULL; |
4740 | 4743 | ||
4741 | return trace_create_file(option, 0644, t_options, (void *)index, | 4744 | return trace_create_file(option, 0644, t_options, (void *)index, |
4742 | &trace_options_core_fops); | 4745 | &trace_options_core_fops); |
4743 | } | 4746 | } |
4744 | 4747 | ||
4745 | static __init void create_trace_options_dir(void) | 4748 | static __init void create_trace_options_dir(void) |
4746 | { | 4749 | { |
4747 | struct dentry *t_options; | 4750 | struct dentry *t_options; |
4748 | int i; | 4751 | int i; |
4749 | 4752 | ||
4750 | t_options = trace_options_init_dentry(); | 4753 | t_options = trace_options_init_dentry(); |
4751 | if (!t_options) | 4754 | if (!t_options) |
4752 | return; | 4755 | return; |
4753 | 4756 | ||
4754 | for (i = 0; trace_options[i]; i++) | 4757 | for (i = 0; trace_options[i]; i++) |
4755 | create_trace_option_core_file(trace_options[i], i); | 4758 | create_trace_option_core_file(trace_options[i], i); |
4756 | } | 4759 | } |
4757 | 4760 | ||
4758 | static ssize_t | 4761 | static ssize_t |
4759 | rb_simple_read(struct file *filp, char __user *ubuf, | 4762 | rb_simple_read(struct file *filp, char __user *ubuf, |
4760 | size_t cnt, loff_t *ppos) | 4763 | size_t cnt, loff_t *ppos) |
4761 | { | 4764 | { |
4762 | struct trace_array *tr = filp->private_data; | 4765 | struct trace_array *tr = filp->private_data; |
4763 | struct ring_buffer *buffer = tr->buffer; | 4766 | struct ring_buffer *buffer = tr->buffer; |
4764 | char buf[64]; | 4767 | char buf[64]; |
4765 | int r; | 4768 | int r; |
4766 | 4769 | ||
4767 | if (buffer) | 4770 | if (buffer) |
4768 | r = ring_buffer_record_is_on(buffer); | 4771 | r = ring_buffer_record_is_on(buffer); |
4769 | else | 4772 | else |
4770 | r = 0; | 4773 | r = 0; |
4771 | 4774 | ||
4772 | r = sprintf(buf, "%d\n", r); | 4775 | r = sprintf(buf, "%d\n", r); |
4773 | 4776 | ||
4774 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 4777 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
4775 | } | 4778 | } |
4776 | 4779 | ||
4777 | static ssize_t | 4780 | static ssize_t |
4778 | rb_simple_write(struct file *filp, const char __user *ubuf, | 4781 | rb_simple_write(struct file *filp, const char __user *ubuf, |
4779 | size_t cnt, loff_t *ppos) | 4782 | size_t cnt, loff_t *ppos) |
4780 | { | 4783 | { |
4781 | struct trace_array *tr = filp->private_data; | 4784 | struct trace_array *tr = filp->private_data; |
4782 | struct ring_buffer *buffer = tr->buffer; | 4785 | struct ring_buffer *buffer = tr->buffer; |
4783 | unsigned long val; | 4786 | unsigned long val; |
4784 | int ret; | 4787 | int ret; |
4785 | 4788 | ||
4786 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 4789 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
4787 | if (ret) | 4790 | if (ret) |
4788 | return ret; | 4791 | return ret; |
4789 | 4792 | ||
4790 | if (buffer) { | 4793 | if (buffer) { |
4791 | if (val) | 4794 | if (val) |
4792 | ring_buffer_record_on(buffer); | 4795 | ring_buffer_record_on(buffer); |
4793 | else | 4796 | else |
4794 | ring_buffer_record_off(buffer); | 4797 | ring_buffer_record_off(buffer); |
4795 | } | 4798 | } |
4796 | 4799 | ||
4797 | (*ppos)++; | 4800 | (*ppos)++; |
4798 | 4801 | ||
4799 | return cnt; | 4802 | return cnt; |
4800 | } | 4803 | } |
4801 | 4804 | ||
4802 | static const struct file_operations rb_simple_fops = { | 4805 | static const struct file_operations rb_simple_fops = { |
4803 | .open = tracing_open_generic, | 4806 | .open = tracing_open_generic, |
4804 | .read = rb_simple_read, | 4807 | .read = rb_simple_read, |
4805 | .write = rb_simple_write, | 4808 | .write = rb_simple_write, |
4806 | .llseek = default_llseek, | 4809 | .llseek = default_llseek, |
4807 | }; | 4810 | }; |
4808 | 4811 | ||
4809 | static __init int tracer_init_debugfs(void) | 4812 | static __init int tracer_init_debugfs(void) |
4810 | { | 4813 | { |
4811 | struct dentry *d_tracer; | 4814 | struct dentry *d_tracer; |
4812 | int cpu; | 4815 | int cpu; |
4813 | 4816 | ||
4814 | trace_access_lock_init(); | 4817 | trace_access_lock_init(); |
4815 | 4818 | ||
4816 | d_tracer = tracing_init_dentry(); | 4819 | d_tracer = tracing_init_dentry(); |
4817 | 4820 | ||
4818 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4821 | trace_create_file("tracing_enabled", 0644, d_tracer, |
4819 | &global_trace, &tracing_ctrl_fops); | 4822 | &global_trace, &tracing_ctrl_fops); |
4820 | 4823 | ||
4821 | trace_create_file("trace_options", 0644, d_tracer, | 4824 | trace_create_file("trace_options", 0644, d_tracer, |
4822 | NULL, &tracing_iter_fops); | 4825 | NULL, &tracing_iter_fops); |
4823 | 4826 | ||
4824 | trace_create_file("tracing_cpumask", 0644, d_tracer, | 4827 | trace_create_file("tracing_cpumask", 0644, d_tracer, |
4825 | NULL, &tracing_cpumask_fops); | 4828 | NULL, &tracing_cpumask_fops); |
4826 | 4829 | ||
4827 | trace_create_file("trace", 0644, d_tracer, | 4830 | trace_create_file("trace", 0644, d_tracer, |
4828 | (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); | 4831 | (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); |
4829 | 4832 | ||
4830 | trace_create_file("available_tracers", 0444, d_tracer, | 4833 | trace_create_file("available_tracers", 0444, d_tracer, |
4831 | &global_trace, &show_traces_fops); | 4834 | &global_trace, &show_traces_fops); |
4832 | 4835 | ||
4833 | trace_create_file("current_tracer", 0644, d_tracer, | 4836 | trace_create_file("current_tracer", 0644, d_tracer, |
4834 | &global_trace, &set_tracer_fops); | 4837 | &global_trace, &set_tracer_fops); |
4835 | 4838 | ||
4836 | #ifdef CONFIG_TRACER_MAX_TRACE | 4839 | #ifdef CONFIG_TRACER_MAX_TRACE |
4837 | trace_create_file("tracing_max_latency", 0644, d_tracer, | 4840 | trace_create_file("tracing_max_latency", 0644, d_tracer, |
4838 | &tracing_max_latency, &tracing_max_lat_fops); | 4841 | &tracing_max_latency, &tracing_max_lat_fops); |
4839 | #endif | 4842 | #endif |
4840 | 4843 | ||
4841 | trace_create_file("tracing_thresh", 0644, d_tracer, | 4844 | trace_create_file("tracing_thresh", 0644, d_tracer, |
4842 | &tracing_thresh, &tracing_max_lat_fops); | 4845 | &tracing_thresh, &tracing_max_lat_fops); |
4843 | 4846 | ||
4844 | trace_create_file("README", 0444, d_tracer, | 4847 | trace_create_file("README", 0444, d_tracer, |
4845 | NULL, &tracing_readme_fops); | 4848 | NULL, &tracing_readme_fops); |
4846 | 4849 | ||
4847 | trace_create_file("trace_pipe", 0444, d_tracer, | 4850 | trace_create_file("trace_pipe", 0444, d_tracer, |
4848 | (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); | 4851 | (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); |
4849 | 4852 | ||
4850 | trace_create_file("buffer_size_kb", 0644, d_tracer, | 4853 | trace_create_file("buffer_size_kb", 0644, d_tracer, |
4851 | (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops); | 4854 | (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops); |
4852 | 4855 | ||
4853 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | 4856 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, |
4854 | &global_trace, &tracing_total_entries_fops); | 4857 | &global_trace, &tracing_total_entries_fops); |
4855 | 4858 | ||
4856 | trace_create_file("free_buffer", 0644, d_tracer, | 4859 | trace_create_file("free_buffer", 0644, d_tracer, |
4857 | &global_trace, &tracing_free_buffer_fops); | 4860 | &global_trace, &tracing_free_buffer_fops); |
4858 | 4861 | ||
4859 | trace_create_file("trace_marker", 0220, d_tracer, | 4862 | trace_create_file("trace_marker", 0220, d_tracer, |
4860 | NULL, &tracing_mark_fops); | 4863 | NULL, &tracing_mark_fops); |
4861 | 4864 | ||
4862 | trace_create_file("saved_cmdlines", 0444, d_tracer, | 4865 | trace_create_file("saved_cmdlines", 0444, d_tracer, |
4863 | NULL, &tracing_saved_cmdlines_fops); | 4866 | NULL, &tracing_saved_cmdlines_fops); |
4864 | 4867 | ||
4865 | trace_create_file("trace_clock", 0644, d_tracer, NULL, | 4868 | trace_create_file("trace_clock", 0644, d_tracer, NULL, |
4866 | &trace_clock_fops); | 4869 | &trace_clock_fops); |
4867 | 4870 | ||
4868 | trace_create_file("tracing_on", 0644, d_tracer, | 4871 | trace_create_file("tracing_on", 0644, d_tracer, |
4869 | &global_trace, &rb_simple_fops); | 4872 | &global_trace, &rb_simple_fops); |
4870 | 4873 | ||
4871 | #ifdef CONFIG_DYNAMIC_FTRACE | 4874 | #ifdef CONFIG_DYNAMIC_FTRACE |
4872 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, | 4875 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, |
4873 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); | 4876 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); |
4874 | #endif | 4877 | #endif |
4875 | 4878 | ||
4876 | create_trace_options_dir(); | 4879 | create_trace_options_dir(); |
4877 | 4880 | ||
4878 | for_each_tracing_cpu(cpu) | 4881 | for_each_tracing_cpu(cpu) |
4879 | tracing_init_debugfs_percpu(cpu); | 4882 | tracing_init_debugfs_percpu(cpu); |
4880 | 4883 | ||
4881 | return 0; | 4884 | return 0; |
4882 | } | 4885 | } |
4883 | 4886 | ||
4884 | static int trace_panic_handler(struct notifier_block *this, | 4887 | static int trace_panic_handler(struct notifier_block *this, |
4885 | unsigned long event, void *unused) | 4888 | unsigned long event, void *unused) |
4886 | { | 4889 | { |
4887 | if (ftrace_dump_on_oops) | 4890 | if (ftrace_dump_on_oops) |
4888 | ftrace_dump(ftrace_dump_on_oops); | 4891 | ftrace_dump(ftrace_dump_on_oops); |
4889 | return NOTIFY_OK; | 4892 | return NOTIFY_OK; |
4890 | } | 4893 | } |
4891 | 4894 | ||
4892 | static struct notifier_block trace_panic_notifier = { | 4895 | static struct notifier_block trace_panic_notifier = { |
4893 | .notifier_call = trace_panic_handler, | 4896 | .notifier_call = trace_panic_handler, |
4894 | .next = NULL, | 4897 | .next = NULL, |
4895 | .priority = 150 /* priority: INT_MAX >= x >= 0 */ | 4898 | .priority = 150 /* priority: INT_MAX >= x >= 0 */ |
4896 | }; | 4899 | }; |
4897 | 4900 | ||
4898 | static int trace_die_handler(struct notifier_block *self, | 4901 | static int trace_die_handler(struct notifier_block *self, |
4899 | unsigned long val, | 4902 | unsigned long val, |
4900 | void *data) | 4903 | void *data) |
4901 | { | 4904 | { |
4902 | switch (val) { | 4905 | switch (val) { |
4903 | case DIE_OOPS: | 4906 | case DIE_OOPS: |
4904 | if (ftrace_dump_on_oops) | 4907 | if (ftrace_dump_on_oops) |
4905 | ftrace_dump(ftrace_dump_on_oops); | 4908 | ftrace_dump(ftrace_dump_on_oops); |
4906 | break; | 4909 | break; |
4907 | default: | 4910 | default: |
4908 | break; | 4911 | break; |
4909 | } | 4912 | } |
4910 | return NOTIFY_OK; | 4913 | return NOTIFY_OK; |
4911 | } | 4914 | } |
4912 | 4915 | ||
4913 | static struct notifier_block trace_die_notifier = { | 4916 | static struct notifier_block trace_die_notifier = { |
4914 | .notifier_call = trace_die_handler, | 4917 | .notifier_call = trace_die_handler, |
4915 | .priority = 200 | 4918 | .priority = 200 |
4916 | }; | 4919 | }; |
4917 | 4920 | ||
4918 | /* | 4921 | /* |
4919 | * printk is set to max of 1024, we really don't need it that big. | 4922 | * printk is set to max of 1024, we really don't need it that big. |
4920 | * Nothing should be printing 1000 characters anyway. | 4923 | * Nothing should be printing 1000 characters anyway. |
4921 | */ | 4924 | */ |
4922 | #define TRACE_MAX_PRINT 1000 | 4925 | #define TRACE_MAX_PRINT 1000 |
4923 | 4926 | ||
4924 | /* | 4927 | /* |
4925 | * Define here KERN_TRACE so that we have one place to modify | 4928 | * Define here KERN_TRACE so that we have one place to modify |
4926 | * it if we decide to change what log level the ftrace dump | 4929 | * it if we decide to change what log level the ftrace dump |
4927 | * should be at. | 4930 | * should be at. |
4928 | */ | 4931 | */ |
4929 | #define KERN_TRACE KERN_EMERG | 4932 | #define KERN_TRACE KERN_EMERG |
4930 | 4933 | ||
4931 | void | 4934 | void |
4932 | trace_printk_seq(struct trace_seq *s) | 4935 | trace_printk_seq(struct trace_seq *s) |
4933 | { | 4936 | { |
4934 | /* Probably should print a warning here. */ | 4937 | /* Probably should print a warning here. */ |
4935 | if (s->len >= 1000) | 4938 | if (s->len >= 1000) |
4936 | s->len = 1000; | 4939 | s->len = 1000; |
4937 | 4940 | ||
4938 | /* should be zero ended, but we are paranoid. */ | 4941 | /* should be zero ended, but we are paranoid. */ |
4939 | s->buffer[s->len] = 0; | 4942 | s->buffer[s->len] = 0; |
4940 | 4943 | ||
4941 | printk(KERN_TRACE "%s", s->buffer); | 4944 | printk(KERN_TRACE "%s", s->buffer); |
4942 | 4945 | ||
4943 | trace_seq_init(s); | 4946 | trace_seq_init(s); |
4944 | } | 4947 | } |
4945 | 4948 | ||
4946 | void trace_init_global_iter(struct trace_iterator *iter) | 4949 | void trace_init_global_iter(struct trace_iterator *iter) |
4947 | { | 4950 | { |
4948 | iter->tr = &global_trace; | 4951 | iter->tr = &global_trace; |
4949 | iter->trace = current_trace; | 4952 | iter->trace = current_trace; |
4950 | iter->cpu_file = TRACE_PIPE_ALL_CPU; | 4953 | iter->cpu_file = TRACE_PIPE_ALL_CPU; |
4951 | } | 4954 | } |
4952 | 4955 | ||
4953 | static void | 4956 | static void |
4954 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | 4957 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) |
4955 | { | 4958 | { |
4956 | static arch_spinlock_t ftrace_dump_lock = | 4959 | static arch_spinlock_t ftrace_dump_lock = |
4957 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 4960 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
4958 | /* use static because iter can be a bit big for the stack */ | 4961 | /* use static because iter can be a bit big for the stack */ |
4959 | static struct trace_iterator iter; | 4962 | static struct trace_iterator iter; |
4960 | unsigned int old_userobj; | 4963 | unsigned int old_userobj; |
4961 | static int dump_ran; | 4964 | static int dump_ran; |
4962 | unsigned long flags; | 4965 | unsigned long flags; |
4963 | int cnt = 0, cpu; | 4966 | int cnt = 0, cpu; |
4964 | 4967 | ||
4965 | /* only one dump */ | 4968 | /* only one dump */ |
4966 | local_irq_save(flags); | 4969 | local_irq_save(flags); |
4967 | arch_spin_lock(&ftrace_dump_lock); | 4970 | arch_spin_lock(&ftrace_dump_lock); |
4968 | if (dump_ran) | 4971 | if (dump_ran) |
4969 | goto out; | 4972 | goto out; |
4970 | 4973 | ||
4971 | dump_ran = 1; | 4974 | dump_ran = 1; |
4972 | 4975 | ||
4973 | tracing_off(); | 4976 | tracing_off(); |
4974 | 4977 | ||
4975 | /* Did function tracer already get disabled? */ | 4978 | /* Did function tracer already get disabled? */ |
4976 | if (ftrace_is_dead()) { | 4979 | if (ftrace_is_dead()) { |
4977 | printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | 4980 | printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); |
4978 | printk("# MAY BE MISSING FUNCTION EVENTS\n"); | 4981 | printk("# MAY BE MISSING FUNCTION EVENTS\n"); |
4979 | } | 4982 | } |
4980 | 4983 | ||
4981 | if (disable_tracing) | 4984 | if (disable_tracing) |
4982 | ftrace_kill(); | 4985 | ftrace_kill(); |
4983 | 4986 | ||
4984 | trace_init_global_iter(&iter); | 4987 | trace_init_global_iter(&iter); |
4985 | 4988 | ||
4986 | for_each_tracing_cpu(cpu) { | 4989 | for_each_tracing_cpu(cpu) { |
4987 | atomic_inc(&iter.tr->data[cpu]->disabled); | 4990 | atomic_inc(&iter.tr->data[cpu]->disabled); |
4988 | } | 4991 | } |
4989 | 4992 | ||
4990 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; | 4993 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; |
4991 | 4994 | ||
4992 | /* don't look at user memory in panic mode */ | 4995 | /* don't look at user memory in panic mode */ |
4993 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; | 4996 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; |
4994 | 4997 | ||
4995 | /* Simulate the iterator */ | 4998 | /* Simulate the iterator */ |
4996 | iter.tr = &global_trace; | 4999 | iter.tr = &global_trace; |
4997 | iter.trace = current_trace; | 5000 | iter.trace = current_trace; |
4998 | 5001 | ||
4999 | switch (oops_dump_mode) { | 5002 | switch (oops_dump_mode) { |
5000 | case DUMP_ALL: | 5003 | case DUMP_ALL: |
5001 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 5004 | iter.cpu_file = TRACE_PIPE_ALL_CPU; |
5002 | break; | 5005 | break; |
5003 | case DUMP_ORIG: | 5006 | case DUMP_ORIG: |
5004 | iter.cpu_file = raw_smp_processor_id(); | 5007 | iter.cpu_file = raw_smp_processor_id(); |
5005 | break; | 5008 | break; |
5006 | case DUMP_NONE: | 5009 | case DUMP_NONE: |
5007 | goto out_enable; | 5010 | goto out_enable; |
5008 | default: | 5011 | default: |
5009 | printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); | 5012 | printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); |
5010 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 5013 | iter.cpu_file = TRACE_PIPE_ALL_CPU; |
5011 | } | 5014 | } |
5012 | 5015 | ||
5013 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); | 5016 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); |
5014 | 5017 | ||
5015 | /* | 5018 | /* |
5016 | * We need to stop all tracing on all CPUS to read the | 5019 | * We need to stop all tracing on all CPUS to read the |
5017 | * the next buffer. This is a bit expensive, but is | 5020 | * the next buffer. This is a bit expensive, but is |
5018 | * not done often. We fill all what we can read, | 5021 | * not done often. We fill all what we can read, |
5019 | * and then release the locks again. | 5022 | * and then release the locks again. |
5020 | */ | 5023 | */ |
5021 | 5024 | ||
5022 | while (!trace_empty(&iter)) { | 5025 | while (!trace_empty(&iter)) { |
5023 | 5026 | ||
5024 | if (!cnt) | 5027 | if (!cnt) |
5025 | printk(KERN_TRACE "---------------------------------\n"); | 5028 | printk(KERN_TRACE "---------------------------------\n"); |
5026 | 5029 | ||
5027 | cnt++; | 5030 | cnt++; |
5028 | 5031 | ||
5029 | /* reset all but tr, trace, and overruns */ | 5032 | /* reset all but tr, trace, and overruns */ |
5030 | memset(&iter.seq, 0, | 5033 | memset(&iter.seq, 0, |
5031 | sizeof(struct trace_iterator) - | 5034 | sizeof(struct trace_iterator) - |
5032 | offsetof(struct trace_iterator, seq)); | 5035 | offsetof(struct trace_iterator, seq)); |
5033 | iter.iter_flags |= TRACE_FILE_LAT_FMT; | 5036 | iter.iter_flags |= TRACE_FILE_LAT_FMT; |
5034 | iter.pos = -1; | 5037 | iter.pos = -1; |
5035 | 5038 | ||
5036 | if (trace_find_next_entry_inc(&iter) != NULL) { | 5039 | if (trace_find_next_entry_inc(&iter) != NULL) { |
5037 | int ret; | 5040 | int ret; |
5038 | 5041 | ||
5039 | ret = print_trace_line(&iter); | 5042 | ret = print_trace_line(&iter); |
5040 | if (ret != TRACE_TYPE_NO_CONSUME) | 5043 | if (ret != TRACE_TYPE_NO_CONSUME) |
5041 | trace_consume(&iter); | 5044 | trace_consume(&iter); |
5042 | } | 5045 | } |
5043 | touch_nmi_watchdog(); | 5046 | touch_nmi_watchdog(); |
5044 | 5047 | ||
5045 | trace_printk_seq(&iter.seq); | 5048 | trace_printk_seq(&iter.seq); |
5046 | } | 5049 | } |
5047 | 5050 | ||
5048 | if (!cnt) | 5051 | if (!cnt) |
5049 | printk(KERN_TRACE " (ftrace buffer empty)\n"); | 5052 | printk(KERN_TRACE " (ftrace buffer empty)\n"); |
5050 | else | 5053 | else |
5051 | printk(KERN_TRACE "---------------------------------\n"); | 5054 | printk(KERN_TRACE "---------------------------------\n"); |
5052 | 5055 | ||
5053 | out_enable: | 5056 | out_enable: |
5054 | /* Re-enable tracing if requested */ | 5057 | /* Re-enable tracing if requested */ |
5055 | if (!disable_tracing) { | 5058 | if (!disable_tracing) { |
5056 | trace_flags |= old_userobj; | 5059 | trace_flags |= old_userobj; |
5057 | 5060 | ||
5058 | for_each_tracing_cpu(cpu) { | 5061 | for_each_tracing_cpu(cpu) { |
5059 | atomic_dec(&iter.tr->data[cpu]->disabled); | 5062 | atomic_dec(&iter.tr->data[cpu]->disabled); |
5060 | } | 5063 | } |
5061 | tracing_on(); | 5064 | tracing_on(); |
5062 | } | 5065 | } |
5063 | 5066 | ||
5064 | out: | 5067 | out: |
5065 | arch_spin_unlock(&ftrace_dump_lock); | 5068 | arch_spin_unlock(&ftrace_dump_lock); |
5066 | local_irq_restore(flags); | 5069 | local_irq_restore(flags); |
5067 | } | 5070 | } |
5068 | 5071 | ||
5069 | /* By default: disable tracing after the dump */ | 5072 | /* By default: disable tracing after the dump */ |
5070 | void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) | 5073 | void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) |
5071 | { | 5074 | { |
5072 | __ftrace_dump(true, oops_dump_mode); | 5075 | __ftrace_dump(true, oops_dump_mode); |
5073 | } | 5076 | } |
5074 | EXPORT_SYMBOL_GPL(ftrace_dump); | 5077 | EXPORT_SYMBOL_GPL(ftrace_dump); |
5075 | 5078 | ||
5076 | __init static int tracer_alloc_buffers(void) | 5079 | __init static int tracer_alloc_buffers(void) |
5077 | { | 5080 | { |
5078 | int ring_buf_size; | 5081 | int ring_buf_size; |
5079 | enum ring_buffer_flags rb_flags; | 5082 | enum ring_buffer_flags rb_flags; |
5080 | int i; | 5083 | int i; |
5081 | int ret = -ENOMEM; | 5084 | int ret = -ENOMEM; |
5082 | 5085 | ||
5083 | 5086 | ||
5084 | if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) | 5087 | if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) |
5085 | goto out; | 5088 | goto out; |
5086 | 5089 | ||
5087 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 5090 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) |
5088 | goto out_free_buffer_mask; | 5091 | goto out_free_buffer_mask; |
5089 | 5092 | ||
5090 | /* Only allocate trace_printk buffers if a trace_printk exists */ | 5093 | /* Only allocate trace_printk buffers if a trace_printk exists */ |
5091 | if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt) | 5094 | if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt) |
5092 | trace_printk_init_buffers(); | 5095 | trace_printk_init_buffers(); |
5093 | 5096 | ||
5094 | /* To save memory, keep the ring buffer size to its minimum */ | 5097 | /* To save memory, keep the ring buffer size to its minimum */ |
5095 | if (ring_buffer_expanded) | 5098 | if (ring_buffer_expanded) |
5096 | ring_buf_size = trace_buf_size; | 5099 | ring_buf_size = trace_buf_size; |
5097 | else | 5100 | else |
5098 | ring_buf_size = 1; | 5101 | ring_buf_size = 1; |
5099 | 5102 | ||
5100 | rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; | 5103 | rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; |
5101 | 5104 | ||
5102 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); | 5105 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); |
5103 | cpumask_copy(tracing_cpumask, cpu_all_mask); | 5106 | cpumask_copy(tracing_cpumask, cpu_all_mask); |
5104 | 5107 | ||
5105 | /* TODO: make the number of buffers hot pluggable with CPUS */ | 5108 | /* TODO: make the number of buffers hot pluggable with CPUS */ |
5106 | global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); | 5109 | global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); |
5107 | if (!global_trace.buffer) { | 5110 | if (!global_trace.buffer) { |
5108 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); | 5111 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); |
5109 | WARN_ON(1); | 5112 | WARN_ON(1); |
5110 | goto out_free_cpumask; | 5113 | goto out_free_cpumask; |
5111 | } | 5114 | } |
5112 | if (global_trace.buffer_disabled) | 5115 | if (global_trace.buffer_disabled) |
5113 | tracing_off(); | 5116 | tracing_off(); |
5114 | 5117 | ||
5115 | 5118 | ||
5116 | #ifdef CONFIG_TRACER_MAX_TRACE | 5119 | #ifdef CONFIG_TRACER_MAX_TRACE |
5117 | max_tr.buffer = ring_buffer_alloc(1, rb_flags); | 5120 | max_tr.buffer = ring_buffer_alloc(1, rb_flags); |
5118 | if (!max_tr.buffer) { | 5121 | if (!max_tr.buffer) { |
5119 | printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); | 5122 | printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); |
5120 | WARN_ON(1); | 5123 | WARN_ON(1); |
5121 | ring_buffer_free(global_trace.buffer); | 5124 | ring_buffer_free(global_trace.buffer); |
5122 | goto out_free_cpumask; | 5125 | goto out_free_cpumask; |
5123 | } | 5126 | } |
5124 | #endif | 5127 | #endif |
5125 | 5128 | ||
5126 | /* Allocate the first page for all buffers */ | 5129 | /* Allocate the first page for all buffers */ |
5127 | for_each_tracing_cpu(i) { | 5130 | for_each_tracing_cpu(i) { |
5128 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); | 5131 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); |
5129 | max_tr.data[i] = &per_cpu(max_tr_data, i); | 5132 | max_tr.data[i] = &per_cpu(max_tr_data, i); |
5130 | } | 5133 | } |
5131 | 5134 | ||
5132 | set_buffer_entries(&global_trace, | 5135 | set_buffer_entries(&global_trace, |
5133 | ring_buffer_size(global_trace.buffer, 0)); | 5136 | ring_buffer_size(global_trace.buffer, 0)); |
5134 | #ifdef CONFIG_TRACER_MAX_TRACE | 5137 | #ifdef CONFIG_TRACER_MAX_TRACE |
5135 | set_buffer_entries(&max_tr, 1); | 5138 | set_buffer_entries(&max_tr, 1); |
5136 | #endif | 5139 | #endif |
5137 | 5140 | ||
5138 | trace_init_cmdlines(); | 5141 | trace_init_cmdlines(); |
5139 | 5142 | ||
5140 | register_tracer(&nop_trace); | 5143 | register_tracer(&nop_trace); |
5141 | current_trace = &nop_trace; | 5144 | current_trace = &nop_trace; |
5142 | /* All seems OK, enable tracing */ | 5145 | /* All seems OK, enable tracing */ |
5143 | tracing_disabled = 0; | 5146 | tracing_disabled = 0; |
5144 | 5147 | ||
5145 | atomic_notifier_chain_register(&panic_notifier_list, | 5148 | atomic_notifier_chain_register(&panic_notifier_list, |
5146 | &trace_panic_notifier); | 5149 | &trace_panic_notifier); |
5147 | 5150 | ||
5148 | register_die_notifier(&trace_die_notifier); | 5151 | register_die_notifier(&trace_die_notifier); |
5149 | 5152 | ||
5150 | return 0; | 5153 | return 0; |
5151 | 5154 | ||
5152 | out_free_cpumask: | 5155 | out_free_cpumask: |
5153 | free_cpumask_var(tracing_cpumask); | 5156 | free_cpumask_var(tracing_cpumask); |
5154 | out_free_buffer_mask: | 5157 | out_free_buffer_mask: |
5155 | free_cpumask_var(tracing_buffer_mask); | 5158 | free_cpumask_var(tracing_buffer_mask); |
5156 | out: | 5159 | out: |
5157 | return ret; | 5160 | return ret; |
5158 | } | 5161 | } |
5159 | 5162 | ||
5160 | __init static int clear_boot_tracer(void) | 5163 | __init static int clear_boot_tracer(void) |
5161 | { | 5164 | { |
5162 | /* | 5165 | /* |
5163 | * The default tracer at boot buffer is an init section. | 5166 | * The default tracer at boot buffer is an init section. |
5164 | * This function is called in lateinit. If we did not | 5167 | * This function is called in lateinit. If we did not |
5165 | * find the boot tracer, then clear it out, to prevent | 5168 | * find the boot tracer, then clear it out, to prevent |
5166 | * later registration from accessing the buffer that is | 5169 | * later registration from accessing the buffer that is |
5167 | * about to be freed. | 5170 | * about to be freed. |
5168 | */ | 5171 | */ |
5169 | if (!default_bootup_tracer) | 5172 | if (!default_bootup_tracer) |
5170 | return 0; | 5173 | return 0; |
5171 | 5174 | ||
5172 | printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", | 5175 | printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", |
5173 | default_bootup_tracer); | 5176 | default_bootup_tracer); |
5174 | default_bootup_tracer = NULL; | 5177 | default_bootup_tracer = NULL; |
5175 | 5178 | ||
5176 | return 0; | 5179 | return 0; |
5177 | } | 5180 | } |
5178 | 5181 | ||
5179 | early_initcall(tracer_alloc_buffers); | 5182 | early_initcall(tracer_alloc_buffers); |
5180 | fs_initcall(tracer_init_debugfs); | 5183 | fs_initcall(tracer_init_debugfs); |
5181 | late_initcall(clear_boot_tracer); | 5184 | late_initcall(clear_boot_tracer); |
5182 | 5185 |