Commit a358324466b171e145df20bdb74fe81759906de6

Authored by Steven Rostedt
1 parent 4143c5cb36

ring-buffer: buffer record on/off switch

Impact: enable/disable ring buffer recording API added

Several kernel developers have requested that there be a way to stop
recording into the ring buffers with a simple switch that can also
be enabled from userspace. This patch addes a new kernel API to the
ring buffers called:

 tracing_on()
 tracing_off()

When tracing_off() is called, all ring buffers will not be able to record
into their buffers.

tracing_on() will enable the ring buffers again.

These two act like an on/off switch. That is, there is no counting of the
number of times tracing_off or tracing_on has been called.

A new file is added to the debugfs/tracing directory called

  tracing_on

This allows for userspace applications to also flip the switch.

  echo 0 > debugfs/tracing/tracing_on

disables the tracing.

  echo 1 > /debugfs/tracing/tracing_on

enables it.

Note, this does not disable or enable any tracers. It only sets or clears
a flag that needs to be set in order for the ring buffers to write to
their buffers. It is a global flag, and affects all ring buffers.

The buffers start out with tracing_on enabled.

There are now three flags that control recording into the buffers:

 tracing_on: which affects all ring buffer tracers.

 buffer->record_disabled: which affects an allocated buffer, which may be set
     if an anomaly is detected, and tracing is disabled.

 cpu_buffer->record_disabled: which is set by tracing_stop() or if an
     anomaly is detected. tracing_start can not reenable this if
     an anomaly occurred.

The userspace debugfs/tracing/tracing_enabled is implemented with
tracing_stop() but the user space code can not enable it if the kernel
called tracing_stop().

Userspace can enable the tracing_on even if the kernel disabled it.
It is just a switch used to stop tracing if a condition was hit.
tracing_on is not for protecting critical areas in the kernel nor is
it for stopping tracing if an anomaly occurred. This is because userspace
can reenable it at any time.

Side effect: With this patch, I discovered a dead variable in ftrace.c
  called tracing_on. This patch removes it.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

Showing 3 changed files with 106 additions and 6 deletions Inline Diff

include/linux/ring_buffer.h
1 #ifndef _LINUX_RING_BUFFER_H 1 #ifndef _LINUX_RING_BUFFER_H
2 #define _LINUX_RING_BUFFER_H 2 #define _LINUX_RING_BUFFER_H
3 3
4 #include <linux/mm.h> 4 #include <linux/mm.h>
5 #include <linux/seq_file.h> 5 #include <linux/seq_file.h>
6 6
7 struct ring_buffer; 7 struct ring_buffer;
8 struct ring_buffer_iter; 8 struct ring_buffer_iter;
9 9
10 /* 10 /*
11 * Don't reference this struct directly, use functions below. 11 * Don't reference this struct directly, use functions below.
12 */ 12 */
13 struct ring_buffer_event { 13 struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27; 14 u32 type:2, len:3, time_delta:27;
15 u32 array[]; 15 u32 array[];
16 }; 16 };
17 17
18 /** 18 /**
19 * enum ring_buffer_type - internal ring buffer types 19 * enum ring_buffer_type - internal ring buffer types
20 * 20 *
21 * @RINGBUF_TYPE_PADDING: Left over page padding 21 * @RINGBUF_TYPE_PADDING: Left over page padding
22 * array is ignored 22 * array is ignored
23 * size is variable depending on how much 23 * size is variable depending on how much
24 * padding is needed 24 * padding is needed
25 * 25 *
26 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta 26 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
27 * array[0] = time delta (28 .. 59) 27 * array[0] = time delta (28 .. 59)
28 * size = 8 bytes 28 * size = 8 bytes
29 * 29 *
30 * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock 30 * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
31 * array[0] = tv_nsec 31 * array[0] = tv_nsec
32 * array[1] = tv_sec 32 * array[1] = tv_sec
33 * size = 16 bytes 33 * size = 16 bytes
34 * 34 *
35 * @RINGBUF_TYPE_DATA: Data record 35 * @RINGBUF_TYPE_DATA: Data record
36 * If len is zero: 36 * If len is zero:
37 * array[0] holds the actual length 37 * array[0] holds the actual length
38 * array[1..(length+3)/4-1] holds data 38 * array[1..(length+3)/4-1] holds data
39 * else 39 * else
40 * length = len << 2 40 * length = len << 2
41 * array[0..(length+3)/4] holds data 41 * array[0..(length+3)/4] holds data
42 */ 42 */
43 enum ring_buffer_type { 43 enum ring_buffer_type {
44 RINGBUF_TYPE_PADDING, 44 RINGBUF_TYPE_PADDING,
45 RINGBUF_TYPE_TIME_EXTEND, 45 RINGBUF_TYPE_TIME_EXTEND,
46 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ 46 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
47 RINGBUF_TYPE_TIME_STAMP, 47 RINGBUF_TYPE_TIME_STAMP,
48 RINGBUF_TYPE_DATA, 48 RINGBUF_TYPE_DATA,
49 }; 49 };
50 50
51 unsigned ring_buffer_event_length(struct ring_buffer_event *event); 51 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
52 void *ring_buffer_event_data(struct ring_buffer_event *event); 52 void *ring_buffer_event_data(struct ring_buffer_event *event);
53 53
54 /** 54 /**
55 * ring_buffer_event_time_delta - return the delta timestamp of the event 55 * ring_buffer_event_time_delta - return the delta timestamp of the event
56 * @event: the event to get the delta timestamp of 56 * @event: the event to get the delta timestamp of
57 * 57 *
58 * The delta timestamp is the 27 bit timestamp since the last event. 58 * The delta timestamp is the 27 bit timestamp since the last event.
59 */ 59 */
60 static inline unsigned 60 static inline unsigned
61 ring_buffer_event_time_delta(struct ring_buffer_event *event) 61 ring_buffer_event_time_delta(struct ring_buffer_event *event)
62 { 62 {
63 return event->time_delta; 63 return event->time_delta;
64 } 64 }
65 65
66 /* 66 /*
67 * size is in bytes for each per CPU buffer. 67 * size is in bytes for each per CPU buffer.
68 */ 68 */
69 struct ring_buffer * 69 struct ring_buffer *
70 ring_buffer_alloc(unsigned long size, unsigned flags); 70 ring_buffer_alloc(unsigned long size, unsigned flags);
71 void ring_buffer_free(struct ring_buffer *buffer); 71 void ring_buffer_free(struct ring_buffer *buffer);
72 72
73 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 73 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
74 74
75 struct ring_buffer_event * 75 struct ring_buffer_event *
76 ring_buffer_lock_reserve(struct ring_buffer *buffer, 76 ring_buffer_lock_reserve(struct ring_buffer *buffer,
77 unsigned long length, 77 unsigned long length,
78 unsigned long *flags); 78 unsigned long *flags);
79 int ring_buffer_unlock_commit(struct ring_buffer *buffer, 79 int ring_buffer_unlock_commit(struct ring_buffer *buffer,
80 struct ring_buffer_event *event, 80 struct ring_buffer_event *event,
81 unsigned long flags); 81 unsigned long flags);
82 int ring_buffer_write(struct ring_buffer *buffer, 82 int ring_buffer_write(struct ring_buffer *buffer,
83 unsigned long length, void *data); 83 unsigned long length, void *data);
84 84
85 struct ring_buffer_event * 85 struct ring_buffer_event *
86 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); 86 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
87 struct ring_buffer_event * 87 struct ring_buffer_event *
88 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); 88 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
89 89
90 struct ring_buffer_iter * 90 struct ring_buffer_iter *
91 ring_buffer_read_start(struct ring_buffer *buffer, int cpu); 91 ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
92 void ring_buffer_read_finish(struct ring_buffer_iter *iter); 92 void ring_buffer_read_finish(struct ring_buffer_iter *iter);
93 93
94 struct ring_buffer_event * 94 struct ring_buffer_event *
95 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); 95 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
96 struct ring_buffer_event * 96 struct ring_buffer_event *
97 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); 97 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
98 void ring_buffer_iter_reset(struct ring_buffer_iter *iter); 98 void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
99 int ring_buffer_iter_empty(struct ring_buffer_iter *iter); 99 int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
100 100
101 unsigned long ring_buffer_size(struct ring_buffer *buffer); 101 unsigned long ring_buffer_size(struct ring_buffer *buffer);
102 102
103 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); 103 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
104 void ring_buffer_reset(struct ring_buffer *buffer); 104 void ring_buffer_reset(struct ring_buffer *buffer);
105 105
106 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, 106 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
107 struct ring_buffer *buffer_b, int cpu); 107 struct ring_buffer *buffer_b, int cpu);
108 108
109 int ring_buffer_empty(struct ring_buffer *buffer); 109 int ring_buffer_empty(struct ring_buffer *buffer);
110 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); 110 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
111 111
112 void ring_buffer_record_disable(struct ring_buffer *buffer); 112 void ring_buffer_record_disable(struct ring_buffer *buffer);
113 void ring_buffer_record_enable(struct ring_buffer *buffer); 113 void ring_buffer_record_enable(struct ring_buffer *buffer);
114 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); 114 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
115 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); 115 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
116 116
117 unsigned long ring_buffer_entries(struct ring_buffer *buffer); 117 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
118 unsigned long ring_buffer_overruns(struct ring_buffer *buffer); 118 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
119 119
120 u64 ring_buffer_time_stamp(int cpu); 120 u64 ring_buffer_time_stamp(int cpu);
121 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); 121 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122 122
123 void tracing_on(void);
124 void tracing_off(void);
125
123 enum ring_buffer_flags { 126 enum ring_buffer_flags {
124 RB_FL_OVERWRITE = 1 << 0, 127 RB_FL_OVERWRITE = 1 << 0,
125 }; 128 };
126 129
127 #endif /* _LINUX_RING_BUFFER_H */ 130 #endif /* _LINUX_RING_BUFFER_H */
128 131
kernel/trace/ftrace.c
1 /* 1 /*
2 * Infrastructure for profiling code inserted by 'gcc -pg'. 2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 * 3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com> 5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 * 6 *
7 * Originally ported from the -rt patch by: 7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> 8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 * 9 *
10 * Based on code in the latency_tracer, that is: 10 * Based on code in the latency_tracer, that is:
11 * 11 *
12 * Copyright (C) 2004-2006 Ingo Molnar 12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III 13 * Copyright (C) 2004 William Lee Irwin III
14 */ 14 */
15 15
16 #include <linux/stop_machine.h> 16 #include <linux/stop_machine.h>
17 #include <linux/clocksource.h> 17 #include <linux/clocksource.h>
18 #include <linux/kallsyms.h> 18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h> 19 #include <linux/seq_file.h>
20 #include <linux/debugfs.h> 20 #include <linux/debugfs.h>
21 #include <linux/hardirq.h> 21 #include <linux/hardirq.h>
22 #include <linux/kthread.h> 22 #include <linux/kthread.h>
23 #include <linux/uaccess.h> 23 #include <linux/uaccess.h>
24 #include <linux/kprobes.h> 24 #include <linux/kprobes.h>
25 #include <linux/ftrace.h> 25 #include <linux/ftrace.h>
26 #include <linux/sysctl.h> 26 #include <linux/sysctl.h>
27 #include <linux/ctype.h> 27 #include <linux/ctype.h>
28 #include <linux/list.h> 28 #include <linux/list.h>
29 29
30 #include <asm/ftrace.h> 30 #include <asm/ftrace.h>
31 31
32 #include "trace.h" 32 #include "trace.h"
33 33
34 #define FTRACE_WARN_ON(cond) \ 34 #define FTRACE_WARN_ON(cond) \
35 do { \ 35 do { \
36 if (WARN_ON(cond)) \ 36 if (WARN_ON(cond)) \
37 ftrace_kill(); \ 37 ftrace_kill(); \
38 } while (0) 38 } while (0)
39 39
40 #define FTRACE_WARN_ON_ONCE(cond) \ 40 #define FTRACE_WARN_ON_ONCE(cond) \
41 do { \ 41 do { \
42 if (WARN_ON_ONCE(cond)) \ 42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \ 43 ftrace_kill(); \
44 } while (0) 44 } while (0)
45 45
46 /* ftrace_enabled is a method to turn ftrace on or off */ 46 /* ftrace_enabled is a method to turn ftrace on or off */
47 int ftrace_enabled __read_mostly; 47 int ftrace_enabled __read_mostly;
48 static int last_ftrace_enabled; 48 static int last_ftrace_enabled;
49 49
50 /* 50 /*
51 * ftrace_disabled is set when an anomaly is discovered. 51 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 52 * ftrace_disabled is much stronger than ftrace_enabled.
53 */ 53 */
54 static int ftrace_disabled __read_mostly; 54 static int ftrace_disabled __read_mostly;
55 55
56 static DEFINE_SPINLOCK(ftrace_lock); 56 static DEFINE_SPINLOCK(ftrace_lock);
57 static DEFINE_MUTEX(ftrace_sysctl_lock); 57 static DEFINE_MUTEX(ftrace_sysctl_lock);
58 58
59 static struct ftrace_ops ftrace_list_end __read_mostly = 59 static struct ftrace_ops ftrace_list_end __read_mostly =
60 { 60 {
61 .func = ftrace_stub, 61 .func = ftrace_stub,
62 }; 62 };
63 63
64 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 64 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 65 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
66 66
67 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 67 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68 { 68 {
69 struct ftrace_ops *op = ftrace_list; 69 struct ftrace_ops *op = ftrace_list;
70 70
71 /* in case someone actually ports this to alpha! */ 71 /* in case someone actually ports this to alpha! */
72 read_barrier_depends(); 72 read_barrier_depends();
73 73
74 while (op != &ftrace_list_end) { 74 while (op != &ftrace_list_end) {
75 /* silly alpha */ 75 /* silly alpha */
76 read_barrier_depends(); 76 read_barrier_depends();
77 op->func(ip, parent_ip); 77 op->func(ip, parent_ip);
78 op = op->next; 78 op = op->next;
79 }; 79 };
80 } 80 }
81 81
82 /** 82 /**
83 * clear_ftrace_function - reset the ftrace function 83 * clear_ftrace_function - reset the ftrace function
84 * 84 *
85 * This NULLs the ftrace function and in essence stops 85 * This NULLs the ftrace function and in essence stops
86 * tracing. There may be lag 86 * tracing. There may be lag
87 */ 87 */
88 void clear_ftrace_function(void) 88 void clear_ftrace_function(void)
89 { 89 {
90 ftrace_trace_function = ftrace_stub; 90 ftrace_trace_function = ftrace_stub;
91 } 91 }
92 92
93 static int __register_ftrace_function(struct ftrace_ops *ops) 93 static int __register_ftrace_function(struct ftrace_ops *ops)
94 { 94 {
95 /* should not be called from interrupt context */ 95 /* should not be called from interrupt context */
96 spin_lock(&ftrace_lock); 96 spin_lock(&ftrace_lock);
97 97
98 ops->next = ftrace_list; 98 ops->next = ftrace_list;
99 /* 99 /*
100 * We are entering ops into the ftrace_list but another 100 * We are entering ops into the ftrace_list but another
101 * CPU might be walking that list. We need to make sure 101 * CPU might be walking that list. We need to make sure
102 * the ops->next pointer is valid before another CPU sees 102 * the ops->next pointer is valid before another CPU sees
103 * the ops pointer included into the ftrace_list. 103 * the ops pointer included into the ftrace_list.
104 */ 104 */
105 smp_wmb(); 105 smp_wmb();
106 ftrace_list = ops; 106 ftrace_list = ops;
107 107
108 if (ftrace_enabled) { 108 if (ftrace_enabled) {
109 /* 109 /*
110 * For one func, simply call it directly. 110 * For one func, simply call it directly.
111 * For more than one func, call the chain. 111 * For more than one func, call the chain.
112 */ 112 */
113 if (ops->next == &ftrace_list_end) 113 if (ops->next == &ftrace_list_end)
114 ftrace_trace_function = ops->func; 114 ftrace_trace_function = ops->func;
115 else 115 else
116 ftrace_trace_function = ftrace_list_func; 116 ftrace_trace_function = ftrace_list_func;
117 } 117 }
118 118
119 spin_unlock(&ftrace_lock); 119 spin_unlock(&ftrace_lock);
120 120
121 return 0; 121 return 0;
122 } 122 }
123 123
124 static int __unregister_ftrace_function(struct ftrace_ops *ops) 124 static int __unregister_ftrace_function(struct ftrace_ops *ops)
125 { 125 {
126 struct ftrace_ops **p; 126 struct ftrace_ops **p;
127 int ret = 0; 127 int ret = 0;
128 128
129 /* should not be called from interrupt context */ 129 /* should not be called from interrupt context */
130 spin_lock(&ftrace_lock); 130 spin_lock(&ftrace_lock);
131 131
132 /* 132 /*
133 * If we are removing the last function, then simply point 133 * If we are removing the last function, then simply point
134 * to the ftrace_stub. 134 * to the ftrace_stub.
135 */ 135 */
136 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 136 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
137 ftrace_trace_function = ftrace_stub; 137 ftrace_trace_function = ftrace_stub;
138 ftrace_list = &ftrace_list_end; 138 ftrace_list = &ftrace_list_end;
139 goto out; 139 goto out;
140 } 140 }
141 141
142 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 142 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
143 if (*p == ops) 143 if (*p == ops)
144 break; 144 break;
145 145
146 if (*p != ops) { 146 if (*p != ops) {
147 ret = -1; 147 ret = -1;
148 goto out; 148 goto out;
149 } 149 }
150 150
151 *p = (*p)->next; 151 *p = (*p)->next;
152 152
153 if (ftrace_enabled) { 153 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 154 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 155 if (ftrace_list == &ftrace_list_end ||
156 ftrace_list->next == &ftrace_list_end) 156 ftrace_list->next == &ftrace_list_end)
157 ftrace_trace_function = ftrace_list->func; 157 ftrace_trace_function = ftrace_list->func;
158 } 158 }
159 159
160 out: 160 out:
161 spin_unlock(&ftrace_lock); 161 spin_unlock(&ftrace_lock);
162 162
163 return ret; 163 return ret;
164 } 164 }
165 165
166 #ifdef CONFIG_DYNAMIC_FTRACE 166 #ifdef CONFIG_DYNAMIC_FTRACE
167 #ifndef CONFIG_FTRACE_MCOUNT_RECORD 167 #ifndef CONFIG_FTRACE_MCOUNT_RECORD
168 # error Dynamic ftrace depends on MCOUNT_RECORD 168 # error Dynamic ftrace depends on MCOUNT_RECORD
169 #endif 169 #endif
170 170
171 /* 171 /*
172 * Since MCOUNT_ADDR may point to mcount itself, we do not want 172 * Since MCOUNT_ADDR may point to mcount itself, we do not want
173 * to get it confused by reading a reference in the code as we 173 * to get it confused by reading a reference in the code as we
174 * are parsing on objcopy output of text. Use a variable for 174 * are parsing on objcopy output of text. Use a variable for
175 * it instead. 175 * it instead.
176 */ 176 */
177 static unsigned long mcount_addr = MCOUNT_ADDR; 177 static unsigned long mcount_addr = MCOUNT_ADDR;
178 178
179 enum { 179 enum {
180 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
181 FTRACE_DISABLE_CALLS = (1 << 1), 181 FTRACE_DISABLE_CALLS = (1 << 1),
182 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 182 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
183 FTRACE_ENABLE_MCOUNT = (1 << 3), 183 FTRACE_ENABLE_MCOUNT = (1 << 3),
184 FTRACE_DISABLE_MCOUNT = (1 << 4), 184 FTRACE_DISABLE_MCOUNT = (1 << 4),
185 }; 185 };
186 186
187 static int ftrace_filtered; 187 static int ftrace_filtered;
188 static int tracing_on;
189 188
190 static LIST_HEAD(ftrace_new_addrs); 189 static LIST_HEAD(ftrace_new_addrs);
191 190
192 static DEFINE_MUTEX(ftrace_regex_lock); 191 static DEFINE_MUTEX(ftrace_regex_lock);
193 192
194 struct ftrace_page { 193 struct ftrace_page {
195 struct ftrace_page *next; 194 struct ftrace_page *next;
196 unsigned long index; 195 unsigned long index;
197 struct dyn_ftrace records[]; 196 struct dyn_ftrace records[];
198 }; 197 };
199 198
200 #define ENTRIES_PER_PAGE \ 199 #define ENTRIES_PER_PAGE \
201 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) 200 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
202 201
203 /* estimate from running different kernels */ 202 /* estimate from running different kernels */
204 #define NR_TO_INIT 10000 203 #define NR_TO_INIT 10000
205 204
206 static struct ftrace_page *ftrace_pages_start; 205 static struct ftrace_page *ftrace_pages_start;
207 static struct ftrace_page *ftrace_pages; 206 static struct ftrace_page *ftrace_pages;
208 207
209 static struct dyn_ftrace *ftrace_free_records; 208 static struct dyn_ftrace *ftrace_free_records;
210 209
211 210
212 #ifdef CONFIG_KPROBES 211 #ifdef CONFIG_KPROBES
213 212
214 static int frozen_record_count; 213 static int frozen_record_count;
215 214
216 static inline void freeze_record(struct dyn_ftrace *rec) 215 static inline void freeze_record(struct dyn_ftrace *rec)
217 { 216 {
218 if (!(rec->flags & FTRACE_FL_FROZEN)) { 217 if (!(rec->flags & FTRACE_FL_FROZEN)) {
219 rec->flags |= FTRACE_FL_FROZEN; 218 rec->flags |= FTRACE_FL_FROZEN;
220 frozen_record_count++; 219 frozen_record_count++;
221 } 220 }
222 } 221 }
223 222
224 static inline void unfreeze_record(struct dyn_ftrace *rec) 223 static inline void unfreeze_record(struct dyn_ftrace *rec)
225 { 224 {
226 if (rec->flags & FTRACE_FL_FROZEN) { 225 if (rec->flags & FTRACE_FL_FROZEN) {
227 rec->flags &= ~FTRACE_FL_FROZEN; 226 rec->flags &= ~FTRACE_FL_FROZEN;
228 frozen_record_count--; 227 frozen_record_count--;
229 } 228 }
230 } 229 }
231 230
232 static inline int record_frozen(struct dyn_ftrace *rec) 231 static inline int record_frozen(struct dyn_ftrace *rec)
233 { 232 {
234 return rec->flags & FTRACE_FL_FROZEN; 233 return rec->flags & FTRACE_FL_FROZEN;
235 } 234 }
236 #else 235 #else
237 # define freeze_record(rec) ({ 0; }) 236 # define freeze_record(rec) ({ 0; })
238 # define unfreeze_record(rec) ({ 0; }) 237 # define unfreeze_record(rec) ({ 0; })
239 # define record_frozen(rec) ({ 0; }) 238 # define record_frozen(rec) ({ 0; })
240 #endif /* CONFIG_KPROBES */ 239 #endif /* CONFIG_KPROBES */
241 240
242 static void ftrace_free_rec(struct dyn_ftrace *rec) 241 static void ftrace_free_rec(struct dyn_ftrace *rec)
243 { 242 {
244 rec->ip = (unsigned long)ftrace_free_records; 243 rec->ip = (unsigned long)ftrace_free_records;
245 ftrace_free_records = rec; 244 ftrace_free_records = rec;
246 rec->flags |= FTRACE_FL_FREE; 245 rec->flags |= FTRACE_FL_FREE;
247 } 246 }
248 247
249 void ftrace_release(void *start, unsigned long size) 248 void ftrace_release(void *start, unsigned long size)
250 { 249 {
251 struct dyn_ftrace *rec; 250 struct dyn_ftrace *rec;
252 struct ftrace_page *pg; 251 struct ftrace_page *pg;
253 unsigned long s = (unsigned long)start; 252 unsigned long s = (unsigned long)start;
254 unsigned long e = s + size; 253 unsigned long e = s + size;
255 int i; 254 int i;
256 255
257 if (ftrace_disabled || !start) 256 if (ftrace_disabled || !start)
258 return; 257 return;
259 258
260 /* should not be called from interrupt context */ 259 /* should not be called from interrupt context */
261 spin_lock(&ftrace_lock); 260 spin_lock(&ftrace_lock);
262 261
263 for (pg = ftrace_pages_start; pg; pg = pg->next) { 262 for (pg = ftrace_pages_start; pg; pg = pg->next) {
264 for (i = 0; i < pg->index; i++) { 263 for (i = 0; i < pg->index; i++) {
265 rec = &pg->records[i]; 264 rec = &pg->records[i];
266 265
267 if ((rec->ip >= s) && (rec->ip < e)) 266 if ((rec->ip >= s) && (rec->ip < e))
268 ftrace_free_rec(rec); 267 ftrace_free_rec(rec);
269 } 268 }
270 } 269 }
271 spin_unlock(&ftrace_lock); 270 spin_unlock(&ftrace_lock);
272 } 271 }
273 272
274 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 273 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
275 { 274 {
276 struct dyn_ftrace *rec; 275 struct dyn_ftrace *rec;
277 276
278 /* First check for freed records */ 277 /* First check for freed records */
279 if (ftrace_free_records) { 278 if (ftrace_free_records) {
280 rec = ftrace_free_records; 279 rec = ftrace_free_records;
281 280
282 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 281 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
283 FTRACE_WARN_ON_ONCE(1); 282 FTRACE_WARN_ON_ONCE(1);
284 ftrace_free_records = NULL; 283 ftrace_free_records = NULL;
285 return NULL; 284 return NULL;
286 } 285 }
287 286
288 ftrace_free_records = (void *)rec->ip; 287 ftrace_free_records = (void *)rec->ip;
289 memset(rec, 0, sizeof(*rec)); 288 memset(rec, 0, sizeof(*rec));
290 return rec; 289 return rec;
291 } 290 }
292 291
293 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 292 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
294 if (!ftrace_pages->next) { 293 if (!ftrace_pages->next) {
295 /* allocate another page */ 294 /* allocate another page */
296 ftrace_pages->next = 295 ftrace_pages->next =
297 (void *)get_zeroed_page(GFP_KERNEL); 296 (void *)get_zeroed_page(GFP_KERNEL);
298 if (!ftrace_pages->next) 297 if (!ftrace_pages->next)
299 return NULL; 298 return NULL;
300 } 299 }
301 ftrace_pages = ftrace_pages->next; 300 ftrace_pages = ftrace_pages->next;
302 } 301 }
303 302
304 return &ftrace_pages->records[ftrace_pages->index++]; 303 return &ftrace_pages->records[ftrace_pages->index++];
305 } 304 }
306 305
307 static struct dyn_ftrace * 306 static struct dyn_ftrace *
308 ftrace_record_ip(unsigned long ip) 307 ftrace_record_ip(unsigned long ip)
309 { 308 {
310 struct dyn_ftrace *rec; 309 struct dyn_ftrace *rec;
311 310
312 if (!ftrace_enabled || ftrace_disabled) 311 if (!ftrace_enabled || ftrace_disabled)
313 return NULL; 312 return NULL;
314 313
315 rec = ftrace_alloc_dyn_node(ip); 314 rec = ftrace_alloc_dyn_node(ip);
316 if (!rec) 315 if (!rec)
317 return NULL; 316 return NULL;
318 317
319 rec->ip = ip; 318 rec->ip = ip;
320 319
321 list_add(&rec->list, &ftrace_new_addrs); 320 list_add(&rec->list, &ftrace_new_addrs);
322 321
323 return rec; 322 return rec;
324 } 323 }
325 324
326 #define FTRACE_ADDR ((long)(ftrace_caller)) 325 #define FTRACE_ADDR ((long)(ftrace_caller))
327 326
328 static int 327 static int
329 __ftrace_replace_code(struct dyn_ftrace *rec, 328 __ftrace_replace_code(struct dyn_ftrace *rec,
330 unsigned char *old, unsigned char *new, int enable) 329 unsigned char *old, unsigned char *new, int enable)
331 { 330 {
332 unsigned long ip, fl; 331 unsigned long ip, fl;
333 332
334 ip = rec->ip; 333 ip = rec->ip;
335 334
336 if (ftrace_filtered && enable) { 335 if (ftrace_filtered && enable) {
337 /* 336 /*
338 * If filtering is on: 337 * If filtering is on:
339 * 338 *
340 * If this record is set to be filtered and 339 * If this record is set to be filtered and
341 * is enabled then do nothing. 340 * is enabled then do nothing.
342 * 341 *
343 * If this record is set to be filtered and 342 * If this record is set to be filtered and
344 * it is not enabled, enable it. 343 * it is not enabled, enable it.
345 * 344 *
346 * If this record is not set to be filtered 345 * If this record is not set to be filtered
347 * and it is not enabled do nothing. 346 * and it is not enabled do nothing.
348 * 347 *
349 * If this record is set not to trace then 348 * If this record is set not to trace then
350 * do nothing. 349 * do nothing.
351 * 350 *
352 * If this record is set not to trace and 351 * If this record is set not to trace and
353 * it is enabled then disable it. 352 * it is enabled then disable it.
354 * 353 *
355 * If this record is not set to be filtered and 354 * If this record is not set to be filtered and
356 * it is enabled, disable it. 355 * it is enabled, disable it.
357 */ 356 */
358 357
359 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | 358 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
360 FTRACE_FL_ENABLED); 359 FTRACE_FL_ENABLED);
361 360
362 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || 361 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
363 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || 362 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
364 !fl || (fl == FTRACE_FL_NOTRACE)) 363 !fl || (fl == FTRACE_FL_NOTRACE))
365 return 0; 364 return 0;
366 365
367 /* 366 /*
368 * If it is enabled disable it, 367 * If it is enabled disable it,
369 * otherwise enable it! 368 * otherwise enable it!
370 */ 369 */
371 if (fl & FTRACE_FL_ENABLED) { 370 if (fl & FTRACE_FL_ENABLED) {
372 /* swap new and old */ 371 /* swap new and old */
373 new = old; 372 new = old;
374 old = ftrace_call_replace(ip, FTRACE_ADDR); 373 old = ftrace_call_replace(ip, FTRACE_ADDR);
375 rec->flags &= ~FTRACE_FL_ENABLED; 374 rec->flags &= ~FTRACE_FL_ENABLED;
376 } else { 375 } else {
377 new = ftrace_call_replace(ip, FTRACE_ADDR); 376 new = ftrace_call_replace(ip, FTRACE_ADDR);
378 rec->flags |= FTRACE_FL_ENABLED; 377 rec->flags |= FTRACE_FL_ENABLED;
379 } 378 }
380 } else { 379 } else {
381 380
382 if (enable) { 381 if (enable) {
383 /* 382 /*
384 * If this record is set not to trace and is 383 * If this record is set not to trace and is
385 * not enabled, do nothing. 384 * not enabled, do nothing.
386 */ 385 */
387 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); 386 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
388 if (fl == FTRACE_FL_NOTRACE) 387 if (fl == FTRACE_FL_NOTRACE)
389 return 0; 388 return 0;
390 389
391 new = ftrace_call_replace(ip, FTRACE_ADDR); 390 new = ftrace_call_replace(ip, FTRACE_ADDR);
392 } else 391 } else
393 old = ftrace_call_replace(ip, FTRACE_ADDR); 392 old = ftrace_call_replace(ip, FTRACE_ADDR);
394 393
395 if (enable) { 394 if (enable) {
396 if (rec->flags & FTRACE_FL_ENABLED) 395 if (rec->flags & FTRACE_FL_ENABLED)
397 return 0; 396 return 0;
398 rec->flags |= FTRACE_FL_ENABLED; 397 rec->flags |= FTRACE_FL_ENABLED;
399 } else { 398 } else {
400 if (!(rec->flags & FTRACE_FL_ENABLED)) 399 if (!(rec->flags & FTRACE_FL_ENABLED))
401 return 0; 400 return 0;
402 rec->flags &= ~FTRACE_FL_ENABLED; 401 rec->flags &= ~FTRACE_FL_ENABLED;
403 } 402 }
404 } 403 }
405 404
406 return ftrace_modify_code(ip, old, new); 405 return ftrace_modify_code(ip, old, new);
407 } 406 }
408 407
409 static void ftrace_replace_code(int enable) 408 static void ftrace_replace_code(int enable)
410 { 409 {
411 int i, failed; 410 int i, failed;
412 unsigned char *new = NULL, *old = NULL; 411 unsigned char *new = NULL, *old = NULL;
413 struct dyn_ftrace *rec; 412 struct dyn_ftrace *rec;
414 struct ftrace_page *pg; 413 struct ftrace_page *pg;
415 414
416 if (enable) 415 if (enable)
417 old = ftrace_nop_replace(); 416 old = ftrace_nop_replace();
418 else 417 else
419 new = ftrace_nop_replace(); 418 new = ftrace_nop_replace();
420 419
421 for (pg = ftrace_pages_start; pg; pg = pg->next) { 420 for (pg = ftrace_pages_start; pg; pg = pg->next) {
422 for (i = 0; i < pg->index; i++) { 421 for (i = 0; i < pg->index; i++) {
423 rec = &pg->records[i]; 422 rec = &pg->records[i];
424 423
425 /* don't modify code that has already faulted */ 424 /* don't modify code that has already faulted */
426 if (rec->flags & FTRACE_FL_FAILED) 425 if (rec->flags & FTRACE_FL_FAILED)
427 continue; 426 continue;
428 427
429 /* ignore updates to this record's mcount site */ 428 /* ignore updates to this record's mcount site */
430 if (get_kprobe((void *)rec->ip)) { 429 if (get_kprobe((void *)rec->ip)) {
431 freeze_record(rec); 430 freeze_record(rec);
432 continue; 431 continue;
433 } else { 432 } else {
434 unfreeze_record(rec); 433 unfreeze_record(rec);
435 } 434 }
436 435
437 failed = __ftrace_replace_code(rec, old, new, enable); 436 failed = __ftrace_replace_code(rec, old, new, enable);
438 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 437 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
439 rec->flags |= FTRACE_FL_FAILED; 438 rec->flags |= FTRACE_FL_FAILED;
440 if ((system_state == SYSTEM_BOOTING) || 439 if ((system_state == SYSTEM_BOOTING) ||
441 !core_kernel_text(rec->ip)) { 440 !core_kernel_text(rec->ip)) {
442 ftrace_free_rec(rec); 441 ftrace_free_rec(rec);
443 } 442 }
444 } 443 }
445 } 444 }
446 } 445 }
447 } 446 }
448 447
449 static void print_ip_ins(const char *fmt, unsigned char *p) 448 static void print_ip_ins(const char *fmt, unsigned char *p)
450 { 449 {
451 int i; 450 int i;
452 451
453 printk(KERN_CONT "%s", fmt); 452 printk(KERN_CONT "%s", fmt);
454 453
455 for (i = 0; i < MCOUNT_INSN_SIZE; i++) 454 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
456 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); 455 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
457 } 456 }
458 457
459 static int 458 static int
460 ftrace_code_disable(struct dyn_ftrace *rec) 459 ftrace_code_disable(struct dyn_ftrace *rec)
461 { 460 {
462 unsigned long ip; 461 unsigned long ip;
463 unsigned char *nop, *call; 462 unsigned char *nop, *call;
464 int ret; 463 int ret;
465 464
466 ip = rec->ip; 465 ip = rec->ip;
467 466
468 nop = ftrace_nop_replace(); 467 nop = ftrace_nop_replace();
469 call = ftrace_call_replace(ip, mcount_addr); 468 call = ftrace_call_replace(ip, mcount_addr);
470 469
471 ret = ftrace_modify_code(ip, call, nop); 470 ret = ftrace_modify_code(ip, call, nop);
472 if (ret) { 471 if (ret) {
473 switch (ret) { 472 switch (ret) {
474 case -EFAULT: 473 case -EFAULT:
475 FTRACE_WARN_ON_ONCE(1); 474 FTRACE_WARN_ON_ONCE(1);
476 pr_info("ftrace faulted on modifying "); 475 pr_info("ftrace faulted on modifying ");
477 print_ip_sym(ip); 476 print_ip_sym(ip);
478 break; 477 break;
479 case -EINVAL: 478 case -EINVAL:
480 FTRACE_WARN_ON_ONCE(1); 479 FTRACE_WARN_ON_ONCE(1);
481 pr_info("ftrace failed to modify "); 480 pr_info("ftrace failed to modify ");
482 print_ip_sym(ip); 481 print_ip_sym(ip);
483 print_ip_ins(" expected: ", call); 482 print_ip_ins(" expected: ", call);
484 print_ip_ins(" actual: ", (unsigned char *)ip); 483 print_ip_ins(" actual: ", (unsigned char *)ip);
485 print_ip_ins(" replace: ", nop); 484 print_ip_ins(" replace: ", nop);
486 printk(KERN_CONT "\n"); 485 printk(KERN_CONT "\n");
487 break; 486 break;
488 case -EPERM: 487 case -EPERM:
489 FTRACE_WARN_ON_ONCE(1); 488 FTRACE_WARN_ON_ONCE(1);
490 pr_info("ftrace faulted on writing "); 489 pr_info("ftrace faulted on writing ");
491 print_ip_sym(ip); 490 print_ip_sym(ip);
492 break; 491 break;
493 default: 492 default:
494 FTRACE_WARN_ON_ONCE(1); 493 FTRACE_WARN_ON_ONCE(1);
495 pr_info("ftrace faulted on unknown error "); 494 pr_info("ftrace faulted on unknown error ");
496 print_ip_sym(ip); 495 print_ip_sym(ip);
497 } 496 }
498 497
499 rec->flags |= FTRACE_FL_FAILED; 498 rec->flags |= FTRACE_FL_FAILED;
500 return 0; 499 return 0;
501 } 500 }
502 return 1; 501 return 1;
503 } 502 }
504 503
505 static int __ftrace_modify_code(void *data) 504 static int __ftrace_modify_code(void *data)
506 { 505 {
507 int *command = data; 506 int *command = data;
508 507
509 if (*command & FTRACE_ENABLE_CALLS) { 508 if (*command & FTRACE_ENABLE_CALLS)
510 ftrace_replace_code(1); 509 ftrace_replace_code(1);
511 tracing_on = 1; 510 else if (*command & FTRACE_DISABLE_CALLS)
512 } else if (*command & FTRACE_DISABLE_CALLS) {
513 ftrace_replace_code(0); 511 ftrace_replace_code(0);
514 tracing_on = 0;
515 }
516 512
517 if (*command & FTRACE_UPDATE_TRACE_FUNC) 513 if (*command & FTRACE_UPDATE_TRACE_FUNC)
518 ftrace_update_ftrace_func(ftrace_trace_function); 514 ftrace_update_ftrace_func(ftrace_trace_function);
519 515
520 return 0; 516 return 0;
521 } 517 }
522 518
523 static void ftrace_run_update_code(int command) 519 static void ftrace_run_update_code(int command)
524 { 520 {
525 stop_machine(__ftrace_modify_code, &command, NULL); 521 stop_machine(__ftrace_modify_code, &command, NULL);
526 } 522 }
527 523
528 static ftrace_func_t saved_ftrace_func; 524 static ftrace_func_t saved_ftrace_func;
529 static int ftrace_start; 525 static int ftrace_start;
530 static DEFINE_MUTEX(ftrace_start_lock); 526 static DEFINE_MUTEX(ftrace_start_lock);
531 527
532 static void ftrace_startup(void) 528 static void ftrace_startup(void)
533 { 529 {
534 int command = 0; 530 int command = 0;
535 531
536 if (unlikely(ftrace_disabled)) 532 if (unlikely(ftrace_disabled))
537 return; 533 return;
538 534
539 mutex_lock(&ftrace_start_lock); 535 mutex_lock(&ftrace_start_lock);
540 ftrace_start++; 536 ftrace_start++;
541 if (ftrace_start == 1) 537 if (ftrace_start == 1)
542 command |= FTRACE_ENABLE_CALLS; 538 command |= FTRACE_ENABLE_CALLS;
543 539
544 if (saved_ftrace_func != ftrace_trace_function) { 540 if (saved_ftrace_func != ftrace_trace_function) {
545 saved_ftrace_func = ftrace_trace_function; 541 saved_ftrace_func = ftrace_trace_function;
546 command |= FTRACE_UPDATE_TRACE_FUNC; 542 command |= FTRACE_UPDATE_TRACE_FUNC;
547 } 543 }
548 544
549 if (!command || !ftrace_enabled) 545 if (!command || !ftrace_enabled)
550 goto out; 546 goto out;
551 547
552 ftrace_run_update_code(command); 548 ftrace_run_update_code(command);
553 out: 549 out:
554 mutex_unlock(&ftrace_start_lock); 550 mutex_unlock(&ftrace_start_lock);
555 } 551 }
556 552
557 static void ftrace_shutdown(void) 553 static void ftrace_shutdown(void)
558 { 554 {
559 int command = 0; 555 int command = 0;
560 556
561 if (unlikely(ftrace_disabled)) 557 if (unlikely(ftrace_disabled))
562 return; 558 return;
563 559
564 mutex_lock(&ftrace_start_lock); 560 mutex_lock(&ftrace_start_lock);
565 ftrace_start--; 561 ftrace_start--;
566 if (!ftrace_start) 562 if (!ftrace_start)
567 command |= FTRACE_DISABLE_CALLS; 563 command |= FTRACE_DISABLE_CALLS;
568 564
569 if (saved_ftrace_func != ftrace_trace_function) { 565 if (saved_ftrace_func != ftrace_trace_function) {
570 saved_ftrace_func = ftrace_trace_function; 566 saved_ftrace_func = ftrace_trace_function;
571 command |= FTRACE_UPDATE_TRACE_FUNC; 567 command |= FTRACE_UPDATE_TRACE_FUNC;
572 } 568 }
573 569
574 if (!command || !ftrace_enabled) 570 if (!command || !ftrace_enabled)
575 goto out; 571 goto out;
576 572
577 ftrace_run_update_code(command); 573 ftrace_run_update_code(command);
578 out: 574 out:
579 mutex_unlock(&ftrace_start_lock); 575 mutex_unlock(&ftrace_start_lock);
580 } 576 }
581 577
582 static void ftrace_startup_sysctl(void) 578 static void ftrace_startup_sysctl(void)
583 { 579 {
584 int command = FTRACE_ENABLE_MCOUNT; 580 int command = FTRACE_ENABLE_MCOUNT;
585 581
586 if (unlikely(ftrace_disabled)) 582 if (unlikely(ftrace_disabled))
587 return; 583 return;
588 584
589 mutex_lock(&ftrace_start_lock); 585 mutex_lock(&ftrace_start_lock);
590 /* Force update next time */ 586 /* Force update next time */
591 saved_ftrace_func = NULL; 587 saved_ftrace_func = NULL;
592 /* ftrace_start is true if we want ftrace running */ 588 /* ftrace_start is true if we want ftrace running */
593 if (ftrace_start) 589 if (ftrace_start)
594 command |= FTRACE_ENABLE_CALLS; 590 command |= FTRACE_ENABLE_CALLS;
595 591
596 ftrace_run_update_code(command); 592 ftrace_run_update_code(command);
597 mutex_unlock(&ftrace_start_lock); 593 mutex_unlock(&ftrace_start_lock);
598 } 594 }
599 595
600 static void ftrace_shutdown_sysctl(void) 596 static void ftrace_shutdown_sysctl(void)
601 { 597 {
602 int command = FTRACE_DISABLE_MCOUNT; 598 int command = FTRACE_DISABLE_MCOUNT;
603 599
604 if (unlikely(ftrace_disabled)) 600 if (unlikely(ftrace_disabled))
605 return; 601 return;
606 602
607 mutex_lock(&ftrace_start_lock); 603 mutex_lock(&ftrace_start_lock);
608 /* ftrace_start is true if ftrace is running */ 604 /* ftrace_start is true if ftrace is running */
609 if (ftrace_start) 605 if (ftrace_start)
610 command |= FTRACE_DISABLE_CALLS; 606 command |= FTRACE_DISABLE_CALLS;
611 607
612 ftrace_run_update_code(command); 608 ftrace_run_update_code(command);
613 mutex_unlock(&ftrace_start_lock); 609 mutex_unlock(&ftrace_start_lock);
614 } 610 }
615 611
616 static cycle_t ftrace_update_time; 612 static cycle_t ftrace_update_time;
617 static unsigned long ftrace_update_cnt; 613 static unsigned long ftrace_update_cnt;
618 unsigned long ftrace_update_tot_cnt; 614 unsigned long ftrace_update_tot_cnt;
619 615
620 static int ftrace_update_code(void) 616 static int ftrace_update_code(void)
621 { 617 {
622 struct dyn_ftrace *p, *t; 618 struct dyn_ftrace *p, *t;
623 cycle_t start, stop; 619 cycle_t start, stop;
624 620
625 start = ftrace_now(raw_smp_processor_id()); 621 start = ftrace_now(raw_smp_processor_id());
626 ftrace_update_cnt = 0; 622 ftrace_update_cnt = 0;
627 623
628 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { 624 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
629 625
630 /* If something went wrong, bail without enabling anything */ 626 /* If something went wrong, bail without enabling anything */
631 if (unlikely(ftrace_disabled)) 627 if (unlikely(ftrace_disabled))
632 return -1; 628 return -1;
633 629
634 list_del_init(&p->list); 630 list_del_init(&p->list);
635 631
636 /* convert record (i.e, patch mcount-call with NOP) */ 632 /* convert record (i.e, patch mcount-call with NOP) */
637 if (ftrace_code_disable(p)) { 633 if (ftrace_code_disable(p)) {
638 p->flags |= FTRACE_FL_CONVERTED; 634 p->flags |= FTRACE_FL_CONVERTED;
639 ftrace_update_cnt++; 635 ftrace_update_cnt++;
640 } else 636 } else
641 ftrace_free_rec(p); 637 ftrace_free_rec(p);
642 } 638 }
643 639
644 stop = ftrace_now(raw_smp_processor_id()); 640 stop = ftrace_now(raw_smp_processor_id());
645 ftrace_update_time = stop - start; 641 ftrace_update_time = stop - start;
646 ftrace_update_tot_cnt += ftrace_update_cnt; 642 ftrace_update_tot_cnt += ftrace_update_cnt;
647 643
648 return 0; 644 return 0;
649 } 645 }
650 646
651 static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) 647 static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
652 { 648 {
653 struct ftrace_page *pg; 649 struct ftrace_page *pg;
654 int cnt; 650 int cnt;
655 int i; 651 int i;
656 652
657 /* allocate a few pages */ 653 /* allocate a few pages */
658 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); 654 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
659 if (!ftrace_pages_start) 655 if (!ftrace_pages_start)
660 return -1; 656 return -1;
661 657
662 /* 658 /*
663 * Allocate a few more pages. 659 * Allocate a few more pages.
664 * 660 *
665 * TODO: have some parser search vmlinux before 661 * TODO: have some parser search vmlinux before
666 * final linking to find all calls to ftrace. 662 * final linking to find all calls to ftrace.
667 * Then we can: 663 * Then we can:
668 * a) know how many pages to allocate. 664 * a) know how many pages to allocate.
669 * and/or 665 * and/or
670 * b) set up the table then. 666 * b) set up the table then.
671 * 667 *
672 * The dynamic code is still necessary for 668 * The dynamic code is still necessary for
673 * modules. 669 * modules.
674 */ 670 */
675 671
676 pg = ftrace_pages = ftrace_pages_start; 672 pg = ftrace_pages = ftrace_pages_start;
677 673
678 cnt = num_to_init / ENTRIES_PER_PAGE; 674 cnt = num_to_init / ENTRIES_PER_PAGE;
679 pr_info("ftrace: allocating %ld entries in %d pages\n", 675 pr_info("ftrace: allocating %ld entries in %d pages\n",
680 num_to_init, cnt); 676 num_to_init, cnt);
681 677
682 for (i = 0; i < cnt; i++) { 678 for (i = 0; i < cnt; i++) {
683 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 679 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
684 680
685 /* If we fail, we'll try later anyway */ 681 /* If we fail, we'll try later anyway */
686 if (!pg->next) 682 if (!pg->next)
687 break; 683 break;
688 684
689 pg = pg->next; 685 pg = pg->next;
690 } 686 }
691 687
692 return 0; 688 return 0;
693 } 689 }
694 690
695 enum { 691 enum {
696 FTRACE_ITER_FILTER = (1 << 0), 692 FTRACE_ITER_FILTER = (1 << 0),
697 FTRACE_ITER_CONT = (1 << 1), 693 FTRACE_ITER_CONT = (1 << 1),
698 FTRACE_ITER_NOTRACE = (1 << 2), 694 FTRACE_ITER_NOTRACE = (1 << 2),
699 FTRACE_ITER_FAILURES = (1 << 3), 695 FTRACE_ITER_FAILURES = (1 << 3),
700 }; 696 };
701 697
702 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 698 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
703 699
704 struct ftrace_iterator { 700 struct ftrace_iterator {
705 loff_t pos; 701 loff_t pos;
706 struct ftrace_page *pg; 702 struct ftrace_page *pg;
707 unsigned idx; 703 unsigned idx;
708 unsigned flags; 704 unsigned flags;
709 unsigned char buffer[FTRACE_BUFF_MAX+1]; 705 unsigned char buffer[FTRACE_BUFF_MAX+1];
710 unsigned buffer_idx; 706 unsigned buffer_idx;
711 unsigned filtered; 707 unsigned filtered;
712 }; 708 };
713 709
714 static void * 710 static void *
715 t_next(struct seq_file *m, void *v, loff_t *pos) 711 t_next(struct seq_file *m, void *v, loff_t *pos)
716 { 712 {
717 struct ftrace_iterator *iter = m->private; 713 struct ftrace_iterator *iter = m->private;
718 struct dyn_ftrace *rec = NULL; 714 struct dyn_ftrace *rec = NULL;
719 715
720 (*pos)++; 716 (*pos)++;
721 717
722 /* should not be called from interrupt context */ 718 /* should not be called from interrupt context */
723 spin_lock(&ftrace_lock); 719 spin_lock(&ftrace_lock);
724 retry: 720 retry:
725 if (iter->idx >= iter->pg->index) { 721 if (iter->idx >= iter->pg->index) {
726 if (iter->pg->next) { 722 if (iter->pg->next) {
727 iter->pg = iter->pg->next; 723 iter->pg = iter->pg->next;
728 iter->idx = 0; 724 iter->idx = 0;
729 goto retry; 725 goto retry;
730 } 726 }
731 } else { 727 } else {
732 rec = &iter->pg->records[iter->idx++]; 728 rec = &iter->pg->records[iter->idx++];
733 if ((rec->flags & FTRACE_FL_FREE) || 729 if ((rec->flags & FTRACE_FL_FREE) ||
734 730
735 (!(iter->flags & FTRACE_ITER_FAILURES) && 731 (!(iter->flags & FTRACE_ITER_FAILURES) &&
736 (rec->flags & FTRACE_FL_FAILED)) || 732 (rec->flags & FTRACE_FL_FAILED)) ||
737 733
738 ((iter->flags & FTRACE_ITER_FAILURES) && 734 ((iter->flags & FTRACE_ITER_FAILURES) &&
739 !(rec->flags & FTRACE_FL_FAILED)) || 735 !(rec->flags & FTRACE_FL_FAILED)) ||
740 736
741 ((iter->flags & FTRACE_ITER_NOTRACE) && 737 ((iter->flags & FTRACE_ITER_NOTRACE) &&
742 !(rec->flags & FTRACE_FL_NOTRACE))) { 738 !(rec->flags & FTRACE_FL_NOTRACE))) {
743 rec = NULL; 739 rec = NULL;
744 goto retry; 740 goto retry;
745 } 741 }
746 } 742 }
747 spin_unlock(&ftrace_lock); 743 spin_unlock(&ftrace_lock);
748 744
749 iter->pos = *pos; 745 iter->pos = *pos;
750 746
751 return rec; 747 return rec;
752 } 748 }
753 749
754 static void *t_start(struct seq_file *m, loff_t *pos) 750 static void *t_start(struct seq_file *m, loff_t *pos)
755 { 751 {
756 struct ftrace_iterator *iter = m->private; 752 struct ftrace_iterator *iter = m->private;
757 void *p = NULL; 753 void *p = NULL;
758 loff_t l = -1; 754 loff_t l = -1;
759 755
760 if (*pos != iter->pos) { 756 if (*pos != iter->pos) {
761 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 757 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
762 ; 758 ;
763 } else { 759 } else {
764 l = *pos; 760 l = *pos;
765 p = t_next(m, p, &l); 761 p = t_next(m, p, &l);
766 } 762 }
767 763
768 return p; 764 return p;
769 } 765 }
770 766
771 static void t_stop(struct seq_file *m, void *p) 767 static void t_stop(struct seq_file *m, void *p)
772 { 768 {
773 } 769 }
774 770
775 static int t_show(struct seq_file *m, void *v) 771 static int t_show(struct seq_file *m, void *v)
776 { 772 {
777 struct dyn_ftrace *rec = v; 773 struct dyn_ftrace *rec = v;
778 char str[KSYM_SYMBOL_LEN]; 774 char str[KSYM_SYMBOL_LEN];
779 775
780 if (!rec) 776 if (!rec)
781 return 0; 777 return 0;
782 778
783 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 779 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
784 780
785 seq_printf(m, "%s\n", str); 781 seq_printf(m, "%s\n", str);
786 782
787 return 0; 783 return 0;
788 } 784 }
789 785
790 static struct seq_operations show_ftrace_seq_ops = { 786 static struct seq_operations show_ftrace_seq_ops = {
791 .start = t_start, 787 .start = t_start,
792 .next = t_next, 788 .next = t_next,
793 .stop = t_stop, 789 .stop = t_stop,
794 .show = t_show, 790 .show = t_show,
795 }; 791 };
796 792
797 static int 793 static int
798 ftrace_avail_open(struct inode *inode, struct file *file) 794 ftrace_avail_open(struct inode *inode, struct file *file)
799 { 795 {
800 struct ftrace_iterator *iter; 796 struct ftrace_iterator *iter;
801 int ret; 797 int ret;
802 798
803 if (unlikely(ftrace_disabled)) 799 if (unlikely(ftrace_disabled))
804 return -ENODEV; 800 return -ENODEV;
805 801
806 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 802 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
807 if (!iter) 803 if (!iter)
808 return -ENOMEM; 804 return -ENOMEM;
809 805
810 iter->pg = ftrace_pages_start; 806 iter->pg = ftrace_pages_start;
811 iter->pos = -1; 807 iter->pos = -1;
812 808
813 ret = seq_open(file, &show_ftrace_seq_ops); 809 ret = seq_open(file, &show_ftrace_seq_ops);
814 if (!ret) { 810 if (!ret) {
815 struct seq_file *m = file->private_data; 811 struct seq_file *m = file->private_data;
816 812
817 m->private = iter; 813 m->private = iter;
818 } else { 814 } else {
819 kfree(iter); 815 kfree(iter);
820 } 816 }
821 817
822 return ret; 818 return ret;
823 } 819 }
824 820
825 int ftrace_avail_release(struct inode *inode, struct file *file) 821 int ftrace_avail_release(struct inode *inode, struct file *file)
826 { 822 {
827 struct seq_file *m = (struct seq_file *)file->private_data; 823 struct seq_file *m = (struct seq_file *)file->private_data;
828 struct ftrace_iterator *iter = m->private; 824 struct ftrace_iterator *iter = m->private;
829 825
830 seq_release(inode, file); 826 seq_release(inode, file);
831 kfree(iter); 827 kfree(iter);
832 828
833 return 0; 829 return 0;
834 } 830 }
835 831
836 static int 832 static int
837 ftrace_failures_open(struct inode *inode, struct file *file) 833 ftrace_failures_open(struct inode *inode, struct file *file)
838 { 834 {
839 int ret; 835 int ret;
840 struct seq_file *m; 836 struct seq_file *m;
841 struct ftrace_iterator *iter; 837 struct ftrace_iterator *iter;
842 838
843 ret = ftrace_avail_open(inode, file); 839 ret = ftrace_avail_open(inode, file);
844 if (!ret) { 840 if (!ret) {
845 m = (struct seq_file *)file->private_data; 841 m = (struct seq_file *)file->private_data;
846 iter = (struct ftrace_iterator *)m->private; 842 iter = (struct ftrace_iterator *)m->private;
847 iter->flags = FTRACE_ITER_FAILURES; 843 iter->flags = FTRACE_ITER_FAILURES;
848 } 844 }
849 845
850 return ret; 846 return ret;
851 } 847 }
852 848
853 849
854 static void ftrace_filter_reset(int enable) 850 static void ftrace_filter_reset(int enable)
855 { 851 {
856 struct ftrace_page *pg; 852 struct ftrace_page *pg;
857 struct dyn_ftrace *rec; 853 struct dyn_ftrace *rec;
858 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 854 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
859 unsigned i; 855 unsigned i;
860 856
861 /* should not be called from interrupt context */ 857 /* should not be called from interrupt context */
862 spin_lock(&ftrace_lock); 858 spin_lock(&ftrace_lock);
863 if (enable) 859 if (enable)
864 ftrace_filtered = 0; 860 ftrace_filtered = 0;
865 pg = ftrace_pages_start; 861 pg = ftrace_pages_start;
866 while (pg) { 862 while (pg) {
867 for (i = 0; i < pg->index; i++) { 863 for (i = 0; i < pg->index; i++) {
868 rec = &pg->records[i]; 864 rec = &pg->records[i];
869 if (rec->flags & FTRACE_FL_FAILED) 865 if (rec->flags & FTRACE_FL_FAILED)
870 continue; 866 continue;
871 rec->flags &= ~type; 867 rec->flags &= ~type;
872 } 868 }
873 pg = pg->next; 869 pg = pg->next;
874 } 870 }
875 spin_unlock(&ftrace_lock); 871 spin_unlock(&ftrace_lock);
876 } 872 }
877 873
878 static int 874 static int
879 ftrace_regex_open(struct inode *inode, struct file *file, int enable) 875 ftrace_regex_open(struct inode *inode, struct file *file, int enable)
880 { 876 {
881 struct ftrace_iterator *iter; 877 struct ftrace_iterator *iter;
882 int ret = 0; 878 int ret = 0;
883 879
884 if (unlikely(ftrace_disabled)) 880 if (unlikely(ftrace_disabled))
885 return -ENODEV; 881 return -ENODEV;
886 882
887 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 883 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
888 if (!iter) 884 if (!iter)
889 return -ENOMEM; 885 return -ENOMEM;
890 886
891 mutex_lock(&ftrace_regex_lock); 887 mutex_lock(&ftrace_regex_lock);
892 if ((file->f_mode & FMODE_WRITE) && 888 if ((file->f_mode & FMODE_WRITE) &&
893 !(file->f_flags & O_APPEND)) 889 !(file->f_flags & O_APPEND))
894 ftrace_filter_reset(enable); 890 ftrace_filter_reset(enable);
895 891
896 if (file->f_mode & FMODE_READ) { 892 if (file->f_mode & FMODE_READ) {
897 iter->pg = ftrace_pages_start; 893 iter->pg = ftrace_pages_start;
898 iter->pos = -1; 894 iter->pos = -1;
899 iter->flags = enable ? FTRACE_ITER_FILTER : 895 iter->flags = enable ? FTRACE_ITER_FILTER :
900 FTRACE_ITER_NOTRACE; 896 FTRACE_ITER_NOTRACE;
901 897
902 ret = seq_open(file, &show_ftrace_seq_ops); 898 ret = seq_open(file, &show_ftrace_seq_ops);
903 if (!ret) { 899 if (!ret) {
904 struct seq_file *m = file->private_data; 900 struct seq_file *m = file->private_data;
905 m->private = iter; 901 m->private = iter;
906 } else 902 } else
907 kfree(iter); 903 kfree(iter);
908 } else 904 } else
909 file->private_data = iter; 905 file->private_data = iter;
910 mutex_unlock(&ftrace_regex_lock); 906 mutex_unlock(&ftrace_regex_lock);
911 907
912 return ret; 908 return ret;
913 } 909 }
914 910
915 static int 911 static int
916 ftrace_filter_open(struct inode *inode, struct file *file) 912 ftrace_filter_open(struct inode *inode, struct file *file)
917 { 913 {
918 return ftrace_regex_open(inode, file, 1); 914 return ftrace_regex_open(inode, file, 1);
919 } 915 }
920 916
921 static int 917 static int
922 ftrace_notrace_open(struct inode *inode, struct file *file) 918 ftrace_notrace_open(struct inode *inode, struct file *file)
923 { 919 {
924 return ftrace_regex_open(inode, file, 0); 920 return ftrace_regex_open(inode, file, 0);
925 } 921 }
926 922
927 static ssize_t 923 static ssize_t
928 ftrace_regex_read(struct file *file, char __user *ubuf, 924 ftrace_regex_read(struct file *file, char __user *ubuf,
929 size_t cnt, loff_t *ppos) 925 size_t cnt, loff_t *ppos)
930 { 926 {
931 if (file->f_mode & FMODE_READ) 927 if (file->f_mode & FMODE_READ)
932 return seq_read(file, ubuf, cnt, ppos); 928 return seq_read(file, ubuf, cnt, ppos);
933 else 929 else
934 return -EPERM; 930 return -EPERM;
935 } 931 }
936 932
937 static loff_t 933 static loff_t
938 ftrace_regex_lseek(struct file *file, loff_t offset, int origin) 934 ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
939 { 935 {
940 loff_t ret; 936 loff_t ret;
941 937
942 if (file->f_mode & FMODE_READ) 938 if (file->f_mode & FMODE_READ)
943 ret = seq_lseek(file, offset, origin); 939 ret = seq_lseek(file, offset, origin);
944 else 940 else
945 file->f_pos = ret = 1; 941 file->f_pos = ret = 1;
946 942
947 return ret; 943 return ret;
948 } 944 }
949 945
950 enum { 946 enum {
951 MATCH_FULL, 947 MATCH_FULL,
952 MATCH_FRONT_ONLY, 948 MATCH_FRONT_ONLY,
953 MATCH_MIDDLE_ONLY, 949 MATCH_MIDDLE_ONLY,
954 MATCH_END_ONLY, 950 MATCH_END_ONLY,
955 }; 951 };
956 952
957 static void 953 static void
958 ftrace_match(unsigned char *buff, int len, int enable) 954 ftrace_match(unsigned char *buff, int len, int enable)
959 { 955 {
960 char str[KSYM_SYMBOL_LEN]; 956 char str[KSYM_SYMBOL_LEN];
961 char *search = NULL; 957 char *search = NULL;
962 struct ftrace_page *pg; 958 struct ftrace_page *pg;
963 struct dyn_ftrace *rec; 959 struct dyn_ftrace *rec;
964 int type = MATCH_FULL; 960 int type = MATCH_FULL;
965 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 961 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
966 unsigned i, match = 0, search_len = 0; 962 unsigned i, match = 0, search_len = 0;
967 963
968 for (i = 0; i < len; i++) { 964 for (i = 0; i < len; i++) {
969 if (buff[i] == '*') { 965 if (buff[i] == '*') {
970 if (!i) { 966 if (!i) {
971 search = buff + i + 1; 967 search = buff + i + 1;
972 type = MATCH_END_ONLY; 968 type = MATCH_END_ONLY;
973 search_len = len - (i + 1); 969 search_len = len - (i + 1);
974 } else { 970 } else {
975 if (type == MATCH_END_ONLY) { 971 if (type == MATCH_END_ONLY) {
976 type = MATCH_MIDDLE_ONLY; 972 type = MATCH_MIDDLE_ONLY;
977 } else { 973 } else {
978 match = i; 974 match = i;
979 type = MATCH_FRONT_ONLY; 975 type = MATCH_FRONT_ONLY;
980 } 976 }
981 buff[i] = 0; 977 buff[i] = 0;
982 break; 978 break;
983 } 979 }
984 } 980 }
985 } 981 }
986 982
987 /* should not be called from interrupt context */ 983 /* should not be called from interrupt context */
988 spin_lock(&ftrace_lock); 984 spin_lock(&ftrace_lock);
989 if (enable) 985 if (enable)
990 ftrace_filtered = 1; 986 ftrace_filtered = 1;
991 pg = ftrace_pages_start; 987 pg = ftrace_pages_start;
992 while (pg) { 988 while (pg) {
993 for (i = 0; i < pg->index; i++) { 989 for (i = 0; i < pg->index; i++) {
994 int matched = 0; 990 int matched = 0;
995 char *ptr; 991 char *ptr;
996 992
997 rec = &pg->records[i]; 993 rec = &pg->records[i];
998 if (rec->flags & FTRACE_FL_FAILED) 994 if (rec->flags & FTRACE_FL_FAILED)
999 continue; 995 continue;
1000 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 996 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1001 switch (type) { 997 switch (type) {
1002 case MATCH_FULL: 998 case MATCH_FULL:
1003 if (strcmp(str, buff) == 0) 999 if (strcmp(str, buff) == 0)
1004 matched = 1; 1000 matched = 1;
1005 break; 1001 break;
1006 case MATCH_FRONT_ONLY: 1002 case MATCH_FRONT_ONLY:
1007 if (memcmp(str, buff, match) == 0) 1003 if (memcmp(str, buff, match) == 0)
1008 matched = 1; 1004 matched = 1;
1009 break; 1005 break;
1010 case MATCH_MIDDLE_ONLY: 1006 case MATCH_MIDDLE_ONLY:
1011 if (strstr(str, search)) 1007 if (strstr(str, search))
1012 matched = 1; 1008 matched = 1;
1013 break; 1009 break;
1014 case MATCH_END_ONLY: 1010 case MATCH_END_ONLY:
1015 ptr = strstr(str, search); 1011 ptr = strstr(str, search);
1016 if (ptr && (ptr[search_len] == 0)) 1012 if (ptr && (ptr[search_len] == 0))
1017 matched = 1; 1013 matched = 1;
1018 break; 1014 break;
1019 } 1015 }
1020 if (matched) 1016 if (matched)
1021 rec->flags |= flag; 1017 rec->flags |= flag;
1022 } 1018 }
1023 pg = pg->next; 1019 pg = pg->next;
1024 } 1020 }
1025 spin_unlock(&ftrace_lock); 1021 spin_unlock(&ftrace_lock);
1026 } 1022 }
1027 1023
1028 static ssize_t 1024 static ssize_t
1029 ftrace_regex_write(struct file *file, const char __user *ubuf, 1025 ftrace_regex_write(struct file *file, const char __user *ubuf,
1030 size_t cnt, loff_t *ppos, int enable) 1026 size_t cnt, loff_t *ppos, int enable)
1031 { 1027 {
1032 struct ftrace_iterator *iter; 1028 struct ftrace_iterator *iter;
1033 char ch; 1029 char ch;
1034 size_t read = 0; 1030 size_t read = 0;
1035 ssize_t ret; 1031 ssize_t ret;
1036 1032
1037 if (!cnt || cnt < 0) 1033 if (!cnt || cnt < 0)
1038 return 0; 1034 return 0;
1039 1035
1040 mutex_lock(&ftrace_regex_lock); 1036 mutex_lock(&ftrace_regex_lock);
1041 1037
1042 if (file->f_mode & FMODE_READ) { 1038 if (file->f_mode & FMODE_READ) {
1043 struct seq_file *m = file->private_data; 1039 struct seq_file *m = file->private_data;
1044 iter = m->private; 1040 iter = m->private;
1045 } else 1041 } else
1046 iter = file->private_data; 1042 iter = file->private_data;
1047 1043
1048 if (!*ppos) { 1044 if (!*ppos) {
1049 iter->flags &= ~FTRACE_ITER_CONT; 1045 iter->flags &= ~FTRACE_ITER_CONT;
1050 iter->buffer_idx = 0; 1046 iter->buffer_idx = 0;
1051 } 1047 }
1052 1048
1053 ret = get_user(ch, ubuf++); 1049 ret = get_user(ch, ubuf++);
1054 if (ret) 1050 if (ret)
1055 goto out; 1051 goto out;
1056 read++; 1052 read++;
1057 cnt--; 1053 cnt--;
1058 1054
1059 if (!(iter->flags & ~FTRACE_ITER_CONT)) { 1055 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1060 /* skip white space */ 1056 /* skip white space */
1061 while (cnt && isspace(ch)) { 1057 while (cnt && isspace(ch)) {
1062 ret = get_user(ch, ubuf++); 1058 ret = get_user(ch, ubuf++);
1063 if (ret) 1059 if (ret)
1064 goto out; 1060 goto out;
1065 read++; 1061 read++;
1066 cnt--; 1062 cnt--;
1067 } 1063 }
1068 1064
1069 if (isspace(ch)) { 1065 if (isspace(ch)) {
1070 file->f_pos += read; 1066 file->f_pos += read;
1071 ret = read; 1067 ret = read;
1072 goto out; 1068 goto out;
1073 } 1069 }
1074 1070
1075 iter->buffer_idx = 0; 1071 iter->buffer_idx = 0;
1076 } 1072 }
1077 1073
1078 while (cnt && !isspace(ch)) { 1074 while (cnt && !isspace(ch)) {
1079 if (iter->buffer_idx < FTRACE_BUFF_MAX) 1075 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1080 iter->buffer[iter->buffer_idx++] = ch; 1076 iter->buffer[iter->buffer_idx++] = ch;
1081 else { 1077 else {
1082 ret = -EINVAL; 1078 ret = -EINVAL;
1083 goto out; 1079 goto out;
1084 } 1080 }
1085 ret = get_user(ch, ubuf++); 1081 ret = get_user(ch, ubuf++);
1086 if (ret) 1082 if (ret)
1087 goto out; 1083 goto out;
1088 read++; 1084 read++;
1089 cnt--; 1085 cnt--;
1090 } 1086 }
1091 1087
1092 if (isspace(ch)) { 1088 if (isspace(ch)) {
1093 iter->filtered++; 1089 iter->filtered++;
1094 iter->buffer[iter->buffer_idx] = 0; 1090 iter->buffer[iter->buffer_idx] = 0;
1095 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1091 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1096 iter->buffer_idx = 0; 1092 iter->buffer_idx = 0;
1097 } else 1093 } else
1098 iter->flags |= FTRACE_ITER_CONT; 1094 iter->flags |= FTRACE_ITER_CONT;
1099 1095
1100 1096
1101 file->f_pos += read; 1097 file->f_pos += read;
1102 1098
1103 ret = read; 1099 ret = read;
1104 out: 1100 out:
1105 mutex_unlock(&ftrace_regex_lock); 1101 mutex_unlock(&ftrace_regex_lock);
1106 1102
1107 return ret; 1103 return ret;
1108 } 1104 }
1109 1105
1110 static ssize_t 1106 static ssize_t
1111 ftrace_filter_write(struct file *file, const char __user *ubuf, 1107 ftrace_filter_write(struct file *file, const char __user *ubuf,
1112 size_t cnt, loff_t *ppos) 1108 size_t cnt, loff_t *ppos)
1113 { 1109 {
1114 return ftrace_regex_write(file, ubuf, cnt, ppos, 1); 1110 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
1115 } 1111 }
1116 1112
1117 static ssize_t 1113 static ssize_t
1118 ftrace_notrace_write(struct file *file, const char __user *ubuf, 1114 ftrace_notrace_write(struct file *file, const char __user *ubuf,
1119 size_t cnt, loff_t *ppos) 1115 size_t cnt, loff_t *ppos)
1120 { 1116 {
1121 return ftrace_regex_write(file, ubuf, cnt, ppos, 0); 1117 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
1122 } 1118 }
1123 1119
1124 static void 1120 static void
1125 ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) 1121 ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1126 { 1122 {
1127 if (unlikely(ftrace_disabled)) 1123 if (unlikely(ftrace_disabled))
1128 return; 1124 return;
1129 1125
1130 mutex_lock(&ftrace_regex_lock); 1126 mutex_lock(&ftrace_regex_lock);
1131 if (reset) 1127 if (reset)
1132 ftrace_filter_reset(enable); 1128 ftrace_filter_reset(enable);
1133 if (buf) 1129 if (buf)
1134 ftrace_match(buf, len, enable); 1130 ftrace_match(buf, len, enable);
1135 mutex_unlock(&ftrace_regex_lock); 1131 mutex_unlock(&ftrace_regex_lock);
1136 } 1132 }
1137 1133
1138 /** 1134 /**
1139 * ftrace_set_filter - set a function to filter on in ftrace 1135 * ftrace_set_filter - set a function to filter on in ftrace
1140 * @buf - the string that holds the function filter text. 1136 * @buf - the string that holds the function filter text.
1141 * @len - the length of the string. 1137 * @len - the length of the string.
1142 * @reset - non zero to reset all filters before applying this filter. 1138 * @reset - non zero to reset all filters before applying this filter.
1143 * 1139 *
1144 * Filters denote which functions should be enabled when tracing is enabled. 1140 * Filters denote which functions should be enabled when tracing is enabled.
1145 * If @buf is NULL and reset is set, all functions will be enabled for tracing. 1141 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1146 */ 1142 */
1147 void ftrace_set_filter(unsigned char *buf, int len, int reset) 1143 void ftrace_set_filter(unsigned char *buf, int len, int reset)
1148 { 1144 {
1149 ftrace_set_regex(buf, len, reset, 1); 1145 ftrace_set_regex(buf, len, reset, 1);
1150 } 1146 }
1151 1147
1152 /** 1148 /**
1153 * ftrace_set_notrace - set a function to not trace in ftrace 1149 * ftrace_set_notrace - set a function to not trace in ftrace
1154 * @buf - the string that holds the function notrace text. 1150 * @buf - the string that holds the function notrace text.
1155 * @len - the length of the string. 1151 * @len - the length of the string.
1156 * @reset - non zero to reset all filters before applying this filter. 1152 * @reset - non zero to reset all filters before applying this filter.
1157 * 1153 *
1158 * Notrace Filters denote which functions should not be enabled when tracing 1154 * Notrace Filters denote which functions should not be enabled when tracing
1159 * is enabled. If @buf is NULL and reset is set, all functions will be enabled 1155 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
1160 * for tracing. 1156 * for tracing.
1161 */ 1157 */
1162 void ftrace_set_notrace(unsigned char *buf, int len, int reset) 1158 void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1163 { 1159 {
1164 ftrace_set_regex(buf, len, reset, 0); 1160 ftrace_set_regex(buf, len, reset, 0);
1165 } 1161 }
1166 1162
1167 static int 1163 static int
1168 ftrace_regex_release(struct inode *inode, struct file *file, int enable) 1164 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1169 { 1165 {
1170 struct seq_file *m = (struct seq_file *)file->private_data; 1166 struct seq_file *m = (struct seq_file *)file->private_data;
1171 struct ftrace_iterator *iter; 1167 struct ftrace_iterator *iter;
1172 1168
1173 mutex_lock(&ftrace_regex_lock); 1169 mutex_lock(&ftrace_regex_lock);
1174 if (file->f_mode & FMODE_READ) { 1170 if (file->f_mode & FMODE_READ) {
1175 iter = m->private; 1171 iter = m->private;
1176 1172
1177 seq_release(inode, file); 1173 seq_release(inode, file);
1178 } else 1174 } else
1179 iter = file->private_data; 1175 iter = file->private_data;
1180 1176
1181 if (iter->buffer_idx) { 1177 if (iter->buffer_idx) {
1182 iter->filtered++; 1178 iter->filtered++;
1183 iter->buffer[iter->buffer_idx] = 0; 1179 iter->buffer[iter->buffer_idx] = 0;
1184 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1180 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1185 } 1181 }
1186 1182
1187 mutex_lock(&ftrace_sysctl_lock); 1183 mutex_lock(&ftrace_sysctl_lock);
1188 mutex_lock(&ftrace_start_lock); 1184 mutex_lock(&ftrace_start_lock);
1189 if (iter->filtered && ftrace_start && ftrace_enabled) 1185 if (iter->filtered && ftrace_start && ftrace_enabled)
1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1186 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1191 mutex_unlock(&ftrace_start_lock); 1187 mutex_unlock(&ftrace_start_lock);
1192 mutex_unlock(&ftrace_sysctl_lock); 1188 mutex_unlock(&ftrace_sysctl_lock);
1193 1189
1194 kfree(iter); 1190 kfree(iter);
1195 mutex_unlock(&ftrace_regex_lock); 1191 mutex_unlock(&ftrace_regex_lock);
1196 return 0; 1192 return 0;
1197 } 1193 }
1198 1194
1199 static int 1195 static int
1200 ftrace_filter_release(struct inode *inode, struct file *file) 1196 ftrace_filter_release(struct inode *inode, struct file *file)
1201 { 1197 {
1202 return ftrace_regex_release(inode, file, 1); 1198 return ftrace_regex_release(inode, file, 1);
1203 } 1199 }
1204 1200
1205 static int 1201 static int
1206 ftrace_notrace_release(struct inode *inode, struct file *file) 1202 ftrace_notrace_release(struct inode *inode, struct file *file)
1207 { 1203 {
1208 return ftrace_regex_release(inode, file, 0); 1204 return ftrace_regex_release(inode, file, 0);
1209 } 1205 }
1210 1206
1211 static struct file_operations ftrace_avail_fops = { 1207 static struct file_operations ftrace_avail_fops = {
1212 .open = ftrace_avail_open, 1208 .open = ftrace_avail_open,
1213 .read = seq_read, 1209 .read = seq_read,
1214 .llseek = seq_lseek, 1210 .llseek = seq_lseek,
1215 .release = ftrace_avail_release, 1211 .release = ftrace_avail_release,
1216 }; 1212 };
1217 1213
1218 static struct file_operations ftrace_failures_fops = { 1214 static struct file_operations ftrace_failures_fops = {
1219 .open = ftrace_failures_open, 1215 .open = ftrace_failures_open,
1220 .read = seq_read, 1216 .read = seq_read,
1221 .llseek = seq_lseek, 1217 .llseek = seq_lseek,
1222 .release = ftrace_avail_release, 1218 .release = ftrace_avail_release,
1223 }; 1219 };
1224 1220
1225 static struct file_operations ftrace_filter_fops = { 1221 static struct file_operations ftrace_filter_fops = {
1226 .open = ftrace_filter_open, 1222 .open = ftrace_filter_open,
1227 .read = ftrace_regex_read, 1223 .read = ftrace_regex_read,
1228 .write = ftrace_filter_write, 1224 .write = ftrace_filter_write,
1229 .llseek = ftrace_regex_lseek, 1225 .llseek = ftrace_regex_lseek,
1230 .release = ftrace_filter_release, 1226 .release = ftrace_filter_release,
1231 }; 1227 };
1232 1228
1233 static struct file_operations ftrace_notrace_fops = { 1229 static struct file_operations ftrace_notrace_fops = {
1234 .open = ftrace_notrace_open, 1230 .open = ftrace_notrace_open,
1235 .read = ftrace_regex_read, 1231 .read = ftrace_regex_read,
1236 .write = ftrace_notrace_write, 1232 .write = ftrace_notrace_write,
1237 .llseek = ftrace_regex_lseek, 1233 .llseek = ftrace_regex_lseek,
1238 .release = ftrace_notrace_release, 1234 .release = ftrace_notrace_release,
1239 }; 1235 };
1240 1236
1241 static __init int ftrace_init_debugfs(void) 1237 static __init int ftrace_init_debugfs(void)
1242 { 1238 {
1243 struct dentry *d_tracer; 1239 struct dentry *d_tracer;
1244 struct dentry *entry; 1240 struct dentry *entry;
1245 1241
1246 d_tracer = tracing_init_dentry(); 1242 d_tracer = tracing_init_dentry();
1247 1243
1248 entry = debugfs_create_file("available_filter_functions", 0444, 1244 entry = debugfs_create_file("available_filter_functions", 0444,
1249 d_tracer, NULL, &ftrace_avail_fops); 1245 d_tracer, NULL, &ftrace_avail_fops);
1250 if (!entry) 1246 if (!entry)
1251 pr_warning("Could not create debugfs " 1247 pr_warning("Could not create debugfs "
1252 "'available_filter_functions' entry\n"); 1248 "'available_filter_functions' entry\n");
1253 1249
1254 entry = debugfs_create_file("failures", 0444, 1250 entry = debugfs_create_file("failures", 0444,
1255 d_tracer, NULL, &ftrace_failures_fops); 1251 d_tracer, NULL, &ftrace_failures_fops);
1256 if (!entry) 1252 if (!entry)
1257 pr_warning("Could not create debugfs 'failures' entry\n"); 1253 pr_warning("Could not create debugfs 'failures' entry\n");
1258 1254
1259 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, 1255 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1260 NULL, &ftrace_filter_fops); 1256 NULL, &ftrace_filter_fops);
1261 if (!entry) 1257 if (!entry)
1262 pr_warning("Could not create debugfs " 1258 pr_warning("Could not create debugfs "
1263 "'set_ftrace_filter' entry\n"); 1259 "'set_ftrace_filter' entry\n");
1264 1260
1265 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, 1261 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
1266 NULL, &ftrace_notrace_fops); 1262 NULL, &ftrace_notrace_fops);
1267 if (!entry) 1263 if (!entry)
1268 pr_warning("Could not create debugfs " 1264 pr_warning("Could not create debugfs "
1269 "'set_ftrace_notrace' entry\n"); 1265 "'set_ftrace_notrace' entry\n");
1270 1266
1271 return 0; 1267 return 0;
1272 } 1268 }
1273 1269
1274 fs_initcall(ftrace_init_debugfs); 1270 fs_initcall(ftrace_init_debugfs);
1275 1271
1276 static int ftrace_convert_nops(unsigned long *start, 1272 static int ftrace_convert_nops(unsigned long *start,
1277 unsigned long *end) 1273 unsigned long *end)
1278 { 1274 {
1279 unsigned long *p; 1275 unsigned long *p;
1280 unsigned long addr; 1276 unsigned long addr;
1281 unsigned long flags; 1277 unsigned long flags;
1282 1278
1283 mutex_lock(&ftrace_start_lock); 1279 mutex_lock(&ftrace_start_lock);
1284 p = start; 1280 p = start;
1285 while (p < end) { 1281 while (p < end) {
1286 addr = ftrace_call_adjust(*p++); 1282 addr = ftrace_call_adjust(*p++);
1287 ftrace_record_ip(addr); 1283 ftrace_record_ip(addr);
1288 } 1284 }
1289 1285
1290 /* disable interrupts to prevent kstop machine */ 1286 /* disable interrupts to prevent kstop machine */
1291 local_irq_save(flags); 1287 local_irq_save(flags);
1292 ftrace_update_code(); 1288 ftrace_update_code();
1293 local_irq_restore(flags); 1289 local_irq_restore(flags);
1294 mutex_unlock(&ftrace_start_lock); 1290 mutex_unlock(&ftrace_start_lock);
1295 1291
1296 return 0; 1292 return 0;
1297 } 1293 }
1298 1294
1299 void ftrace_init_module(unsigned long *start, unsigned long *end) 1295 void ftrace_init_module(unsigned long *start, unsigned long *end)
1300 { 1296 {
1301 if (ftrace_disabled || start == end) 1297 if (ftrace_disabled || start == end)
1302 return; 1298 return;
1303 ftrace_convert_nops(start, end); 1299 ftrace_convert_nops(start, end);
1304 } 1300 }
1305 1301
1306 extern unsigned long __start_mcount_loc[]; 1302 extern unsigned long __start_mcount_loc[];
1307 extern unsigned long __stop_mcount_loc[]; 1303 extern unsigned long __stop_mcount_loc[];
1308 1304
1309 void __init ftrace_init(void) 1305 void __init ftrace_init(void)
1310 { 1306 {
1311 unsigned long count, addr, flags; 1307 unsigned long count, addr, flags;
1312 int ret; 1308 int ret;
1313 1309
1314 /* Keep the ftrace pointer to the stub */ 1310 /* Keep the ftrace pointer to the stub */
1315 addr = (unsigned long)ftrace_stub; 1311 addr = (unsigned long)ftrace_stub;
1316 1312
1317 local_irq_save(flags); 1313 local_irq_save(flags);
1318 ftrace_dyn_arch_init(&addr); 1314 ftrace_dyn_arch_init(&addr);
1319 local_irq_restore(flags); 1315 local_irq_restore(flags);
1320 1316
1321 /* ftrace_dyn_arch_init places the return code in addr */ 1317 /* ftrace_dyn_arch_init places the return code in addr */
1322 if (addr) 1318 if (addr)
1323 goto failed; 1319 goto failed;
1324 1320
1325 count = __stop_mcount_loc - __start_mcount_loc; 1321 count = __stop_mcount_loc - __start_mcount_loc;
1326 1322
1327 ret = ftrace_dyn_table_alloc(count); 1323 ret = ftrace_dyn_table_alloc(count);
1328 if (ret) 1324 if (ret)
1329 goto failed; 1325 goto failed;
1330 1326
1331 last_ftrace_enabled = ftrace_enabled = 1; 1327 last_ftrace_enabled = ftrace_enabled = 1;
1332 1328
1333 ret = ftrace_convert_nops(__start_mcount_loc, 1329 ret = ftrace_convert_nops(__start_mcount_loc,
1334 __stop_mcount_loc); 1330 __stop_mcount_loc);
1335 1331
1336 return; 1332 return;
1337 failed: 1333 failed:
1338 ftrace_disabled = 1; 1334 ftrace_disabled = 1;
1339 } 1335 }
1340 1336
1341 #else 1337 #else
1342 1338
1343 static int __init ftrace_nodyn_init(void) 1339 static int __init ftrace_nodyn_init(void)
1344 { 1340 {
1345 ftrace_enabled = 1; 1341 ftrace_enabled = 1;
1346 return 0; 1342 return 0;
1347 } 1343 }
1348 device_initcall(ftrace_nodyn_init); 1344 device_initcall(ftrace_nodyn_init);
1349 1345
1350 # define ftrace_startup() do { } while (0) 1346 # define ftrace_startup() do { } while (0)
1351 # define ftrace_shutdown() do { } while (0) 1347 # define ftrace_shutdown() do { } while (0)
1352 # define ftrace_startup_sysctl() do { } while (0) 1348 # define ftrace_startup_sysctl() do { } while (0)
1353 # define ftrace_shutdown_sysctl() do { } while (0) 1349 # define ftrace_shutdown_sysctl() do { } while (0)
1354 #endif /* CONFIG_DYNAMIC_FTRACE */ 1350 #endif /* CONFIG_DYNAMIC_FTRACE */
1355 1351
1356 /** 1352 /**
1357 * ftrace_kill - kill ftrace 1353 * ftrace_kill - kill ftrace
1358 * 1354 *
1359 * This function should be used by panic code. It stops ftrace 1355 * This function should be used by panic code. It stops ftrace
1360 * but in a not so nice way. If you need to simply kill ftrace 1356 * but in a not so nice way. If you need to simply kill ftrace
1361 * from a non-atomic section, use ftrace_kill. 1357 * from a non-atomic section, use ftrace_kill.
1362 */ 1358 */
1363 void ftrace_kill(void) 1359 void ftrace_kill(void)
1364 { 1360 {
1365 ftrace_disabled = 1; 1361 ftrace_disabled = 1;
1366 ftrace_enabled = 0; 1362 ftrace_enabled = 0;
1367 clear_ftrace_function(); 1363 clear_ftrace_function();
1368 } 1364 }
1369 1365
1370 /** 1366 /**
1371 * register_ftrace_function - register a function for profiling 1367 * register_ftrace_function - register a function for profiling
1372 * @ops - ops structure that holds the function for profiling. 1368 * @ops - ops structure that holds the function for profiling.
1373 * 1369 *
1374 * Register a function to be called by all functions in the 1370 * Register a function to be called by all functions in the
1375 * kernel. 1371 * kernel.
1376 * 1372 *
1377 * Note: @ops->func and all the functions it calls must be labeled 1373 * Note: @ops->func and all the functions it calls must be labeled
1378 * with "notrace", otherwise it will go into a 1374 * with "notrace", otherwise it will go into a
1379 * recursive loop. 1375 * recursive loop.
1380 */ 1376 */
1381 int register_ftrace_function(struct ftrace_ops *ops) 1377 int register_ftrace_function(struct ftrace_ops *ops)
1382 { 1378 {
1383 int ret; 1379 int ret;
1384 1380
1385 if (unlikely(ftrace_disabled)) 1381 if (unlikely(ftrace_disabled))
1386 return -1; 1382 return -1;
1387 1383
1388 mutex_lock(&ftrace_sysctl_lock); 1384 mutex_lock(&ftrace_sysctl_lock);
1389 ret = __register_ftrace_function(ops); 1385 ret = __register_ftrace_function(ops);
1390 ftrace_startup(); 1386 ftrace_startup();
1391 mutex_unlock(&ftrace_sysctl_lock); 1387 mutex_unlock(&ftrace_sysctl_lock);
1392 1388
1393 return ret; 1389 return ret;
1394 } 1390 }
1395 1391
1396 /** 1392 /**
1397 * unregister_ftrace_function - unresgister a function for profiling. 1393 * unregister_ftrace_function - unresgister a function for profiling.
1398 * @ops - ops structure that holds the function to unregister 1394 * @ops - ops structure that holds the function to unregister
1399 * 1395 *
1400 * Unregister a function that was added to be called by ftrace profiling. 1396 * Unregister a function that was added to be called by ftrace profiling.
1401 */ 1397 */
1402 int unregister_ftrace_function(struct ftrace_ops *ops) 1398 int unregister_ftrace_function(struct ftrace_ops *ops)
1403 { 1399 {
1404 int ret; 1400 int ret;
1405 1401
1406 mutex_lock(&ftrace_sysctl_lock); 1402 mutex_lock(&ftrace_sysctl_lock);
1407 ret = __unregister_ftrace_function(ops); 1403 ret = __unregister_ftrace_function(ops);
1408 ftrace_shutdown(); 1404 ftrace_shutdown();
1409 mutex_unlock(&ftrace_sysctl_lock); 1405 mutex_unlock(&ftrace_sysctl_lock);
1410 1406
1411 return ret; 1407 return ret;
1412 } 1408 }
1413 1409
1414 int 1410 int
1415 ftrace_enable_sysctl(struct ctl_table *table, int write, 1411 ftrace_enable_sysctl(struct ctl_table *table, int write,
1416 struct file *file, void __user *buffer, size_t *lenp, 1412 struct file *file, void __user *buffer, size_t *lenp,
1417 loff_t *ppos) 1413 loff_t *ppos)
1418 { 1414 {
1419 int ret; 1415 int ret;
1420 1416
1421 if (unlikely(ftrace_disabled)) 1417 if (unlikely(ftrace_disabled))
1422 return -ENODEV; 1418 return -ENODEV;
1423 1419
1424 mutex_lock(&ftrace_sysctl_lock); 1420 mutex_lock(&ftrace_sysctl_lock);
1425 1421
1426 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 1422 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1427 1423
1428 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 1424 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1429 goto out; 1425 goto out;
1430 1426
1431 last_ftrace_enabled = ftrace_enabled; 1427 last_ftrace_enabled = ftrace_enabled;
1432 1428
1433 if (ftrace_enabled) { 1429 if (ftrace_enabled) {
1434 1430
1435 ftrace_startup_sysctl(); 1431 ftrace_startup_sysctl();
1436 1432
1437 /* we are starting ftrace again */ 1433 /* we are starting ftrace again */
1438 if (ftrace_list != &ftrace_list_end) { 1434 if (ftrace_list != &ftrace_list_end) {
1439 if (ftrace_list->next == &ftrace_list_end) 1435 if (ftrace_list->next == &ftrace_list_end)
1440 ftrace_trace_function = ftrace_list->func; 1436 ftrace_trace_function = ftrace_list->func;
1441 else 1437 else
1442 ftrace_trace_function = ftrace_list_func; 1438 ftrace_trace_function = ftrace_list_func;
1443 } 1439 }
1444 1440
1445 } else { 1441 } else {
1446 /* stopping ftrace calls (just send to ftrace_stub) */ 1442 /* stopping ftrace calls (just send to ftrace_stub) */
1447 ftrace_trace_function = ftrace_stub; 1443 ftrace_trace_function = ftrace_stub;
1448 1444
1449 ftrace_shutdown_sysctl(); 1445 ftrace_shutdown_sysctl();
1450 } 1446 }
1451 1447
1452 out: 1448 out:
1453 mutex_unlock(&ftrace_sysctl_lock); 1449 mutex_unlock(&ftrace_sysctl_lock);
1454 return ret; 1450 return ret;
1455 } 1451 }
1456 1452
1457 1453
kernel/trace/ring_buffer.c
1 /* 1 /*
2 * Generic ring buffer 2 * Generic ring buffer
3 * 3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6 #include <linux/ring_buffer.h> 6 #include <linux/ring_buffer.h>
7 #include <linux/spinlock.h> 7 #include <linux/spinlock.h>
8 #include <linux/debugfs.h> 8 #include <linux/debugfs.h>
9 #include <linux/uaccess.h> 9 #include <linux/uaccess.h>
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <linux/percpu.h> 11 #include <linux/percpu.h>
12 #include <linux/mutex.h> 12 #include <linux/mutex.h>
13 #include <linux/sched.h> /* used for sched_clock() (for now) */ 13 #include <linux/sched.h> /* used for sched_clock() (for now) */
14 #include <linux/init.h> 14 #include <linux/init.h>
15 #include <linux/hash.h> 15 #include <linux/hash.h>
16 #include <linux/list.h> 16 #include <linux/list.h>
17 #include <linux/fs.h> 17 #include <linux/fs.h>
18 18
19 #include "trace.h"
20
21 /* Global flag to disable all recording to ring buffers */
22 static int ring_buffers_off __read_mostly;
23
24 /**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30 void tracing_on(void)
31 {
32 ring_buffers_off = 0;
33 }
34
35 /**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43 void tracing_off(void)
44 {
45 ring_buffers_off = 1;
46 }
47
19 /* Up this if you want to test the TIME_EXTENTS and normalization */ 48 /* Up this if you want to test the TIME_EXTENTS and normalization */
20 #define DEBUG_SHIFT 0 49 #define DEBUG_SHIFT 0
21 50
22 /* FIXME!!! */ 51 /* FIXME!!! */
23 u64 ring_buffer_time_stamp(int cpu) 52 u64 ring_buffer_time_stamp(int cpu)
24 { 53 {
25 /* shift to debug/test normalization and TIME_EXTENTS */ 54 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 55 return sched_clock() << DEBUG_SHIFT;
27 } 56 }
28 57
29 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 58 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
30 { 59 {
31 /* Just stupid testing the normalize function and deltas */ 60 /* Just stupid testing the normalize function and deltas */
32 *ts >>= DEBUG_SHIFT; 61 *ts >>= DEBUG_SHIFT;
33 } 62 }
34 63
35 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 64 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
36 #define RB_ALIGNMENT_SHIFT 2 65 #define RB_ALIGNMENT_SHIFT 2
37 #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) 66 #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
38 #define RB_MAX_SMALL_DATA 28 67 #define RB_MAX_SMALL_DATA 28
39 68
40 enum { 69 enum {
41 RB_LEN_TIME_EXTEND = 8, 70 RB_LEN_TIME_EXTEND = 8,
42 RB_LEN_TIME_STAMP = 16, 71 RB_LEN_TIME_STAMP = 16,
43 }; 72 };
44 73
45 /* inline for ring buffer fast paths */ 74 /* inline for ring buffer fast paths */
46 static inline unsigned 75 static inline unsigned
47 rb_event_length(struct ring_buffer_event *event) 76 rb_event_length(struct ring_buffer_event *event)
48 { 77 {
49 unsigned length; 78 unsigned length;
50 79
51 switch (event->type) { 80 switch (event->type) {
52 case RINGBUF_TYPE_PADDING: 81 case RINGBUF_TYPE_PADDING:
53 /* undefined */ 82 /* undefined */
54 return -1; 83 return -1;
55 84
56 case RINGBUF_TYPE_TIME_EXTEND: 85 case RINGBUF_TYPE_TIME_EXTEND:
57 return RB_LEN_TIME_EXTEND; 86 return RB_LEN_TIME_EXTEND;
58 87
59 case RINGBUF_TYPE_TIME_STAMP: 88 case RINGBUF_TYPE_TIME_STAMP:
60 return RB_LEN_TIME_STAMP; 89 return RB_LEN_TIME_STAMP;
61 90
62 case RINGBUF_TYPE_DATA: 91 case RINGBUF_TYPE_DATA:
63 if (event->len) 92 if (event->len)
64 length = event->len << RB_ALIGNMENT_SHIFT; 93 length = event->len << RB_ALIGNMENT_SHIFT;
65 else 94 else
66 length = event->array[0]; 95 length = event->array[0];
67 return length + RB_EVNT_HDR_SIZE; 96 return length + RB_EVNT_HDR_SIZE;
68 default: 97 default:
69 BUG(); 98 BUG();
70 } 99 }
71 /* not hit */ 100 /* not hit */
72 return 0; 101 return 0;
73 } 102 }
74 103
75 /** 104 /**
76 * ring_buffer_event_length - return the length of the event 105 * ring_buffer_event_length - return the length of the event
77 * @event: the event to get the length of 106 * @event: the event to get the length of
78 */ 107 */
79 unsigned ring_buffer_event_length(struct ring_buffer_event *event) 108 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
80 { 109 {
81 return rb_event_length(event); 110 return rb_event_length(event);
82 } 111 }
83 112
84 /* inline for ring buffer fast paths */ 113 /* inline for ring buffer fast paths */
85 static inline void * 114 static inline void *
86 rb_event_data(struct ring_buffer_event *event) 115 rb_event_data(struct ring_buffer_event *event)
87 { 116 {
88 BUG_ON(event->type != RINGBUF_TYPE_DATA); 117 BUG_ON(event->type != RINGBUF_TYPE_DATA);
89 /* If length is in len field, then array[0] has the data */ 118 /* If length is in len field, then array[0] has the data */
90 if (event->len) 119 if (event->len)
91 return (void *)&event->array[0]; 120 return (void *)&event->array[0];
92 /* Otherwise length is in array[0] and array[1] has the data */ 121 /* Otherwise length is in array[0] and array[1] has the data */
93 return (void *)&event->array[1]; 122 return (void *)&event->array[1];
94 } 123 }
95 124
96 /** 125 /**
97 * ring_buffer_event_data - return the data of the event 126 * ring_buffer_event_data - return the data of the event
98 * @event: the event to get the data from 127 * @event: the event to get the data from
99 */ 128 */
100 void *ring_buffer_event_data(struct ring_buffer_event *event) 129 void *ring_buffer_event_data(struct ring_buffer_event *event)
101 { 130 {
102 return rb_event_data(event); 131 return rb_event_data(event);
103 } 132 }
104 133
105 #define for_each_buffer_cpu(buffer, cpu) \ 134 #define for_each_buffer_cpu(buffer, cpu) \
106 for_each_cpu_mask(cpu, buffer->cpumask) 135 for_each_cpu_mask(cpu, buffer->cpumask)
107 136
108 #define TS_SHIFT 27 137 #define TS_SHIFT 27
109 #define TS_MASK ((1ULL << TS_SHIFT) - 1) 138 #define TS_MASK ((1ULL << TS_SHIFT) - 1)
110 #define TS_DELTA_TEST (~TS_MASK) 139 #define TS_DELTA_TEST (~TS_MASK)
111 140
112 /* 141 /*
113 * This hack stolen from mm/slob.c. 142 * This hack stolen from mm/slob.c.
114 * We can store per page timing information in the page frame of the page. 143 * We can store per page timing information in the page frame of the page.
115 * Thanks to Peter Zijlstra for suggesting this idea. 144 * Thanks to Peter Zijlstra for suggesting this idea.
116 */ 145 */
117 struct buffer_page { 146 struct buffer_page {
118 u64 time_stamp; /* page time stamp */ 147 u64 time_stamp; /* page time stamp */
119 local_t write; /* index for next write */ 148 local_t write; /* index for next write */
120 local_t commit; /* write commited index */ 149 local_t commit; /* write commited index */
121 unsigned read; /* index for next read */ 150 unsigned read; /* index for next read */
122 struct list_head list; /* list of free pages */ 151 struct list_head list; /* list of free pages */
123 void *page; /* Actual data page */ 152 void *page; /* Actual data page */
124 }; 153 };
125 154
126 /* 155 /*
127 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 156 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
128 * this issue out. 157 * this issue out.
129 */ 158 */
130 static inline void free_buffer_page(struct buffer_page *bpage) 159 static inline void free_buffer_page(struct buffer_page *bpage)
131 { 160 {
132 if (bpage->page) 161 if (bpage->page)
133 free_page((unsigned long)bpage->page); 162 free_page((unsigned long)bpage->page);
134 kfree(bpage); 163 kfree(bpage);
135 } 164 }
136 165
137 /* 166 /*
138 * We need to fit the time_stamp delta into 27 bits. 167 * We need to fit the time_stamp delta into 27 bits.
139 */ 168 */
140 static inline int test_time_stamp(u64 delta) 169 static inline int test_time_stamp(u64 delta)
141 { 170 {
142 if (delta & TS_DELTA_TEST) 171 if (delta & TS_DELTA_TEST)
143 return 1; 172 return 1;
144 return 0; 173 return 0;
145 } 174 }
146 175
147 #define BUF_PAGE_SIZE PAGE_SIZE 176 #define BUF_PAGE_SIZE PAGE_SIZE
148 177
149 /* 178 /*
150 * head_page == tail_page && head == tail then buffer is empty. 179 * head_page == tail_page && head == tail then buffer is empty.
151 */ 180 */
152 struct ring_buffer_per_cpu { 181 struct ring_buffer_per_cpu {
153 int cpu; 182 int cpu;
154 struct ring_buffer *buffer; 183 struct ring_buffer *buffer;
155 spinlock_t lock; 184 spinlock_t lock;
156 struct lock_class_key lock_key; 185 struct lock_class_key lock_key;
157 struct list_head pages; 186 struct list_head pages;
158 struct buffer_page *head_page; /* read from head */ 187 struct buffer_page *head_page; /* read from head */
159 struct buffer_page *tail_page; /* write to tail */ 188 struct buffer_page *tail_page; /* write to tail */
160 struct buffer_page *commit_page; /* commited pages */ 189 struct buffer_page *commit_page; /* commited pages */
161 struct buffer_page *reader_page; 190 struct buffer_page *reader_page;
162 unsigned long overrun; 191 unsigned long overrun;
163 unsigned long entries; 192 unsigned long entries;
164 u64 write_stamp; 193 u64 write_stamp;
165 u64 read_stamp; 194 u64 read_stamp;
166 atomic_t record_disabled; 195 atomic_t record_disabled;
167 }; 196 };
168 197
169 struct ring_buffer { 198 struct ring_buffer {
170 unsigned long size; 199 unsigned long size;
171 unsigned pages; 200 unsigned pages;
172 unsigned flags; 201 unsigned flags;
173 int cpus; 202 int cpus;
174 cpumask_t cpumask; 203 cpumask_t cpumask;
175 atomic_t record_disabled; 204 atomic_t record_disabled;
176 205
177 struct mutex mutex; 206 struct mutex mutex;
178 207
179 struct ring_buffer_per_cpu **buffers; 208 struct ring_buffer_per_cpu **buffers;
180 }; 209 };
181 210
182 struct ring_buffer_iter { 211 struct ring_buffer_iter {
183 struct ring_buffer_per_cpu *cpu_buffer; 212 struct ring_buffer_per_cpu *cpu_buffer;
184 unsigned long head; 213 unsigned long head;
185 struct buffer_page *head_page; 214 struct buffer_page *head_page;
186 u64 read_stamp; 215 u64 read_stamp;
187 }; 216 };
188 217
189 #define RB_WARN_ON(buffer, cond) \ 218 #define RB_WARN_ON(buffer, cond) \
190 do { \ 219 do { \
191 if (unlikely(cond)) { \ 220 if (unlikely(cond)) { \
192 atomic_inc(&buffer->record_disabled); \ 221 atomic_inc(&buffer->record_disabled); \
193 WARN_ON(1); \ 222 WARN_ON(1); \
194 } \ 223 } \
195 } while (0) 224 } while (0)
196 225
197 #define RB_WARN_ON_RET(buffer, cond) \ 226 #define RB_WARN_ON_RET(buffer, cond) \
198 do { \ 227 do { \
199 if (unlikely(cond)) { \ 228 if (unlikely(cond)) { \
200 atomic_inc(&buffer->record_disabled); \ 229 atomic_inc(&buffer->record_disabled); \
201 WARN_ON(1); \ 230 WARN_ON(1); \
202 return -1; \ 231 return -1; \
203 } \ 232 } \
204 } while (0) 233 } while (0)
205 234
206 #define RB_WARN_ON_ONCE(buffer, cond) \ 235 #define RB_WARN_ON_ONCE(buffer, cond) \
207 do { \ 236 do { \
208 static int once; \ 237 static int once; \
209 if (unlikely(cond) && !once) { \ 238 if (unlikely(cond) && !once) { \
210 once++; \ 239 once++; \
211 atomic_inc(&buffer->record_disabled); \ 240 atomic_inc(&buffer->record_disabled); \
212 WARN_ON(1); \ 241 WARN_ON(1); \
213 } \ 242 } \
214 } while (0) 243 } while (0)
215 244
216 /** 245 /**
217 * check_pages - integrity check of buffer pages 246 * check_pages - integrity check of buffer pages
218 * @cpu_buffer: CPU buffer with pages to test 247 * @cpu_buffer: CPU buffer with pages to test
219 * 248 *
220 * As a safty measure we check to make sure the data pages have not 249 * As a safty measure we check to make sure the data pages have not
221 * been corrupted. 250 * been corrupted.
222 */ 251 */
223 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 252 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
224 { 253 {
225 struct list_head *head = &cpu_buffer->pages; 254 struct list_head *head = &cpu_buffer->pages;
226 struct buffer_page *page, *tmp; 255 struct buffer_page *page, *tmp;
227 256
228 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 257 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
229 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 258 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
230 259
231 list_for_each_entry_safe(page, tmp, head, list) { 260 list_for_each_entry_safe(page, tmp, head, list) {
232 RB_WARN_ON_RET(cpu_buffer, 261 RB_WARN_ON_RET(cpu_buffer,
233 page->list.next->prev != &page->list); 262 page->list.next->prev != &page->list);
234 RB_WARN_ON_RET(cpu_buffer, 263 RB_WARN_ON_RET(cpu_buffer,
235 page->list.prev->next != &page->list); 264 page->list.prev->next != &page->list);
236 } 265 }
237 266
238 return 0; 267 return 0;
239 } 268 }
240 269
241 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 270 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
242 unsigned nr_pages) 271 unsigned nr_pages)
243 { 272 {
244 struct list_head *head = &cpu_buffer->pages; 273 struct list_head *head = &cpu_buffer->pages;
245 struct buffer_page *page, *tmp; 274 struct buffer_page *page, *tmp;
246 unsigned long addr; 275 unsigned long addr;
247 LIST_HEAD(pages); 276 LIST_HEAD(pages);
248 unsigned i; 277 unsigned i;
249 278
250 for (i = 0; i < nr_pages; i++) { 279 for (i = 0; i < nr_pages; i++) {
251 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 280 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
252 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 281 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
253 if (!page) 282 if (!page)
254 goto free_pages; 283 goto free_pages;
255 list_add(&page->list, &pages); 284 list_add(&page->list, &pages);
256 285
257 addr = __get_free_page(GFP_KERNEL); 286 addr = __get_free_page(GFP_KERNEL);
258 if (!addr) 287 if (!addr)
259 goto free_pages; 288 goto free_pages;
260 page->page = (void *)addr; 289 page->page = (void *)addr;
261 } 290 }
262 291
263 list_splice(&pages, head); 292 list_splice(&pages, head);
264 293
265 rb_check_pages(cpu_buffer); 294 rb_check_pages(cpu_buffer);
266 295
267 return 0; 296 return 0;
268 297
269 free_pages: 298 free_pages:
270 list_for_each_entry_safe(page, tmp, &pages, list) { 299 list_for_each_entry_safe(page, tmp, &pages, list) {
271 list_del_init(&page->list); 300 list_del_init(&page->list);
272 free_buffer_page(page); 301 free_buffer_page(page);
273 } 302 }
274 return -ENOMEM; 303 return -ENOMEM;
275 } 304 }
276 305
277 static struct ring_buffer_per_cpu * 306 static struct ring_buffer_per_cpu *
278 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 307 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
279 { 308 {
280 struct ring_buffer_per_cpu *cpu_buffer; 309 struct ring_buffer_per_cpu *cpu_buffer;
281 struct buffer_page *page; 310 struct buffer_page *page;
282 unsigned long addr; 311 unsigned long addr;
283 int ret; 312 int ret;
284 313
285 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 314 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
286 GFP_KERNEL, cpu_to_node(cpu)); 315 GFP_KERNEL, cpu_to_node(cpu));
287 if (!cpu_buffer) 316 if (!cpu_buffer)
288 return NULL; 317 return NULL;
289 318
290 cpu_buffer->cpu = cpu; 319 cpu_buffer->cpu = cpu;
291 cpu_buffer->buffer = buffer; 320 cpu_buffer->buffer = buffer;
292 spin_lock_init(&cpu_buffer->lock); 321 spin_lock_init(&cpu_buffer->lock);
293 INIT_LIST_HEAD(&cpu_buffer->pages); 322 INIT_LIST_HEAD(&cpu_buffer->pages);
294 323
295 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 324 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
296 GFP_KERNEL, cpu_to_node(cpu)); 325 GFP_KERNEL, cpu_to_node(cpu));
297 if (!page) 326 if (!page)
298 goto fail_free_buffer; 327 goto fail_free_buffer;
299 328
300 cpu_buffer->reader_page = page; 329 cpu_buffer->reader_page = page;
301 addr = __get_free_page(GFP_KERNEL); 330 addr = __get_free_page(GFP_KERNEL);
302 if (!addr) 331 if (!addr)
303 goto fail_free_reader; 332 goto fail_free_reader;
304 page->page = (void *)addr; 333 page->page = (void *)addr;
305 334
306 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 335 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
307 336
308 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 337 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
309 if (ret < 0) 338 if (ret < 0)
310 goto fail_free_reader; 339 goto fail_free_reader;
311 340
312 cpu_buffer->head_page 341 cpu_buffer->head_page
313 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 342 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
314 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 343 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
315 344
316 return cpu_buffer; 345 return cpu_buffer;
317 346
318 fail_free_reader: 347 fail_free_reader:
319 free_buffer_page(cpu_buffer->reader_page); 348 free_buffer_page(cpu_buffer->reader_page);
320 349
321 fail_free_buffer: 350 fail_free_buffer:
322 kfree(cpu_buffer); 351 kfree(cpu_buffer);
323 return NULL; 352 return NULL;
324 } 353 }
325 354
326 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 355 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
327 { 356 {
328 struct list_head *head = &cpu_buffer->pages; 357 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *page, *tmp; 358 struct buffer_page *page, *tmp;
330 359
331 list_del_init(&cpu_buffer->reader_page->list); 360 list_del_init(&cpu_buffer->reader_page->list);
332 free_buffer_page(cpu_buffer->reader_page); 361 free_buffer_page(cpu_buffer->reader_page);
333 362
334 list_for_each_entry_safe(page, tmp, head, list) { 363 list_for_each_entry_safe(page, tmp, head, list) {
335 list_del_init(&page->list); 364 list_del_init(&page->list);
336 free_buffer_page(page); 365 free_buffer_page(page);
337 } 366 }
338 kfree(cpu_buffer); 367 kfree(cpu_buffer);
339 } 368 }
340 369
341 /* 370 /*
342 * Causes compile errors if the struct buffer_page gets bigger 371 * Causes compile errors if the struct buffer_page gets bigger
343 * than the struct page. 372 * than the struct page.
344 */ 373 */
345 extern int ring_buffer_page_too_big(void); 374 extern int ring_buffer_page_too_big(void);
346 375
347 /** 376 /**
348 * ring_buffer_alloc - allocate a new ring_buffer 377 * ring_buffer_alloc - allocate a new ring_buffer
349 * @size: the size in bytes that is needed. 378 * @size: the size in bytes that is needed.
350 * @flags: attributes to set for the ring buffer. 379 * @flags: attributes to set for the ring buffer.
351 * 380 *
352 * Currently the only flag that is available is the RB_FL_OVERWRITE 381 * Currently the only flag that is available is the RB_FL_OVERWRITE
353 * flag. This flag means that the buffer will overwrite old data 382 * flag. This flag means that the buffer will overwrite old data
354 * when the buffer wraps. If this flag is not set, the buffer will 383 * when the buffer wraps. If this flag is not set, the buffer will
355 * drop data when the tail hits the head. 384 * drop data when the tail hits the head.
356 */ 385 */
357 struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) 386 struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
358 { 387 {
359 struct ring_buffer *buffer; 388 struct ring_buffer *buffer;
360 int bsize; 389 int bsize;
361 int cpu; 390 int cpu;
362 391
363 /* Paranoid! Optimizes out when all is well */ 392 /* Paranoid! Optimizes out when all is well */
364 if (sizeof(struct buffer_page) > sizeof(struct page)) 393 if (sizeof(struct buffer_page) > sizeof(struct page))
365 ring_buffer_page_too_big(); 394 ring_buffer_page_too_big();
366 395
367 396
368 /* keep it in its own cache line */ 397 /* keep it in its own cache line */
369 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 398 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
370 GFP_KERNEL); 399 GFP_KERNEL);
371 if (!buffer) 400 if (!buffer)
372 return NULL; 401 return NULL;
373 402
374 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 403 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
375 buffer->flags = flags; 404 buffer->flags = flags;
376 405
377 /* need at least two pages */ 406 /* need at least two pages */
378 if (buffer->pages == 1) 407 if (buffer->pages == 1)
379 buffer->pages++; 408 buffer->pages++;
380 409
381 buffer->cpumask = cpu_possible_map; 410 buffer->cpumask = cpu_possible_map;
382 buffer->cpus = nr_cpu_ids; 411 buffer->cpus = nr_cpu_ids;
383 412
384 bsize = sizeof(void *) * nr_cpu_ids; 413 bsize = sizeof(void *) * nr_cpu_ids;
385 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), 414 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
386 GFP_KERNEL); 415 GFP_KERNEL);
387 if (!buffer->buffers) 416 if (!buffer->buffers)
388 goto fail_free_buffer; 417 goto fail_free_buffer;
389 418
390 for_each_buffer_cpu(buffer, cpu) { 419 for_each_buffer_cpu(buffer, cpu) {
391 buffer->buffers[cpu] = 420 buffer->buffers[cpu] =
392 rb_allocate_cpu_buffer(buffer, cpu); 421 rb_allocate_cpu_buffer(buffer, cpu);
393 if (!buffer->buffers[cpu]) 422 if (!buffer->buffers[cpu])
394 goto fail_free_buffers; 423 goto fail_free_buffers;
395 } 424 }
396 425
397 mutex_init(&buffer->mutex); 426 mutex_init(&buffer->mutex);
398 427
399 return buffer; 428 return buffer;
400 429
401 fail_free_buffers: 430 fail_free_buffers:
402 for_each_buffer_cpu(buffer, cpu) { 431 for_each_buffer_cpu(buffer, cpu) {
403 if (buffer->buffers[cpu]) 432 if (buffer->buffers[cpu])
404 rb_free_cpu_buffer(buffer->buffers[cpu]); 433 rb_free_cpu_buffer(buffer->buffers[cpu]);
405 } 434 }
406 kfree(buffer->buffers); 435 kfree(buffer->buffers);
407 436
408 fail_free_buffer: 437 fail_free_buffer:
409 kfree(buffer); 438 kfree(buffer);
410 return NULL; 439 return NULL;
411 } 440 }
412 441
413 /** 442 /**
414 * ring_buffer_free - free a ring buffer. 443 * ring_buffer_free - free a ring buffer.
415 * @buffer: the buffer to free. 444 * @buffer: the buffer to free.
416 */ 445 */
417 void 446 void
418 ring_buffer_free(struct ring_buffer *buffer) 447 ring_buffer_free(struct ring_buffer *buffer)
419 { 448 {
420 int cpu; 449 int cpu;
421 450
422 for_each_buffer_cpu(buffer, cpu) 451 for_each_buffer_cpu(buffer, cpu)
423 rb_free_cpu_buffer(buffer->buffers[cpu]); 452 rb_free_cpu_buffer(buffer->buffers[cpu]);
424 453
425 kfree(buffer); 454 kfree(buffer);
426 } 455 }
427 456
428 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 457 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
429 458
430 static void 459 static void
431 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 460 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
432 { 461 {
433 struct buffer_page *page; 462 struct buffer_page *page;
434 struct list_head *p; 463 struct list_head *p;
435 unsigned i; 464 unsigned i;
436 465
437 atomic_inc(&cpu_buffer->record_disabled); 466 atomic_inc(&cpu_buffer->record_disabled);
438 synchronize_sched(); 467 synchronize_sched();
439 468
440 for (i = 0; i < nr_pages; i++) { 469 for (i = 0; i < nr_pages; i++) {
441 BUG_ON(list_empty(&cpu_buffer->pages)); 470 BUG_ON(list_empty(&cpu_buffer->pages));
442 p = cpu_buffer->pages.next; 471 p = cpu_buffer->pages.next;
443 page = list_entry(p, struct buffer_page, list); 472 page = list_entry(p, struct buffer_page, list);
444 list_del_init(&page->list); 473 list_del_init(&page->list);
445 free_buffer_page(page); 474 free_buffer_page(page);
446 } 475 }
447 BUG_ON(list_empty(&cpu_buffer->pages)); 476 BUG_ON(list_empty(&cpu_buffer->pages));
448 477
449 rb_reset_cpu(cpu_buffer); 478 rb_reset_cpu(cpu_buffer);
450 479
451 rb_check_pages(cpu_buffer); 480 rb_check_pages(cpu_buffer);
452 481
453 atomic_dec(&cpu_buffer->record_disabled); 482 atomic_dec(&cpu_buffer->record_disabled);
454 483
455 } 484 }
456 485
457 static void 486 static void
458 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 487 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
459 struct list_head *pages, unsigned nr_pages) 488 struct list_head *pages, unsigned nr_pages)
460 { 489 {
461 struct buffer_page *page; 490 struct buffer_page *page;
462 struct list_head *p; 491 struct list_head *p;
463 unsigned i; 492 unsigned i;
464 493
465 atomic_inc(&cpu_buffer->record_disabled); 494 atomic_inc(&cpu_buffer->record_disabled);
466 synchronize_sched(); 495 synchronize_sched();
467 496
468 for (i = 0; i < nr_pages; i++) { 497 for (i = 0; i < nr_pages; i++) {
469 BUG_ON(list_empty(pages)); 498 BUG_ON(list_empty(pages));
470 p = pages->next; 499 p = pages->next;
471 page = list_entry(p, struct buffer_page, list); 500 page = list_entry(p, struct buffer_page, list);
472 list_del_init(&page->list); 501 list_del_init(&page->list);
473 list_add_tail(&page->list, &cpu_buffer->pages); 502 list_add_tail(&page->list, &cpu_buffer->pages);
474 } 503 }
475 rb_reset_cpu(cpu_buffer); 504 rb_reset_cpu(cpu_buffer);
476 505
477 rb_check_pages(cpu_buffer); 506 rb_check_pages(cpu_buffer);
478 507
479 atomic_dec(&cpu_buffer->record_disabled); 508 atomic_dec(&cpu_buffer->record_disabled);
480 } 509 }
481 510
482 /** 511 /**
483 * ring_buffer_resize - resize the ring buffer 512 * ring_buffer_resize - resize the ring buffer
484 * @buffer: the buffer to resize. 513 * @buffer: the buffer to resize.
485 * @size: the new size. 514 * @size: the new size.
486 * 515 *
487 * The tracer is responsible for making sure that the buffer is 516 * The tracer is responsible for making sure that the buffer is
488 * not being used while changing the size. 517 * not being used while changing the size.
489 * Note: We may be able to change the above requirement by using 518 * Note: We may be able to change the above requirement by using
490 * RCU synchronizations. 519 * RCU synchronizations.
491 * 520 *
492 * Minimum size is 2 * BUF_PAGE_SIZE. 521 * Minimum size is 2 * BUF_PAGE_SIZE.
493 * 522 *
494 * Returns -1 on failure. 523 * Returns -1 on failure.
495 */ 524 */
496 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 525 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
497 { 526 {
498 struct ring_buffer_per_cpu *cpu_buffer; 527 struct ring_buffer_per_cpu *cpu_buffer;
499 unsigned nr_pages, rm_pages, new_pages; 528 unsigned nr_pages, rm_pages, new_pages;
500 struct buffer_page *page, *tmp; 529 struct buffer_page *page, *tmp;
501 unsigned long buffer_size; 530 unsigned long buffer_size;
502 unsigned long addr; 531 unsigned long addr;
503 LIST_HEAD(pages); 532 LIST_HEAD(pages);
504 int i, cpu; 533 int i, cpu;
505 534
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 535 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 536 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 537 buffer_size = buffer->pages * BUF_PAGE_SIZE;
509 538
510 /* we need a minimum of two pages */ 539 /* we need a minimum of two pages */
511 if (size < BUF_PAGE_SIZE * 2) 540 if (size < BUF_PAGE_SIZE * 2)
512 size = BUF_PAGE_SIZE * 2; 541 size = BUF_PAGE_SIZE * 2;
513 542
514 if (size == buffer_size) 543 if (size == buffer_size)
515 return size; 544 return size;
516 545
517 mutex_lock(&buffer->mutex); 546 mutex_lock(&buffer->mutex);
518 547
519 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 548 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
520 549
521 if (size < buffer_size) { 550 if (size < buffer_size) {
522 551
523 /* easy case, just free pages */ 552 /* easy case, just free pages */
524 BUG_ON(nr_pages >= buffer->pages); 553 BUG_ON(nr_pages >= buffer->pages);
525 554
526 rm_pages = buffer->pages - nr_pages; 555 rm_pages = buffer->pages - nr_pages;
527 556
528 for_each_buffer_cpu(buffer, cpu) { 557 for_each_buffer_cpu(buffer, cpu) {
529 cpu_buffer = buffer->buffers[cpu]; 558 cpu_buffer = buffer->buffers[cpu];
530 rb_remove_pages(cpu_buffer, rm_pages); 559 rb_remove_pages(cpu_buffer, rm_pages);
531 } 560 }
532 goto out; 561 goto out;
533 } 562 }
534 563
535 /* 564 /*
536 * This is a bit more difficult. We only want to add pages 565 * This is a bit more difficult. We only want to add pages
537 * when we can allocate enough for all CPUs. We do this 566 * when we can allocate enough for all CPUs. We do this
538 * by allocating all the pages and storing them on a local 567 * by allocating all the pages and storing them on a local
539 * link list. If we succeed in our allocation, then we 568 * link list. If we succeed in our allocation, then we
540 * add these pages to the cpu_buffers. Otherwise we just free 569 * add these pages to the cpu_buffers. Otherwise we just free
541 * them all and return -ENOMEM; 570 * them all and return -ENOMEM;
542 */ 571 */
543 BUG_ON(nr_pages <= buffer->pages); 572 BUG_ON(nr_pages <= buffer->pages);
544 new_pages = nr_pages - buffer->pages; 573 new_pages = nr_pages - buffer->pages;
545 574
546 for_each_buffer_cpu(buffer, cpu) { 575 for_each_buffer_cpu(buffer, cpu) {
547 for (i = 0; i < new_pages; i++) { 576 for (i = 0; i < new_pages; i++) {
548 page = kzalloc_node(ALIGN(sizeof(*page), 577 page = kzalloc_node(ALIGN(sizeof(*page),
549 cache_line_size()), 578 cache_line_size()),
550 GFP_KERNEL, cpu_to_node(cpu)); 579 GFP_KERNEL, cpu_to_node(cpu));
551 if (!page) 580 if (!page)
552 goto free_pages; 581 goto free_pages;
553 list_add(&page->list, &pages); 582 list_add(&page->list, &pages);
554 addr = __get_free_page(GFP_KERNEL); 583 addr = __get_free_page(GFP_KERNEL);
555 if (!addr) 584 if (!addr)
556 goto free_pages; 585 goto free_pages;
557 page->page = (void *)addr; 586 page->page = (void *)addr;
558 } 587 }
559 } 588 }
560 589
561 for_each_buffer_cpu(buffer, cpu) { 590 for_each_buffer_cpu(buffer, cpu) {
562 cpu_buffer = buffer->buffers[cpu]; 591 cpu_buffer = buffer->buffers[cpu];
563 rb_insert_pages(cpu_buffer, &pages, new_pages); 592 rb_insert_pages(cpu_buffer, &pages, new_pages);
564 } 593 }
565 594
566 BUG_ON(!list_empty(&pages)); 595 BUG_ON(!list_empty(&pages));
567 596
568 out: 597 out:
569 buffer->pages = nr_pages; 598 buffer->pages = nr_pages;
570 mutex_unlock(&buffer->mutex); 599 mutex_unlock(&buffer->mutex);
571 600
572 return size; 601 return size;
573 602
574 free_pages: 603 free_pages:
575 list_for_each_entry_safe(page, tmp, &pages, list) { 604 list_for_each_entry_safe(page, tmp, &pages, list) {
576 list_del_init(&page->list); 605 list_del_init(&page->list);
577 free_buffer_page(page); 606 free_buffer_page(page);
578 } 607 }
579 return -ENOMEM; 608 return -ENOMEM;
580 } 609 }
581 610
582 static inline int rb_null_event(struct ring_buffer_event *event) 611 static inline int rb_null_event(struct ring_buffer_event *event)
583 { 612 {
584 return event->type == RINGBUF_TYPE_PADDING; 613 return event->type == RINGBUF_TYPE_PADDING;
585 } 614 }
586 615
587 static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 616 static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
588 { 617 {
589 return page->page + index; 618 return page->page + index;
590 } 619 }
591 620
592 static inline struct ring_buffer_event * 621 static inline struct ring_buffer_event *
593 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) 622 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
594 { 623 {
595 return __rb_page_index(cpu_buffer->reader_page, 624 return __rb_page_index(cpu_buffer->reader_page,
596 cpu_buffer->reader_page->read); 625 cpu_buffer->reader_page->read);
597 } 626 }
598 627
599 static inline struct ring_buffer_event * 628 static inline struct ring_buffer_event *
600 rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) 629 rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
601 { 630 {
602 return __rb_page_index(cpu_buffer->head_page, 631 return __rb_page_index(cpu_buffer->head_page,
603 cpu_buffer->head_page->read); 632 cpu_buffer->head_page->read);
604 } 633 }
605 634
606 static inline struct ring_buffer_event * 635 static inline struct ring_buffer_event *
607 rb_iter_head_event(struct ring_buffer_iter *iter) 636 rb_iter_head_event(struct ring_buffer_iter *iter)
608 { 637 {
609 return __rb_page_index(iter->head_page, iter->head); 638 return __rb_page_index(iter->head_page, iter->head);
610 } 639 }
611 640
612 static inline unsigned rb_page_write(struct buffer_page *bpage) 641 static inline unsigned rb_page_write(struct buffer_page *bpage)
613 { 642 {
614 return local_read(&bpage->write); 643 return local_read(&bpage->write);
615 } 644 }
616 645
617 static inline unsigned rb_page_commit(struct buffer_page *bpage) 646 static inline unsigned rb_page_commit(struct buffer_page *bpage)
618 { 647 {
619 return local_read(&bpage->commit); 648 return local_read(&bpage->commit);
620 } 649 }
621 650
622 /* Size is determined by what has been commited */ 651 /* Size is determined by what has been commited */
623 static inline unsigned rb_page_size(struct buffer_page *bpage) 652 static inline unsigned rb_page_size(struct buffer_page *bpage)
624 { 653 {
625 return rb_page_commit(bpage); 654 return rb_page_commit(bpage);
626 } 655 }
627 656
628 static inline unsigned 657 static inline unsigned
629 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) 658 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
630 { 659 {
631 return rb_page_commit(cpu_buffer->commit_page); 660 return rb_page_commit(cpu_buffer->commit_page);
632 } 661 }
633 662
634 static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) 663 static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
635 { 664 {
636 return rb_page_commit(cpu_buffer->head_page); 665 return rb_page_commit(cpu_buffer->head_page);
637 } 666 }
638 667
639 /* 668 /*
640 * When the tail hits the head and the buffer is in overwrite mode, 669 * When the tail hits the head and the buffer is in overwrite mode,
641 * the head jumps to the next page and all content on the previous 670 * the head jumps to the next page and all content on the previous
642 * page is discarded. But before doing so, we update the overrun 671 * page is discarded. But before doing so, we update the overrun
643 * variable of the buffer. 672 * variable of the buffer.
644 */ 673 */
645 static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) 674 static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
646 { 675 {
647 struct ring_buffer_event *event; 676 struct ring_buffer_event *event;
648 unsigned long head; 677 unsigned long head;
649 678
650 for (head = 0; head < rb_head_size(cpu_buffer); 679 for (head = 0; head < rb_head_size(cpu_buffer);
651 head += rb_event_length(event)) { 680 head += rb_event_length(event)) {
652 681
653 event = __rb_page_index(cpu_buffer->head_page, head); 682 event = __rb_page_index(cpu_buffer->head_page, head);
654 BUG_ON(rb_null_event(event)); 683 BUG_ON(rb_null_event(event));
655 /* Only count data entries */ 684 /* Only count data entries */
656 if (event->type != RINGBUF_TYPE_DATA) 685 if (event->type != RINGBUF_TYPE_DATA)
657 continue; 686 continue;
658 cpu_buffer->overrun++; 687 cpu_buffer->overrun++;
659 cpu_buffer->entries--; 688 cpu_buffer->entries--;
660 } 689 }
661 } 690 }
662 691
663 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 692 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
664 struct buffer_page **page) 693 struct buffer_page **page)
665 { 694 {
666 struct list_head *p = (*page)->list.next; 695 struct list_head *p = (*page)->list.next;
667 696
668 if (p == &cpu_buffer->pages) 697 if (p == &cpu_buffer->pages)
669 p = p->next; 698 p = p->next;
670 699
671 *page = list_entry(p, struct buffer_page, list); 700 *page = list_entry(p, struct buffer_page, list);
672 } 701 }
673 702
674 static inline unsigned 703 static inline unsigned
675 rb_event_index(struct ring_buffer_event *event) 704 rb_event_index(struct ring_buffer_event *event)
676 { 705 {
677 unsigned long addr = (unsigned long)event; 706 unsigned long addr = (unsigned long)event;
678 707
679 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 708 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
680 } 709 }
681 710
682 static inline int 711 static inline int
683 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 712 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
684 struct ring_buffer_event *event) 713 struct ring_buffer_event *event)
685 { 714 {
686 unsigned long addr = (unsigned long)event; 715 unsigned long addr = (unsigned long)event;
687 unsigned long index; 716 unsigned long index;
688 717
689 index = rb_event_index(event); 718 index = rb_event_index(event);
690 addr &= PAGE_MASK; 719 addr &= PAGE_MASK;
691 720
692 return cpu_buffer->commit_page->page == (void *)addr && 721 return cpu_buffer->commit_page->page == (void *)addr &&
693 rb_commit_index(cpu_buffer) == index; 722 rb_commit_index(cpu_buffer) == index;
694 } 723 }
695 724
696 static inline void 725 static inline void
697 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 726 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
698 struct ring_buffer_event *event) 727 struct ring_buffer_event *event)
699 { 728 {
700 unsigned long addr = (unsigned long)event; 729 unsigned long addr = (unsigned long)event;
701 unsigned long index; 730 unsigned long index;
702 731
703 index = rb_event_index(event); 732 index = rb_event_index(event);
704 addr &= PAGE_MASK; 733 addr &= PAGE_MASK;
705 734
706 while (cpu_buffer->commit_page->page != (void *)addr) { 735 while (cpu_buffer->commit_page->page != (void *)addr) {
707 RB_WARN_ON(cpu_buffer, 736 RB_WARN_ON(cpu_buffer,
708 cpu_buffer->commit_page == cpu_buffer->tail_page); 737 cpu_buffer->commit_page == cpu_buffer->tail_page);
709 cpu_buffer->commit_page->commit = 738 cpu_buffer->commit_page->commit =
710 cpu_buffer->commit_page->write; 739 cpu_buffer->commit_page->write;
711 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 740 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
712 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 741 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
713 } 742 }
714 743
715 /* Now set the commit to the event's index */ 744 /* Now set the commit to the event's index */
716 local_set(&cpu_buffer->commit_page->commit, index); 745 local_set(&cpu_buffer->commit_page->commit, index);
717 } 746 }
718 747
719 static inline void 748 static inline void
720 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 749 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
721 { 750 {
722 /* 751 /*
723 * We only race with interrupts and NMIs on this CPU. 752 * We only race with interrupts and NMIs on this CPU.
724 * If we own the commit event, then we can commit 753 * If we own the commit event, then we can commit
725 * all others that interrupted us, since the interruptions 754 * all others that interrupted us, since the interruptions
726 * are in stack format (they finish before they come 755 * are in stack format (they finish before they come
727 * back to us). This allows us to do a simple loop to 756 * back to us). This allows us to do a simple loop to
728 * assign the commit to the tail. 757 * assign the commit to the tail.
729 */ 758 */
730 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 759 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
731 cpu_buffer->commit_page->commit = 760 cpu_buffer->commit_page->commit =
732 cpu_buffer->commit_page->write; 761 cpu_buffer->commit_page->write;
733 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 762 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
734 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 763 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
735 /* add barrier to keep gcc from optimizing too much */ 764 /* add barrier to keep gcc from optimizing too much */
736 barrier(); 765 barrier();
737 } 766 }
738 while (rb_commit_index(cpu_buffer) != 767 while (rb_commit_index(cpu_buffer) !=
739 rb_page_write(cpu_buffer->commit_page)) { 768 rb_page_write(cpu_buffer->commit_page)) {
740 cpu_buffer->commit_page->commit = 769 cpu_buffer->commit_page->commit =
741 cpu_buffer->commit_page->write; 770 cpu_buffer->commit_page->write;
742 barrier(); 771 barrier();
743 } 772 }
744 } 773 }
745 774
746 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 775 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
747 { 776 {
748 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 777 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
749 cpu_buffer->reader_page->read = 0; 778 cpu_buffer->reader_page->read = 0;
750 } 779 }
751 780
752 static inline void rb_inc_iter(struct ring_buffer_iter *iter) 781 static inline void rb_inc_iter(struct ring_buffer_iter *iter)
753 { 782 {
754 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 783 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
755 784
756 /* 785 /*
757 * The iterator could be on the reader page (it starts there). 786 * The iterator could be on the reader page (it starts there).
758 * But the head could have moved, since the reader was 787 * But the head could have moved, since the reader was
759 * found. Check for this case and assign the iterator 788 * found. Check for this case and assign the iterator
760 * to the head page instead of next. 789 * to the head page instead of next.
761 */ 790 */
762 if (iter->head_page == cpu_buffer->reader_page) 791 if (iter->head_page == cpu_buffer->reader_page)
763 iter->head_page = cpu_buffer->head_page; 792 iter->head_page = cpu_buffer->head_page;
764 else 793 else
765 rb_inc_page(cpu_buffer, &iter->head_page); 794 rb_inc_page(cpu_buffer, &iter->head_page);
766 795
767 iter->read_stamp = iter->head_page->time_stamp; 796 iter->read_stamp = iter->head_page->time_stamp;
768 iter->head = 0; 797 iter->head = 0;
769 } 798 }
770 799
771 /** 800 /**
772 * ring_buffer_update_event - update event type and data 801 * ring_buffer_update_event - update event type and data
773 * @event: the even to update 802 * @event: the even to update
774 * @type: the type of event 803 * @type: the type of event
775 * @length: the size of the event field in the ring buffer 804 * @length: the size of the event field in the ring buffer
776 * 805 *
777 * Update the type and data fields of the event. The length 806 * Update the type and data fields of the event. The length
778 * is the actual size that is written to the ring buffer, 807 * is the actual size that is written to the ring buffer,
779 * and with this, we can determine what to place into the 808 * and with this, we can determine what to place into the
780 * data field. 809 * data field.
781 */ 810 */
782 static inline void 811 static inline void
783 rb_update_event(struct ring_buffer_event *event, 812 rb_update_event(struct ring_buffer_event *event,
784 unsigned type, unsigned length) 813 unsigned type, unsigned length)
785 { 814 {
786 event->type = type; 815 event->type = type;
787 816
788 switch (type) { 817 switch (type) {
789 818
790 case RINGBUF_TYPE_PADDING: 819 case RINGBUF_TYPE_PADDING:
791 break; 820 break;
792 821
793 case RINGBUF_TYPE_TIME_EXTEND: 822 case RINGBUF_TYPE_TIME_EXTEND:
794 event->len = 823 event->len =
795 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) 824 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
796 >> RB_ALIGNMENT_SHIFT; 825 >> RB_ALIGNMENT_SHIFT;
797 break; 826 break;
798 827
799 case RINGBUF_TYPE_TIME_STAMP: 828 case RINGBUF_TYPE_TIME_STAMP:
800 event->len = 829 event->len =
801 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) 830 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
802 >> RB_ALIGNMENT_SHIFT; 831 >> RB_ALIGNMENT_SHIFT;
803 break; 832 break;
804 833
805 case RINGBUF_TYPE_DATA: 834 case RINGBUF_TYPE_DATA:
806 length -= RB_EVNT_HDR_SIZE; 835 length -= RB_EVNT_HDR_SIZE;
807 if (length > RB_MAX_SMALL_DATA) { 836 if (length > RB_MAX_SMALL_DATA) {
808 event->len = 0; 837 event->len = 0;
809 event->array[0] = length; 838 event->array[0] = length;
810 } else 839 } else
811 event->len = 840 event->len =
812 (length + (RB_ALIGNMENT-1)) 841 (length + (RB_ALIGNMENT-1))
813 >> RB_ALIGNMENT_SHIFT; 842 >> RB_ALIGNMENT_SHIFT;
814 break; 843 break;
815 default: 844 default:
816 BUG(); 845 BUG();
817 } 846 }
818 } 847 }
819 848
820 static inline unsigned rb_calculate_event_length(unsigned length) 849 static inline unsigned rb_calculate_event_length(unsigned length)
821 { 850 {
822 struct ring_buffer_event event; /* Used only for sizeof array */ 851 struct ring_buffer_event event; /* Used only for sizeof array */
823 852
824 /* zero length can cause confusions */ 853 /* zero length can cause confusions */
825 if (!length) 854 if (!length)
826 length = 1; 855 length = 1;
827 856
828 if (length > RB_MAX_SMALL_DATA) 857 if (length > RB_MAX_SMALL_DATA)
829 length += sizeof(event.array[0]); 858 length += sizeof(event.array[0]);
830 859
831 length += RB_EVNT_HDR_SIZE; 860 length += RB_EVNT_HDR_SIZE;
832 length = ALIGN(length, RB_ALIGNMENT); 861 length = ALIGN(length, RB_ALIGNMENT);
833 862
834 return length; 863 return length;
835 } 864 }
836 865
837 static struct ring_buffer_event * 866 static struct ring_buffer_event *
838 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 867 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
839 unsigned type, unsigned long length, u64 *ts) 868 unsigned type, unsigned long length, u64 *ts)
840 { 869 {
841 struct buffer_page *tail_page, *head_page, *reader_page; 870 struct buffer_page *tail_page, *head_page, *reader_page;
842 unsigned long tail, write; 871 unsigned long tail, write;
843 struct ring_buffer *buffer = cpu_buffer->buffer; 872 struct ring_buffer *buffer = cpu_buffer->buffer;
844 struct ring_buffer_event *event; 873 struct ring_buffer_event *event;
845 unsigned long flags; 874 unsigned long flags;
846 875
847 tail_page = cpu_buffer->tail_page; 876 tail_page = cpu_buffer->tail_page;
848 write = local_add_return(length, &tail_page->write); 877 write = local_add_return(length, &tail_page->write);
849 tail = write - length; 878 tail = write - length;
850 879
851 /* See if we shot pass the end of this buffer page */ 880 /* See if we shot pass the end of this buffer page */
852 if (write > BUF_PAGE_SIZE) { 881 if (write > BUF_PAGE_SIZE) {
853 struct buffer_page *next_page = tail_page; 882 struct buffer_page *next_page = tail_page;
854 883
855 spin_lock_irqsave(&cpu_buffer->lock, flags); 884 spin_lock_irqsave(&cpu_buffer->lock, flags);
856 885
857 rb_inc_page(cpu_buffer, &next_page); 886 rb_inc_page(cpu_buffer, &next_page);
858 887
859 head_page = cpu_buffer->head_page; 888 head_page = cpu_buffer->head_page;
860 reader_page = cpu_buffer->reader_page; 889 reader_page = cpu_buffer->reader_page;
861 890
862 /* we grabbed the lock before incrementing */ 891 /* we grabbed the lock before incrementing */
863 RB_WARN_ON(cpu_buffer, next_page == reader_page); 892 RB_WARN_ON(cpu_buffer, next_page == reader_page);
864 893
865 /* 894 /*
866 * If for some reason, we had an interrupt storm that made 895 * If for some reason, we had an interrupt storm that made
867 * it all the way around the buffer, bail, and warn 896 * it all the way around the buffer, bail, and warn
868 * about it. 897 * about it.
869 */ 898 */
870 if (unlikely(next_page == cpu_buffer->commit_page)) { 899 if (unlikely(next_page == cpu_buffer->commit_page)) {
871 WARN_ON_ONCE(1); 900 WARN_ON_ONCE(1);
872 goto out_unlock; 901 goto out_unlock;
873 } 902 }
874 903
875 if (next_page == head_page) { 904 if (next_page == head_page) {
876 if (!(buffer->flags & RB_FL_OVERWRITE)) { 905 if (!(buffer->flags & RB_FL_OVERWRITE)) {
877 /* reset write */ 906 /* reset write */
878 if (tail <= BUF_PAGE_SIZE) 907 if (tail <= BUF_PAGE_SIZE)
879 local_set(&tail_page->write, tail); 908 local_set(&tail_page->write, tail);
880 goto out_unlock; 909 goto out_unlock;
881 } 910 }
882 911
883 /* tail_page has not moved yet? */ 912 /* tail_page has not moved yet? */
884 if (tail_page == cpu_buffer->tail_page) { 913 if (tail_page == cpu_buffer->tail_page) {
885 /* count overflows */ 914 /* count overflows */
886 rb_update_overflow(cpu_buffer); 915 rb_update_overflow(cpu_buffer);
887 916
888 rb_inc_page(cpu_buffer, &head_page); 917 rb_inc_page(cpu_buffer, &head_page);
889 cpu_buffer->head_page = head_page; 918 cpu_buffer->head_page = head_page;
890 cpu_buffer->head_page->read = 0; 919 cpu_buffer->head_page->read = 0;
891 } 920 }
892 } 921 }
893 922
894 /* 923 /*
895 * If the tail page is still the same as what we think 924 * If the tail page is still the same as what we think
896 * it is, then it is up to us to update the tail 925 * it is, then it is up to us to update the tail
897 * pointer. 926 * pointer.
898 */ 927 */
899 if (tail_page == cpu_buffer->tail_page) { 928 if (tail_page == cpu_buffer->tail_page) {
900 local_set(&next_page->write, 0); 929 local_set(&next_page->write, 0);
901 local_set(&next_page->commit, 0); 930 local_set(&next_page->commit, 0);
902 cpu_buffer->tail_page = next_page; 931 cpu_buffer->tail_page = next_page;
903 932
904 /* reread the time stamp */ 933 /* reread the time stamp */
905 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 934 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
906 cpu_buffer->tail_page->time_stamp = *ts; 935 cpu_buffer->tail_page->time_stamp = *ts;
907 } 936 }
908 937
909 /* 938 /*
910 * The actual tail page has moved forward. 939 * The actual tail page has moved forward.
911 */ 940 */
912 if (tail < BUF_PAGE_SIZE) { 941 if (tail < BUF_PAGE_SIZE) {
913 /* Mark the rest of the page with padding */ 942 /* Mark the rest of the page with padding */
914 event = __rb_page_index(tail_page, tail); 943 event = __rb_page_index(tail_page, tail);
915 event->type = RINGBUF_TYPE_PADDING; 944 event->type = RINGBUF_TYPE_PADDING;
916 } 945 }
917 946
918 if (tail <= BUF_PAGE_SIZE) 947 if (tail <= BUF_PAGE_SIZE)
919 /* Set the write back to the previous setting */ 948 /* Set the write back to the previous setting */
920 local_set(&tail_page->write, tail); 949 local_set(&tail_page->write, tail);
921 950
922 /* 951 /*
923 * If this was a commit entry that failed, 952 * If this was a commit entry that failed,
924 * increment that too 953 * increment that too
925 */ 954 */
926 if (tail_page == cpu_buffer->commit_page && 955 if (tail_page == cpu_buffer->commit_page &&
927 tail == rb_commit_index(cpu_buffer)) { 956 tail == rb_commit_index(cpu_buffer)) {
928 rb_set_commit_to_write(cpu_buffer); 957 rb_set_commit_to_write(cpu_buffer);
929 } 958 }
930 959
931 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 960 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
932 961
933 /* fail and let the caller try again */ 962 /* fail and let the caller try again */
934 return ERR_PTR(-EAGAIN); 963 return ERR_PTR(-EAGAIN);
935 } 964 }
936 965
937 /* We reserved something on the buffer */ 966 /* We reserved something on the buffer */
938 967
939 BUG_ON(write > BUF_PAGE_SIZE); 968 BUG_ON(write > BUF_PAGE_SIZE);
940 969
941 event = __rb_page_index(tail_page, tail); 970 event = __rb_page_index(tail_page, tail);
942 rb_update_event(event, type, length); 971 rb_update_event(event, type, length);
943 972
944 /* 973 /*
945 * If this is a commit and the tail is zero, then update 974 * If this is a commit and the tail is zero, then update
946 * this page's time stamp. 975 * this page's time stamp.
947 */ 976 */
948 if (!tail && rb_is_commit(cpu_buffer, event)) 977 if (!tail && rb_is_commit(cpu_buffer, event))
949 cpu_buffer->commit_page->time_stamp = *ts; 978 cpu_buffer->commit_page->time_stamp = *ts;
950 979
951 return event; 980 return event;
952 981
953 out_unlock: 982 out_unlock:
954 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 983 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
955 return NULL; 984 return NULL;
956 } 985 }
957 986
958 static int 987 static int
959 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, 988 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
960 u64 *ts, u64 *delta) 989 u64 *ts, u64 *delta)
961 { 990 {
962 struct ring_buffer_event *event; 991 struct ring_buffer_event *event;
963 static int once; 992 static int once;
964 int ret; 993 int ret;
965 994
966 if (unlikely(*delta > (1ULL << 59) && !once++)) { 995 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu" 996 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n", 997 " ts=%llu write stamp = %llu\n",
969 (unsigned long long)*delta, 998 (unsigned long long)*delta,
970 (unsigned long long)*ts, 999 (unsigned long long)*ts,
971 (unsigned long long)cpu_buffer->write_stamp); 1000 (unsigned long long)cpu_buffer->write_stamp);
972 WARN_ON(1); 1001 WARN_ON(1);
973 } 1002 }
974 1003
975 /* 1004 /*
976 * The delta is too big, we to add a 1005 * The delta is too big, we to add a
977 * new timestamp. 1006 * new timestamp.
978 */ 1007 */
979 event = __rb_reserve_next(cpu_buffer, 1008 event = __rb_reserve_next(cpu_buffer,
980 RINGBUF_TYPE_TIME_EXTEND, 1009 RINGBUF_TYPE_TIME_EXTEND,
981 RB_LEN_TIME_EXTEND, 1010 RB_LEN_TIME_EXTEND,
982 ts); 1011 ts);
983 if (!event) 1012 if (!event)
984 return -EBUSY; 1013 return -EBUSY;
985 1014
986 if (PTR_ERR(event) == -EAGAIN) 1015 if (PTR_ERR(event) == -EAGAIN)
987 return -EAGAIN; 1016 return -EAGAIN;
988 1017
989 /* Only a commited time event can update the write stamp */ 1018 /* Only a commited time event can update the write stamp */
990 if (rb_is_commit(cpu_buffer, event)) { 1019 if (rb_is_commit(cpu_buffer, event)) {
991 /* 1020 /*
992 * If this is the first on the page, then we need to 1021 * If this is the first on the page, then we need to
993 * update the page itself, and just put in a zero. 1022 * update the page itself, and just put in a zero.
994 */ 1023 */
995 if (rb_event_index(event)) { 1024 if (rb_event_index(event)) {
996 event->time_delta = *delta & TS_MASK; 1025 event->time_delta = *delta & TS_MASK;
997 event->array[0] = *delta >> TS_SHIFT; 1026 event->array[0] = *delta >> TS_SHIFT;
998 } else { 1027 } else {
999 cpu_buffer->commit_page->time_stamp = *ts; 1028 cpu_buffer->commit_page->time_stamp = *ts;
1000 event->time_delta = 0; 1029 event->time_delta = 0;
1001 event->array[0] = 0; 1030 event->array[0] = 0;
1002 } 1031 }
1003 cpu_buffer->write_stamp = *ts; 1032 cpu_buffer->write_stamp = *ts;
1004 /* let the caller know this was the commit */ 1033 /* let the caller know this was the commit */
1005 ret = 1; 1034 ret = 1;
1006 } else { 1035 } else {
1007 /* Darn, this is just wasted space */ 1036 /* Darn, this is just wasted space */
1008 event->time_delta = 0; 1037 event->time_delta = 0;
1009 event->array[0] = 0; 1038 event->array[0] = 0;
1010 ret = 0; 1039 ret = 0;
1011 } 1040 }
1012 1041
1013 *delta = 0; 1042 *delta = 0;
1014 1043
1015 return ret; 1044 return ret;
1016 } 1045 }
1017 1046
1018 static struct ring_buffer_event * 1047 static struct ring_buffer_event *
1019 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1048 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1020 unsigned type, unsigned long length) 1049 unsigned type, unsigned long length)
1021 { 1050 {
1022 struct ring_buffer_event *event; 1051 struct ring_buffer_event *event;
1023 u64 ts, delta; 1052 u64 ts, delta;
1024 int commit = 0; 1053 int commit = 0;
1025 int nr_loops = 0; 1054 int nr_loops = 0;
1026 1055
1027 again: 1056 again:
1028 /* 1057 /*
1029 * We allow for interrupts to reenter here and do a trace. 1058 * We allow for interrupts to reenter here and do a trace.
1030 * If one does, it will cause this original code to loop 1059 * If one does, it will cause this original code to loop
1031 * back here. Even with heavy interrupts happening, this 1060 * back here. Even with heavy interrupts happening, this
1032 * should only happen a few times in a row. If this happens 1061 * should only happen a few times in a row. If this happens
1033 * 1000 times in a row, there must be either an interrupt 1062 * 1000 times in a row, there must be either an interrupt
1034 * storm or we have something buggy. 1063 * storm or we have something buggy.
1035 * Bail! 1064 * Bail!
1036 */ 1065 */
1037 if (unlikely(++nr_loops > 1000)) { 1066 if (unlikely(++nr_loops > 1000)) {
1038 RB_WARN_ON(cpu_buffer, 1); 1067 RB_WARN_ON(cpu_buffer, 1);
1039 return NULL; 1068 return NULL;
1040 } 1069 }
1041 1070
1042 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1071 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1043 1072
1044 /* 1073 /*
1045 * Only the first commit can update the timestamp. 1074 * Only the first commit can update the timestamp.
1046 * Yes there is a race here. If an interrupt comes in 1075 * Yes there is a race here. If an interrupt comes in
1047 * just after the conditional and it traces too, then it 1076 * just after the conditional and it traces too, then it
1048 * will also check the deltas. More than one timestamp may 1077 * will also check the deltas. More than one timestamp may
1049 * also be made. But only the entry that did the actual 1078 * also be made. But only the entry that did the actual
1050 * commit will be something other than zero. 1079 * commit will be something other than zero.
1051 */ 1080 */
1052 if (cpu_buffer->tail_page == cpu_buffer->commit_page && 1081 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1053 rb_page_write(cpu_buffer->tail_page) == 1082 rb_page_write(cpu_buffer->tail_page) ==
1054 rb_commit_index(cpu_buffer)) { 1083 rb_commit_index(cpu_buffer)) {
1055 1084
1056 delta = ts - cpu_buffer->write_stamp; 1085 delta = ts - cpu_buffer->write_stamp;
1057 1086
1058 /* make sure this delta is calculated here */ 1087 /* make sure this delta is calculated here */
1059 barrier(); 1088 barrier();
1060 1089
1061 /* Did the write stamp get updated already? */ 1090 /* Did the write stamp get updated already? */
1062 if (unlikely(ts < cpu_buffer->write_stamp)) 1091 if (unlikely(ts < cpu_buffer->write_stamp))
1063 delta = 0; 1092 delta = 0;
1064 1093
1065 if (test_time_stamp(delta)) { 1094 if (test_time_stamp(delta)) {
1066 1095
1067 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1096 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1068 1097
1069 if (commit == -EBUSY) 1098 if (commit == -EBUSY)
1070 return NULL; 1099 return NULL;
1071 1100
1072 if (commit == -EAGAIN) 1101 if (commit == -EAGAIN)
1073 goto again; 1102 goto again;
1074 1103
1075 RB_WARN_ON(cpu_buffer, commit < 0); 1104 RB_WARN_ON(cpu_buffer, commit < 0);
1076 } 1105 }
1077 } else 1106 } else
1078 /* Non commits have zero deltas */ 1107 /* Non commits have zero deltas */
1079 delta = 0; 1108 delta = 0;
1080 1109
1081 event = __rb_reserve_next(cpu_buffer, type, length, &ts); 1110 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1082 if (PTR_ERR(event) == -EAGAIN) 1111 if (PTR_ERR(event) == -EAGAIN)
1083 goto again; 1112 goto again;
1084 1113
1085 if (!event) { 1114 if (!event) {
1086 if (unlikely(commit)) 1115 if (unlikely(commit))
1087 /* 1116 /*
1088 * Ouch! We needed a timestamp and it was commited. But 1117 * Ouch! We needed a timestamp and it was commited. But
1089 * we didn't get our event reserved. 1118 * we didn't get our event reserved.
1090 */ 1119 */
1091 rb_set_commit_to_write(cpu_buffer); 1120 rb_set_commit_to_write(cpu_buffer);
1092 return NULL; 1121 return NULL;
1093 } 1122 }
1094 1123
1095 /* 1124 /*
1096 * If the timestamp was commited, make the commit our entry 1125 * If the timestamp was commited, make the commit our entry
1097 * now so that we will update it when needed. 1126 * now so that we will update it when needed.
1098 */ 1127 */
1099 if (commit) 1128 if (commit)
1100 rb_set_commit_event(cpu_buffer, event); 1129 rb_set_commit_event(cpu_buffer, event);
1101 else if (!rb_is_commit(cpu_buffer, event)) 1130 else if (!rb_is_commit(cpu_buffer, event))
1102 delta = 0; 1131 delta = 0;
1103 1132
1104 event->time_delta = delta; 1133 event->time_delta = delta;
1105 1134
1106 return event; 1135 return event;
1107 } 1136 }
1108 1137
1109 static DEFINE_PER_CPU(int, rb_need_resched); 1138 static DEFINE_PER_CPU(int, rb_need_resched);
1110 1139
1111 /** 1140 /**
1112 * ring_buffer_lock_reserve - reserve a part of the buffer 1141 * ring_buffer_lock_reserve - reserve a part of the buffer
1113 * @buffer: the ring buffer to reserve from 1142 * @buffer: the ring buffer to reserve from
1114 * @length: the length of the data to reserve (excluding event header) 1143 * @length: the length of the data to reserve (excluding event header)
1115 * @flags: a pointer to save the interrupt flags 1144 * @flags: a pointer to save the interrupt flags
1116 * 1145 *
1117 * Returns a reseverd event on the ring buffer to copy directly to. 1146 * Returns a reseverd event on the ring buffer to copy directly to.
1118 * The user of this interface will need to get the body to write into 1147 * The user of this interface will need to get the body to write into
1119 * and can use the ring_buffer_event_data() interface. 1148 * and can use the ring_buffer_event_data() interface.
1120 * 1149 *
1121 * The length is the length of the data needed, not the event length 1150 * The length is the length of the data needed, not the event length
1122 * which also includes the event header. 1151 * which also includes the event header.
1123 * 1152 *
1124 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. 1153 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1125 * If NULL is returned, then nothing has been allocated or locked. 1154 * If NULL is returned, then nothing has been allocated or locked.
1126 */ 1155 */
1127 struct ring_buffer_event * 1156 struct ring_buffer_event *
1128 ring_buffer_lock_reserve(struct ring_buffer *buffer, 1157 ring_buffer_lock_reserve(struct ring_buffer *buffer,
1129 unsigned long length, 1158 unsigned long length,
1130 unsigned long *flags) 1159 unsigned long *flags)
1131 { 1160 {
1132 struct ring_buffer_per_cpu *cpu_buffer; 1161 struct ring_buffer_per_cpu *cpu_buffer;
1133 struct ring_buffer_event *event; 1162 struct ring_buffer_event *event;
1134 int cpu, resched; 1163 int cpu, resched;
1135 1164
1165 if (ring_buffers_off)
1166 return NULL;
1167
1136 if (atomic_read(&buffer->record_disabled)) 1168 if (atomic_read(&buffer->record_disabled))
1137 return NULL; 1169 return NULL;
1138 1170
1139 /* If we are tracing schedule, we don't want to recurse */ 1171 /* If we are tracing schedule, we don't want to recurse */
1140 resched = need_resched(); 1172 resched = need_resched();
1141 preempt_disable_notrace(); 1173 preempt_disable_notrace();
1142 1174
1143 cpu = raw_smp_processor_id(); 1175 cpu = raw_smp_processor_id();
1144 1176
1145 if (!cpu_isset(cpu, buffer->cpumask)) 1177 if (!cpu_isset(cpu, buffer->cpumask))
1146 goto out; 1178 goto out;
1147 1179
1148 cpu_buffer = buffer->buffers[cpu]; 1180 cpu_buffer = buffer->buffers[cpu];
1149 1181
1150 if (atomic_read(&cpu_buffer->record_disabled)) 1182 if (atomic_read(&cpu_buffer->record_disabled))
1151 goto out; 1183 goto out;
1152 1184
1153 length = rb_calculate_event_length(length); 1185 length = rb_calculate_event_length(length);
1154 if (length > BUF_PAGE_SIZE) 1186 if (length > BUF_PAGE_SIZE)
1155 goto out; 1187 goto out;
1156 1188
1157 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 1189 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1158 if (!event) 1190 if (!event)
1159 goto out; 1191 goto out;
1160 1192
1161 /* 1193 /*
1162 * Need to store resched state on this cpu. 1194 * Need to store resched state on this cpu.
1163 * Only the first needs to. 1195 * Only the first needs to.
1164 */ 1196 */
1165 1197
1166 if (preempt_count() == 1) 1198 if (preempt_count() == 1)
1167 per_cpu(rb_need_resched, cpu) = resched; 1199 per_cpu(rb_need_resched, cpu) = resched;
1168 1200
1169 return event; 1201 return event;
1170 1202
1171 out: 1203 out:
1172 if (resched) 1204 if (resched)
1173 preempt_enable_notrace(); 1205 preempt_enable_notrace();
1174 else 1206 else
1175 preempt_enable_notrace(); 1207 preempt_enable_notrace();
1176 return NULL; 1208 return NULL;
1177 } 1209 }
1178 1210
1179 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 1211 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1180 struct ring_buffer_event *event) 1212 struct ring_buffer_event *event)
1181 { 1213 {
1182 cpu_buffer->entries++; 1214 cpu_buffer->entries++;
1183 1215
1184 /* Only process further if we own the commit */ 1216 /* Only process further if we own the commit */
1185 if (!rb_is_commit(cpu_buffer, event)) 1217 if (!rb_is_commit(cpu_buffer, event))
1186 return; 1218 return;
1187 1219
1188 cpu_buffer->write_stamp += event->time_delta; 1220 cpu_buffer->write_stamp += event->time_delta;
1189 1221
1190 rb_set_commit_to_write(cpu_buffer); 1222 rb_set_commit_to_write(cpu_buffer);
1191 } 1223 }
1192 1224
1193 /** 1225 /**
1194 * ring_buffer_unlock_commit - commit a reserved 1226 * ring_buffer_unlock_commit - commit a reserved
1195 * @buffer: The buffer to commit to 1227 * @buffer: The buffer to commit to
1196 * @event: The event pointer to commit. 1228 * @event: The event pointer to commit.
1197 * @flags: the interrupt flags received from ring_buffer_lock_reserve. 1229 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1198 * 1230 *
1199 * This commits the data to the ring buffer, and releases any locks held. 1231 * This commits the data to the ring buffer, and releases any locks held.
1200 * 1232 *
1201 * Must be paired with ring_buffer_lock_reserve. 1233 * Must be paired with ring_buffer_lock_reserve.
1202 */ 1234 */
1203 int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1235 int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1204 struct ring_buffer_event *event, 1236 struct ring_buffer_event *event,
1205 unsigned long flags) 1237 unsigned long flags)
1206 { 1238 {
1207 struct ring_buffer_per_cpu *cpu_buffer; 1239 struct ring_buffer_per_cpu *cpu_buffer;
1208 int cpu = raw_smp_processor_id(); 1240 int cpu = raw_smp_processor_id();
1209 1241
1210 cpu_buffer = buffer->buffers[cpu]; 1242 cpu_buffer = buffer->buffers[cpu];
1211 1243
1212 rb_commit(cpu_buffer, event); 1244 rb_commit(cpu_buffer, event);
1213 1245
1214 /* 1246 /*
1215 * Only the last preempt count needs to restore preemption. 1247 * Only the last preempt count needs to restore preemption.
1216 */ 1248 */
1217 if (preempt_count() == 1) { 1249 if (preempt_count() == 1) {
1218 if (per_cpu(rb_need_resched, cpu)) 1250 if (per_cpu(rb_need_resched, cpu))
1219 preempt_enable_no_resched_notrace(); 1251 preempt_enable_no_resched_notrace();
1220 else 1252 else
1221 preempt_enable_notrace(); 1253 preempt_enable_notrace();
1222 } else 1254 } else
1223 preempt_enable_no_resched_notrace(); 1255 preempt_enable_no_resched_notrace();
1224 1256
1225 return 0; 1257 return 0;
1226 } 1258 }
1227 1259
1228 /** 1260 /**
1229 * ring_buffer_write - write data to the buffer without reserving 1261 * ring_buffer_write - write data to the buffer without reserving
1230 * @buffer: The ring buffer to write to. 1262 * @buffer: The ring buffer to write to.
1231 * @length: The length of the data being written (excluding the event header) 1263 * @length: The length of the data being written (excluding the event header)
1232 * @data: The data to write to the buffer. 1264 * @data: The data to write to the buffer.
1233 * 1265 *
1234 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as 1266 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1235 * one function. If you already have the data to write to the buffer, it 1267 * one function. If you already have the data to write to the buffer, it
1236 * may be easier to simply call this function. 1268 * may be easier to simply call this function.
1237 * 1269 *
1238 * Note, like ring_buffer_lock_reserve, the length is the length of the data 1270 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1239 * and not the length of the event which would hold the header. 1271 * and not the length of the event which would hold the header.
1240 */ 1272 */
1241 int ring_buffer_write(struct ring_buffer *buffer, 1273 int ring_buffer_write(struct ring_buffer *buffer,
1242 unsigned long length, 1274 unsigned long length,
1243 void *data) 1275 void *data)
1244 { 1276 {
1245 struct ring_buffer_per_cpu *cpu_buffer; 1277 struct ring_buffer_per_cpu *cpu_buffer;
1246 struct ring_buffer_event *event; 1278 struct ring_buffer_event *event;
1247 unsigned long event_length; 1279 unsigned long event_length;
1248 void *body; 1280 void *body;
1249 int ret = -EBUSY; 1281 int ret = -EBUSY;
1250 int cpu, resched; 1282 int cpu, resched;
1251 1283
1284 if (ring_buffers_off)
1285 return -EBUSY;
1286
1252 if (atomic_read(&buffer->record_disabled)) 1287 if (atomic_read(&buffer->record_disabled))
1253 return -EBUSY; 1288 return -EBUSY;
1254 1289
1255 resched = need_resched(); 1290 resched = need_resched();
1256 preempt_disable_notrace(); 1291 preempt_disable_notrace();
1257 1292
1258 cpu = raw_smp_processor_id(); 1293 cpu = raw_smp_processor_id();
1259 1294
1260 if (!cpu_isset(cpu, buffer->cpumask)) 1295 if (!cpu_isset(cpu, buffer->cpumask))
1261 goto out; 1296 goto out;
1262 1297
1263 cpu_buffer = buffer->buffers[cpu]; 1298 cpu_buffer = buffer->buffers[cpu];
1264 1299
1265 if (atomic_read(&cpu_buffer->record_disabled)) 1300 if (atomic_read(&cpu_buffer->record_disabled))
1266 goto out; 1301 goto out;
1267 1302
1268 event_length = rb_calculate_event_length(length); 1303 event_length = rb_calculate_event_length(length);
1269 event = rb_reserve_next_event(cpu_buffer, 1304 event = rb_reserve_next_event(cpu_buffer,
1270 RINGBUF_TYPE_DATA, event_length); 1305 RINGBUF_TYPE_DATA, event_length);
1271 if (!event) 1306 if (!event)
1272 goto out; 1307 goto out;
1273 1308
1274 body = rb_event_data(event); 1309 body = rb_event_data(event);
1275 1310
1276 memcpy(body, data, length); 1311 memcpy(body, data, length);
1277 1312
1278 rb_commit(cpu_buffer, event); 1313 rb_commit(cpu_buffer, event);
1279 1314
1280 ret = 0; 1315 ret = 0;
1281 out: 1316 out:
1282 if (resched) 1317 if (resched)
1283 preempt_enable_no_resched_notrace(); 1318 preempt_enable_no_resched_notrace();
1284 else 1319 else
1285 preempt_enable_notrace(); 1320 preempt_enable_notrace();
1286 1321
1287 return ret; 1322 return ret;
1288 } 1323 }
1289 1324
1290 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1325 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1291 { 1326 {
1292 struct buffer_page *reader = cpu_buffer->reader_page; 1327 struct buffer_page *reader = cpu_buffer->reader_page;
1293 struct buffer_page *head = cpu_buffer->head_page; 1328 struct buffer_page *head = cpu_buffer->head_page;
1294 struct buffer_page *commit = cpu_buffer->commit_page; 1329 struct buffer_page *commit = cpu_buffer->commit_page;
1295 1330
1296 return reader->read == rb_page_commit(reader) && 1331 return reader->read == rb_page_commit(reader) &&
1297 (commit == reader || 1332 (commit == reader ||
1298 (commit == head && 1333 (commit == head &&
1299 head->read == rb_page_commit(commit))); 1334 head->read == rb_page_commit(commit)));
1300 } 1335 }
1301 1336
1302 /** 1337 /**
1303 * ring_buffer_record_disable - stop all writes into the buffer 1338 * ring_buffer_record_disable - stop all writes into the buffer
1304 * @buffer: The ring buffer to stop writes to. 1339 * @buffer: The ring buffer to stop writes to.
1305 * 1340 *
1306 * This prevents all writes to the buffer. Any attempt to write 1341 * This prevents all writes to the buffer. Any attempt to write
1307 * to the buffer after this will fail and return NULL. 1342 * to the buffer after this will fail and return NULL.
1308 * 1343 *
1309 * The caller should call synchronize_sched() after this. 1344 * The caller should call synchronize_sched() after this.
1310 */ 1345 */
1311 void ring_buffer_record_disable(struct ring_buffer *buffer) 1346 void ring_buffer_record_disable(struct ring_buffer *buffer)
1312 { 1347 {
1313 atomic_inc(&buffer->record_disabled); 1348 atomic_inc(&buffer->record_disabled);
1314 } 1349 }
1315 1350
1316 /** 1351 /**
1317 * ring_buffer_record_enable - enable writes to the buffer 1352 * ring_buffer_record_enable - enable writes to the buffer
1318 * @buffer: The ring buffer to enable writes 1353 * @buffer: The ring buffer to enable writes
1319 * 1354 *
1320 * Note, multiple disables will need the same number of enables 1355 * Note, multiple disables will need the same number of enables
1321 * to truely enable the writing (much like preempt_disable). 1356 * to truely enable the writing (much like preempt_disable).
1322 */ 1357 */
1323 void ring_buffer_record_enable(struct ring_buffer *buffer) 1358 void ring_buffer_record_enable(struct ring_buffer *buffer)
1324 { 1359 {
1325 atomic_dec(&buffer->record_disabled); 1360 atomic_dec(&buffer->record_disabled);
1326 } 1361 }
1327 1362
1328 /** 1363 /**
1329 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 1364 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1330 * @buffer: The ring buffer to stop writes to. 1365 * @buffer: The ring buffer to stop writes to.
1331 * @cpu: The CPU buffer to stop 1366 * @cpu: The CPU buffer to stop
1332 * 1367 *
1333 * This prevents all writes to the buffer. Any attempt to write 1368 * This prevents all writes to the buffer. Any attempt to write
1334 * to the buffer after this will fail and return NULL. 1369 * to the buffer after this will fail and return NULL.
1335 * 1370 *
1336 * The caller should call synchronize_sched() after this. 1371 * The caller should call synchronize_sched() after this.
1337 */ 1372 */
1338 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) 1373 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1339 { 1374 {
1340 struct ring_buffer_per_cpu *cpu_buffer; 1375 struct ring_buffer_per_cpu *cpu_buffer;
1341 1376
1342 if (!cpu_isset(cpu, buffer->cpumask)) 1377 if (!cpu_isset(cpu, buffer->cpumask))
1343 return; 1378 return;
1344 1379
1345 cpu_buffer = buffer->buffers[cpu]; 1380 cpu_buffer = buffer->buffers[cpu];
1346 atomic_inc(&cpu_buffer->record_disabled); 1381 atomic_inc(&cpu_buffer->record_disabled);
1347 } 1382 }
1348 1383
1349 /** 1384 /**
1350 * ring_buffer_record_enable_cpu - enable writes to the buffer 1385 * ring_buffer_record_enable_cpu - enable writes to the buffer
1351 * @buffer: The ring buffer to enable writes 1386 * @buffer: The ring buffer to enable writes
1352 * @cpu: The CPU to enable. 1387 * @cpu: The CPU to enable.
1353 * 1388 *
1354 * Note, multiple disables will need the same number of enables 1389 * Note, multiple disables will need the same number of enables
1355 * to truely enable the writing (much like preempt_disable). 1390 * to truely enable the writing (much like preempt_disable).
1356 */ 1391 */
1357 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 1392 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1358 { 1393 {
1359 struct ring_buffer_per_cpu *cpu_buffer; 1394 struct ring_buffer_per_cpu *cpu_buffer;
1360 1395
1361 if (!cpu_isset(cpu, buffer->cpumask)) 1396 if (!cpu_isset(cpu, buffer->cpumask))
1362 return; 1397 return;
1363 1398
1364 cpu_buffer = buffer->buffers[cpu]; 1399 cpu_buffer = buffer->buffers[cpu];
1365 atomic_dec(&cpu_buffer->record_disabled); 1400 atomic_dec(&cpu_buffer->record_disabled);
1366 } 1401 }
1367 1402
1368 /** 1403 /**
1369 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 1404 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1370 * @buffer: The ring buffer 1405 * @buffer: The ring buffer
1371 * @cpu: The per CPU buffer to get the entries from. 1406 * @cpu: The per CPU buffer to get the entries from.
1372 */ 1407 */
1373 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 1408 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1374 { 1409 {
1375 struct ring_buffer_per_cpu *cpu_buffer; 1410 struct ring_buffer_per_cpu *cpu_buffer;
1376 1411
1377 if (!cpu_isset(cpu, buffer->cpumask)) 1412 if (!cpu_isset(cpu, buffer->cpumask))
1378 return 0; 1413 return 0;
1379 1414
1380 cpu_buffer = buffer->buffers[cpu]; 1415 cpu_buffer = buffer->buffers[cpu];
1381 return cpu_buffer->entries; 1416 return cpu_buffer->entries;
1382 } 1417 }
1383 1418
1384 /** 1419 /**
1385 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer 1420 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1386 * @buffer: The ring buffer 1421 * @buffer: The ring buffer
1387 * @cpu: The per CPU buffer to get the number of overruns from 1422 * @cpu: The per CPU buffer to get the number of overruns from
1388 */ 1423 */
1389 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) 1424 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1390 { 1425 {
1391 struct ring_buffer_per_cpu *cpu_buffer; 1426 struct ring_buffer_per_cpu *cpu_buffer;
1392 1427
1393 if (!cpu_isset(cpu, buffer->cpumask)) 1428 if (!cpu_isset(cpu, buffer->cpumask))
1394 return 0; 1429 return 0;
1395 1430
1396 cpu_buffer = buffer->buffers[cpu]; 1431 cpu_buffer = buffer->buffers[cpu];
1397 return cpu_buffer->overrun; 1432 return cpu_buffer->overrun;
1398 } 1433 }
1399 1434
1400 /** 1435 /**
1401 * ring_buffer_entries - get the number of entries in a buffer 1436 * ring_buffer_entries - get the number of entries in a buffer
1402 * @buffer: The ring buffer 1437 * @buffer: The ring buffer
1403 * 1438 *
1404 * Returns the total number of entries in the ring buffer 1439 * Returns the total number of entries in the ring buffer
1405 * (all CPU entries) 1440 * (all CPU entries)
1406 */ 1441 */
1407 unsigned long ring_buffer_entries(struct ring_buffer *buffer) 1442 unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1408 { 1443 {
1409 struct ring_buffer_per_cpu *cpu_buffer; 1444 struct ring_buffer_per_cpu *cpu_buffer;
1410 unsigned long entries = 0; 1445 unsigned long entries = 0;
1411 int cpu; 1446 int cpu;
1412 1447
1413 /* if you care about this being correct, lock the buffer */ 1448 /* if you care about this being correct, lock the buffer */
1414 for_each_buffer_cpu(buffer, cpu) { 1449 for_each_buffer_cpu(buffer, cpu) {
1415 cpu_buffer = buffer->buffers[cpu]; 1450 cpu_buffer = buffer->buffers[cpu];
1416 entries += cpu_buffer->entries; 1451 entries += cpu_buffer->entries;
1417 } 1452 }
1418 1453
1419 return entries; 1454 return entries;
1420 } 1455 }
1421 1456
1422 /** 1457 /**
1423 * ring_buffer_overrun_cpu - get the number of overruns in buffer 1458 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1424 * @buffer: The ring buffer 1459 * @buffer: The ring buffer
1425 * 1460 *
1426 * Returns the total number of overruns in the ring buffer 1461 * Returns the total number of overruns in the ring buffer
1427 * (all CPU entries) 1462 * (all CPU entries)
1428 */ 1463 */
1429 unsigned long ring_buffer_overruns(struct ring_buffer *buffer) 1464 unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1430 { 1465 {
1431 struct ring_buffer_per_cpu *cpu_buffer; 1466 struct ring_buffer_per_cpu *cpu_buffer;
1432 unsigned long overruns = 0; 1467 unsigned long overruns = 0;
1433 int cpu; 1468 int cpu;
1434 1469
1435 /* if you care about this being correct, lock the buffer */ 1470 /* if you care about this being correct, lock the buffer */
1436 for_each_buffer_cpu(buffer, cpu) { 1471 for_each_buffer_cpu(buffer, cpu) {
1437 cpu_buffer = buffer->buffers[cpu]; 1472 cpu_buffer = buffer->buffers[cpu];
1438 overruns += cpu_buffer->overrun; 1473 overruns += cpu_buffer->overrun;
1439 } 1474 }
1440 1475
1441 return overruns; 1476 return overruns;
1442 } 1477 }
1443 1478
1444 /** 1479 /**
1445 * ring_buffer_iter_reset - reset an iterator 1480 * ring_buffer_iter_reset - reset an iterator
1446 * @iter: The iterator to reset 1481 * @iter: The iterator to reset
1447 * 1482 *
1448 * Resets the iterator, so that it will start from the beginning 1483 * Resets the iterator, so that it will start from the beginning
1449 * again. 1484 * again.
1450 */ 1485 */
1451 void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 1486 void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1452 { 1487 {
1453 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1488 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1454 1489
1455 /* Iterator usage is expected to have record disabled */ 1490 /* Iterator usage is expected to have record disabled */
1456 if (list_empty(&cpu_buffer->reader_page->list)) { 1491 if (list_empty(&cpu_buffer->reader_page->list)) {
1457 iter->head_page = cpu_buffer->head_page; 1492 iter->head_page = cpu_buffer->head_page;
1458 iter->head = cpu_buffer->head_page->read; 1493 iter->head = cpu_buffer->head_page->read;
1459 } else { 1494 } else {
1460 iter->head_page = cpu_buffer->reader_page; 1495 iter->head_page = cpu_buffer->reader_page;
1461 iter->head = cpu_buffer->reader_page->read; 1496 iter->head = cpu_buffer->reader_page->read;
1462 } 1497 }
1463 if (iter->head) 1498 if (iter->head)
1464 iter->read_stamp = cpu_buffer->read_stamp; 1499 iter->read_stamp = cpu_buffer->read_stamp;
1465 else 1500 else
1466 iter->read_stamp = iter->head_page->time_stamp; 1501 iter->read_stamp = iter->head_page->time_stamp;
1467 } 1502 }
1468 1503
1469 /** 1504 /**
1470 * ring_buffer_iter_empty - check if an iterator has no more to read 1505 * ring_buffer_iter_empty - check if an iterator has no more to read
1471 * @iter: The iterator to check 1506 * @iter: The iterator to check
1472 */ 1507 */
1473 int ring_buffer_iter_empty(struct ring_buffer_iter *iter) 1508 int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1474 { 1509 {
1475 struct ring_buffer_per_cpu *cpu_buffer; 1510 struct ring_buffer_per_cpu *cpu_buffer;
1476 1511
1477 cpu_buffer = iter->cpu_buffer; 1512 cpu_buffer = iter->cpu_buffer;
1478 1513
1479 return iter->head_page == cpu_buffer->commit_page && 1514 return iter->head_page == cpu_buffer->commit_page &&
1480 iter->head == rb_commit_index(cpu_buffer); 1515 iter->head == rb_commit_index(cpu_buffer);
1481 } 1516 }
1482 1517
1483 static void 1518 static void
1484 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, 1519 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1485 struct ring_buffer_event *event) 1520 struct ring_buffer_event *event)
1486 { 1521 {
1487 u64 delta; 1522 u64 delta;
1488 1523
1489 switch (event->type) { 1524 switch (event->type) {
1490 case RINGBUF_TYPE_PADDING: 1525 case RINGBUF_TYPE_PADDING:
1491 return; 1526 return;
1492 1527
1493 case RINGBUF_TYPE_TIME_EXTEND: 1528 case RINGBUF_TYPE_TIME_EXTEND:
1494 delta = event->array[0]; 1529 delta = event->array[0];
1495 delta <<= TS_SHIFT; 1530 delta <<= TS_SHIFT;
1496 delta += event->time_delta; 1531 delta += event->time_delta;
1497 cpu_buffer->read_stamp += delta; 1532 cpu_buffer->read_stamp += delta;
1498 return; 1533 return;
1499 1534
1500 case RINGBUF_TYPE_TIME_STAMP: 1535 case RINGBUF_TYPE_TIME_STAMP:
1501 /* FIXME: not implemented */ 1536 /* FIXME: not implemented */
1502 return; 1537 return;
1503 1538
1504 case RINGBUF_TYPE_DATA: 1539 case RINGBUF_TYPE_DATA:
1505 cpu_buffer->read_stamp += event->time_delta; 1540 cpu_buffer->read_stamp += event->time_delta;
1506 return; 1541 return;
1507 1542
1508 default: 1543 default:
1509 BUG(); 1544 BUG();
1510 } 1545 }
1511 return; 1546 return;
1512 } 1547 }
1513 1548
1514 static void 1549 static void
1515 rb_update_iter_read_stamp(struct ring_buffer_iter *iter, 1550 rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1516 struct ring_buffer_event *event) 1551 struct ring_buffer_event *event)
1517 { 1552 {
1518 u64 delta; 1553 u64 delta;
1519 1554
1520 switch (event->type) { 1555 switch (event->type) {
1521 case RINGBUF_TYPE_PADDING: 1556 case RINGBUF_TYPE_PADDING:
1522 return; 1557 return;
1523 1558
1524 case RINGBUF_TYPE_TIME_EXTEND: 1559 case RINGBUF_TYPE_TIME_EXTEND:
1525 delta = event->array[0]; 1560 delta = event->array[0];
1526 delta <<= TS_SHIFT; 1561 delta <<= TS_SHIFT;
1527 delta += event->time_delta; 1562 delta += event->time_delta;
1528 iter->read_stamp += delta; 1563 iter->read_stamp += delta;
1529 return; 1564 return;
1530 1565
1531 case RINGBUF_TYPE_TIME_STAMP: 1566 case RINGBUF_TYPE_TIME_STAMP:
1532 /* FIXME: not implemented */ 1567 /* FIXME: not implemented */
1533 return; 1568 return;
1534 1569
1535 case RINGBUF_TYPE_DATA: 1570 case RINGBUF_TYPE_DATA:
1536 iter->read_stamp += event->time_delta; 1571 iter->read_stamp += event->time_delta;
1537 return; 1572 return;
1538 1573
1539 default: 1574 default:
1540 BUG(); 1575 BUG();
1541 } 1576 }
1542 return; 1577 return;
1543 } 1578 }
1544 1579
1545 static struct buffer_page * 1580 static struct buffer_page *
1546 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 1581 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1547 { 1582 {
1548 struct buffer_page *reader = NULL; 1583 struct buffer_page *reader = NULL;
1549 unsigned long flags; 1584 unsigned long flags;
1550 int nr_loops = 0; 1585 int nr_loops = 0;
1551 1586
1552 spin_lock_irqsave(&cpu_buffer->lock, flags); 1587 spin_lock_irqsave(&cpu_buffer->lock, flags);
1553 1588
1554 again: 1589 again:
1555 /* 1590 /*
1556 * This should normally only loop twice. But because the 1591 * This should normally only loop twice. But because the
1557 * start of the reader inserts an empty page, it causes 1592 * start of the reader inserts an empty page, it causes
1558 * a case where we will loop three times. There should be no 1593 * a case where we will loop three times. There should be no
1559 * reason to loop four times (that I know of). 1594 * reason to loop four times (that I know of).
1560 */ 1595 */
1561 if (unlikely(++nr_loops > 3)) { 1596 if (unlikely(++nr_loops > 3)) {
1562 RB_WARN_ON(cpu_buffer, 1); 1597 RB_WARN_ON(cpu_buffer, 1);
1563 reader = NULL; 1598 reader = NULL;
1564 goto out; 1599 goto out;
1565 } 1600 }
1566 1601
1567 reader = cpu_buffer->reader_page; 1602 reader = cpu_buffer->reader_page;
1568 1603
1569 /* If there's more to read, return this page */ 1604 /* If there's more to read, return this page */
1570 if (cpu_buffer->reader_page->read < rb_page_size(reader)) 1605 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1571 goto out; 1606 goto out;
1572 1607
1573 /* Never should we have an index greater than the size */ 1608 /* Never should we have an index greater than the size */
1574 RB_WARN_ON(cpu_buffer, 1609 RB_WARN_ON(cpu_buffer,
1575 cpu_buffer->reader_page->read > rb_page_size(reader)); 1610 cpu_buffer->reader_page->read > rb_page_size(reader));
1576 1611
1577 /* check if we caught up to the tail */ 1612 /* check if we caught up to the tail */
1578 reader = NULL; 1613 reader = NULL;
1579 if (cpu_buffer->commit_page == cpu_buffer->reader_page) 1614 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1580 goto out; 1615 goto out;
1581 1616
1582 /* 1617 /*
1583 * Splice the empty reader page into the list around the head. 1618 * Splice the empty reader page into the list around the head.
1584 * Reset the reader page to size zero. 1619 * Reset the reader page to size zero.
1585 */ 1620 */
1586 1621
1587 reader = cpu_buffer->head_page; 1622 reader = cpu_buffer->head_page;
1588 cpu_buffer->reader_page->list.next = reader->list.next; 1623 cpu_buffer->reader_page->list.next = reader->list.next;
1589 cpu_buffer->reader_page->list.prev = reader->list.prev; 1624 cpu_buffer->reader_page->list.prev = reader->list.prev;
1590 1625
1591 local_set(&cpu_buffer->reader_page->write, 0); 1626 local_set(&cpu_buffer->reader_page->write, 0);
1592 local_set(&cpu_buffer->reader_page->commit, 0); 1627 local_set(&cpu_buffer->reader_page->commit, 0);
1593 1628
1594 /* Make the reader page now replace the head */ 1629 /* Make the reader page now replace the head */
1595 reader->list.prev->next = &cpu_buffer->reader_page->list; 1630 reader->list.prev->next = &cpu_buffer->reader_page->list;
1596 reader->list.next->prev = &cpu_buffer->reader_page->list; 1631 reader->list.next->prev = &cpu_buffer->reader_page->list;
1597 1632
1598 /* 1633 /*
1599 * If the tail is on the reader, then we must set the head 1634 * If the tail is on the reader, then we must set the head
1600 * to the inserted page, otherwise we set it one before. 1635 * to the inserted page, otherwise we set it one before.
1601 */ 1636 */
1602 cpu_buffer->head_page = cpu_buffer->reader_page; 1637 cpu_buffer->head_page = cpu_buffer->reader_page;
1603 1638
1604 if (cpu_buffer->commit_page != reader) 1639 if (cpu_buffer->commit_page != reader)
1605 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 1640 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1606 1641
1607 /* Finally update the reader page to the new head */ 1642 /* Finally update the reader page to the new head */
1608 cpu_buffer->reader_page = reader; 1643 cpu_buffer->reader_page = reader;
1609 rb_reset_reader_page(cpu_buffer); 1644 rb_reset_reader_page(cpu_buffer);
1610 1645
1611 goto again; 1646 goto again;
1612 1647
1613 out: 1648 out:
1614 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1649 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1615 1650
1616 return reader; 1651 return reader;
1617 } 1652 }
1618 1653
1619 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) 1654 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1620 { 1655 {
1621 struct ring_buffer_event *event; 1656 struct ring_buffer_event *event;
1622 struct buffer_page *reader; 1657 struct buffer_page *reader;
1623 unsigned length; 1658 unsigned length;
1624 1659
1625 reader = rb_get_reader_page(cpu_buffer); 1660 reader = rb_get_reader_page(cpu_buffer);
1626 1661
1627 /* This function should not be called when buffer is empty */ 1662 /* This function should not be called when buffer is empty */
1628 BUG_ON(!reader); 1663 BUG_ON(!reader);
1629 1664
1630 event = rb_reader_event(cpu_buffer); 1665 event = rb_reader_event(cpu_buffer);
1631 1666
1632 if (event->type == RINGBUF_TYPE_DATA) 1667 if (event->type == RINGBUF_TYPE_DATA)
1633 cpu_buffer->entries--; 1668 cpu_buffer->entries--;
1634 1669
1635 rb_update_read_stamp(cpu_buffer, event); 1670 rb_update_read_stamp(cpu_buffer, event);
1636 1671
1637 length = rb_event_length(event); 1672 length = rb_event_length(event);
1638 cpu_buffer->reader_page->read += length; 1673 cpu_buffer->reader_page->read += length;
1639 } 1674 }
1640 1675
1641 static void rb_advance_iter(struct ring_buffer_iter *iter) 1676 static void rb_advance_iter(struct ring_buffer_iter *iter)
1642 { 1677 {
1643 struct ring_buffer *buffer; 1678 struct ring_buffer *buffer;
1644 struct ring_buffer_per_cpu *cpu_buffer; 1679 struct ring_buffer_per_cpu *cpu_buffer;
1645 struct ring_buffer_event *event; 1680 struct ring_buffer_event *event;
1646 unsigned length; 1681 unsigned length;
1647 1682
1648 cpu_buffer = iter->cpu_buffer; 1683 cpu_buffer = iter->cpu_buffer;
1649 buffer = cpu_buffer->buffer; 1684 buffer = cpu_buffer->buffer;
1650 1685
1651 /* 1686 /*
1652 * Check if we are at the end of the buffer. 1687 * Check if we are at the end of the buffer.
1653 */ 1688 */
1654 if (iter->head >= rb_page_size(iter->head_page)) { 1689 if (iter->head >= rb_page_size(iter->head_page)) {
1655 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1690 BUG_ON(iter->head_page == cpu_buffer->commit_page);
1656 rb_inc_iter(iter); 1691 rb_inc_iter(iter);
1657 return; 1692 return;
1658 } 1693 }
1659 1694
1660 event = rb_iter_head_event(iter); 1695 event = rb_iter_head_event(iter);
1661 1696
1662 length = rb_event_length(event); 1697 length = rb_event_length(event);
1663 1698
1664 /* 1699 /*
1665 * This should not be called to advance the header if we are 1700 * This should not be called to advance the header if we are
1666 * at the tail of the buffer. 1701 * at the tail of the buffer.
1667 */ 1702 */
1668 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1703 BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
1669 (iter->head + length > rb_commit_index(cpu_buffer))); 1704 (iter->head + length > rb_commit_index(cpu_buffer)));
1670 1705
1671 rb_update_iter_read_stamp(iter, event); 1706 rb_update_iter_read_stamp(iter, event);
1672 1707
1673 iter->head += length; 1708 iter->head += length;
1674 1709
1675 /* check for end of page padding */ 1710 /* check for end of page padding */
1676 if ((iter->head >= rb_page_size(iter->head_page)) && 1711 if ((iter->head >= rb_page_size(iter->head_page)) &&
1677 (iter->head_page != cpu_buffer->commit_page)) 1712 (iter->head_page != cpu_buffer->commit_page))
1678 rb_advance_iter(iter); 1713 rb_advance_iter(iter);
1679 } 1714 }
1680 1715
1681 /** 1716 /**
1682 * ring_buffer_peek - peek at the next event to be read 1717 * ring_buffer_peek - peek at the next event to be read
1683 * @buffer: The ring buffer to read 1718 * @buffer: The ring buffer to read
1684 * @cpu: The cpu to peak at 1719 * @cpu: The cpu to peak at
1685 * @ts: The timestamp counter of this event. 1720 * @ts: The timestamp counter of this event.
1686 * 1721 *
1687 * This will return the event that will be read next, but does 1722 * This will return the event that will be read next, but does
1688 * not consume the data. 1723 * not consume the data.
1689 */ 1724 */
1690 struct ring_buffer_event * 1725 struct ring_buffer_event *
1691 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 1726 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1692 { 1727 {
1693 struct ring_buffer_per_cpu *cpu_buffer; 1728 struct ring_buffer_per_cpu *cpu_buffer;
1694 struct ring_buffer_event *event; 1729 struct ring_buffer_event *event;
1695 struct buffer_page *reader; 1730 struct buffer_page *reader;
1696 int nr_loops = 0; 1731 int nr_loops = 0;
1697 1732
1698 if (!cpu_isset(cpu, buffer->cpumask)) 1733 if (!cpu_isset(cpu, buffer->cpumask))
1699 return NULL; 1734 return NULL;
1700 1735
1701 cpu_buffer = buffer->buffers[cpu]; 1736 cpu_buffer = buffer->buffers[cpu];
1702 1737
1703 again: 1738 again:
1704 /* 1739 /*
1705 * We repeat when a timestamp is encountered. It is possible 1740 * We repeat when a timestamp is encountered. It is possible
1706 * to get multiple timestamps from an interrupt entering just 1741 * to get multiple timestamps from an interrupt entering just
1707 * as one timestamp is about to be written. The max times 1742 * as one timestamp is about to be written. The max times
1708 * that this can happen is the number of nested interrupts we 1743 * that this can happen is the number of nested interrupts we
1709 * can have. Nesting 10 deep of interrupts is clearly 1744 * can have. Nesting 10 deep of interrupts is clearly
1710 * an anomaly. 1745 * an anomaly.
1711 */ 1746 */
1712 if (unlikely(++nr_loops > 10)) { 1747 if (unlikely(++nr_loops > 10)) {
1713 RB_WARN_ON(cpu_buffer, 1); 1748 RB_WARN_ON(cpu_buffer, 1);
1714 return NULL; 1749 return NULL;
1715 } 1750 }
1716 1751
1717 reader = rb_get_reader_page(cpu_buffer); 1752 reader = rb_get_reader_page(cpu_buffer);
1718 if (!reader) 1753 if (!reader)
1719 return NULL; 1754 return NULL;
1720 1755
1721 event = rb_reader_event(cpu_buffer); 1756 event = rb_reader_event(cpu_buffer);
1722 1757
1723 switch (event->type) { 1758 switch (event->type) {
1724 case RINGBUF_TYPE_PADDING: 1759 case RINGBUF_TYPE_PADDING:
1725 RB_WARN_ON(cpu_buffer, 1); 1760 RB_WARN_ON(cpu_buffer, 1);
1726 rb_advance_reader(cpu_buffer); 1761 rb_advance_reader(cpu_buffer);
1727 return NULL; 1762 return NULL;
1728 1763
1729 case RINGBUF_TYPE_TIME_EXTEND: 1764 case RINGBUF_TYPE_TIME_EXTEND:
1730 /* Internal data, OK to advance */ 1765 /* Internal data, OK to advance */
1731 rb_advance_reader(cpu_buffer); 1766 rb_advance_reader(cpu_buffer);
1732 goto again; 1767 goto again;
1733 1768
1734 case RINGBUF_TYPE_TIME_STAMP: 1769 case RINGBUF_TYPE_TIME_STAMP:
1735 /* FIXME: not implemented */ 1770 /* FIXME: not implemented */
1736 rb_advance_reader(cpu_buffer); 1771 rb_advance_reader(cpu_buffer);
1737 goto again; 1772 goto again;
1738 1773
1739 case RINGBUF_TYPE_DATA: 1774 case RINGBUF_TYPE_DATA:
1740 if (ts) { 1775 if (ts) {
1741 *ts = cpu_buffer->read_stamp + event->time_delta; 1776 *ts = cpu_buffer->read_stamp + event->time_delta;
1742 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 1777 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1743 } 1778 }
1744 return event; 1779 return event;
1745 1780
1746 default: 1781 default:
1747 BUG(); 1782 BUG();
1748 } 1783 }
1749 1784
1750 return NULL; 1785 return NULL;
1751 } 1786 }
1752 1787
1753 /** 1788 /**
1754 * ring_buffer_iter_peek - peek at the next event to be read 1789 * ring_buffer_iter_peek - peek at the next event to be read
1755 * @iter: The ring buffer iterator 1790 * @iter: The ring buffer iterator
1756 * @ts: The timestamp counter of this event. 1791 * @ts: The timestamp counter of this event.
1757 * 1792 *
1758 * This will return the event that will be read next, but does 1793 * This will return the event that will be read next, but does
1759 * not increment the iterator. 1794 * not increment the iterator.
1760 */ 1795 */
1761 struct ring_buffer_event * 1796 struct ring_buffer_event *
1762 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) 1797 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1763 { 1798 {
1764 struct ring_buffer *buffer; 1799 struct ring_buffer *buffer;
1765 struct ring_buffer_per_cpu *cpu_buffer; 1800 struct ring_buffer_per_cpu *cpu_buffer;
1766 struct ring_buffer_event *event; 1801 struct ring_buffer_event *event;
1767 int nr_loops = 0; 1802 int nr_loops = 0;
1768 1803
1769 if (ring_buffer_iter_empty(iter)) 1804 if (ring_buffer_iter_empty(iter))
1770 return NULL; 1805 return NULL;
1771 1806
1772 cpu_buffer = iter->cpu_buffer; 1807 cpu_buffer = iter->cpu_buffer;
1773 buffer = cpu_buffer->buffer; 1808 buffer = cpu_buffer->buffer;
1774 1809
1775 again: 1810 again:
1776 /* 1811 /*
1777 * We repeat when a timestamp is encountered. It is possible 1812 * We repeat when a timestamp is encountered. It is possible
1778 * to get multiple timestamps from an interrupt entering just 1813 * to get multiple timestamps from an interrupt entering just
1779 * as one timestamp is about to be written. The max times 1814 * as one timestamp is about to be written. The max times
1780 * that this can happen is the number of nested interrupts we 1815 * that this can happen is the number of nested interrupts we
1781 * can have. Nesting 10 deep of interrupts is clearly 1816 * can have. Nesting 10 deep of interrupts is clearly
1782 * an anomaly. 1817 * an anomaly.
1783 */ 1818 */
1784 if (unlikely(++nr_loops > 10)) { 1819 if (unlikely(++nr_loops > 10)) {
1785 RB_WARN_ON(cpu_buffer, 1); 1820 RB_WARN_ON(cpu_buffer, 1);
1786 return NULL; 1821 return NULL;
1787 } 1822 }
1788 1823
1789 if (rb_per_cpu_empty(cpu_buffer)) 1824 if (rb_per_cpu_empty(cpu_buffer))
1790 return NULL; 1825 return NULL;
1791 1826
1792 event = rb_iter_head_event(iter); 1827 event = rb_iter_head_event(iter);
1793 1828
1794 switch (event->type) { 1829 switch (event->type) {
1795 case RINGBUF_TYPE_PADDING: 1830 case RINGBUF_TYPE_PADDING:
1796 rb_inc_iter(iter); 1831 rb_inc_iter(iter);
1797 goto again; 1832 goto again;
1798 1833
1799 case RINGBUF_TYPE_TIME_EXTEND: 1834 case RINGBUF_TYPE_TIME_EXTEND:
1800 /* Internal data, OK to advance */ 1835 /* Internal data, OK to advance */
1801 rb_advance_iter(iter); 1836 rb_advance_iter(iter);
1802 goto again; 1837 goto again;
1803 1838
1804 case RINGBUF_TYPE_TIME_STAMP: 1839 case RINGBUF_TYPE_TIME_STAMP:
1805 /* FIXME: not implemented */ 1840 /* FIXME: not implemented */
1806 rb_advance_iter(iter); 1841 rb_advance_iter(iter);
1807 goto again; 1842 goto again;
1808 1843
1809 case RINGBUF_TYPE_DATA: 1844 case RINGBUF_TYPE_DATA:
1810 if (ts) { 1845 if (ts) {
1811 *ts = iter->read_stamp + event->time_delta; 1846 *ts = iter->read_stamp + event->time_delta;
1812 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 1847 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1813 } 1848 }
1814 return event; 1849 return event;
1815 1850
1816 default: 1851 default:
1817 BUG(); 1852 BUG();
1818 } 1853 }
1819 1854
1820 return NULL; 1855 return NULL;
1821 } 1856 }
1822 1857
1823 /** 1858 /**
1824 * ring_buffer_consume - return an event and consume it 1859 * ring_buffer_consume - return an event and consume it
1825 * @buffer: The ring buffer to get the next event from 1860 * @buffer: The ring buffer to get the next event from
1826 * 1861 *
1827 * Returns the next event in the ring buffer, and that event is consumed. 1862 * Returns the next event in the ring buffer, and that event is consumed.
1828 * Meaning, that sequential reads will keep returning a different event, 1863 * Meaning, that sequential reads will keep returning a different event,
1829 * and eventually empty the ring buffer if the producer is slower. 1864 * and eventually empty the ring buffer if the producer is slower.
1830 */ 1865 */
1831 struct ring_buffer_event * 1866 struct ring_buffer_event *
1832 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1867 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1833 { 1868 {
1834 struct ring_buffer_per_cpu *cpu_buffer; 1869 struct ring_buffer_per_cpu *cpu_buffer;
1835 struct ring_buffer_event *event; 1870 struct ring_buffer_event *event;
1836 1871
1837 if (!cpu_isset(cpu, buffer->cpumask)) 1872 if (!cpu_isset(cpu, buffer->cpumask))
1838 return NULL; 1873 return NULL;
1839 1874
1840 event = ring_buffer_peek(buffer, cpu, ts); 1875 event = ring_buffer_peek(buffer, cpu, ts);
1841 if (!event) 1876 if (!event)
1842 return NULL; 1877 return NULL;
1843 1878
1844 cpu_buffer = buffer->buffers[cpu]; 1879 cpu_buffer = buffer->buffers[cpu];
1845 rb_advance_reader(cpu_buffer); 1880 rb_advance_reader(cpu_buffer);
1846 1881
1847 return event; 1882 return event;
1848 } 1883 }
1849 1884
1850 /** 1885 /**
1851 * ring_buffer_read_start - start a non consuming read of the buffer 1886 * ring_buffer_read_start - start a non consuming read of the buffer
1852 * @buffer: The ring buffer to read from 1887 * @buffer: The ring buffer to read from
1853 * @cpu: The cpu buffer to iterate over 1888 * @cpu: The cpu buffer to iterate over
1854 * 1889 *
1855 * This starts up an iteration through the buffer. It also disables 1890 * This starts up an iteration through the buffer. It also disables
1856 * the recording to the buffer until the reading is finished. 1891 * the recording to the buffer until the reading is finished.
1857 * This prevents the reading from being corrupted. This is not 1892 * This prevents the reading from being corrupted. This is not
1858 * a consuming read, so a producer is not expected. 1893 * a consuming read, so a producer is not expected.
1859 * 1894 *
1860 * Must be paired with ring_buffer_finish. 1895 * Must be paired with ring_buffer_finish.
1861 */ 1896 */
1862 struct ring_buffer_iter * 1897 struct ring_buffer_iter *
1863 ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 1898 ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1864 { 1899 {
1865 struct ring_buffer_per_cpu *cpu_buffer; 1900 struct ring_buffer_per_cpu *cpu_buffer;
1866 struct ring_buffer_iter *iter; 1901 struct ring_buffer_iter *iter;
1867 unsigned long flags; 1902 unsigned long flags;
1868 1903
1869 if (!cpu_isset(cpu, buffer->cpumask)) 1904 if (!cpu_isset(cpu, buffer->cpumask))
1870 return NULL; 1905 return NULL;
1871 1906
1872 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 1907 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1873 if (!iter) 1908 if (!iter)
1874 return NULL; 1909 return NULL;
1875 1910
1876 cpu_buffer = buffer->buffers[cpu]; 1911 cpu_buffer = buffer->buffers[cpu];
1877 1912
1878 iter->cpu_buffer = cpu_buffer; 1913 iter->cpu_buffer = cpu_buffer;
1879 1914
1880 atomic_inc(&cpu_buffer->record_disabled); 1915 atomic_inc(&cpu_buffer->record_disabled);
1881 synchronize_sched(); 1916 synchronize_sched();
1882 1917
1883 spin_lock_irqsave(&cpu_buffer->lock, flags); 1918 spin_lock_irqsave(&cpu_buffer->lock, flags);
1884 ring_buffer_iter_reset(iter); 1919 ring_buffer_iter_reset(iter);
1885 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1920 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1886 1921
1887 return iter; 1922 return iter;
1888 } 1923 }
1889 1924
1890 /** 1925 /**
1891 * ring_buffer_finish - finish reading the iterator of the buffer 1926 * ring_buffer_finish - finish reading the iterator of the buffer
1892 * @iter: The iterator retrieved by ring_buffer_start 1927 * @iter: The iterator retrieved by ring_buffer_start
1893 * 1928 *
1894 * This re-enables the recording to the buffer, and frees the 1929 * This re-enables the recording to the buffer, and frees the
1895 * iterator. 1930 * iterator.
1896 */ 1931 */
1897 void 1932 void
1898 ring_buffer_read_finish(struct ring_buffer_iter *iter) 1933 ring_buffer_read_finish(struct ring_buffer_iter *iter)
1899 { 1934 {
1900 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1935 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1901 1936
1902 atomic_dec(&cpu_buffer->record_disabled); 1937 atomic_dec(&cpu_buffer->record_disabled);
1903 kfree(iter); 1938 kfree(iter);
1904 } 1939 }
1905 1940
1906 /** 1941 /**
1907 * ring_buffer_read - read the next item in the ring buffer by the iterator 1942 * ring_buffer_read - read the next item in the ring buffer by the iterator
1908 * @iter: The ring buffer iterator 1943 * @iter: The ring buffer iterator
1909 * @ts: The time stamp of the event read. 1944 * @ts: The time stamp of the event read.
1910 * 1945 *
1911 * This reads the next event in the ring buffer and increments the iterator. 1946 * This reads the next event in the ring buffer and increments the iterator.
1912 */ 1947 */
1913 struct ring_buffer_event * 1948 struct ring_buffer_event *
1914 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 1949 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1915 { 1950 {
1916 struct ring_buffer_event *event; 1951 struct ring_buffer_event *event;
1917 1952
1918 event = ring_buffer_iter_peek(iter, ts); 1953 event = ring_buffer_iter_peek(iter, ts);
1919 if (!event) 1954 if (!event)
1920 return NULL; 1955 return NULL;
1921 1956
1922 rb_advance_iter(iter); 1957 rb_advance_iter(iter);
1923 1958
1924 return event; 1959 return event;
1925 } 1960 }
1926 1961
1927 /** 1962 /**
1928 * ring_buffer_size - return the size of the ring buffer (in bytes) 1963 * ring_buffer_size - return the size of the ring buffer (in bytes)
1929 * @buffer: The ring buffer. 1964 * @buffer: The ring buffer.
1930 */ 1965 */
1931 unsigned long ring_buffer_size(struct ring_buffer *buffer) 1966 unsigned long ring_buffer_size(struct ring_buffer *buffer)
1932 { 1967 {
1933 return BUF_PAGE_SIZE * buffer->pages; 1968 return BUF_PAGE_SIZE * buffer->pages;
1934 } 1969 }
1935 1970
1936 static void 1971 static void
1937 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 1972 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1938 { 1973 {
1939 cpu_buffer->head_page 1974 cpu_buffer->head_page
1940 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1975 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1941 local_set(&cpu_buffer->head_page->write, 0); 1976 local_set(&cpu_buffer->head_page->write, 0);
1942 local_set(&cpu_buffer->head_page->commit, 0); 1977 local_set(&cpu_buffer->head_page->commit, 0);
1943 1978
1944 cpu_buffer->head_page->read = 0; 1979 cpu_buffer->head_page->read = 0;
1945 1980
1946 cpu_buffer->tail_page = cpu_buffer->head_page; 1981 cpu_buffer->tail_page = cpu_buffer->head_page;
1947 cpu_buffer->commit_page = cpu_buffer->head_page; 1982 cpu_buffer->commit_page = cpu_buffer->head_page;
1948 1983
1949 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1984 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1950 local_set(&cpu_buffer->reader_page->write, 0); 1985 local_set(&cpu_buffer->reader_page->write, 0);
1951 local_set(&cpu_buffer->reader_page->commit, 0); 1986 local_set(&cpu_buffer->reader_page->commit, 0);
1952 cpu_buffer->reader_page->read = 0; 1987 cpu_buffer->reader_page->read = 0;
1953 1988
1954 cpu_buffer->overrun = 0; 1989 cpu_buffer->overrun = 0;
1955 cpu_buffer->entries = 0; 1990 cpu_buffer->entries = 0;
1956 } 1991 }
1957 1992
1958 /** 1993 /**
1959 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer 1994 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
1960 * @buffer: The ring buffer to reset a per cpu buffer of 1995 * @buffer: The ring buffer to reset a per cpu buffer of
1961 * @cpu: The CPU buffer to be reset 1996 * @cpu: The CPU buffer to be reset
1962 */ 1997 */
1963 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) 1998 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1964 { 1999 {
1965 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2000 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1966 unsigned long flags; 2001 unsigned long flags;
1967 2002
1968 if (!cpu_isset(cpu, buffer->cpumask)) 2003 if (!cpu_isset(cpu, buffer->cpumask))
1969 return; 2004 return;
1970 2005
1971 spin_lock_irqsave(&cpu_buffer->lock, flags); 2006 spin_lock_irqsave(&cpu_buffer->lock, flags);
1972 2007
1973 rb_reset_cpu(cpu_buffer); 2008 rb_reset_cpu(cpu_buffer);
1974 2009
1975 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2010 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1976 } 2011 }
1977 2012
1978 /** 2013 /**
1979 * ring_buffer_reset - reset a ring buffer 2014 * ring_buffer_reset - reset a ring buffer
1980 * @buffer: The ring buffer to reset all cpu buffers 2015 * @buffer: The ring buffer to reset all cpu buffers
1981 */ 2016 */
1982 void ring_buffer_reset(struct ring_buffer *buffer) 2017 void ring_buffer_reset(struct ring_buffer *buffer)
1983 { 2018 {
1984 int cpu; 2019 int cpu;
1985 2020
1986 for_each_buffer_cpu(buffer, cpu) 2021 for_each_buffer_cpu(buffer, cpu)
1987 ring_buffer_reset_cpu(buffer, cpu); 2022 ring_buffer_reset_cpu(buffer, cpu);
1988 } 2023 }
1989 2024
1990 /** 2025 /**
1991 * rind_buffer_empty - is the ring buffer empty? 2026 * rind_buffer_empty - is the ring buffer empty?
1992 * @buffer: The ring buffer to test 2027 * @buffer: The ring buffer to test
1993 */ 2028 */
1994 int ring_buffer_empty(struct ring_buffer *buffer) 2029 int ring_buffer_empty(struct ring_buffer *buffer)
1995 { 2030 {
1996 struct ring_buffer_per_cpu *cpu_buffer; 2031 struct ring_buffer_per_cpu *cpu_buffer;
1997 int cpu; 2032 int cpu;
1998 2033
1999 /* yes this is racy, but if you don't like the race, lock the buffer */ 2034 /* yes this is racy, but if you don't like the race, lock the buffer */
2000 for_each_buffer_cpu(buffer, cpu) { 2035 for_each_buffer_cpu(buffer, cpu) {
2001 cpu_buffer = buffer->buffers[cpu]; 2036 cpu_buffer = buffer->buffers[cpu];
2002 if (!rb_per_cpu_empty(cpu_buffer)) 2037 if (!rb_per_cpu_empty(cpu_buffer))
2003 return 0; 2038 return 0;
2004 } 2039 }
2005 return 1; 2040 return 1;
2006 } 2041 }
2007 2042
2008 /** 2043 /**
2009 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? 2044 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
2010 * @buffer: The ring buffer 2045 * @buffer: The ring buffer
2011 * @cpu: The CPU buffer to test 2046 * @cpu: The CPU buffer to test
2012 */ 2047 */
2013 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2048 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2014 { 2049 {
2015 struct ring_buffer_per_cpu *cpu_buffer; 2050 struct ring_buffer_per_cpu *cpu_buffer;
2016 2051
2017 if (!cpu_isset(cpu, buffer->cpumask)) 2052 if (!cpu_isset(cpu, buffer->cpumask))
2018 return 1; 2053 return 1;
2019 2054
2020 cpu_buffer = buffer->buffers[cpu]; 2055 cpu_buffer = buffer->buffers[cpu];
2021 return rb_per_cpu_empty(cpu_buffer); 2056 return rb_per_cpu_empty(cpu_buffer);
2022 } 2057 }
2023 2058
2024 /** 2059 /**
2025 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 2060 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2026 * @buffer_a: One buffer to swap with 2061 * @buffer_a: One buffer to swap with
2027 * @buffer_b: The other buffer to swap with 2062 * @buffer_b: The other buffer to swap with
2028 * 2063 *
2029 * This function is useful for tracers that want to take a "snapshot" 2064 * This function is useful for tracers that want to take a "snapshot"
2030 * of a CPU buffer and has another back up buffer lying around. 2065 * of a CPU buffer and has another back up buffer lying around.
2031 * it is expected that the tracer handles the cpu buffer not being 2066 * it is expected that the tracer handles the cpu buffer not being
2032 * used at the moment. 2067 * used at the moment.
2033 */ 2068 */
2034 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, 2069 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2035 struct ring_buffer *buffer_b, int cpu) 2070 struct ring_buffer *buffer_b, int cpu)
2036 { 2071 {
2037 struct ring_buffer_per_cpu *cpu_buffer_a; 2072 struct ring_buffer_per_cpu *cpu_buffer_a;
2038 struct ring_buffer_per_cpu *cpu_buffer_b; 2073 struct ring_buffer_per_cpu *cpu_buffer_b;
2039 2074
2040 if (!cpu_isset(cpu, buffer_a->cpumask) || 2075 if (!cpu_isset(cpu, buffer_a->cpumask) ||
2041 !cpu_isset(cpu, buffer_b->cpumask)) 2076 !cpu_isset(cpu, buffer_b->cpumask))
2042 return -EINVAL; 2077 return -EINVAL;
2043 2078
2044 /* At least make sure the two buffers are somewhat the same */ 2079 /* At least make sure the two buffers are somewhat the same */
2045 if (buffer_a->size != buffer_b->size || 2080 if (buffer_a->size != buffer_b->size ||
2046 buffer_a->pages != buffer_b->pages) 2081 buffer_a->pages != buffer_b->pages)
2047 return -EINVAL; 2082 return -EINVAL;
2048 2083
2049 cpu_buffer_a = buffer_a->buffers[cpu]; 2084 cpu_buffer_a = buffer_a->buffers[cpu];
2050 cpu_buffer_b = buffer_b->buffers[cpu]; 2085 cpu_buffer_b = buffer_b->buffers[cpu];
2051 2086
2052 /* 2087 /*
2053 * We can't do a synchronize_sched here because this 2088 * We can't do a synchronize_sched here because this
2054 * function can be called in atomic context. 2089 * function can be called in atomic context.
2055 * Normally this will be called from the same CPU as cpu. 2090 * Normally this will be called from the same CPU as cpu.
2056 * If not it's up to the caller to protect this. 2091 * If not it's up to the caller to protect this.
2057 */ 2092 */
2058 atomic_inc(&cpu_buffer_a->record_disabled); 2093 atomic_inc(&cpu_buffer_a->record_disabled);
2059 atomic_inc(&cpu_buffer_b->record_disabled); 2094 atomic_inc(&cpu_buffer_b->record_disabled);
2060 2095
2061 buffer_a->buffers[cpu] = cpu_buffer_b; 2096 buffer_a->buffers[cpu] = cpu_buffer_b;
2062 buffer_b->buffers[cpu] = cpu_buffer_a; 2097 buffer_b->buffers[cpu] = cpu_buffer_a;
2063 2098
2064 cpu_buffer_b->buffer = buffer_a; 2099 cpu_buffer_b->buffer = buffer_a;
2065 cpu_buffer_a->buffer = buffer_b; 2100 cpu_buffer_a->buffer = buffer_b;
2066 2101
2067 atomic_dec(&cpu_buffer_a->record_disabled); 2102 atomic_dec(&cpu_buffer_a->record_disabled);
2068 atomic_dec(&cpu_buffer_b->record_disabled); 2103 atomic_dec(&cpu_buffer_b->record_disabled);
2069 2104
2070 return 0; 2105 return 0;
2071 } 2106 }
2107
2108 static ssize_t
2109 rb_simple_read(struct file *filp, char __user *ubuf,
2110 size_t cnt, loff_t *ppos)
2111 {
2112 int *p = filp->private_data;
2113 char buf[64];
2114 int r;
2115
2116 /* !ring_buffers_off == tracing_on */
2117 r = sprintf(buf, "%d\n", !*p);
2118
2119 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2120 }
2121
2122 static ssize_t
2123 rb_simple_write(struct file *filp, const char __user *ubuf,
2124 size_t cnt, loff_t *ppos)
2125 {
2126 int *p = filp->private_data;
2127 char buf[64];
2128 long val;
2129 int ret;
2130
2131 if (cnt >= sizeof(buf))
2132 return -EINVAL;
2133
2134 if (copy_from_user(&buf, ubuf, cnt))
2135 return -EFAULT;
2136
2137 buf[cnt] = 0;
2138
2139 ret = strict_strtoul(buf, 10, &val);
2140 if (ret < 0)
2141 return ret;
2142
2143 /* !ring_buffers_off == tracing_on */
2144 *p = !val;
2145
2146 (*ppos)++;
2147
2148 return cnt;
2149 }
2150
2151 static struct file_operations rb_simple_fops = {
2152 .open = tracing_open_generic,
2153 .read = rb_simple_read,
2154 .write = rb_simple_write,
2155 };
2156
2157
2158 static __init int rb_init_debugfs(void)
2159 {
2160 struct dentry *d_tracer;
2161 struct dentry *entry;
2162
2163 d_tracer = tracing_init_dentry();
2164
2165 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2166 &ring_buffers_off, &rb_simple_fops);
2167 if (!entry)
2168 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2169
2170 return 0;
2171 }
2172
2173 fs_initcall(rb_init_debugfs);
2072 2174