Blame view
net/ipv4/tcp_probe.c
7.39 KB
a42e9d6ce
|
1 2 3 4 5 6 7 8 |
/* * tcpprobe - Observe the TCP flow with kprobes. * * The idea for this came from Werner Almesberger's umlsim * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by |
662ad4f8e
|
9 |
* the Free Software Foundation; either version 2 of the License. |
a42e9d6ce
|
10 11 12 13 14 15 16 17 18 19 |
* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ |
afd465030
|
20 |
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
a42e9d6ce
|
21 22 23 24 |
#include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/socket.h> #include <linux/tcp.h> |
5a0e3ad6a
|
25 |
#include <linux/slab.h> |
a42e9d6ce
|
26 27 |
#include <linux/proc_fs.h> #include <linux/module.h> |
85795d64e
|
28 29 |
#include <linux/ktime.h> #include <linux/time.h> |
457c4cbc5
|
30 |
#include <net/net_namespace.h> |
a42e9d6ce
|
31 32 |
#include <net/tcp.h> |
65ebe6342
|
33 |
MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); |
a42e9d6ce
|
34 35 |
MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); |
662ad4f8e
|
36 |
MODULE_VERSION("1.1"); |
a42e9d6ce
|
37 |
|
47d18a9be
|
38 |
static int port __read_mostly; |
a42e9d6ce
|
39 40 |
MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); |
f81074f86
|
41 |
static unsigned int bufsize __read_mostly = 4096; |
662ad4f8e
|
42 |
MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); |
f81074f86
|
43 |
module_param(bufsize, uint, 0); |
a42e9d6ce
|
44 |
|
47d18a9be
|
45 |
static unsigned int fwmark __read_mostly; |
b1dcdc68b
|
46 47 |
MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); module_param(fwmark, uint, 0); |
85795d64e
|
48 49 50 |
static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); module_param(full, int, 0); |
a42e9d6ce
|
51 |
static const char procname[] = "tcpprobe"; |
662ad4f8e
|
52 53 |
struct tcp_log { ktime_t tstamp; |
f925d0a62
|
54 55 56 57 58 |
union { struct sockaddr raw; struct sockaddr_in v4; struct sockaddr_in6 v6; } src, dst; |
662ad4f8e
|
59 60 61 62 |
u16 length; u32 snd_nxt; u32 snd_una; u32 snd_wnd; |
b4c1c1d03
|
63 |
u32 rcv_wnd; |
662ad4f8e
|
64 65 66 67 68 69 |
u32 snd_cwnd; u32 ssthresh; u32 srtt; }; static struct { |
85795d64e
|
70 |
spinlock_t lock; |
a42e9d6ce
|
71 |
wait_queue_head_t wait; |
85795d64e
|
72 73 |
ktime_t start; u32 lastcwnd; |
a42e9d6ce
|
74 |
|
662ad4f8e
|
75 76 77 |
unsigned long head, tail; struct tcp_log *log; } tcp_probe; |
662ad4f8e
|
78 |
static inline int tcp_probe_used(void) |
a42e9d6ce
|
79 |
{ |
f81074f86
|
80 |
return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); |
662ad4f8e
|
81 82 83 84 |
} static inline int tcp_probe_avail(void) { |
f81074f86
|
85 |
return bufsize - tcp_probe_used() - 1; |
14a49e1fd
|
86 |
} |
a42e9d6ce
|
87 |
|
f925d0a62
|
88 89 90 91 92 93 |
#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \ do { \ si4.sin_family = AF_INET; \ si4.sin_port = inet->inet_##mem##port; \ si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ |
85795d64e
|
94 95 96 97 |
/* * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! */ |
c995ae225
|
98 99 |
static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) |
a42e9d6ce
|
100 101 102 |
{ const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); |
b1dcdc68b
|
103 104 105 106 107 |
/* Only update if port or skb mark matches */ if (((port == 0 && fwmark == 0) || ntohs(inet->inet_dport) == port || ntohs(inet->inet_sport) == port || (fwmark > 0 && skb->mark == fwmark)) && |
9d4fb27db
|
108 |
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) { |
662ad4f8e
|
109 110 111 112 113 114 115 |
spin_lock(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; p->tstamp = ktime_get(); |
f925d0a62
|
116 117 118 119 120 121 |
switch (sk->sk_family) { case AF_INET: tcp_probe_copy_fl_to_si4(inet, p->src.v4, s); tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); break; case AF_INET6: |
efe4208f4
|
122 123 124 125 126 127 128 129 130 131 132 |
memset(&p->src.v6, 0, sizeof(p->src.v6)); memset(&p->dst.v6, 0, sizeof(p->dst.v6)); #if IS_ENABLED(CONFIG_IPV6) p->src.v6.sin6_family = AF_INET6; p->src.v6.sin6_port = inet->inet_sport; p->src.v6.sin6_addr = inet6_sk(sk)->saddr; p->dst.v6.sin6_family = AF_INET6; p->dst.v6.sin6_port = inet->inet_dport; p->dst.v6.sin6_addr = sk->sk_v6_daddr; #endif |
f925d0a62
|
133 134 135 136 |
break; default: BUG(); } |
662ad4f8e
|
137 138 139 140 141 |
p->length = skb->len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; p->snd_wnd = tp->snd_wnd; |
b4c1c1d03
|
142 |
p->rcv_wnd = tp->rcv_wnd; |
b3b0b681b
|
143 |
p->ssthresh = tcp_current_ssthresh(sk); |
740b0f184
|
144 |
p->srtt = tp->srtt_us >> 3; |
662ad4f8e
|
145 |
|
f81074f86
|
146 |
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); |
662ad4f8e
|
147 148 149 150 151 |
} tcp_probe.lastcwnd = tp->snd_cwnd; spin_unlock(&tcp_probe.lock); wake_up(&tcp_probe.wait); |
a42e9d6ce
|
152 153 154 |
} jprobe_return(); |
a42e9d6ce
|
155 |
} |
662ad4f8e
|
156 |
static struct jprobe tcp_jprobe = { |
3a872d89b
|
157 |
.kp = { |
85795d64e
|
158 |
.symbol_name = "tcp_rcv_established", |
3a872d89b
|
159 |
}, |
9e367d859
|
160 |
.entry = jtcp_rcv_established, |
a42e9d6ce
|
161 |
}; |
5e73ea1a3
|
162 |
static int tcpprobe_open(struct inode *inode, struct file *file) |
a42e9d6ce
|
163 |
{ |
662ad4f8e
|
164 165 166 167 168 |
/* Reset (empty) log */ spin_lock_bh(&tcp_probe.lock); tcp_probe.head = tcp_probe.tail = 0; tcp_probe.start = ktime_get(); spin_unlock_bh(&tcp_probe.lock); |
a42e9d6ce
|
169 170 |
return 0; } |
662ad4f8e
|
171 172 173 |
static int tcpprobe_sprint(char *tbuf, int n) { const struct tcp_log *p |
f81074f86
|
174 |
= tcp_probe.log + tcp_probe.tail; |
662ad4f8e
|
175 176 |
struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); |
dda0b3869
|
177 |
return scnprintf(tbuf, n, |
f925d0a62
|
178 179 |
"%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u ", |
688d1945b
|
180 181 |
(unsigned long)tv.tv_sec, (unsigned long)tv.tv_nsec, |
f925d0a62
|
182 |
&p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, |
b4c1c1d03
|
183 |
p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); |
662ad4f8e
|
184 |
} |
a42e9d6ce
|
185 186 187 |
static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { |
a2025b8b1
|
188 189 |
int error = 0; size_t cnt = 0; |
a42e9d6ce
|
190 |
|
a2025b8b1
|
191 |
if (!buf) |
a42e9d6ce
|
192 |
return -EINVAL; |
662ad4f8e
|
193 |
while (cnt < len) { |
cc8c6c1b2
|
194 |
char tbuf[256]; |
662ad4f8e
|
195 196 197 198 199 200 201 |
int width; /* Wait for data in buffer */ error = wait_event_interruptible(tcp_probe.wait, tcp_probe_used() > 0); if (error) break; |
a42e9d6ce
|
202 |
|
662ad4f8e
|
203 204 205 206 207 208 |
spin_lock_bh(&tcp_probe.lock); if (tcp_probe.head == tcp_probe.tail) { /* multiple readers race? */ spin_unlock_bh(&tcp_probe.lock); continue; } |
a42e9d6ce
|
209 |
|
662ad4f8e
|
210 |
width = tcpprobe_sprint(tbuf, sizeof(tbuf)); |
a42e9d6ce
|
211 |
|
8d390efd9
|
212 |
if (cnt + width < len) |
f81074f86
|
213 |
tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); |
a42e9d6ce
|
214 |
|
662ad4f8e
|
215 216 217 218 |
spin_unlock_bh(&tcp_probe.lock); /* if record greater than space available return partial buffer (so far) */ |
8d390efd9
|
219 |
if (cnt + width >= len) |
662ad4f8e
|
220 |
break; |
8d390efd9
|
221 222 |
if (copy_to_user(buf + cnt, tbuf, width)) return -EFAULT; |
662ad4f8e
|
223 224 |
cnt += width; } |
a42e9d6ce
|
225 |
|
662ad4f8e
|
226 |
return cnt == 0 ? error : cnt; |
a42e9d6ce
|
227 |
} |
9a32144e9
|
228 |
static const struct file_operations tcpprobe_fops = { |
a42e9d6ce
|
229 230 231 |
.owner = THIS_MODULE, .open = tcpprobe_open, .read = tcpprobe_read, |
6038f373a
|
232 |
.llseek = noop_llseek, |
a42e9d6ce
|
233 234 235 236 237 |
}; static __init int tcpprobe_init(void) { int ret = -ENOMEM; |
d8cdeda6d
|
238 239 240 241 242 243 |
/* Warning: if the function signature of tcp_rcv_established, * has been changed, you also have to change the signature of * jtcp_rcv_established, otherwise you end up right here! */ BUILD_BUG_ON(__same_type(tcp_rcv_established, jtcp_rcv_established) == 0); |
662ad4f8e
|
244 245 |
init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); |
f81074f86
|
246 |
if (bufsize == 0) |
662ad4f8e
|
247 |
return -EINVAL; |
f81074f86
|
248 |
bufsize = roundup_pow_of_two(bufsize); |
3d8ea1fd7
|
249 |
tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); |
662ad4f8e
|
250 251 |
if (!tcp_probe.log) goto err0; |
a42e9d6ce
|
252 |
|
d4beaa66a
|
253 |
if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops)) |
a42e9d6ce
|
254 |
goto err0; |
662ad4f8e
|
255 |
ret = register_jprobe(&tcp_jprobe); |
a42e9d6ce
|
256 257 |
if (ret) goto err1; |
b1dcdc68b
|
258 259 260 |
pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u ", port, fwmark, bufsize); |
a42e9d6ce
|
261 262 |
return 0; err1: |
ece31ffd5
|
263 |
remove_proc_entry(procname, init_net.proc_net); |
a42e9d6ce
|
264 |
err0: |
662ad4f8e
|
265 |
kfree(tcp_probe.log); |
a42e9d6ce
|
266 267 268 269 270 271 |
return ret; } module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { |
ece31ffd5
|
272 |
remove_proc_entry(procname, init_net.proc_net); |
662ad4f8e
|
273 274 |
unregister_jprobe(&tcp_jprobe); kfree(tcp_probe.log); |
a42e9d6ce
|
275 276 |
} module_exit(tcpprobe_exit); |