Blame view

net/psample/psample.c 11.9 KB
d2912cb15   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
6ae0a6286   Yotam Gigi   net: Introduce ps...
2
3
4
  /*
   * net/psample/psample.c - Netlink channel for packet sampling
   * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
6ae0a6286   Yotam Gigi   net: Introduce ps...
5
6
7
8
9
10
11
12
13
14
15
16
   */
  
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/skbuff.h>
  #include <linux/module.h>
  #include <net/net_namespace.h>
  #include <net/sock.h>
  #include <net/netlink.h>
  #include <net/genetlink.h>
  #include <net/psample.h>
  #include <linux/spinlock.h>
d8bed686a   Chris Mi   net: psample: Add...
17
18
  #include <net/ip_tunnels.h>
  #include <net/dst_metadata.h>
6ae0a6286   Yotam Gigi   net: Introduce ps...
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  
  #define PSAMPLE_MAX_PACKET_SIZE 0xffff
  
  static LIST_HEAD(psample_groups_list);
  static DEFINE_SPINLOCK(psample_groups_lock);
  
  /* multicast groups */
  enum psample_nl_multicast_groups {
  	PSAMPLE_NL_MCGRP_CONFIG,
  	PSAMPLE_NL_MCGRP_SAMPLE,
  };
  
  static const struct genl_multicast_group psample_nl_mcgrps[] = {
  	[PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
  	[PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
  };
  
  static struct genl_family psample_nl_family __ro_after_init;
  
  static int psample_group_nl_fill(struct sk_buff *msg,
  				 struct psample_group *group,
  				 enum psample_command cmd, u32 portid, u32 seq,
  				 int flags)
  {
  	void *hdr;
  	int ret;
  
  	hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd);
  	if (!hdr)
  		return -EMSGSIZE;
  
  	ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
  	if (ret < 0)
  		goto error;
  
  	ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount);
  	if (ret < 0)
  		goto error;
  
  	ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq);
  	if (ret < 0)
  		goto error;
  
  	genlmsg_end(msg, hdr);
  	return 0;
  
  error:
  	genlmsg_cancel(msg, hdr);
  	return -EMSGSIZE;
  }
  
  static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
  					   struct netlink_callback *cb)
  {
  	struct psample_group *group;
  	int start = cb->args[0];
  	int idx = 0;
  	int err;
4a5da47d5   Vlad Buslov   net: sched: take ...
77
  	spin_lock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  	list_for_each_entry(group, &psample_groups_list, list) {
  		if (!net_eq(group->net, sock_net(msg->sk)))
  			continue;
  		if (idx < start) {
  			idx++;
  			continue;
  		}
  		err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP,
  					    NETLINK_CB(cb->skb).portid,
  					    cb->nlh->nlmsg_seq, NLM_F_MULTI);
  		if (err)
  			break;
  		idx++;
  	}
4a5da47d5   Vlad Buslov   net: sched: take ...
92
  	spin_unlock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
93
94
95
  	cb->args[0] = idx;
  	return msg->len;
  }
66a9b9287   Jakub Kicinski   genetlink: move t...
96
  static const struct genl_small_ops psample_nl_ops[] = {
6ae0a6286   Yotam Gigi   net: Introduce ps...
97
98
  	{
  		.cmd = PSAMPLE_CMD_GET_GROUP,
ef6243acb   Johannes Berg   genetlink: option...
99
  		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
6ae0a6286   Yotam Gigi   net: Introduce ps...
100
101
102
103
104
105
106
107
108
109
110
111
  		.dumpit = psample_nl_cmd_get_group_dumpit,
  		/* can be retrieved by unprivileged users */
  	}
  };
  
  static struct genl_family psample_nl_family __ro_after_init = {
  	.name		= PSAMPLE_GENL_NAME,
  	.version	= PSAMPLE_GENL_VERSION,
  	.maxattr	= PSAMPLE_ATTR_MAX,
  	.netnsok	= true,
  	.module		= THIS_MODULE,
  	.mcgrps		= psample_nl_mcgrps,
66a9b9287   Jakub Kicinski   genetlink: move t...
112
113
  	.small_ops	= psample_nl_ops,
  	.n_small_ops	= ARRAY_SIZE(psample_nl_ops),
6ae0a6286   Yotam Gigi   net: Introduce ps...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
  	.n_mcgrps	= ARRAY_SIZE(psample_nl_mcgrps),
  };
  
  static void psample_group_notify(struct psample_group *group,
  				 enum psample_command cmd)
  {
  	struct sk_buff *msg;
  	int err;
  
  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  	if (!msg)
  		return;
  
  	err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI);
  	if (!err)
  		genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0,
  					PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC);
  	else
  		nlmsg_free(msg);
  }
  
  static struct psample_group *psample_group_create(struct net *net,
  						  u32 group_num)
  {
  	struct psample_group *group;
  
  	group = kzalloc(sizeof(*group), GFP_ATOMIC);
  	if (!group)
  		return NULL;
  
  	group->net = net;
  	group->group_num = group_num;
  	list_add_tail(&group->list, &psample_groups_list);
  
  	psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP);
  	return group;
  }
  
  static void psample_group_destroy(struct psample_group *group)
  {
  	psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
  	list_del(&group->list);
dbf47a2a0   Vlad Buslov   net: sched: act_s...
156
  	kfree_rcu(group, rcu);
6ae0a6286   Yotam Gigi   net: Introduce ps...
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
  }
  
  static struct psample_group *
  psample_group_lookup(struct net *net, u32 group_num)
  {
  	struct psample_group *group;
  
  	list_for_each_entry(group, &psample_groups_list, list)
  		if ((group->group_num == group_num) && (group->net == net))
  			return group;
  	return NULL;
  }
  
  struct psample_group *psample_group_get(struct net *net, u32 group_num)
  {
  	struct psample_group *group;
4a5da47d5   Vlad Buslov   net: sched: take ...
173
  	spin_lock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
174
175
176
177
178
179
180
181
182
183
  
  	group = psample_group_lookup(net, group_num);
  	if (!group) {
  		group = psample_group_create(net, group_num);
  		if (!group)
  			goto out;
  	}
  	group->refcount++;
  
  out:
4a5da47d5   Vlad Buslov   net: sched: take ...
184
  	spin_unlock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
185
186
187
  	return group;
  }
  EXPORT_SYMBOL_GPL(psample_group_get);
4a5da47d5   Vlad Buslov   net: sched: take ...
188
189
190
191
192
193
194
  void psample_group_take(struct psample_group *group)
  {
  	spin_lock_bh(&psample_groups_lock);
  	group->refcount++;
  	spin_unlock_bh(&psample_groups_lock);
  }
  EXPORT_SYMBOL_GPL(psample_group_take);
6ae0a6286   Yotam Gigi   net: Introduce ps...
195
196
  void psample_group_put(struct psample_group *group)
  {
4a5da47d5   Vlad Buslov   net: sched: take ...
197
  	spin_lock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
198
199
200
  
  	if (--group->refcount == 0)
  		psample_group_destroy(group);
4a5da47d5   Vlad Buslov   net: sched: take ...
201
  	spin_unlock_bh(&psample_groups_lock);
6ae0a6286   Yotam Gigi   net: Introduce ps...
202
203
  }
  EXPORT_SYMBOL_GPL(psample_group_put);
07a7f3081   Randy Dunlap   net: psample: fix...
204
  #ifdef CONFIG_INET
d8bed686a   Chris Mi   net: psample: Add...
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
  			      struct ip_tunnel_info *tun_info)
  {
  	unsigned short tun_proto = ip_tunnel_info_af(tun_info);
  	const void *tun_opts = ip_tunnel_info_opts(tun_info);
  	const struct ip_tunnel_key *tun_key = &tun_info->key;
  	int tun_opts_len = tun_info->options_len;
  
  	if (tun_key->tun_flags & TUNNEL_KEY &&
  	    nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
  			 PSAMPLE_TUNNEL_KEY_ATTR_PAD))
  		return -EMSGSIZE;
  
  	if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE &&
  	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE))
  		return -EMSGSIZE;
  
  	switch (tun_proto) {
  	case AF_INET:
  		if (tun_key->u.ipv4.src &&
  		    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,
  				    tun_key->u.ipv4.src))
  			return -EMSGSIZE;
  		if (tun_key->u.ipv4.dst &&
  		    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,
  				    tun_key->u.ipv4.dst))
  			return -EMSGSIZE;
  		break;
  	case AF_INET6:
  		if (!ipv6_addr_any(&tun_key->u.ipv6.src) &&
  		    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,
  				     &tun_key->u.ipv6.src))
  			return -EMSGSIZE;
  		if (!ipv6_addr_any(&tun_key->u.ipv6.dst) &&
  		    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,
  				     &tun_key->u.ipv6.dst))
  			return -EMSGSIZE;
  		break;
  	}
  	if (tun_key->tos &&
  	    nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos))
  		return -EMSGSIZE;
  	if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
  		return -EMSGSIZE;
  	if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
  	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
  		return -EMSGSIZE;
  	if ((tun_key->tun_flags & TUNNEL_CSUM) &&
  	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
  		return -EMSGSIZE;
  	if (tun_key->tp_src &&
  	    nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src))
  		return -EMSGSIZE;
  	if (tun_key->tp_dst &&
  	    nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
  		return -EMSGSIZE;
  	if ((tun_key->tun_flags & TUNNEL_OAM) &&
  	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
  		return -EMSGSIZE;
  	if (tun_opts_len) {
  		if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
  		    nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
  			    tun_opts_len, tun_opts))
  			return -EMSGSIZE;
  		else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
  			 nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
  				 tun_opts_len, tun_opts))
  			return -EMSGSIZE;
  	}
  
  	return 0;
  }
  
  static int psample_ip_tun_to_nlattr(struct sk_buff *skb,
  			    struct ip_tunnel_info *tun_info)
  {
  	struct nlattr *nla;
  	int err;
  
  	nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL);
  	if (!nla)
  		return -EMSGSIZE;
  
  	err = __psample_ip_tun_to_nlattr(skb, tun_info);
  	if (err) {
  		nla_nest_cancel(skb, nla);
  		return err;
  	}
  
  	nla_nest_end(skb, nla);
  
  	return 0;
  }
  
  static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
  {
  	unsigned short tun_proto = ip_tunnel_info_af(tun_info);
  	const struct ip_tunnel_key *tun_key = &tun_info->key;
  	int tun_opts_len = tun_info->options_len;
  	int sum = 0;
  
  	if (tun_key->tun_flags & TUNNEL_KEY)
  		sum += nla_total_size(sizeof(u64));
  
  	if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
  		sum += nla_total_size(0);
  
  	switch (tun_proto) {
  	case AF_INET:
  		if (tun_key->u.ipv4.src)
  			sum += nla_total_size(sizeof(u32));
  		if (tun_key->u.ipv4.dst)
  			sum += nla_total_size(sizeof(u32));
  		break;
  	case AF_INET6:
  		if (!ipv6_addr_any(&tun_key->u.ipv6.src))
  			sum += nla_total_size(sizeof(struct in6_addr));
  		if (!ipv6_addr_any(&tun_key->u.ipv6.dst))
  			sum += nla_total_size(sizeof(struct in6_addr));
  		break;
  	}
  	if (tun_key->tos)
  		sum += nla_total_size(sizeof(u8));
  	sum += nla_total_size(sizeof(u8));	/* TTL */
  	if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
  		sum += nla_total_size(0);
  	if (tun_key->tun_flags & TUNNEL_CSUM)
  		sum += nla_total_size(0);
  	if (tun_key->tp_src)
  		sum += nla_total_size(sizeof(u16));
  	if (tun_key->tp_dst)
  		sum += nla_total_size(sizeof(u16));
  	if (tun_key->tun_flags & TUNNEL_OAM)
  		sum += nla_total_size(0);
  	if (tun_opts_len) {
  		if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
  			sum += nla_total_size(tun_opts_len);
  		else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
  			sum += nla_total_size(tun_opts_len);
  	}
  
  	return sum;
  }
07a7f3081   Randy Dunlap   net: psample: fix...
348
  #endif
d8bed686a   Chris Mi   net: psample: Add...
349

6ae0a6286   Yotam Gigi   net: Introduce ps...
350
351
352
353
  void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
  			   u32 trunc_size, int in_ifindex, int out_ifindex,
  			   u32 sample_rate)
  {
07a7f3081   Randy Dunlap   net: psample: fix...
354
  #ifdef CONFIG_INET
d8bed686a   Chris Mi   net: psample: Add...
355
  	struct ip_tunnel_info *tun_info;
07a7f3081   Randy Dunlap   net: psample: fix...
356
  #endif
6ae0a6286   Yotam Gigi   net: Introduce ps...
357
358
359
360
361
362
363
364
365
366
367
368
  	struct sk_buff *nl_skb;
  	int data_len;
  	int meta_len;
  	void *data;
  	int ret;
  
  	meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
  		   (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
  		   nla_total_size(sizeof(u32)) +	/* sample_rate */
  		   nla_total_size(sizeof(u32)) +	/* orig_size */
  		   nla_total_size(sizeof(u32)) +	/* group_num */
  		   nla_total_size(sizeof(u32));		/* seq */
07a7f3081   Randy Dunlap   net: psample: fix...
369
  #ifdef CONFIG_INET
d8bed686a   Chris Mi   net: psample: Add...
370
371
372
  	tun_info = skb_tunnel_info(skb);
  	if (tun_info)
  		meta_len += psample_tunnel_meta_len(tun_info);
07a7f3081   Randy Dunlap   net: psample: fix...
373
  #endif
d8bed686a   Chris Mi   net: psample: Add...
374

6ae0a6286   Yotam Gigi   net: Introduce ps...
375
376
377
378
  	data_len = min(skb->len, trunc_size);
  	if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
  		data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
  			    - NLA_ALIGNTO;
7eb9d7675   Nikolay Aleksandrov   net: psample: fix...
379
  	nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC);
6ae0a6286   Yotam Gigi   net: Introduce ps...
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
  	if (unlikely(!nl_skb))
  		return;
  
  	data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0,
  			   PSAMPLE_CMD_SAMPLE);
  	if (unlikely(!data))
  		goto error;
  
  	if (in_ifindex) {
  		ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex);
  		if (unlikely(ret < 0))
  			goto error;
  	}
  
  	if (out_ifindex) {
  		ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex);
  		if (unlikely(ret < 0))
  			goto error;
  	}
  
  	ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate);
  	if (unlikely(ret < 0))
  		goto error;
  
  	ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len);
  	if (unlikely(ret < 0))
  		goto error;
  
  	ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
  	if (unlikely(ret < 0))
  		goto error;
  
  	ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++);
  	if (unlikely(ret < 0))
  		goto error;
  
  	if (data_len) {
  		int nla_len = nla_total_size(data_len);
  		struct nlattr *nla;
4df864c1d   Johannes Berg   networking: make ...
419
  		nla = skb_put(nl_skb, nla_len);
6ae0a6286   Yotam Gigi   net: Introduce ps...
420
421
422
423
424
425
  		nla->nla_type = PSAMPLE_ATTR_DATA;
  		nla->nla_len = nla_attr_size(data_len);
  
  		if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
  			goto error;
  	}
07a7f3081   Randy Dunlap   net: psample: fix...
426
  #ifdef CONFIG_INET
d8bed686a   Chris Mi   net: psample: Add...
427
428
429
430
431
  	if (tun_info) {
  		ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
  		if (unlikely(ret < 0))
  			goto error;
  	}
07a7f3081   Randy Dunlap   net: psample: fix...
432
  #endif
d8bed686a   Chris Mi   net: psample: Add...
433

6ae0a6286   Yotam Gigi   net: Introduce ps...
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
  	genlmsg_end(nl_skb, data);
  	genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
  				PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
  
  	return;
  error:
  	pr_err_ratelimited("Could not create psample log message
  ");
  	nlmsg_free(nl_skb);
  }
  EXPORT_SYMBOL_GPL(psample_sample_packet);
  
  static int __init psample_module_init(void)
  {
  	return genl_register_family(&psample_nl_family);
  }
  
  static void __exit psample_module_exit(void)
  {
  	genl_unregister_family(&psample_nl_family);
  }
  
  module_init(psample_module_init);
  module_exit(psample_module_exit);
f1fd20c36   Yotam Gigi   MAINTAINERS: Upda...
458
  MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
6ae0a6286   Yotam Gigi   net: Introduce ps...
459
460
  MODULE_DESCRIPTION("netlink channel for packet sampling");
  MODULE_LICENSE("GPL v2");