Blame view

samples/bpf/xdp_adjust_tail_kern.c 3.72 KB
c6ffd1ff7   Nikita V. Shirokov   bpf: add bpf_xdp_...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  /* SPDX-License-Identifier: GPL-2.0
   * Copyright (c) 2018 Facebook
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of version 2 of the GNU General Public
   * License as published by the Free Software Foundation.
   *
   * This program shows how to use bpf_xdp_adjust_tail() by
   * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
   * to be more preice in case of v4)" where receiving packets bigger then
   * 600 bytes.
   */
  #define KBUILD_MODNAME "foo"
  #include <uapi/linux/bpf.h>
  #include <linux/in.h>
  #include <linux/if_ether.h>
  #include <linux/if_packet.h>
  #include <linux/if_vlan.h>
  #include <linux/ip.h>
  #include <linux/icmp.h>
  #include "bpf_helpers.h"
  
  #define DEFAULT_TTL 64
  #define MAX_PCKT_SIZE 600
  #define ICMP_TOOBIG_SIZE 98
  #define ICMP_TOOBIG_PAYLOAD_SIZE 92
  
  struct bpf_map_def SEC("maps") icmpcnt = {
  	.type = BPF_MAP_TYPE_ARRAY,
  	.key_size = sizeof(__u32),
  	.value_size = sizeof(__u64),
  	.max_entries = 1,
  };
  
  static __always_inline void count_icmp(void)
  {
  	u64 key = 0;
  	u64 *icmp_count;
  
  	icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
  	if (icmp_count)
  		*icmp_count += 1;
  }
  
  static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
  {
  	struct ethhdr *eth;
  
  	eth = data;
  	memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
  	memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
  	eth->h_proto = orig_eth->h_proto;
  }
  
  static __always_inline __u16 csum_fold_helper(__u32 csum)
  {
  	return ~((csum & 0xffff) + (csum >> 16));
  }
  
  static __always_inline void ipv4_csum(void *data_start, int data_size,
  				      __u32 *csum)
  {
  	*csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
  	*csum = csum_fold_helper(*csum);
  }
  
  static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
  {
  	int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
  
  	if (bpf_xdp_adjust_head(xdp, 0 - headroom))
  		return XDP_DROP;
  	void *data = (void *)(long)xdp->data;
  	void *data_end = (void *)(long)xdp->data_end;
  
  	if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
  		return XDP_DROP;
  
  	struct iphdr *iph, *orig_iph;
  	struct icmphdr *icmp_hdr;
  	struct ethhdr *orig_eth;
  	__u32 csum = 0;
  	__u64 off = 0;
  
  	orig_eth = data + headroom;
  	swap_mac(data, orig_eth);
  	off += sizeof(struct ethhdr);
  	iph = data + off;
  	off += sizeof(struct iphdr);
  	icmp_hdr = data + off;
  	off += sizeof(struct icmphdr);
  	orig_iph = data + off;
  	icmp_hdr->type = ICMP_DEST_UNREACH;
  	icmp_hdr->code = ICMP_FRAG_NEEDED;
  	icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
  	icmp_hdr->checksum = 0;
  	ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
  	icmp_hdr->checksum = csum;
  	iph->ttl = DEFAULT_TTL;
  	iph->daddr = orig_iph->saddr;
  	iph->saddr = orig_iph->daddr;
  	iph->version = 4;
  	iph->ihl = 5;
  	iph->protocol = IPPROTO_ICMP;
  	iph->tos = 0;
  	iph->tot_len = htons(
  		ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
  	iph->check = 0;
  	csum = 0;
  	ipv4_csum(iph, sizeof(struct iphdr), &csum);
  	iph->check = csum;
  	count_icmp();
  	return XDP_TX;
  }
  
  
  static __always_inline int handle_ipv4(struct xdp_md *xdp)
  {
  	void *data_end = (void *)(long)xdp->data_end;
  	void *data = (void *)(long)xdp->data;
  	int pckt_size = data_end - data;
  	int offset;
  
  	if (pckt_size > MAX_PCKT_SIZE) {
  		offset = pckt_size - ICMP_TOOBIG_SIZE;
  		if (bpf_xdp_adjust_tail(xdp, 0 - offset))
  			return XDP_PASS;
  		return send_icmp4_too_big(xdp);
  	}
  	return XDP_PASS;
  }
  
  SEC("xdp_icmp")
  int _xdp_icmp(struct xdp_md *xdp)
  {
  	void *data_end = (void *)(long)xdp->data_end;
  	void *data = (void *)(long)xdp->data;
  	struct ethhdr *eth = data;
  	__u16 h_proto;
  
  	if (eth + 1 > data_end)
  		return XDP_DROP;
  
  	h_proto = eth->h_proto;
  
  	if (h_proto == htons(ETH_P_IP))
  		return handle_ipv4(xdp);
  	else
  		return XDP_PASS;
  }
  
  char _license[] SEC("license") = "GPL";