Commit 5d9c5a32920c5c0e6716b0f6ed16157783dc56a4

Authored by Herbert Xu
Committed by David S. Miller
1 parent 00ab956f2f

[IPV4]: Get rid of redundant IPCB->opts initialisation

Now that we always zero the IPCB->opts in ip_rcv, it is no longer
necessary to do so before calling netif_rx for tunneled packets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 6 changed files with 0 additions and 7 deletions Inline Diff

1 /* 1 /*
2 * Linux NET3: GRE over IP protocol decoder. 2 * Linux NET3: GRE over IP protocol decoder.
3 * 3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 */ 11 */
12 12
13 #include <linux/capability.h> 13 #include <linux/capability.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/types.h> 15 #include <linux/types.h>
16 #include <linux/sched.h> 16 #include <linux/sched.h>
17 #include <linux/kernel.h> 17 #include <linux/kernel.h>
18 #include <asm/uaccess.h> 18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h> 19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h> 20 #include <linux/netdevice.h>
21 #include <linux/in.h> 21 #include <linux/in.h>
22 #include <linux/tcp.h> 22 #include <linux/tcp.h>
23 #include <linux/udp.h> 23 #include <linux/udp.h>
24 #include <linux/if_arp.h> 24 #include <linux/if_arp.h>
25 #include <linux/mroute.h> 25 #include <linux/mroute.h>
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/in6.h> 27 #include <linux/in6.h>
28 #include <linux/inetdevice.h> 28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h> 29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h> 30 #include <linux/netfilter_ipv4.h>
31 #include <linux/if_ether.h> 31 #include <linux/if_ether.h>
32 32
33 #include <net/sock.h> 33 #include <net/sock.h>
34 #include <net/ip.h> 34 #include <net/ip.h>
35 #include <net/icmp.h> 35 #include <net/icmp.h>
36 #include <net/protocol.h> 36 #include <net/protocol.h>
37 #include <net/ipip.h> 37 #include <net/ipip.h>
38 #include <net/arp.h> 38 #include <net/arp.h>
39 #include <net/checksum.h> 39 #include <net/checksum.h>
40 #include <net/dsfield.h> 40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h> 41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h> 42 #include <net/xfrm.h>
43 43
44 #ifdef CONFIG_IPV6 44 #ifdef CONFIG_IPV6
45 #include <net/ipv6.h> 45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h> 46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h> 47 #include <net/ip6_route.h>
48 #endif 48 #endif
49 49
50 /* 50 /*
51 Problems & solutions 51 Problems & solutions
52 -------------------- 52 --------------------
53 53
54 1. The most important issue is detecting local dead loops. 54 1. The most important issue is detecting local dead loops.
55 They would cause complete host lockup in transmit, which 55 They would cause complete host lockup in transmit, which
56 would be "resolved" by stack overflow or, if queueing is enabled, 56 would be "resolved" by stack overflow or, if queueing is enabled,
57 with infinite looping in net_bh. 57 with infinite looping in net_bh.
58 58
59 We cannot track such dead loops during route installation, 59 We cannot track such dead loops during route installation,
60 it is infeasible task. The most general solutions would be 60 it is infeasible task. The most general solutions would be
61 to keep skb->encapsulation counter (sort of local ttl), 61 to keep skb->encapsulation counter (sort of local ttl),
62 and silently drop packet when it expires. It is the best 62 and silently drop packet when it expires. It is the best
63 solution, but it supposes maintaing new variable in ALL 63 solution, but it supposes maintaing new variable in ALL
64 skb, even if no tunneling is used. 64 skb, even if no tunneling is used.
65 65
66 Current solution: t->recursion lock breaks dead loops. It looks 66 Current solution: t->recursion lock breaks dead loops. It looks
67 like dev->tbusy flag, but I preferred new variable, because 67 like dev->tbusy flag, but I preferred new variable, because
68 the semantics is different. One day, when hard_start_xmit 68 the semantics is different. One day, when hard_start_xmit
69 will be multithreaded we will have to use skb->encapsulation. 69 will be multithreaded we will have to use skb->encapsulation.
70 70
71 71
72 72
73 2. Networking dead loops would not kill routers, but would really 73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case, 74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header. 75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems: 76 It is very good solution, but it introduces two problems:
77 77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels. 79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel, 80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output 81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong: 82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine) 84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end. 85 return only 8 bytes of payload. It is the end.
86 86
87 Hence, if we want that OSPF worked or traceroute said something reasonable, 87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution. 88 we should search for another solution.
89 89
90 One of them is to parse packet trying to detect inner encapsulation 90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially, 91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all. 92 taking into account fragmentation. TO be short, tt is not solution at all.
93 93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit, 95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely, 96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear 97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu 98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops. 99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop, 100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set. 101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made 102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected 103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured 104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-) 105 fatal static route: you are innocent. :-)
106 106
107 107
108 108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them 110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular. 111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular. 112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc) 113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c). 114 to a separate module (ip_tunnel.c).
115 115
116 Alexey Kuznetsov. 116 Alexey Kuznetsov.
117 */ 117 */
118 118
119 static int ipgre_tunnel_init(struct net_device *dev); 119 static int ipgre_tunnel_init(struct net_device *dev);
120 static void ipgre_tunnel_setup(struct net_device *dev); 120 static void ipgre_tunnel_setup(struct net_device *dev);
121 121
122 /* Fallback tunnel: no source, no destination, no key, no options */ 122 /* Fallback tunnel: no source, no destination, no key, no options */
123 123
124 static int ipgre_fb_tunnel_init(struct net_device *dev); 124 static int ipgre_fb_tunnel_init(struct net_device *dev);
125 125
126 static struct net_device *ipgre_fb_tunnel_dev; 126 static struct net_device *ipgre_fb_tunnel_dev;
127 127
128 /* Tunnel hash table */ 128 /* Tunnel hash table */
129 129
130 /* 130 /*
131 4 hash tables: 131 4 hash tables:
132 132
133 3: (remote,local) 133 3: (remote,local)
134 2: (remote,*) 134 2: (remote,*)
135 1: (*,local) 135 1: (*,local)
136 0: (*,*) 136 0: (*,*)
137 137
138 We require exact key match i.e. if a key is present in packet 138 We require exact key match i.e. if a key is present in packet
139 it will match only tunnel with the same key; if it is not present, 139 it will match only tunnel with the same key; if it is not present,
140 it will match only keyless tunnel. 140 it will match only keyless tunnel.
141 141
142 All keysless packets, if not matched configured keyless tunnels 142 All keysless packets, if not matched configured keyless tunnels
143 will match fallback tunnel. 143 will match fallback tunnel.
144 */ 144 */
145 145
146 #define HASH_SIZE 16 146 #define HASH_SIZE 16
147 #define HASH(addr) ((addr^(addr>>4))&0xF) 147 #define HASH(addr) ((addr^(addr>>4))&0xF)
148 148
149 static struct ip_tunnel *tunnels[4][HASH_SIZE]; 149 static struct ip_tunnel *tunnels[4][HASH_SIZE];
150 150
151 #define tunnels_r_l (tunnels[3]) 151 #define tunnels_r_l (tunnels[3])
152 #define tunnels_r (tunnels[2]) 152 #define tunnels_r (tunnels[2])
153 #define tunnels_l (tunnels[1]) 153 #define tunnels_l (tunnels[1])
154 #define tunnels_wc (tunnels[0]) 154 #define tunnels_wc (tunnels[0])
155 155
156 static DEFINE_RWLOCK(ipgre_lock); 156 static DEFINE_RWLOCK(ipgre_lock);
157 157
158 /* Given src, dst and key, find appropriate for input tunnel. */ 158 /* Given src, dst and key, find appropriate for input tunnel. */
159 159
160 static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) 160 static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
161 { 161 {
162 unsigned h0 = HASH(remote); 162 unsigned h0 = HASH(remote);
163 unsigned h1 = HASH(key); 163 unsigned h1 = HASH(key);
164 struct ip_tunnel *t; 164 struct ip_tunnel *t;
165 165
166 for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 166 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
167 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 167 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
168 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 168 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
169 return t; 169 return t;
170 } 170 }
171 } 171 }
172 for (t = tunnels_r[h0^h1]; t; t = t->next) { 172 for (t = tunnels_r[h0^h1]; t; t = t->next) {
173 if (remote == t->parms.iph.daddr) { 173 if (remote == t->parms.iph.daddr) {
174 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 174 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
175 return t; 175 return t;
176 } 176 }
177 } 177 }
178 for (t = tunnels_l[h1]; t; t = t->next) { 178 for (t = tunnels_l[h1]; t; t = t->next) {
179 if (local == t->parms.iph.saddr || 179 if (local == t->parms.iph.saddr ||
180 (local == t->parms.iph.daddr && MULTICAST(local))) { 180 (local == t->parms.iph.daddr && MULTICAST(local))) {
181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 return t; 182 return t;
183 } 183 }
184 } 184 }
185 for (t = tunnels_wc[h1]; t; t = t->next) { 185 for (t = tunnels_wc[h1]; t; t = t->next) {
186 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 186 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187 return t; 187 return t;
188 } 188 }
189 189
190 if (ipgre_fb_tunnel_dev->flags&IFF_UP) 190 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
191 return netdev_priv(ipgre_fb_tunnel_dev); 191 return netdev_priv(ipgre_fb_tunnel_dev);
192 return NULL; 192 return NULL;
193 } 193 }
194 194
195 static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) 195 static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
196 { 196 {
197 u32 remote = t->parms.iph.daddr; 197 u32 remote = t->parms.iph.daddr;
198 u32 local = t->parms.iph.saddr; 198 u32 local = t->parms.iph.saddr;
199 u32 key = t->parms.i_key; 199 u32 key = t->parms.i_key;
200 unsigned h = HASH(key); 200 unsigned h = HASH(key);
201 int prio = 0; 201 int prio = 0;
202 202
203 if (local) 203 if (local)
204 prio |= 1; 204 prio |= 1;
205 if (remote && !MULTICAST(remote)) { 205 if (remote && !MULTICAST(remote)) {
206 prio |= 2; 206 prio |= 2;
207 h ^= HASH(remote); 207 h ^= HASH(remote);
208 } 208 }
209 209
210 return &tunnels[prio][h]; 210 return &tunnels[prio][h];
211 } 211 }
212 212
213 static void ipgre_tunnel_link(struct ip_tunnel *t) 213 static void ipgre_tunnel_link(struct ip_tunnel *t)
214 { 214 {
215 struct ip_tunnel **tp = ipgre_bucket(t); 215 struct ip_tunnel **tp = ipgre_bucket(t);
216 216
217 t->next = *tp; 217 t->next = *tp;
218 write_lock_bh(&ipgre_lock); 218 write_lock_bh(&ipgre_lock);
219 *tp = t; 219 *tp = t;
220 write_unlock_bh(&ipgre_lock); 220 write_unlock_bh(&ipgre_lock);
221 } 221 }
222 222
223 static void ipgre_tunnel_unlink(struct ip_tunnel *t) 223 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
224 { 224 {
225 struct ip_tunnel **tp; 225 struct ip_tunnel **tp;
226 226
227 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { 227 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
228 if (t == *tp) { 228 if (t == *tp) {
229 write_lock_bh(&ipgre_lock); 229 write_lock_bh(&ipgre_lock);
230 *tp = t->next; 230 *tp = t->next;
231 write_unlock_bh(&ipgre_lock); 231 write_unlock_bh(&ipgre_lock);
232 break; 232 break;
233 } 233 }
234 } 234 }
235 } 235 }
236 236
237 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) 237 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
238 { 238 {
239 u32 remote = parms->iph.daddr; 239 u32 remote = parms->iph.daddr;
240 u32 local = parms->iph.saddr; 240 u32 local = parms->iph.saddr;
241 u32 key = parms->i_key; 241 u32 key = parms->i_key;
242 struct ip_tunnel *t, **tp, *nt; 242 struct ip_tunnel *t, **tp, *nt;
243 struct net_device *dev; 243 struct net_device *dev;
244 unsigned h = HASH(key); 244 unsigned h = HASH(key);
245 int prio = 0; 245 int prio = 0;
246 char name[IFNAMSIZ]; 246 char name[IFNAMSIZ];
247 247
248 if (local) 248 if (local)
249 prio |= 1; 249 prio |= 1;
250 if (remote && !MULTICAST(remote)) { 250 if (remote && !MULTICAST(remote)) {
251 prio |= 2; 251 prio |= 2;
252 h ^= HASH(remote); 252 h ^= HASH(remote);
253 } 253 }
254 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { 254 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
255 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 255 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
256 if (key == t->parms.i_key) 256 if (key == t->parms.i_key)
257 return t; 257 return t;
258 } 258 }
259 } 259 }
260 if (!create) 260 if (!create)
261 return NULL; 261 return NULL;
262 262
263 if (parms->name[0]) 263 if (parms->name[0])
264 strlcpy(name, parms->name, IFNAMSIZ); 264 strlcpy(name, parms->name, IFNAMSIZ);
265 else { 265 else {
266 int i; 266 int i;
267 for (i=1; i<100; i++) { 267 for (i=1; i<100; i++) {
268 sprintf(name, "gre%d", i); 268 sprintf(name, "gre%d", i);
269 if (__dev_get_by_name(name) == NULL) 269 if (__dev_get_by_name(name) == NULL)
270 break; 270 break;
271 } 271 }
272 if (i==100) 272 if (i==100)
273 goto failed; 273 goto failed;
274 } 274 }
275 275
276 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 276 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
277 if (!dev) 277 if (!dev)
278 return NULL; 278 return NULL;
279 279
280 dev->init = ipgre_tunnel_init; 280 dev->init = ipgre_tunnel_init;
281 nt = netdev_priv(dev); 281 nt = netdev_priv(dev);
282 nt->parms = *parms; 282 nt->parms = *parms;
283 283
284 if (register_netdevice(dev) < 0) { 284 if (register_netdevice(dev) < 0) {
285 free_netdev(dev); 285 free_netdev(dev);
286 goto failed; 286 goto failed;
287 } 287 }
288 288
289 dev_hold(dev); 289 dev_hold(dev);
290 ipgre_tunnel_link(nt); 290 ipgre_tunnel_link(nt);
291 return nt; 291 return nt;
292 292
293 failed: 293 failed:
294 return NULL; 294 return NULL;
295 } 295 }
296 296
297 static void ipgre_tunnel_uninit(struct net_device *dev) 297 static void ipgre_tunnel_uninit(struct net_device *dev)
298 { 298 {
299 ipgre_tunnel_unlink(netdev_priv(dev)); 299 ipgre_tunnel_unlink(netdev_priv(dev));
300 dev_put(dev); 300 dev_put(dev);
301 } 301 }
302 302
303 303
304 static void ipgre_err(struct sk_buff *skb, u32 info) 304 static void ipgre_err(struct sk_buff *skb, u32 info)
305 { 305 {
306 #ifndef I_WISH_WORLD_WERE_PERFECT 306 #ifndef I_WISH_WORLD_WERE_PERFECT
307 307
308 /* It is not :-( All the routers (except for Linux) return only 308 /* It is not :-( All the routers (except for Linux) return only
309 8 bytes of packet payload. It means, that precise relaying of 309 8 bytes of packet payload. It means, that precise relaying of
310 ICMP in the real Internet is absolutely infeasible. 310 ICMP in the real Internet is absolutely infeasible.
311 311
312 Moreover, Cisco "wise men" put GRE key to the third word 312 Moreover, Cisco "wise men" put GRE key to the third word
313 in GRE header. It makes impossible maintaining even soft state for keyed 313 in GRE header. It makes impossible maintaining even soft state for keyed
314 GRE tunnels with enabled checksum. Tell them "thank you". 314 GRE tunnels with enabled checksum. Tell them "thank you".
315 315
316 Well, I wonder, rfc1812 was written by Cisco employee, 316 Well, I wonder, rfc1812 was written by Cisco employee,
317 what the hell these idiots break standrads established 317 what the hell these idiots break standrads established
318 by themself??? 318 by themself???
319 */ 319 */
320 320
321 struct iphdr *iph = (struct iphdr*)skb->data; 321 struct iphdr *iph = (struct iphdr*)skb->data;
322 u16 *p = (u16*)(skb->data+(iph->ihl<<2)); 322 u16 *p = (u16*)(skb->data+(iph->ihl<<2));
323 int grehlen = (iph->ihl<<2) + 4; 323 int grehlen = (iph->ihl<<2) + 4;
324 int type = skb->h.icmph->type; 324 int type = skb->h.icmph->type;
325 int code = skb->h.icmph->code; 325 int code = skb->h.icmph->code;
326 struct ip_tunnel *t; 326 struct ip_tunnel *t;
327 u16 flags; 327 u16 flags;
328 328
329 flags = p[0]; 329 flags = p[0];
330 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 330 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
331 if (flags&(GRE_VERSION|GRE_ROUTING)) 331 if (flags&(GRE_VERSION|GRE_ROUTING))
332 return; 332 return;
333 if (flags&GRE_KEY) { 333 if (flags&GRE_KEY) {
334 grehlen += 4; 334 grehlen += 4;
335 if (flags&GRE_CSUM) 335 if (flags&GRE_CSUM)
336 grehlen += 4; 336 grehlen += 4;
337 } 337 }
338 } 338 }
339 339
340 /* If only 8 bytes returned, keyed message will be dropped here */ 340 /* If only 8 bytes returned, keyed message will be dropped here */
341 if (skb_headlen(skb) < grehlen) 341 if (skb_headlen(skb) < grehlen)
342 return; 342 return;
343 343
344 switch (type) { 344 switch (type) {
345 default: 345 default:
346 case ICMP_PARAMETERPROB: 346 case ICMP_PARAMETERPROB:
347 return; 347 return;
348 348
349 case ICMP_DEST_UNREACH: 349 case ICMP_DEST_UNREACH:
350 switch (code) { 350 switch (code) {
351 case ICMP_SR_FAILED: 351 case ICMP_SR_FAILED:
352 case ICMP_PORT_UNREACH: 352 case ICMP_PORT_UNREACH:
353 /* Impossible event. */ 353 /* Impossible event. */
354 return; 354 return;
355 case ICMP_FRAG_NEEDED: 355 case ICMP_FRAG_NEEDED:
356 /* Soft state for pmtu is maintained by IP core. */ 356 /* Soft state for pmtu is maintained by IP core. */
357 return; 357 return;
358 default: 358 default:
359 /* All others are translated to HOST_UNREACH. 359 /* All others are translated to HOST_UNREACH.
360 rfc2003 contains "deep thoughts" about NET_UNREACH, 360 rfc2003 contains "deep thoughts" about NET_UNREACH,
361 I believe they are just ether pollution. --ANK 361 I believe they are just ether pollution. --ANK
362 */ 362 */
363 break; 363 break;
364 } 364 }
365 break; 365 break;
366 case ICMP_TIME_EXCEEDED: 366 case ICMP_TIME_EXCEEDED:
367 if (code != ICMP_EXC_TTL) 367 if (code != ICMP_EXC_TTL)
368 return; 368 return;
369 break; 369 break;
370 } 370 }
371 371
372 read_lock(&ipgre_lock); 372 read_lock(&ipgre_lock);
373 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); 373 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
374 if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) 374 if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
375 goto out; 375 goto out;
376 376
377 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 377 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
378 goto out; 378 goto out;
379 379
380 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 380 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
381 t->err_count++; 381 t->err_count++;
382 else 382 else
383 t->err_count = 1; 383 t->err_count = 1;
384 t->err_time = jiffies; 384 t->err_time = jiffies;
385 out: 385 out:
386 read_unlock(&ipgre_lock); 386 read_unlock(&ipgre_lock);
387 return; 387 return;
388 #else 388 #else
389 struct iphdr *iph = (struct iphdr*)dp; 389 struct iphdr *iph = (struct iphdr*)dp;
390 struct iphdr *eiph; 390 struct iphdr *eiph;
391 u16 *p = (u16*)(dp+(iph->ihl<<2)); 391 u16 *p = (u16*)(dp+(iph->ihl<<2));
392 int type = skb->h.icmph->type; 392 int type = skb->h.icmph->type;
393 int code = skb->h.icmph->code; 393 int code = skb->h.icmph->code;
394 int rel_type = 0; 394 int rel_type = 0;
395 int rel_code = 0; 395 int rel_code = 0;
396 int rel_info = 0; 396 int rel_info = 0;
397 u16 flags; 397 u16 flags;
398 int grehlen = (iph->ihl<<2) + 4; 398 int grehlen = (iph->ihl<<2) + 4;
399 struct sk_buff *skb2; 399 struct sk_buff *skb2;
400 struct flowi fl; 400 struct flowi fl;
401 struct rtable *rt; 401 struct rtable *rt;
402 402
403 if (p[1] != htons(ETH_P_IP)) 403 if (p[1] != htons(ETH_P_IP))
404 return; 404 return;
405 405
406 flags = p[0]; 406 flags = p[0];
407 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 407 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
408 if (flags&(GRE_VERSION|GRE_ROUTING)) 408 if (flags&(GRE_VERSION|GRE_ROUTING))
409 return; 409 return;
410 if (flags&GRE_CSUM) 410 if (flags&GRE_CSUM)
411 grehlen += 4; 411 grehlen += 4;
412 if (flags&GRE_KEY) 412 if (flags&GRE_KEY)
413 grehlen += 4; 413 grehlen += 4;
414 if (flags&GRE_SEQ) 414 if (flags&GRE_SEQ)
415 grehlen += 4; 415 grehlen += 4;
416 } 416 }
417 if (len < grehlen + sizeof(struct iphdr)) 417 if (len < grehlen + sizeof(struct iphdr))
418 return; 418 return;
419 eiph = (struct iphdr*)(dp + grehlen); 419 eiph = (struct iphdr*)(dp + grehlen);
420 420
421 switch (type) { 421 switch (type) {
422 default: 422 default:
423 return; 423 return;
424 case ICMP_PARAMETERPROB: 424 case ICMP_PARAMETERPROB:
425 if (skb->h.icmph->un.gateway < (iph->ihl<<2)) 425 if (skb->h.icmph->un.gateway < (iph->ihl<<2))
426 return; 426 return;
427 427
428 /* So... This guy found something strange INSIDE encapsulated 428 /* So... This guy found something strange INSIDE encapsulated
429 packet. Well, he is fool, but what can we do ? 429 packet. Well, he is fool, but what can we do ?
430 */ 430 */
431 rel_type = ICMP_PARAMETERPROB; 431 rel_type = ICMP_PARAMETERPROB;
432 rel_info = skb->h.icmph->un.gateway - grehlen; 432 rel_info = skb->h.icmph->un.gateway - grehlen;
433 break; 433 break;
434 434
435 case ICMP_DEST_UNREACH: 435 case ICMP_DEST_UNREACH:
436 switch (code) { 436 switch (code) {
437 case ICMP_SR_FAILED: 437 case ICMP_SR_FAILED:
438 case ICMP_PORT_UNREACH: 438 case ICMP_PORT_UNREACH:
439 /* Impossible event. */ 439 /* Impossible event. */
440 return; 440 return;
441 case ICMP_FRAG_NEEDED: 441 case ICMP_FRAG_NEEDED:
442 /* And it is the only really necessary thing :-) */ 442 /* And it is the only really necessary thing :-) */
443 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 443 rel_info = ntohs(skb->h.icmph->un.frag.mtu);
444 if (rel_info < grehlen+68) 444 if (rel_info < grehlen+68)
445 return; 445 return;
446 rel_info -= grehlen; 446 rel_info -= grehlen;
447 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 447 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
448 if (rel_info > ntohs(eiph->tot_len)) 448 if (rel_info > ntohs(eiph->tot_len))
449 return; 449 return;
450 break; 450 break;
451 default: 451 default:
452 /* All others are translated to HOST_UNREACH. 452 /* All others are translated to HOST_UNREACH.
453 rfc2003 contains "deep thoughts" about NET_UNREACH, 453 rfc2003 contains "deep thoughts" about NET_UNREACH,
454 I believe, it is just ether pollution. --ANK 454 I believe, it is just ether pollution. --ANK
455 */ 455 */
456 rel_type = ICMP_DEST_UNREACH; 456 rel_type = ICMP_DEST_UNREACH;
457 rel_code = ICMP_HOST_UNREACH; 457 rel_code = ICMP_HOST_UNREACH;
458 break; 458 break;
459 } 459 }
460 break; 460 break;
461 case ICMP_TIME_EXCEEDED: 461 case ICMP_TIME_EXCEEDED:
462 if (code != ICMP_EXC_TTL) 462 if (code != ICMP_EXC_TTL)
463 return; 463 return;
464 break; 464 break;
465 } 465 }
466 466
467 /* Prepare fake skb to feed it to icmp_send */ 467 /* Prepare fake skb to feed it to icmp_send */
468 skb2 = skb_clone(skb, GFP_ATOMIC); 468 skb2 = skb_clone(skb, GFP_ATOMIC);
469 if (skb2 == NULL) 469 if (skb2 == NULL)
470 return; 470 return;
471 dst_release(skb2->dst); 471 dst_release(skb2->dst);
472 skb2->dst = NULL; 472 skb2->dst = NULL;
473 skb_pull(skb2, skb->data - (u8*)eiph); 473 skb_pull(skb2, skb->data - (u8*)eiph);
474 skb2->nh.raw = skb2->data; 474 skb2->nh.raw = skb2->data;
475 475
476 /* Try to guess incoming interface */ 476 /* Try to guess incoming interface */
477 memset(&fl, 0, sizeof(fl)); 477 memset(&fl, 0, sizeof(fl));
478 fl.fl4_dst = eiph->saddr; 478 fl.fl4_dst = eiph->saddr;
479 fl.fl4_tos = RT_TOS(eiph->tos); 479 fl.fl4_tos = RT_TOS(eiph->tos);
480 fl.proto = IPPROTO_GRE; 480 fl.proto = IPPROTO_GRE;
481 if (ip_route_output_key(&rt, &fl)) { 481 if (ip_route_output_key(&rt, &fl)) {
482 kfree_skb(skb2); 482 kfree_skb(skb2);
483 return; 483 return;
484 } 484 }
485 skb2->dev = rt->u.dst.dev; 485 skb2->dev = rt->u.dst.dev;
486 486
487 /* route "incoming" packet */ 487 /* route "incoming" packet */
488 if (rt->rt_flags&RTCF_LOCAL) { 488 if (rt->rt_flags&RTCF_LOCAL) {
489 ip_rt_put(rt); 489 ip_rt_put(rt);
490 rt = NULL; 490 rt = NULL;
491 fl.fl4_dst = eiph->daddr; 491 fl.fl4_dst = eiph->daddr;
492 fl.fl4_src = eiph->saddr; 492 fl.fl4_src = eiph->saddr;
493 fl.fl4_tos = eiph->tos; 493 fl.fl4_tos = eiph->tos;
494 if (ip_route_output_key(&rt, &fl) || 494 if (ip_route_output_key(&rt, &fl) ||
495 rt->u.dst.dev->type != ARPHRD_IPGRE) { 495 rt->u.dst.dev->type != ARPHRD_IPGRE) {
496 ip_rt_put(rt); 496 ip_rt_put(rt);
497 kfree_skb(skb2); 497 kfree_skb(skb2);
498 return; 498 return;
499 } 499 }
500 } else { 500 } else {
501 ip_rt_put(rt); 501 ip_rt_put(rt);
502 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || 502 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
503 skb2->dst->dev->type != ARPHRD_IPGRE) { 503 skb2->dst->dev->type != ARPHRD_IPGRE) {
504 kfree_skb(skb2); 504 kfree_skb(skb2);
505 return; 505 return;
506 } 506 }
507 } 507 }
508 508
509 /* change mtu on this route */ 509 /* change mtu on this route */
510 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 510 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
511 if (rel_info > dst_mtu(skb2->dst)) { 511 if (rel_info > dst_mtu(skb2->dst)) {
512 kfree_skb(skb2); 512 kfree_skb(skb2);
513 return; 513 return;
514 } 514 }
515 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 515 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
516 rel_info = htonl(rel_info); 516 rel_info = htonl(rel_info);
517 } else if (type == ICMP_TIME_EXCEEDED) { 517 } else if (type == ICMP_TIME_EXCEEDED) {
518 struct ip_tunnel *t = netdev_priv(skb2->dev); 518 struct ip_tunnel *t = netdev_priv(skb2->dev);
519 if (t->parms.iph.ttl) { 519 if (t->parms.iph.ttl) {
520 rel_type = ICMP_DEST_UNREACH; 520 rel_type = ICMP_DEST_UNREACH;
521 rel_code = ICMP_HOST_UNREACH; 521 rel_code = ICMP_HOST_UNREACH;
522 } 522 }
523 } 523 }
524 524
525 icmp_send(skb2, rel_type, rel_code, rel_info); 525 icmp_send(skb2, rel_type, rel_code, rel_info);
526 kfree_skb(skb2); 526 kfree_skb(skb2);
527 #endif 527 #endif
528 } 528 }
529 529
530 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 530 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
531 { 531 {
532 if (INET_ECN_is_ce(iph->tos)) { 532 if (INET_ECN_is_ce(iph->tos)) {
533 if (skb->protocol == htons(ETH_P_IP)) { 533 if (skb->protocol == htons(ETH_P_IP)) {
534 IP_ECN_set_ce(skb->nh.iph); 534 IP_ECN_set_ce(skb->nh.iph);
535 } else if (skb->protocol == htons(ETH_P_IPV6)) { 535 } else if (skb->protocol == htons(ETH_P_IPV6)) {
536 IP6_ECN_set_ce(skb->nh.ipv6h); 536 IP6_ECN_set_ce(skb->nh.ipv6h);
537 } 537 }
538 } 538 }
539 } 539 }
540 540
541 static inline u8 541 static inline u8
542 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 542 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
543 { 543 {
544 u8 inner = 0; 544 u8 inner = 0;
545 if (skb->protocol == htons(ETH_P_IP)) 545 if (skb->protocol == htons(ETH_P_IP))
546 inner = old_iph->tos; 546 inner = old_iph->tos;
547 else if (skb->protocol == htons(ETH_P_IPV6)) 547 else if (skb->protocol == htons(ETH_P_IPV6))
548 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 548 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
549 return INET_ECN_encapsulate(tos, inner); 549 return INET_ECN_encapsulate(tos, inner);
550 } 550 }
551 551
552 static int ipgre_rcv(struct sk_buff *skb) 552 static int ipgre_rcv(struct sk_buff *skb)
553 { 553 {
554 struct iphdr *iph; 554 struct iphdr *iph;
555 u8 *h; 555 u8 *h;
556 u16 flags; 556 u16 flags;
557 u16 csum = 0; 557 u16 csum = 0;
558 u32 key = 0; 558 u32 key = 0;
559 u32 seqno = 0; 559 u32 seqno = 0;
560 struct ip_tunnel *tunnel; 560 struct ip_tunnel *tunnel;
561 int offset = 4; 561 int offset = 4;
562 562
563 if (!pskb_may_pull(skb, 16)) 563 if (!pskb_may_pull(skb, 16))
564 goto drop_nolock; 564 goto drop_nolock;
565 565
566 iph = skb->nh.iph; 566 iph = skb->nh.iph;
567 h = skb->data; 567 h = skb->data;
568 flags = *(u16*)h; 568 flags = *(u16*)h;
569 569
570 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 570 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
571 /* - Version must be 0. 571 /* - Version must be 0.
572 - We do not support routing headers. 572 - We do not support routing headers.
573 */ 573 */
574 if (flags&(GRE_VERSION|GRE_ROUTING)) 574 if (flags&(GRE_VERSION|GRE_ROUTING))
575 goto drop_nolock; 575 goto drop_nolock;
576 576
577 if (flags&GRE_CSUM) { 577 if (flags&GRE_CSUM) {
578 switch (skb->ip_summed) { 578 switch (skb->ip_summed) {
579 case CHECKSUM_HW: 579 case CHECKSUM_HW:
580 csum = (u16)csum_fold(skb->csum); 580 csum = (u16)csum_fold(skb->csum);
581 if (!csum) 581 if (!csum)
582 break; 582 break;
583 /* fall through */ 583 /* fall through */
584 case CHECKSUM_NONE: 584 case CHECKSUM_NONE:
585 skb->csum = 0; 585 skb->csum = 0;
586 csum = __skb_checksum_complete(skb); 586 csum = __skb_checksum_complete(skb);
587 skb->ip_summed = CHECKSUM_HW; 587 skb->ip_summed = CHECKSUM_HW;
588 } 588 }
589 offset += 4; 589 offset += 4;
590 } 590 }
591 if (flags&GRE_KEY) { 591 if (flags&GRE_KEY) {
592 key = *(u32*)(h + offset); 592 key = *(u32*)(h + offset);
593 offset += 4; 593 offset += 4;
594 } 594 }
595 if (flags&GRE_SEQ) { 595 if (flags&GRE_SEQ) {
596 seqno = ntohl(*(u32*)(h + offset)); 596 seqno = ntohl(*(u32*)(h + offset));
597 offset += 4; 597 offset += 4;
598 } 598 }
599 } 599 }
600 600
601 read_lock(&ipgre_lock); 601 read_lock(&ipgre_lock);
602 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { 602 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
603 secpath_reset(skb); 603 secpath_reset(skb);
604 604
605 skb->protocol = *(u16*)(h + 2); 605 skb->protocol = *(u16*)(h + 2);
606 /* WCCP version 1 and 2 protocol decoding. 606 /* WCCP version 1 and 2 protocol decoding.
607 * - Change protocol to IP 607 * - Change protocol to IP
608 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 608 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
609 */ 609 */
610 if (flags == 0 && 610 if (flags == 0 &&
611 skb->protocol == __constant_htons(ETH_P_WCCP)) { 611 skb->protocol == __constant_htons(ETH_P_WCCP)) {
612 skb->protocol = __constant_htons(ETH_P_IP); 612 skb->protocol = __constant_htons(ETH_P_IP);
613 if ((*(h + offset) & 0xF0) != 0x40) 613 if ((*(h + offset) & 0xF0) != 0x40)
614 offset += 4; 614 offset += 4;
615 } 615 }
616 616
617 skb->mac.raw = skb->nh.raw; 617 skb->mac.raw = skb->nh.raw;
618 skb->nh.raw = __pskb_pull(skb, offset); 618 skb->nh.raw = __pskb_pull(skb, offset);
619 skb_postpull_rcsum(skb, skb->h.raw, offset); 619 skb_postpull_rcsum(skb, skb->h.raw, offset);
620 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
621 skb->pkt_type = PACKET_HOST; 620 skb->pkt_type = PACKET_HOST;
622 #ifdef CONFIG_NET_IPGRE_BROADCAST 621 #ifdef CONFIG_NET_IPGRE_BROADCAST
623 if (MULTICAST(iph->daddr)) { 622 if (MULTICAST(iph->daddr)) {
624 /* Looped back packet, drop it! */ 623 /* Looped back packet, drop it! */
625 if (((struct rtable*)skb->dst)->fl.iif == 0) 624 if (((struct rtable*)skb->dst)->fl.iif == 0)
626 goto drop; 625 goto drop;
627 tunnel->stat.multicast++; 626 tunnel->stat.multicast++;
628 skb->pkt_type = PACKET_BROADCAST; 627 skb->pkt_type = PACKET_BROADCAST;
629 } 628 }
630 #endif 629 #endif
631 630
632 if (((flags&GRE_CSUM) && csum) || 631 if (((flags&GRE_CSUM) && csum) ||
633 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 632 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
634 tunnel->stat.rx_crc_errors++; 633 tunnel->stat.rx_crc_errors++;
635 tunnel->stat.rx_errors++; 634 tunnel->stat.rx_errors++;
636 goto drop; 635 goto drop;
637 } 636 }
638 if (tunnel->parms.i_flags&GRE_SEQ) { 637 if (tunnel->parms.i_flags&GRE_SEQ) {
639 if (!(flags&GRE_SEQ) || 638 if (!(flags&GRE_SEQ) ||
640 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 639 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
641 tunnel->stat.rx_fifo_errors++; 640 tunnel->stat.rx_fifo_errors++;
642 tunnel->stat.rx_errors++; 641 tunnel->stat.rx_errors++;
643 goto drop; 642 goto drop;
644 } 643 }
645 tunnel->i_seqno = seqno + 1; 644 tunnel->i_seqno = seqno + 1;
646 } 645 }
647 tunnel->stat.rx_packets++; 646 tunnel->stat.rx_packets++;
648 tunnel->stat.rx_bytes += skb->len; 647 tunnel->stat.rx_bytes += skb->len;
649 skb->dev = tunnel->dev; 648 skb->dev = tunnel->dev;
650 dst_release(skb->dst); 649 dst_release(skb->dst);
651 skb->dst = NULL; 650 skb->dst = NULL;
652 nf_reset(skb); 651 nf_reset(skb);
653 ipgre_ecn_decapsulate(iph, skb); 652 ipgre_ecn_decapsulate(iph, skb);
654 netif_rx(skb); 653 netif_rx(skb);
655 read_unlock(&ipgre_lock); 654 read_unlock(&ipgre_lock);
656 return(0); 655 return(0);
657 } 656 }
658 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 657 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
659 658
660 drop: 659 drop:
661 read_unlock(&ipgre_lock); 660 read_unlock(&ipgre_lock);
662 drop_nolock: 661 drop_nolock:
663 kfree_skb(skb); 662 kfree_skb(skb);
664 return(0); 663 return(0);
665 } 664 }
666 665
667 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 666 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
668 { 667 {
669 struct ip_tunnel *tunnel = netdev_priv(dev); 668 struct ip_tunnel *tunnel = netdev_priv(dev);
670 struct net_device_stats *stats = &tunnel->stat; 669 struct net_device_stats *stats = &tunnel->stat;
671 struct iphdr *old_iph = skb->nh.iph; 670 struct iphdr *old_iph = skb->nh.iph;
672 struct iphdr *tiph; 671 struct iphdr *tiph;
673 u8 tos; 672 u8 tos;
674 u16 df; 673 u16 df;
675 struct rtable *rt; /* Route to the other host */ 674 struct rtable *rt; /* Route to the other host */
676 struct net_device *tdev; /* Device to other host */ 675 struct net_device *tdev; /* Device to other host */
677 struct iphdr *iph; /* Our new IP header */ 676 struct iphdr *iph; /* Our new IP header */
678 int max_headroom; /* The extra header space needed */ 677 int max_headroom; /* The extra header space needed */
679 int gre_hlen; 678 int gre_hlen;
680 u32 dst; 679 u32 dst;
681 int mtu; 680 int mtu;
682 681
683 if (tunnel->recursion++) { 682 if (tunnel->recursion++) {
684 tunnel->stat.collisions++; 683 tunnel->stat.collisions++;
685 goto tx_error; 684 goto tx_error;
686 } 685 }
687 686
688 if (dev->hard_header) { 687 if (dev->hard_header) {
689 gre_hlen = 0; 688 gre_hlen = 0;
690 tiph = (struct iphdr*)skb->data; 689 tiph = (struct iphdr*)skb->data;
691 } else { 690 } else {
692 gre_hlen = tunnel->hlen; 691 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph; 692 tiph = &tunnel->parms.iph;
694 } 693 }
695 694
696 if ((dst = tiph->daddr) == 0) { 695 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */ 696 /* NBMA tunnel */
698 697
699 if (skb->dst == NULL) { 698 if (skb->dst == NULL) {
700 tunnel->stat.tx_fifo_errors++; 699 tunnel->stat.tx_fifo_errors++;
701 goto tx_error; 700 goto tx_error;
702 } 701 }
703 702
704 if (skb->protocol == htons(ETH_P_IP)) { 703 if (skb->protocol == htons(ETH_P_IP)) {
705 rt = (struct rtable*)skb->dst; 704 rt = (struct rtable*)skb->dst;
706 if ((dst = rt->rt_gateway) == 0) 705 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp; 706 goto tx_error_icmp;
708 } 707 }
709 #ifdef CONFIG_IPV6 708 #ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) { 709 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6; 710 struct in6_addr *addr6;
712 int addr_type; 711 int addr_type;
713 struct neighbour *neigh = skb->dst->neighbour; 712 struct neighbour *neigh = skb->dst->neighbour;
714 713
715 if (neigh == NULL) 714 if (neigh == NULL)
716 goto tx_error; 715 goto tx_error;
717 716
718 addr6 = (struct in6_addr*)&neigh->primary_key; 717 addr6 = (struct in6_addr*)&neigh->primary_key;
719 addr_type = ipv6_addr_type(addr6); 718 addr_type = ipv6_addr_type(addr6);
720 719
721 if (addr_type == IPV6_ADDR_ANY) { 720 if (addr_type == IPV6_ADDR_ANY) {
722 addr6 = &skb->nh.ipv6h->daddr; 721 addr6 = &skb->nh.ipv6h->daddr;
723 addr_type = ipv6_addr_type(addr6); 722 addr_type = ipv6_addr_type(addr6);
724 } 723 }
725 724
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 725 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp; 726 goto tx_error_icmp;
728 727
729 dst = addr6->s6_addr32[3]; 728 dst = addr6->s6_addr32[3];
730 } 729 }
731 #endif 730 #endif
732 else 731 else
733 goto tx_error; 732 goto tx_error;
734 } 733 }
735 734
736 tos = tiph->tos; 735 tos = tiph->tos;
737 if (tos&1) { 736 if (tos&1) {
738 if (skb->protocol == htons(ETH_P_IP)) 737 if (skb->protocol == htons(ETH_P_IP))
739 tos = old_iph->tos; 738 tos = old_iph->tos;
740 tos &= ~1; 739 tos &= ~1;
741 } 740 }
742 741
743 { 742 {
744 struct flowi fl = { .oif = tunnel->parms.link, 743 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u = 744 .nl_u = { .ip4_u =
746 { .daddr = dst, 745 { .daddr = dst,
747 .saddr = tiph->saddr, 746 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } }, 747 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE }; 748 .proto = IPPROTO_GRE };
750 if (ip_route_output_key(&rt, &fl)) { 749 if (ip_route_output_key(&rt, &fl)) {
751 tunnel->stat.tx_carrier_errors++; 750 tunnel->stat.tx_carrier_errors++;
752 goto tx_error; 751 goto tx_error;
753 } 752 }
754 } 753 }
755 tdev = rt->u.dst.dev; 754 tdev = rt->u.dst.dev;
756 755
757 if (tdev == dev) { 756 if (tdev == dev) {
758 ip_rt_put(rt); 757 ip_rt_put(rt);
759 tunnel->stat.collisions++; 758 tunnel->stat.collisions++;
760 goto tx_error; 759 goto tx_error;
761 } 760 }
762 761
763 df = tiph->frag_off; 762 df = tiph->frag_off;
764 if (df) 763 if (df)
765 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; 764 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
766 else 765 else
767 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 766 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
768 767
769 if (skb->dst) 768 if (skb->dst)
770 skb->dst->ops->update_pmtu(skb->dst, mtu); 769 skb->dst->ops->update_pmtu(skb->dst, mtu);
771 770
772 if (skb->protocol == htons(ETH_P_IP)) { 771 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF)); 772 df |= (old_iph->frag_off&htons(IP_DF));
774 773
775 if ((old_iph->frag_off&htons(IP_DF)) && 774 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) { 775 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 776 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt); 777 ip_rt_put(rt);
779 goto tx_error; 778 goto tx_error;
780 } 779 }
781 } 780 }
782 #ifdef CONFIG_IPV6 781 #ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) { 782 else if (skb->protocol == htons(ETH_P_IPV6)) {
784 struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 783 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
785 784
786 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 785 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
787 if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || 786 if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
788 rt6->rt6i_dst.plen == 128) { 787 rt6->rt6i_dst.plen == 128) {
789 rt6->rt6i_flags |= RTF_MODIFIED; 788 rt6->rt6i_flags |= RTF_MODIFIED;
790 skb->dst->metrics[RTAX_MTU-1] = mtu; 789 skb->dst->metrics[RTAX_MTU-1] = mtu;
791 } 790 }
792 } 791 }
793 792
794 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 793 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
795 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 794 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
796 ip_rt_put(rt); 795 ip_rt_put(rt);
797 goto tx_error; 796 goto tx_error;
798 } 797 }
799 } 798 }
800 #endif 799 #endif
801 800
802 if (tunnel->err_count > 0) { 801 if (tunnel->err_count > 0) {
803 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 802 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
804 tunnel->err_count--; 803 tunnel->err_count--;
805 804
806 dst_link_failure(skb); 805 dst_link_failure(skb);
807 } else 806 } else
808 tunnel->err_count = 0; 807 tunnel->err_count = 0;
809 } 808 }
810 809
811 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 810 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
812 811
813 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { 812 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
814 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 813 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
815 if (!new_skb) { 814 if (!new_skb) {
816 ip_rt_put(rt); 815 ip_rt_put(rt);
817 stats->tx_dropped++; 816 stats->tx_dropped++;
818 dev_kfree_skb(skb); 817 dev_kfree_skb(skb);
819 tunnel->recursion--; 818 tunnel->recursion--;
820 return 0; 819 return 0;
821 } 820 }
822 if (skb->sk) 821 if (skb->sk)
823 skb_set_owner_w(new_skb, skb->sk); 822 skb_set_owner_w(new_skb, skb->sk);
824 dev_kfree_skb(skb); 823 dev_kfree_skb(skb);
825 skb = new_skb; 824 skb = new_skb;
826 old_iph = skb->nh.iph; 825 old_iph = skb->nh.iph;
827 } 826 }
828 827
829 skb->h.raw = skb->nh.raw; 828 skb->h.raw = skb->nh.raw;
830 skb->nh.raw = skb_push(skb, gre_hlen); 829 skb->nh.raw = skb_push(skb, gre_hlen);
831 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 830 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 831 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
833 IPSKB_REROUTED); 832 IPSKB_REROUTED);
834 dst_release(skb->dst); 833 dst_release(skb->dst);
835 skb->dst = &rt->u.dst; 834 skb->dst = &rt->u.dst;
836 835
837 /* 836 /*
838 * Push down and install the IPIP header. 837 * Push down and install the IPIP header.
839 */ 838 */
840 839
841 iph = skb->nh.iph; 840 iph = skb->nh.iph;
842 iph->version = 4; 841 iph->version = 4;
843 iph->ihl = sizeof(struct iphdr) >> 2; 842 iph->ihl = sizeof(struct iphdr) >> 2;
844 iph->frag_off = df; 843 iph->frag_off = df;
845 iph->protocol = IPPROTO_GRE; 844 iph->protocol = IPPROTO_GRE;
846 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 845 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
847 iph->daddr = rt->rt_dst; 846 iph->daddr = rt->rt_dst;
848 iph->saddr = rt->rt_src; 847 iph->saddr = rt->rt_src;
849 848
850 if ((iph->ttl = tiph->ttl) == 0) { 849 if ((iph->ttl = tiph->ttl) == 0) {
851 if (skb->protocol == htons(ETH_P_IP)) 850 if (skb->protocol == htons(ETH_P_IP))
852 iph->ttl = old_iph->ttl; 851 iph->ttl = old_iph->ttl;
853 #ifdef CONFIG_IPV6 852 #ifdef CONFIG_IPV6
854 else if (skb->protocol == htons(ETH_P_IPV6)) 853 else if (skb->protocol == htons(ETH_P_IPV6))
855 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; 854 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
856 #endif 855 #endif
857 else 856 else
858 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 857 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
859 } 858 }
860 859
861 ((u16*)(iph+1))[0] = tunnel->parms.o_flags; 860 ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
862 ((u16*)(iph+1))[1] = skb->protocol; 861 ((u16*)(iph+1))[1] = skb->protocol;
863 862
864 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 863 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
865 u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); 864 u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
866 865
867 if (tunnel->parms.o_flags&GRE_SEQ) { 866 if (tunnel->parms.o_flags&GRE_SEQ) {
868 ++tunnel->o_seqno; 867 ++tunnel->o_seqno;
869 *ptr = htonl(tunnel->o_seqno); 868 *ptr = htonl(tunnel->o_seqno);
870 ptr--; 869 ptr--;
871 } 870 }
872 if (tunnel->parms.o_flags&GRE_KEY) { 871 if (tunnel->parms.o_flags&GRE_KEY) {
873 *ptr = tunnel->parms.o_key; 872 *ptr = tunnel->parms.o_key;
874 ptr--; 873 ptr--;
875 } 874 }
876 if (tunnel->parms.o_flags&GRE_CSUM) { 875 if (tunnel->parms.o_flags&GRE_CSUM) {
877 *ptr = 0; 876 *ptr = 0;
878 *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 877 *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
879 } 878 }
880 } 879 }
881 880
882 nf_reset(skb); 881 nf_reset(skb);
883 882
884 IPTUNNEL_XMIT(); 883 IPTUNNEL_XMIT();
885 tunnel->recursion--; 884 tunnel->recursion--;
886 return 0; 885 return 0;
887 886
888 tx_error_icmp: 887 tx_error_icmp:
889 dst_link_failure(skb); 888 dst_link_failure(skb);
890 889
891 tx_error: 890 tx_error:
892 stats->tx_errors++; 891 stats->tx_errors++;
893 dev_kfree_skb(skb); 892 dev_kfree_skb(skb);
894 tunnel->recursion--; 893 tunnel->recursion--;
895 return 0; 894 return 0;
896 } 895 }
897 896
898 static int 897 static int
899 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 898 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
900 { 899 {
901 int err = 0; 900 int err = 0;
902 struct ip_tunnel_parm p; 901 struct ip_tunnel_parm p;
903 struct ip_tunnel *t; 902 struct ip_tunnel *t;
904 903
905 switch (cmd) { 904 switch (cmd) {
906 case SIOCGETTUNNEL: 905 case SIOCGETTUNNEL:
907 t = NULL; 906 t = NULL;
908 if (dev == ipgre_fb_tunnel_dev) { 907 if (dev == ipgre_fb_tunnel_dev) {
909 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 908 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
910 err = -EFAULT; 909 err = -EFAULT;
911 break; 910 break;
912 } 911 }
913 t = ipgre_tunnel_locate(&p, 0); 912 t = ipgre_tunnel_locate(&p, 0);
914 } 913 }
915 if (t == NULL) 914 if (t == NULL)
916 t = netdev_priv(dev); 915 t = netdev_priv(dev);
917 memcpy(&p, &t->parms, sizeof(p)); 916 memcpy(&p, &t->parms, sizeof(p));
918 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 917 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
919 err = -EFAULT; 918 err = -EFAULT;
920 break; 919 break;
921 920
922 case SIOCADDTUNNEL: 921 case SIOCADDTUNNEL:
923 case SIOCCHGTUNNEL: 922 case SIOCCHGTUNNEL:
924 err = -EPERM; 923 err = -EPERM;
925 if (!capable(CAP_NET_ADMIN)) 924 if (!capable(CAP_NET_ADMIN))
926 goto done; 925 goto done;
927 926
928 err = -EFAULT; 927 err = -EFAULT;
929 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 928 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
930 goto done; 929 goto done;
931 930
932 err = -EINVAL; 931 err = -EINVAL;
933 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 932 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
934 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 933 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
935 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 934 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
936 goto done; 935 goto done;
937 if (p.iph.ttl) 936 if (p.iph.ttl)
938 p.iph.frag_off |= htons(IP_DF); 937 p.iph.frag_off |= htons(IP_DF);
939 938
940 if (!(p.i_flags&GRE_KEY)) 939 if (!(p.i_flags&GRE_KEY))
941 p.i_key = 0; 940 p.i_key = 0;
942 if (!(p.o_flags&GRE_KEY)) 941 if (!(p.o_flags&GRE_KEY))
943 p.o_key = 0; 942 p.o_key = 0;
944 943
945 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 944 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
946 945
947 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 946 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
948 if (t != NULL) { 947 if (t != NULL) {
949 if (t->dev != dev) { 948 if (t->dev != dev) {
950 err = -EEXIST; 949 err = -EEXIST;
951 break; 950 break;
952 } 951 }
953 } else { 952 } else {
954 unsigned nflags=0; 953 unsigned nflags=0;
955 954
956 t = netdev_priv(dev); 955 t = netdev_priv(dev);
957 956
958 if (MULTICAST(p.iph.daddr)) 957 if (MULTICAST(p.iph.daddr))
959 nflags = IFF_BROADCAST; 958 nflags = IFF_BROADCAST;
960 else if (p.iph.daddr) 959 else if (p.iph.daddr)
961 nflags = IFF_POINTOPOINT; 960 nflags = IFF_POINTOPOINT;
962 961
963 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 962 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
964 err = -EINVAL; 963 err = -EINVAL;
965 break; 964 break;
966 } 965 }
967 ipgre_tunnel_unlink(t); 966 ipgre_tunnel_unlink(t);
968 t->parms.iph.saddr = p.iph.saddr; 967 t->parms.iph.saddr = p.iph.saddr;
969 t->parms.iph.daddr = p.iph.daddr; 968 t->parms.iph.daddr = p.iph.daddr;
970 t->parms.i_key = p.i_key; 969 t->parms.i_key = p.i_key;
971 t->parms.o_key = p.o_key; 970 t->parms.o_key = p.o_key;
972 memcpy(dev->dev_addr, &p.iph.saddr, 4); 971 memcpy(dev->dev_addr, &p.iph.saddr, 4);
973 memcpy(dev->broadcast, &p.iph.daddr, 4); 972 memcpy(dev->broadcast, &p.iph.daddr, 4);
974 ipgre_tunnel_link(t); 973 ipgre_tunnel_link(t);
975 netdev_state_change(dev); 974 netdev_state_change(dev);
976 } 975 }
977 } 976 }
978 977
979 if (t) { 978 if (t) {
980 err = 0; 979 err = 0;
981 if (cmd == SIOCCHGTUNNEL) { 980 if (cmd == SIOCCHGTUNNEL) {
982 t->parms.iph.ttl = p.iph.ttl; 981 t->parms.iph.ttl = p.iph.ttl;
983 t->parms.iph.tos = p.iph.tos; 982 t->parms.iph.tos = p.iph.tos;
984 t->parms.iph.frag_off = p.iph.frag_off; 983 t->parms.iph.frag_off = p.iph.frag_off;
985 } 984 }
986 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 985 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
987 err = -EFAULT; 986 err = -EFAULT;
988 } else 987 } else
989 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 988 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
990 break; 989 break;
991 990
992 case SIOCDELTUNNEL: 991 case SIOCDELTUNNEL:
993 err = -EPERM; 992 err = -EPERM;
994 if (!capable(CAP_NET_ADMIN)) 993 if (!capable(CAP_NET_ADMIN))
995 goto done; 994 goto done;
996 995
997 if (dev == ipgre_fb_tunnel_dev) { 996 if (dev == ipgre_fb_tunnel_dev) {
998 err = -EFAULT; 997 err = -EFAULT;
999 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 998 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1000 goto done; 999 goto done;
1001 err = -ENOENT; 1000 err = -ENOENT;
1002 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) 1001 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1003 goto done; 1002 goto done;
1004 err = -EPERM; 1003 err = -EPERM;
1005 if (t == netdev_priv(ipgre_fb_tunnel_dev)) 1004 if (t == netdev_priv(ipgre_fb_tunnel_dev))
1006 goto done; 1005 goto done;
1007 dev = t->dev; 1006 dev = t->dev;
1008 } 1007 }
1009 err = unregister_netdevice(dev); 1008 err = unregister_netdevice(dev);
1010 break; 1009 break;
1011 1010
1012 default: 1011 default:
1013 err = -EINVAL; 1012 err = -EINVAL;
1014 } 1013 }
1015 1014
1016 done: 1015 done:
1017 return err; 1016 return err;
1018 } 1017 }
1019 1018
1020 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) 1019 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1021 { 1020 {
1022 return &(((struct ip_tunnel*)netdev_priv(dev))->stat); 1021 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1023 } 1022 }
1024 1023
1025 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1024 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1026 { 1025 {
1027 struct ip_tunnel *tunnel = netdev_priv(dev); 1026 struct ip_tunnel *tunnel = netdev_priv(dev);
1028 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) 1027 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1029 return -EINVAL; 1028 return -EINVAL;
1030 dev->mtu = new_mtu; 1029 dev->mtu = new_mtu;
1031 return 0; 1030 return 0;
1032 } 1031 }
1033 1032
1034 #ifdef CONFIG_NET_IPGRE_BROADCAST 1033 #ifdef CONFIG_NET_IPGRE_BROADCAST
1035 /* Nice toy. Unfortunately, useless in real life :-) 1034 /* Nice toy. Unfortunately, useless in real life :-)
1036 It allows to construct virtual multiprotocol broadcast "LAN" 1035 It allows to construct virtual multiprotocol broadcast "LAN"
1037 over the Internet, provided multicast routing is tuned. 1036 over the Internet, provided multicast routing is tuned.
1038 1037
1039 1038
1040 I have no idea was this bicycle invented before me, 1039 I have no idea was this bicycle invented before me,
1041 so that I had to set ARPHRD_IPGRE to a random value. 1040 so that I had to set ARPHRD_IPGRE to a random value.
1042 I have an impression, that Cisco could make something similar, 1041 I have an impression, that Cisco could make something similar,
1043 but this feature is apparently missing in IOS<=11.2(8). 1042 but this feature is apparently missing in IOS<=11.2(8).
1044 1043
1045 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 1044 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1046 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 1045 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1047 1046
1048 ping -t 255 224.66.66.66 1047 ping -t 255 224.66.66.66
1049 1048
1050 If nobody answers, mbone does not work. 1049 If nobody answers, mbone does not work.
1051 1050
1052 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 1051 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1053 ip addr add 10.66.66.<somewhat>/24 dev Universe 1052 ip addr add 10.66.66.<somewhat>/24 dev Universe
1054 ifconfig Universe up 1053 ifconfig Universe up
1055 ifconfig Universe add fe80::<Your_real_addr>/10 1054 ifconfig Universe add fe80::<Your_real_addr>/10
1056 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 1055 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1057 ftp 10.66.66.66 1056 ftp 10.66.66.66
1058 ... 1057 ...
1059 ftp fec0:6666:6666::193.233.7.65 1058 ftp fec0:6666:6666::193.233.7.65
1060 ... 1059 ...
1061 1060
1062 */ 1061 */
1063 1062
1064 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, 1063 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1065 void *daddr, void *saddr, unsigned len) 1064 void *daddr, void *saddr, unsigned len)
1066 { 1065 {
1067 struct ip_tunnel *t = netdev_priv(dev); 1066 struct ip_tunnel *t = netdev_priv(dev);
1068 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1067 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1069 u16 *p = (u16*)(iph+1); 1068 u16 *p = (u16*)(iph+1);
1070 1069
1071 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1070 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1072 p[0] = t->parms.o_flags; 1071 p[0] = t->parms.o_flags;
1073 p[1] = htons(type); 1072 p[1] = htons(type);
1074 1073
1075 /* 1074 /*
1076 * Set the source hardware address. 1075 * Set the source hardware address.
1077 */ 1076 */
1078 1077
1079 if (saddr) 1078 if (saddr)
1080 memcpy(&iph->saddr, saddr, 4); 1079 memcpy(&iph->saddr, saddr, 4);
1081 1080
1082 if (daddr) { 1081 if (daddr) {
1083 memcpy(&iph->daddr, daddr, 4); 1082 memcpy(&iph->daddr, daddr, 4);
1084 return t->hlen; 1083 return t->hlen;
1085 } 1084 }
1086 if (iph->daddr && !MULTICAST(iph->daddr)) 1085 if (iph->daddr && !MULTICAST(iph->daddr))
1087 return t->hlen; 1086 return t->hlen;
1088 1087
1089 return -t->hlen; 1088 return -t->hlen;
1090 } 1089 }
1091 1090
1092 static int ipgre_open(struct net_device *dev) 1091 static int ipgre_open(struct net_device *dev)
1093 { 1092 {
1094 struct ip_tunnel *t = netdev_priv(dev); 1093 struct ip_tunnel *t = netdev_priv(dev);
1095 1094
1096 if (MULTICAST(t->parms.iph.daddr)) { 1095 if (MULTICAST(t->parms.iph.daddr)) {
1097 struct flowi fl = { .oif = t->parms.link, 1096 struct flowi fl = { .oif = t->parms.link,
1098 .nl_u = { .ip4_u = 1097 .nl_u = { .ip4_u =
1099 { .daddr = t->parms.iph.daddr, 1098 { .daddr = t->parms.iph.daddr,
1100 .saddr = t->parms.iph.saddr, 1099 .saddr = t->parms.iph.saddr,
1101 .tos = RT_TOS(t->parms.iph.tos) } }, 1100 .tos = RT_TOS(t->parms.iph.tos) } },
1102 .proto = IPPROTO_GRE }; 1101 .proto = IPPROTO_GRE };
1103 struct rtable *rt; 1102 struct rtable *rt;
1104 if (ip_route_output_key(&rt, &fl)) 1103 if (ip_route_output_key(&rt, &fl))
1105 return -EADDRNOTAVAIL; 1104 return -EADDRNOTAVAIL;
1106 dev = rt->u.dst.dev; 1105 dev = rt->u.dst.dev;
1107 ip_rt_put(rt); 1106 ip_rt_put(rt);
1108 if (__in_dev_get_rtnl(dev) == NULL) 1107 if (__in_dev_get_rtnl(dev) == NULL)
1109 return -EADDRNOTAVAIL; 1108 return -EADDRNOTAVAIL;
1110 t->mlink = dev->ifindex; 1109 t->mlink = dev->ifindex;
1111 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 1110 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1112 } 1111 }
1113 return 0; 1112 return 0;
1114 } 1113 }
1115 1114
1116 static int ipgre_close(struct net_device *dev) 1115 static int ipgre_close(struct net_device *dev)
1117 { 1116 {
1118 struct ip_tunnel *t = netdev_priv(dev); 1117 struct ip_tunnel *t = netdev_priv(dev);
1119 if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1118 if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
1120 struct in_device *in_dev = inetdev_by_index(t->mlink); 1119 struct in_device *in_dev = inetdev_by_index(t->mlink);
1121 if (in_dev) { 1120 if (in_dev) {
1122 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1121 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1123 in_dev_put(in_dev); 1122 in_dev_put(in_dev);
1124 } 1123 }
1125 } 1124 }
1126 return 0; 1125 return 0;
1127 } 1126 }
1128 1127
1129 #endif 1128 #endif
1130 1129
1131 static void ipgre_tunnel_setup(struct net_device *dev) 1130 static void ipgre_tunnel_setup(struct net_device *dev)
1132 { 1131 {
1133 SET_MODULE_OWNER(dev); 1132 SET_MODULE_OWNER(dev);
1134 dev->uninit = ipgre_tunnel_uninit; 1133 dev->uninit = ipgre_tunnel_uninit;
1135 dev->destructor = free_netdev; 1134 dev->destructor = free_netdev;
1136 dev->hard_start_xmit = ipgre_tunnel_xmit; 1135 dev->hard_start_xmit = ipgre_tunnel_xmit;
1137 dev->get_stats = ipgre_tunnel_get_stats; 1136 dev->get_stats = ipgre_tunnel_get_stats;
1138 dev->do_ioctl = ipgre_tunnel_ioctl; 1137 dev->do_ioctl = ipgre_tunnel_ioctl;
1139 dev->change_mtu = ipgre_tunnel_change_mtu; 1138 dev->change_mtu = ipgre_tunnel_change_mtu;
1140 1139
1141 dev->type = ARPHRD_IPGRE; 1140 dev->type = ARPHRD_IPGRE;
1142 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1141 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1143 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 1142 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1144 dev->flags = IFF_NOARP; 1143 dev->flags = IFF_NOARP;
1145 dev->iflink = 0; 1144 dev->iflink = 0;
1146 dev->addr_len = 4; 1145 dev->addr_len = 4;
1147 } 1146 }
1148 1147
1149 static int ipgre_tunnel_init(struct net_device *dev) 1148 static int ipgre_tunnel_init(struct net_device *dev)
1150 { 1149 {
1151 struct net_device *tdev = NULL; 1150 struct net_device *tdev = NULL;
1152 struct ip_tunnel *tunnel; 1151 struct ip_tunnel *tunnel;
1153 struct iphdr *iph; 1152 struct iphdr *iph;
1154 int hlen = LL_MAX_HEADER; 1153 int hlen = LL_MAX_HEADER;
1155 int mtu = ETH_DATA_LEN; 1154 int mtu = ETH_DATA_LEN;
1156 int addend = sizeof(struct iphdr) + 4; 1155 int addend = sizeof(struct iphdr) + 4;
1157 1156
1158 tunnel = netdev_priv(dev); 1157 tunnel = netdev_priv(dev);
1159 iph = &tunnel->parms.iph; 1158 iph = &tunnel->parms.iph;
1160 1159
1161 tunnel->dev = dev; 1160 tunnel->dev = dev;
1162 strcpy(tunnel->parms.name, dev->name); 1161 strcpy(tunnel->parms.name, dev->name);
1163 1162
1164 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1163 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1165 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1164 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1166 1165
1167 /* Guess output device to choose reasonable mtu and hard_header_len */ 1166 /* Guess output device to choose reasonable mtu and hard_header_len */
1168 1167
1169 if (iph->daddr) { 1168 if (iph->daddr) {
1170 struct flowi fl = { .oif = tunnel->parms.link, 1169 struct flowi fl = { .oif = tunnel->parms.link,
1171 .nl_u = { .ip4_u = 1170 .nl_u = { .ip4_u =
1172 { .daddr = iph->daddr, 1171 { .daddr = iph->daddr,
1173 .saddr = iph->saddr, 1172 .saddr = iph->saddr,
1174 .tos = RT_TOS(iph->tos) } }, 1173 .tos = RT_TOS(iph->tos) } },
1175 .proto = IPPROTO_GRE }; 1174 .proto = IPPROTO_GRE };
1176 struct rtable *rt; 1175 struct rtable *rt;
1177 if (!ip_route_output_key(&rt, &fl)) { 1176 if (!ip_route_output_key(&rt, &fl)) {
1178 tdev = rt->u.dst.dev; 1177 tdev = rt->u.dst.dev;
1179 ip_rt_put(rt); 1178 ip_rt_put(rt);
1180 } 1179 }
1181 1180
1182 dev->flags |= IFF_POINTOPOINT; 1181 dev->flags |= IFF_POINTOPOINT;
1183 1182
1184 #ifdef CONFIG_NET_IPGRE_BROADCAST 1183 #ifdef CONFIG_NET_IPGRE_BROADCAST
1185 if (MULTICAST(iph->daddr)) { 1184 if (MULTICAST(iph->daddr)) {
1186 if (!iph->saddr) 1185 if (!iph->saddr)
1187 return -EINVAL; 1186 return -EINVAL;
1188 dev->flags = IFF_BROADCAST; 1187 dev->flags = IFF_BROADCAST;
1189 dev->hard_header = ipgre_header; 1188 dev->hard_header = ipgre_header;
1190 dev->open = ipgre_open; 1189 dev->open = ipgre_open;
1191 dev->stop = ipgre_close; 1190 dev->stop = ipgre_close;
1192 } 1191 }
1193 #endif 1192 #endif
1194 } 1193 }
1195 1194
1196 if (!tdev && tunnel->parms.link) 1195 if (!tdev && tunnel->parms.link)
1197 tdev = __dev_get_by_index(tunnel->parms.link); 1196 tdev = __dev_get_by_index(tunnel->parms.link);
1198 1197
1199 if (tdev) { 1198 if (tdev) {
1200 hlen = tdev->hard_header_len; 1199 hlen = tdev->hard_header_len;
1201 mtu = tdev->mtu; 1200 mtu = tdev->mtu;
1202 } 1201 }
1203 dev->iflink = tunnel->parms.link; 1202 dev->iflink = tunnel->parms.link;
1204 1203
1205 /* Precalculate GRE options length */ 1204 /* Precalculate GRE options length */
1206 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 1205 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1207 if (tunnel->parms.o_flags&GRE_CSUM) 1206 if (tunnel->parms.o_flags&GRE_CSUM)
1208 addend += 4; 1207 addend += 4;
1209 if (tunnel->parms.o_flags&GRE_KEY) 1208 if (tunnel->parms.o_flags&GRE_KEY)
1210 addend += 4; 1209 addend += 4;
1211 if (tunnel->parms.o_flags&GRE_SEQ) 1210 if (tunnel->parms.o_flags&GRE_SEQ)
1212 addend += 4; 1211 addend += 4;
1213 } 1212 }
1214 dev->hard_header_len = hlen + addend; 1213 dev->hard_header_len = hlen + addend;
1215 dev->mtu = mtu - addend; 1214 dev->mtu = mtu - addend;
1216 tunnel->hlen = addend; 1215 tunnel->hlen = addend;
1217 return 0; 1216 return 0;
1218 } 1217 }
1219 1218
1220 static int __init ipgre_fb_tunnel_init(struct net_device *dev) 1219 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1221 { 1220 {
1222 struct ip_tunnel *tunnel = netdev_priv(dev); 1221 struct ip_tunnel *tunnel = netdev_priv(dev);
1223 struct iphdr *iph = &tunnel->parms.iph; 1222 struct iphdr *iph = &tunnel->parms.iph;
1224 1223
1225 tunnel->dev = dev; 1224 tunnel->dev = dev;
1226 strcpy(tunnel->parms.name, dev->name); 1225 strcpy(tunnel->parms.name, dev->name);
1227 1226
1228 iph->version = 4; 1227 iph->version = 4;
1229 iph->protocol = IPPROTO_GRE; 1228 iph->protocol = IPPROTO_GRE;
1230 iph->ihl = 5; 1229 iph->ihl = 5;
1231 tunnel->hlen = sizeof(struct iphdr) + 4; 1230 tunnel->hlen = sizeof(struct iphdr) + 4;
1232 1231
1233 dev_hold(dev); 1232 dev_hold(dev);
1234 tunnels_wc[0] = tunnel; 1233 tunnels_wc[0] = tunnel;
1235 return 0; 1234 return 0;
1236 } 1235 }
1237 1236
1238 1237
1239 static struct net_protocol ipgre_protocol = { 1238 static struct net_protocol ipgre_protocol = {
1240 .handler = ipgre_rcv, 1239 .handler = ipgre_rcv,
1241 .err_handler = ipgre_err, 1240 .err_handler = ipgre_err,
1242 }; 1241 };
1243 1242
1244 1243
1245 /* 1244 /*
1246 * And now the modules code and kernel interface. 1245 * And now the modules code and kernel interface.
1247 */ 1246 */
1248 1247
1249 static int __init ipgre_init(void) 1248 static int __init ipgre_init(void)
1250 { 1249 {
1251 int err; 1250 int err;
1252 1251
1253 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1252 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1254 1253
1255 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 1254 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1256 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1255 printk(KERN_INFO "ipgre init: can't add protocol\n");
1257 return -EAGAIN; 1256 return -EAGAIN;
1258 } 1257 }
1259 1258
1260 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1259 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1261 ipgre_tunnel_setup); 1260 ipgre_tunnel_setup);
1262 if (!ipgre_fb_tunnel_dev) { 1261 if (!ipgre_fb_tunnel_dev) {
1263 err = -ENOMEM; 1262 err = -ENOMEM;
1264 goto err1; 1263 goto err1;
1265 } 1264 }
1266 1265
1267 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; 1266 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1268 1267
1269 if ((err = register_netdev(ipgre_fb_tunnel_dev))) 1268 if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1270 goto err2; 1269 goto err2;
1271 out: 1270 out:
1272 return err; 1271 return err;
1273 err2: 1272 err2:
1274 free_netdev(ipgre_fb_tunnel_dev); 1273 free_netdev(ipgre_fb_tunnel_dev);
1275 err1: 1274 err1:
1276 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1275 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1277 goto out; 1276 goto out;
1278 } 1277 }
1279 1278
1280 static void __exit ipgre_destroy_tunnels(void) 1279 static void __exit ipgre_destroy_tunnels(void)
1281 { 1280 {
1282 int prio; 1281 int prio;
1283 1282
1284 for (prio = 0; prio < 4; prio++) { 1283 for (prio = 0; prio < 4; prio++) {
1285 int h; 1284 int h;
1286 for (h = 0; h < HASH_SIZE; h++) { 1285 for (h = 0; h < HASH_SIZE; h++) {
1287 struct ip_tunnel *t; 1286 struct ip_tunnel *t;
1288 while ((t = tunnels[prio][h]) != NULL) 1287 while ((t = tunnels[prio][h]) != NULL)
1289 unregister_netdevice(t->dev); 1288 unregister_netdevice(t->dev);
1290 } 1289 }
1291 } 1290 }
1292 } 1291 }
1293 1292
1294 static void __exit ipgre_fini(void) 1293 static void __exit ipgre_fini(void)
1295 { 1294 {
1296 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1295 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1297 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1296 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1298 1297
1299 rtnl_lock(); 1298 rtnl_lock();
1300 ipgre_destroy_tunnels(); 1299 ipgre_destroy_tunnels();
1301 rtnl_unlock(); 1300 rtnl_unlock();
1302 } 1301 }
1303 1302
1304 module_init(ipgre_init); 1303 module_init(ipgre_init);
1305 module_exit(ipgre_fini); 1304 module_exit(ipgre_fini);
1306 MODULE_LICENSE("GPL"); 1305 MODULE_LICENSE("GPL");
1307 1306
net/ipv4/ip_options.c
1 /* 1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level. 4 * interface as the means of communication with the user level.
5 * 5 *
6 * The options processing module for ip.c 6 * The options processing module for ip.c
7 * 7 *
8 * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ 8 * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
9 * 9 *
10 * Authors: A.N.Kuznetsov 10 * Authors: A.N.Kuznetsov
11 * 11 *
12 */ 12 */
13 13
14 #include <linux/capability.h> 14 #include <linux/capability.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/types.h> 16 #include <linux/types.h>
17 #include <asm/uaccess.h> 17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h> 18 #include <linux/skbuff.h>
19 #include <linux/ip.h> 19 #include <linux/ip.h>
20 #include <linux/icmp.h> 20 #include <linux/icmp.h>
21 #include <linux/netdevice.h> 21 #include <linux/netdevice.h>
22 #include <linux/rtnetlink.h> 22 #include <linux/rtnetlink.h>
23 #include <net/sock.h> 23 #include <net/sock.h>
24 #include <net/ip.h> 24 #include <net/ip.h>
25 #include <net/icmp.h> 25 #include <net/icmp.h>
26 #include <net/route.h> 26 #include <net/route.h>
27 27
28 /* 28 /*
29 * Write options to IP header, record destination address to 29 * Write options to IP header, record destination address to
30 * source route option, address of outgoing interface 30 * source route option, address of outgoing interface
31 * (we should already know it, so that this function is allowed be 31 * (we should already know it, so that this function is allowed be
32 * called only after routing decision) and timestamp, 32 * called only after routing decision) and timestamp,
33 * if we originate this datagram. 33 * if we originate this datagram.
34 * 34 *
35 * daddr is real destination address, next hop is recorded in IP header. 35 * daddr is real destination address, next hop is recorded in IP header.
36 * saddr is address of outgoing interface. 36 * saddr is address of outgoing interface.
37 */ 37 */
38 38
39 void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 39 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
40 u32 daddr, struct rtable *rt, int is_frag) 40 u32 daddr, struct rtable *rt, int is_frag)
41 { 41 {
42 unsigned char * iph = skb->nh.raw; 42 unsigned char * iph = skb->nh.raw;
43 43
44 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); 44 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
45 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); 45 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
46 opt = &(IPCB(skb)->opt); 46 opt = &(IPCB(skb)->opt);
47 opt->is_data = 0; 47 opt->is_data = 0;
48 48
49 if (opt->srr) 49 if (opt->srr)
50 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); 50 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
51 51
52 if (!is_frag) { 52 if (!is_frag) {
53 if (opt->rr_needaddr) 53 if (opt->rr_needaddr)
54 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); 54 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
55 if (opt->ts_needaddr) 55 if (opt->ts_needaddr)
56 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); 56 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
57 if (opt->ts_needtime) { 57 if (opt->ts_needtime) {
58 struct timeval tv; 58 struct timeval tv;
59 __u32 midtime; 59 __u32 midtime;
60 do_gettimeofday(&tv); 60 do_gettimeofday(&tv);
61 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); 61 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
62 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); 62 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
63 } 63 }
64 return; 64 return;
65 } 65 }
66 if (opt->rr) { 66 if (opt->rr) {
67 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]); 67 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
68 opt->rr = 0; 68 opt->rr = 0;
69 opt->rr_needaddr = 0; 69 opt->rr_needaddr = 0;
70 } 70 }
71 if (opt->ts) { 71 if (opt->ts) {
72 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]); 72 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
73 opt->ts = 0; 73 opt->ts = 0;
74 opt->ts_needaddr = opt->ts_needtime = 0; 74 opt->ts_needaddr = opt->ts_needtime = 0;
75 } 75 }
76 } 76 }
77 77
78 /* 78 /*
79 * Provided (sopt, skb) points to received options, 79 * Provided (sopt, skb) points to received options,
80 * build in dopt compiled option set appropriate for answering. 80 * build in dopt compiled option set appropriate for answering.
81 * i.e. invert SRR option, copy anothers, 81 * i.e. invert SRR option, copy anothers,
82 * and grab room in RR/TS options. 82 * and grab room in RR/TS options.
83 * 83 *
84 * NOTE: dopt cannot point to skb. 84 * NOTE: dopt cannot point to skb.
85 */ 85 */
86 86
87 int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 87 int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
88 { 88 {
89 struct ip_options *sopt; 89 struct ip_options *sopt;
90 unsigned char *sptr, *dptr; 90 unsigned char *sptr, *dptr;
91 int soffset, doffset; 91 int soffset, doffset;
92 int optlen; 92 int optlen;
93 u32 daddr; 93 u32 daddr;
94 94
95 memset(dopt, 0, sizeof(struct ip_options)); 95 memset(dopt, 0, sizeof(struct ip_options));
96 96
97 dopt->is_data = 1; 97 dopt->is_data = 1;
98 98
99 sopt = &(IPCB(skb)->opt); 99 sopt = &(IPCB(skb)->opt);
100 100
101 if (sopt->optlen == 0) { 101 if (sopt->optlen == 0) {
102 dopt->optlen = 0; 102 dopt->optlen = 0;
103 return 0; 103 return 0;
104 } 104 }
105 105
106 sptr = skb->nh.raw; 106 sptr = skb->nh.raw;
107 dptr = dopt->__data; 107 dptr = dopt->__data;
108 108
109 if (skb->dst) 109 if (skb->dst)
110 daddr = ((struct rtable*)skb->dst)->rt_spec_dst; 110 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
111 else 111 else
112 daddr = skb->nh.iph->daddr; 112 daddr = skb->nh.iph->daddr;
113 113
114 if (sopt->rr) { 114 if (sopt->rr) {
115 optlen = sptr[sopt->rr+1]; 115 optlen = sptr[sopt->rr+1];
116 soffset = sptr[sopt->rr+2]; 116 soffset = sptr[sopt->rr+2];
117 dopt->rr = dopt->optlen + sizeof(struct iphdr); 117 dopt->rr = dopt->optlen + sizeof(struct iphdr);
118 memcpy(dptr, sptr+sopt->rr, optlen); 118 memcpy(dptr, sptr+sopt->rr, optlen);
119 if (sopt->rr_needaddr && soffset <= optlen) { 119 if (sopt->rr_needaddr && soffset <= optlen) {
120 if (soffset + 3 > optlen) 120 if (soffset + 3 > optlen)
121 return -EINVAL; 121 return -EINVAL;
122 dptr[2] = soffset + 4; 122 dptr[2] = soffset + 4;
123 dopt->rr_needaddr = 1; 123 dopt->rr_needaddr = 1;
124 } 124 }
125 dptr += optlen; 125 dptr += optlen;
126 dopt->optlen += optlen; 126 dopt->optlen += optlen;
127 } 127 }
128 if (sopt->ts) { 128 if (sopt->ts) {
129 optlen = sptr[sopt->ts+1]; 129 optlen = sptr[sopt->ts+1];
130 soffset = sptr[sopt->ts+2]; 130 soffset = sptr[sopt->ts+2];
131 dopt->ts = dopt->optlen + sizeof(struct iphdr); 131 dopt->ts = dopt->optlen + sizeof(struct iphdr);
132 memcpy(dptr, sptr+sopt->ts, optlen); 132 memcpy(dptr, sptr+sopt->ts, optlen);
133 if (soffset <= optlen) { 133 if (soffset <= optlen) {
134 if (sopt->ts_needaddr) { 134 if (sopt->ts_needaddr) {
135 if (soffset + 3 > optlen) 135 if (soffset + 3 > optlen)
136 return -EINVAL; 136 return -EINVAL;
137 dopt->ts_needaddr = 1; 137 dopt->ts_needaddr = 1;
138 soffset += 4; 138 soffset += 4;
139 } 139 }
140 if (sopt->ts_needtime) { 140 if (sopt->ts_needtime) {
141 if (soffset + 3 > optlen) 141 if (soffset + 3 > optlen)
142 return -EINVAL; 142 return -EINVAL;
143 if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) { 143 if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) {
144 dopt->ts_needtime = 1; 144 dopt->ts_needtime = 1;
145 soffset += 4; 145 soffset += 4;
146 } else { 146 } else {
147 dopt->ts_needtime = 0; 147 dopt->ts_needtime = 0;
148 148
149 if (soffset + 8 <= optlen) { 149 if (soffset + 8 <= optlen) {
150 __u32 addr; 150 __u32 addr;
151 151
152 memcpy(&addr, sptr+soffset-1, 4); 152 memcpy(&addr, sptr+soffset-1, 4);
153 if (inet_addr_type(addr) != RTN_LOCAL) { 153 if (inet_addr_type(addr) != RTN_LOCAL) {
154 dopt->ts_needtime = 1; 154 dopt->ts_needtime = 1;
155 soffset += 8; 155 soffset += 8;
156 } 156 }
157 } 157 }
158 } 158 }
159 } 159 }
160 dptr[2] = soffset; 160 dptr[2] = soffset;
161 } 161 }
162 dptr += optlen; 162 dptr += optlen;
163 dopt->optlen += optlen; 163 dopt->optlen += optlen;
164 } 164 }
165 if (sopt->srr) { 165 if (sopt->srr) {
166 unsigned char * start = sptr+sopt->srr; 166 unsigned char * start = sptr+sopt->srr;
167 u32 faddr; 167 u32 faddr;
168 168
169 optlen = start[1]; 169 optlen = start[1];
170 soffset = start[2]; 170 soffset = start[2];
171 doffset = 0; 171 doffset = 0;
172 if (soffset > optlen) 172 if (soffset > optlen)
173 soffset = optlen + 1; 173 soffset = optlen + 1;
174 soffset -= 4; 174 soffset -= 4;
175 if (soffset > 3) { 175 if (soffset > 3) {
176 memcpy(&faddr, &start[soffset-1], 4); 176 memcpy(&faddr, &start[soffset-1], 4);
177 for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) 177 for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
178 memcpy(&dptr[doffset-1], &start[soffset-1], 4); 178 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
179 /* 179 /*
180 * RFC1812 requires to fix illegal source routes. 180 * RFC1812 requires to fix illegal source routes.
181 */ 181 */
182 if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0) 182 if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
183 doffset -= 4; 183 doffset -= 4;
184 } 184 }
185 if (doffset > 3) { 185 if (doffset > 3) {
186 memcpy(&start[doffset-1], &daddr, 4); 186 memcpy(&start[doffset-1], &daddr, 4);
187 dopt->faddr = faddr; 187 dopt->faddr = faddr;
188 dptr[0] = start[0]; 188 dptr[0] = start[0];
189 dptr[1] = doffset+3; 189 dptr[1] = doffset+3;
190 dptr[2] = 4; 190 dptr[2] = 4;
191 dptr += doffset+3; 191 dptr += doffset+3;
192 dopt->srr = dopt->optlen + sizeof(struct iphdr); 192 dopt->srr = dopt->optlen + sizeof(struct iphdr);
193 dopt->optlen += doffset+3; 193 dopt->optlen += doffset+3;
194 dopt->is_strictroute = sopt->is_strictroute; 194 dopt->is_strictroute = sopt->is_strictroute;
195 } 195 }
196 } 196 }
197 while (dopt->optlen & 3) { 197 while (dopt->optlen & 3) {
198 *dptr++ = IPOPT_END; 198 *dptr++ = IPOPT_END;
199 dopt->optlen++; 199 dopt->optlen++;
200 } 200 }
201 return 0; 201 return 0;
202 } 202 }
203 203
204 /* 204 /*
205 * Options "fragmenting", just fill options not 205 * Options "fragmenting", just fill options not
206 * allowed in fragments with NOOPs. 206 * allowed in fragments with NOOPs.
207 * Simple and stupid 8), but the most efficient way. 207 * Simple and stupid 8), but the most efficient way.
208 */ 208 */
209 209
210 void ip_options_fragment(struct sk_buff * skb) 210 void ip_options_fragment(struct sk_buff * skb)
211 { 211 {
212 unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr); 212 unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
213 struct ip_options * opt = &(IPCB(skb)->opt); 213 struct ip_options * opt = &(IPCB(skb)->opt);
214 int l = opt->optlen; 214 int l = opt->optlen;
215 int optlen; 215 int optlen;
216 216
217 while (l > 0) { 217 while (l > 0) {
218 switch (*optptr) { 218 switch (*optptr) {
219 case IPOPT_END: 219 case IPOPT_END:
220 return; 220 return;
221 case IPOPT_NOOP: 221 case IPOPT_NOOP:
222 l--; 222 l--;
223 optptr++; 223 optptr++;
224 continue; 224 continue;
225 } 225 }
226 optlen = optptr[1]; 226 optlen = optptr[1];
227 if (optlen<2 || optlen>l) 227 if (optlen<2 || optlen>l)
228 return; 228 return;
229 if (!IPOPT_COPIED(*optptr)) 229 if (!IPOPT_COPIED(*optptr))
230 memset(optptr, IPOPT_NOOP, optlen); 230 memset(optptr, IPOPT_NOOP, optlen);
231 l -= optlen; 231 l -= optlen;
232 optptr += optlen; 232 optptr += optlen;
233 } 233 }
234 opt->ts = 0; 234 opt->ts = 0;
235 opt->rr = 0; 235 opt->rr = 0;
236 opt->rr_needaddr = 0; 236 opt->rr_needaddr = 0;
237 opt->ts_needaddr = 0; 237 opt->ts_needaddr = 0;
238 opt->ts_needtime = 0; 238 opt->ts_needtime = 0;
239 return; 239 return;
240 } 240 }
241 241
242 /* 242 /*
243 * Verify options and fill pointers in struct options. 243 * Verify options and fill pointers in struct options.
244 * Caller should clear *opt, and set opt->data. 244 * Caller should clear *opt, and set opt->data.
245 * If opt == NULL, then skb->data should point to IP header. 245 * If opt == NULL, then skb->data should point to IP header.
246 */ 246 */
247 247
248 int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) 248 int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
249 { 249 {
250 int l; 250 int l;
251 unsigned char * iph; 251 unsigned char * iph;
252 unsigned char * optptr; 252 unsigned char * optptr;
253 int optlen; 253 int optlen;
254 unsigned char * pp_ptr = NULL; 254 unsigned char * pp_ptr = NULL;
255 struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; 255 struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
256 256
257 if (!opt) { 257 if (!opt) {
258 opt = &(IPCB(skb)->opt); 258 opt = &(IPCB(skb)->opt);
259 memset(opt, 0, sizeof(struct ip_options));
260 iph = skb->nh.raw; 259 iph = skb->nh.raw;
261 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); 260 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
262 optptr = iph + sizeof(struct iphdr); 261 optptr = iph + sizeof(struct iphdr);
263 opt->is_data = 0; 262 opt->is_data = 0;
264 } else { 263 } else {
265 optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]); 264 optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
266 iph = optptr - sizeof(struct iphdr); 265 iph = optptr - sizeof(struct iphdr);
267 } 266 }
268 267
269 for (l = opt->optlen; l > 0; ) { 268 for (l = opt->optlen; l > 0; ) {
270 switch (*optptr) { 269 switch (*optptr) {
271 case IPOPT_END: 270 case IPOPT_END:
272 for (optptr++, l--; l>0; optptr++, l--) { 271 for (optptr++, l--; l>0; optptr++, l--) {
273 if (*optptr != IPOPT_END) { 272 if (*optptr != IPOPT_END) {
274 *optptr = IPOPT_END; 273 *optptr = IPOPT_END;
275 opt->is_changed = 1; 274 opt->is_changed = 1;
276 } 275 }
277 } 276 }
278 goto eol; 277 goto eol;
279 case IPOPT_NOOP: 278 case IPOPT_NOOP:
280 l--; 279 l--;
281 optptr++; 280 optptr++;
282 continue; 281 continue;
283 } 282 }
284 optlen = optptr[1]; 283 optlen = optptr[1];
285 if (optlen<2 || optlen>l) { 284 if (optlen<2 || optlen>l) {
286 pp_ptr = optptr; 285 pp_ptr = optptr;
287 goto error; 286 goto error;
288 } 287 }
289 switch (*optptr) { 288 switch (*optptr) {
290 case IPOPT_SSRR: 289 case IPOPT_SSRR:
291 case IPOPT_LSRR: 290 case IPOPT_LSRR:
292 if (optlen < 3) { 291 if (optlen < 3) {
293 pp_ptr = optptr + 1; 292 pp_ptr = optptr + 1;
294 goto error; 293 goto error;
295 } 294 }
296 if (optptr[2] < 4) { 295 if (optptr[2] < 4) {
297 pp_ptr = optptr + 2; 296 pp_ptr = optptr + 2;
298 goto error; 297 goto error;
299 } 298 }
300 /* NB: cf RFC-1812 5.2.4.1 */ 299 /* NB: cf RFC-1812 5.2.4.1 */
301 if (opt->srr) { 300 if (opt->srr) {
302 pp_ptr = optptr; 301 pp_ptr = optptr;
303 goto error; 302 goto error;
304 } 303 }
305 if (!skb) { 304 if (!skb) {
306 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) { 305 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
307 pp_ptr = optptr + 1; 306 pp_ptr = optptr + 1;
308 goto error; 307 goto error;
309 } 308 }
310 memcpy(&opt->faddr, &optptr[3], 4); 309 memcpy(&opt->faddr, &optptr[3], 4);
311 if (optlen > 7) 310 if (optlen > 7)
312 memmove(&optptr[3], &optptr[7], optlen-7); 311 memmove(&optptr[3], &optptr[7], optlen-7);
313 } 312 }
314 opt->is_strictroute = (optptr[0] == IPOPT_SSRR); 313 opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
315 opt->srr = optptr - iph; 314 opt->srr = optptr - iph;
316 break; 315 break;
317 case IPOPT_RR: 316 case IPOPT_RR:
318 if (opt->rr) { 317 if (opt->rr) {
319 pp_ptr = optptr; 318 pp_ptr = optptr;
320 goto error; 319 goto error;
321 } 320 }
322 if (optlen < 3) { 321 if (optlen < 3) {
323 pp_ptr = optptr + 1; 322 pp_ptr = optptr + 1;
324 goto error; 323 goto error;
325 } 324 }
326 if (optptr[2] < 4) { 325 if (optptr[2] < 4) {
327 pp_ptr = optptr + 2; 326 pp_ptr = optptr + 2;
328 goto error; 327 goto error;
329 } 328 }
330 if (optptr[2] <= optlen) { 329 if (optptr[2] <= optlen) {
331 if (optptr[2]+3 > optlen) { 330 if (optptr[2]+3 > optlen) {
332 pp_ptr = optptr + 2; 331 pp_ptr = optptr + 2;
333 goto error; 332 goto error;
334 } 333 }
335 if (skb) { 334 if (skb) {
336 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 335 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
337 opt->is_changed = 1; 336 opt->is_changed = 1;
338 } 337 }
339 optptr[2] += 4; 338 optptr[2] += 4;
340 opt->rr_needaddr = 1; 339 opt->rr_needaddr = 1;
341 } 340 }
342 opt->rr = optptr - iph; 341 opt->rr = optptr - iph;
343 break; 342 break;
344 case IPOPT_TIMESTAMP: 343 case IPOPT_TIMESTAMP:
345 if (opt->ts) { 344 if (opt->ts) {
346 pp_ptr = optptr; 345 pp_ptr = optptr;
347 goto error; 346 goto error;
348 } 347 }
349 if (optlen < 4) { 348 if (optlen < 4) {
350 pp_ptr = optptr + 1; 349 pp_ptr = optptr + 1;
351 goto error; 350 goto error;
352 } 351 }
353 if (optptr[2] < 5) { 352 if (optptr[2] < 5) {
354 pp_ptr = optptr + 2; 353 pp_ptr = optptr + 2;
355 goto error; 354 goto error;
356 } 355 }
357 if (optptr[2] <= optlen) { 356 if (optptr[2] <= optlen) {
358 __u32 * timeptr = NULL; 357 __u32 * timeptr = NULL;
359 if (optptr[2]+3 > optptr[1]) { 358 if (optptr[2]+3 > optptr[1]) {
360 pp_ptr = optptr + 2; 359 pp_ptr = optptr + 2;
361 goto error; 360 goto error;
362 } 361 }
363 switch (optptr[3]&0xF) { 362 switch (optptr[3]&0xF) {
364 case IPOPT_TS_TSONLY: 363 case IPOPT_TS_TSONLY:
365 opt->ts = optptr - iph; 364 opt->ts = optptr - iph;
366 if (skb) 365 if (skb)
367 timeptr = (__u32*)&optptr[optptr[2]-1]; 366 timeptr = (__u32*)&optptr[optptr[2]-1];
368 opt->ts_needtime = 1; 367 opt->ts_needtime = 1;
369 optptr[2] += 4; 368 optptr[2] += 4;
370 break; 369 break;
371 case IPOPT_TS_TSANDADDR: 370 case IPOPT_TS_TSANDADDR:
372 if (optptr[2]+7 > optptr[1]) { 371 if (optptr[2]+7 > optptr[1]) {
373 pp_ptr = optptr + 2; 372 pp_ptr = optptr + 2;
374 goto error; 373 goto error;
375 } 374 }
376 opt->ts = optptr - iph; 375 opt->ts = optptr - iph;
377 if (skb) { 376 if (skb) {
378 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 377 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
379 timeptr = (__u32*)&optptr[optptr[2]+3]; 378 timeptr = (__u32*)&optptr[optptr[2]+3];
380 } 379 }
381 opt->ts_needaddr = 1; 380 opt->ts_needaddr = 1;
382 opt->ts_needtime = 1; 381 opt->ts_needtime = 1;
383 optptr[2] += 8; 382 optptr[2] += 8;
384 break; 383 break;
385 case IPOPT_TS_PRESPEC: 384 case IPOPT_TS_PRESPEC:
386 if (optptr[2]+7 > optptr[1]) { 385 if (optptr[2]+7 > optptr[1]) {
387 pp_ptr = optptr + 2; 386 pp_ptr = optptr + 2;
388 goto error; 387 goto error;
389 } 388 }
390 opt->ts = optptr - iph; 389 opt->ts = optptr - iph;
391 { 390 {
392 u32 addr; 391 u32 addr;
393 memcpy(&addr, &optptr[optptr[2]-1], 4); 392 memcpy(&addr, &optptr[optptr[2]-1], 4);
394 if (inet_addr_type(addr) == RTN_UNICAST) 393 if (inet_addr_type(addr) == RTN_UNICAST)
395 break; 394 break;
396 if (skb) 395 if (skb)
397 timeptr = (__u32*)&optptr[optptr[2]+3]; 396 timeptr = (__u32*)&optptr[optptr[2]+3];
398 } 397 }
399 opt->ts_needtime = 1; 398 opt->ts_needtime = 1;
400 optptr[2] += 8; 399 optptr[2] += 8;
401 break; 400 break;
402 default: 401 default:
403 if (!skb && !capable(CAP_NET_RAW)) { 402 if (!skb && !capable(CAP_NET_RAW)) {
404 pp_ptr = optptr + 3; 403 pp_ptr = optptr + 3;
405 goto error; 404 goto error;
406 } 405 }
407 break; 406 break;
408 } 407 }
409 if (timeptr) { 408 if (timeptr) {
410 struct timeval tv; 409 struct timeval tv;
411 __u32 midtime; 410 __u32 midtime;
412 do_gettimeofday(&tv); 411 do_gettimeofday(&tv);
413 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); 412 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
414 memcpy(timeptr, &midtime, sizeof(__u32)); 413 memcpy(timeptr, &midtime, sizeof(__u32));
415 opt->is_changed = 1; 414 opt->is_changed = 1;
416 } 415 }
417 } else { 416 } else {
418 unsigned overflow = optptr[3]>>4; 417 unsigned overflow = optptr[3]>>4;
419 if (overflow == 15) { 418 if (overflow == 15) {
420 pp_ptr = optptr + 3; 419 pp_ptr = optptr + 3;
421 goto error; 420 goto error;
422 } 421 }
423 opt->ts = optptr - iph; 422 opt->ts = optptr - iph;
424 if (skb) { 423 if (skb) {
425 optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); 424 optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
426 opt->is_changed = 1; 425 opt->is_changed = 1;
427 } 426 }
428 } 427 }
429 break; 428 break;
430 case IPOPT_RA: 429 case IPOPT_RA:
431 if (optlen < 4) { 430 if (optlen < 4) {
432 pp_ptr = optptr + 1; 431 pp_ptr = optptr + 1;
433 goto error; 432 goto error;
434 } 433 }
435 if (optptr[2] == 0 && optptr[3] == 0) 434 if (optptr[2] == 0 && optptr[3] == 0)
436 opt->router_alert = optptr - iph; 435 opt->router_alert = optptr - iph;
437 break; 436 break;
438 case IPOPT_SEC: 437 case IPOPT_SEC:
439 case IPOPT_SID: 438 case IPOPT_SID:
440 default: 439 default:
441 if (!skb && !capable(CAP_NET_RAW)) { 440 if (!skb && !capable(CAP_NET_RAW)) {
442 pp_ptr = optptr; 441 pp_ptr = optptr;
443 goto error; 442 goto error;
444 } 443 }
445 break; 444 break;
446 } 445 }
447 l -= optlen; 446 l -= optlen;
448 optptr += optlen; 447 optptr += optlen;
449 } 448 }
450 449
451 eol: 450 eol:
452 if (!pp_ptr) 451 if (!pp_ptr)
453 return 0; 452 return 0;
454 453
455 error: 454 error:
456 if (skb) { 455 if (skb) {
457 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); 456 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
458 } 457 }
459 return -EINVAL; 458 return -EINVAL;
460 } 459 }
461 460
462 461
463 /* 462 /*
464 * Undo all the changes done by ip_options_compile(). 463 * Undo all the changes done by ip_options_compile().
465 */ 464 */
466 465
467 void ip_options_undo(struct ip_options * opt) 466 void ip_options_undo(struct ip_options * opt)
468 { 467 {
469 if (opt->srr) { 468 if (opt->srr) {
470 unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); 469 unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr);
471 memmove(optptr+7, optptr+3, optptr[1]-7); 470 memmove(optptr+7, optptr+3, optptr[1]-7);
472 memcpy(optptr+3, &opt->faddr, 4); 471 memcpy(optptr+3, &opt->faddr, 4);
473 } 472 }
474 if (opt->rr_needaddr) { 473 if (opt->rr_needaddr) {
475 unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); 474 unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr);
476 optptr[2] -= 4; 475 optptr[2] -= 4;
477 memset(&optptr[optptr[2]-1], 0, 4); 476 memset(&optptr[optptr[2]-1], 0, 4);
478 } 477 }
479 if (opt->ts) { 478 if (opt->ts) {
480 unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); 479 unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr);
481 if (opt->ts_needtime) { 480 if (opt->ts_needtime) {
482 optptr[2] -= 4; 481 optptr[2] -= 4;
483 memset(&optptr[optptr[2]-1], 0, 4); 482 memset(&optptr[optptr[2]-1], 0, 4);
484 if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC) 483 if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
485 optptr[2] -= 4; 484 optptr[2] -= 4;
486 } 485 }
487 if (opt->ts_needaddr) { 486 if (opt->ts_needaddr) {
488 optptr[2] -= 4; 487 optptr[2] -= 4;
489 memset(&optptr[optptr[2]-1], 0, 4); 488 memset(&optptr[optptr[2]-1], 0, 4);
490 } 489 }
491 } 490 }
492 } 491 }
493 492
494 static struct ip_options *ip_options_get_alloc(const int optlen) 493 static struct ip_options *ip_options_get_alloc(const int optlen)
495 { 494 {
496 struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), 495 struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
497 GFP_KERNEL); 496 GFP_KERNEL);
498 if (opt) 497 if (opt)
499 memset(opt, 0, sizeof(*opt)); 498 memset(opt, 0, sizeof(*opt));
500 return opt; 499 return opt;
501 } 500 }
502 501
503 static int ip_options_get_finish(struct ip_options **optp, 502 static int ip_options_get_finish(struct ip_options **optp,
504 struct ip_options *opt, int optlen) 503 struct ip_options *opt, int optlen)
505 { 504 {
506 while (optlen & 3) 505 while (optlen & 3)
507 opt->__data[optlen++] = IPOPT_END; 506 opt->__data[optlen++] = IPOPT_END;
508 opt->optlen = optlen; 507 opt->optlen = optlen;
509 opt->is_data = 1; 508 opt->is_data = 1;
510 opt->is_setbyuser = 1; 509 opt->is_setbyuser = 1;
511 if (optlen && ip_options_compile(opt, NULL)) { 510 if (optlen && ip_options_compile(opt, NULL)) {
512 kfree(opt); 511 kfree(opt);
513 return -EINVAL; 512 return -EINVAL;
514 } 513 }
515 kfree(*optp); 514 kfree(*optp);
516 *optp = opt; 515 *optp = opt;
517 return 0; 516 return 0;
518 } 517 }
519 518
520 int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) 519 int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
521 { 520 {
522 struct ip_options *opt = ip_options_get_alloc(optlen); 521 struct ip_options *opt = ip_options_get_alloc(optlen);
523 522
524 if (!opt) 523 if (!opt)
525 return -ENOMEM; 524 return -ENOMEM;
526 if (optlen && copy_from_user(opt->__data, data, optlen)) { 525 if (optlen && copy_from_user(opt->__data, data, optlen)) {
527 kfree(opt); 526 kfree(opt);
528 return -EFAULT; 527 return -EFAULT;
529 } 528 }
530 return ip_options_get_finish(optp, opt, optlen); 529 return ip_options_get_finish(optp, opt, optlen);
531 } 530 }
532 531
533 int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) 532 int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
534 { 533 {
535 struct ip_options *opt = ip_options_get_alloc(optlen); 534 struct ip_options *opt = ip_options_get_alloc(optlen);
536 535
537 if (!opt) 536 if (!opt)
538 return -ENOMEM; 537 return -ENOMEM;
539 if (optlen) 538 if (optlen)
540 memcpy(opt->__data, data, optlen); 539 memcpy(opt->__data, data, optlen);
541 return ip_options_get_finish(optp, opt, optlen); 540 return ip_options_get_finish(optp, opt, optlen);
542 } 541 }
543 542
544 void ip_forward_options(struct sk_buff *skb) 543 void ip_forward_options(struct sk_buff *skb)
545 { 544 {
546 struct ip_options * opt = &(IPCB(skb)->opt); 545 struct ip_options * opt = &(IPCB(skb)->opt);
547 unsigned char * optptr; 546 unsigned char * optptr;
548 struct rtable *rt = (struct rtable*)skb->dst; 547 struct rtable *rt = (struct rtable*)skb->dst;
549 unsigned char *raw = skb->nh.raw; 548 unsigned char *raw = skb->nh.raw;
550 549
551 if (opt->rr_needaddr) { 550 if (opt->rr_needaddr) {
552 optptr = (unsigned char *)raw + opt->rr; 551 optptr = (unsigned char *)raw + opt->rr;
553 ip_rt_get_source(&optptr[optptr[2]-5], rt); 552 ip_rt_get_source(&optptr[optptr[2]-5], rt);
554 opt->is_changed = 1; 553 opt->is_changed = 1;
555 } 554 }
556 if (opt->srr_is_hit) { 555 if (opt->srr_is_hit) {
557 int srrptr, srrspace; 556 int srrptr, srrspace;
558 557
559 optptr = raw + opt->srr; 558 optptr = raw + opt->srr;
560 559
561 for ( srrptr=optptr[2], srrspace = optptr[1]; 560 for ( srrptr=optptr[2], srrspace = optptr[1];
562 srrptr <= srrspace; 561 srrptr <= srrspace;
563 srrptr += 4 562 srrptr += 4
564 ) { 563 ) {
565 if (srrptr + 3 > srrspace) 564 if (srrptr + 3 > srrspace)
566 break; 565 break;
567 if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) 566 if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0)
568 break; 567 break;
569 } 568 }
570 if (srrptr + 3 <= srrspace) { 569 if (srrptr + 3 <= srrspace) {
571 opt->is_changed = 1; 570 opt->is_changed = 1;
572 ip_rt_get_source(&optptr[srrptr-1], rt); 571 ip_rt_get_source(&optptr[srrptr-1], rt);
573 skb->nh.iph->daddr = rt->rt_dst; 572 skb->nh.iph->daddr = rt->rt_dst;
574 optptr[2] = srrptr+4; 573 optptr[2] = srrptr+4;
575 } else if (net_ratelimit()) 574 } else if (net_ratelimit())
576 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 575 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
577 if (opt->ts_needaddr) { 576 if (opt->ts_needaddr) {
578 optptr = raw + opt->ts; 577 optptr = raw + opt->ts;
579 ip_rt_get_source(&optptr[optptr[2]-9], rt); 578 ip_rt_get_source(&optptr[optptr[2]-9], rt);
580 opt->is_changed = 1; 579 opt->is_changed = 1;
581 } 580 }
582 } 581 }
583 if (opt->is_changed) { 582 if (opt->is_changed) {
584 opt->is_changed = 0; 583 opt->is_changed = 0;
585 ip_send_check(skb->nh.iph); 584 ip_send_check(skb->nh.iph);
586 } 585 }
587 } 586 }
588 587
589 int ip_options_rcv_srr(struct sk_buff *skb) 588 int ip_options_rcv_srr(struct sk_buff *skb)
590 { 589 {
591 struct ip_options *opt = &(IPCB(skb)->opt); 590 struct ip_options *opt = &(IPCB(skb)->opt);
592 int srrspace, srrptr; 591 int srrspace, srrptr;
593 u32 nexthop; 592 u32 nexthop;
594 struct iphdr *iph = skb->nh.iph; 593 struct iphdr *iph = skb->nh.iph;
595 unsigned char * optptr = skb->nh.raw + opt->srr; 594 unsigned char * optptr = skb->nh.raw + opt->srr;
596 struct rtable *rt = (struct rtable*)skb->dst; 595 struct rtable *rt = (struct rtable*)skb->dst;
597 struct rtable *rt2; 596 struct rtable *rt2;
598 int err; 597 int err;
599 598
600 if (!opt->srr) 599 if (!opt->srr)
601 return 0; 600 return 0;
602 601
603 if (skb->pkt_type != PACKET_HOST) 602 if (skb->pkt_type != PACKET_HOST)
604 return -EINVAL; 603 return -EINVAL;
605 if (rt->rt_type == RTN_UNICAST) { 604 if (rt->rt_type == RTN_UNICAST) {
606 if (!opt->is_strictroute) 605 if (!opt->is_strictroute)
607 return 0; 606 return 0;
608 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24)); 607 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24));
609 return -EINVAL; 608 return -EINVAL;
610 } 609 }
611 if (rt->rt_type != RTN_LOCAL) 610 if (rt->rt_type != RTN_LOCAL)
612 return -EINVAL; 611 return -EINVAL;
613 612
614 for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { 613 for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
615 if (srrptr + 3 > srrspace) { 614 if (srrptr + 3 > srrspace) {
616 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); 615 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
617 return -EINVAL; 616 return -EINVAL;
618 } 617 }
619 memcpy(&nexthop, &optptr[srrptr-1], 4); 618 memcpy(&nexthop, &optptr[srrptr-1], 4);
620 619
621 rt = (struct rtable*)skb->dst; 620 rt = (struct rtable*)skb->dst;
622 skb->dst = NULL; 621 skb->dst = NULL;
623 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); 622 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
624 rt2 = (struct rtable*)skb->dst; 623 rt2 = (struct rtable*)skb->dst;
625 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { 624 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
626 ip_rt_put(rt2); 625 ip_rt_put(rt2);
627 skb->dst = &rt->u.dst; 626 skb->dst = &rt->u.dst;
628 return -EINVAL; 627 return -EINVAL;
629 } 628 }
630 ip_rt_put(rt); 629 ip_rt_put(rt);
631 if (rt2->rt_type != RTN_LOCAL) 630 if (rt2->rt_type != RTN_LOCAL)
632 break; 631 break;
633 /* Superfast 8) loopback forward */ 632 /* Superfast 8) loopback forward */
634 memcpy(&iph->daddr, &optptr[srrptr-1], 4); 633 memcpy(&iph->daddr, &optptr[srrptr-1], 4);
635 opt->is_changed = 1; 634 opt->is_changed = 1;
636 } 635 }
637 if (srrptr <= srrspace) { 636 if (srrptr <= srrspace) {
638 opt->srr_is_hit = 1; 637 opt->srr_is_hit = 1;
639 opt->is_changed = 1; 638 opt->is_changed = 1;
640 } 639 }
641 return 0; 640 return 0;
642 } 641 }
643 642
1 /* 1 /*
2 * Linux NET3: IP/IP protocol decoder. 2 * Linux NET3: IP/IP protocol decoder.
3 * 3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ 4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 * 5 *
6 * Authors: 6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 * 8 *
9 * Fixes: 9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as 10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages). 11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) 12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy. 13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). 14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL 15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling. 16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation. 17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support 18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. 19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together. 20 * I do not want to merge them together.
21 * 21 *
22 * This program is free software; you can redistribute it and/or 22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License 23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version 24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version. 25 * 2 of the License, or (at your option) any later version.
26 * 26 *
27 */ 27 */
28 28
29 /* tunnel.c: an IP tunnel driver 29 /* tunnel.c: an IP tunnel driver
30 30
31 The purpose of this driver is to provide an IP tunnel through 31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets. 32 which you can tunnel network traffic transparently across subnets.
33 33
34 This was written by looking at Nick Holloway's dummy driver 34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code! 35 Thanks for the great code!
36 36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
38 38
39 Minor tweaks: 39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks. 40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers. 41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked. 42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting. 43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat 44 Added tx_dropped stat
45 45
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95 46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47 47
48 Reworked: 48 Reworked:
49 Changed to tunnel to destination gateway in addition to the 49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address 50 tunnel's pointopoint address
51 Almost completely rewritten 51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done. 52 Note: There is currently no firewall or ICMP handling done.
53 53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
55 55
56 */ 56 */
57 57
58 /* Things I wish I had known when writing the tunnel driver: 58 /* Things I wish I had known when writing the tunnel driver:
59 59
60 When the tunnel_xmit() function is called, the skb contains the 60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev 61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are. 62 contains the tunnel device that _we_ are.
63 63
64 When we are passed a packet, we are expected to fill in the 64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address. 65 source address with our source IP address.
66 66
67 What is the proper way to allocate, copy and free a buffer? 67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory 68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer 69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with 70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call 71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in 72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of 73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have 74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len 75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put() 76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can 77 again with the additional amount of space you need. You can
78 find out how much more space you can allocate by calling 78 find out how much more space you can allocate by calling
79 "skb_tailroom(skb)". 79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)". 80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns 81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a 82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)". 83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top 84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom 85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called. 86 space must be reserved before the skb_put() function is called.
87 */ 87 */
88 88
89 /* 89 /*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c 90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 91
92 For comments look at net/ipv4/ip_gre.c --ANK 92 For comments look at net/ipv4/ip_gre.c --ANK
93 */ 93 */
94 94
95 95
96 #include <linux/capability.h> 96 #include <linux/capability.h>
97 #include <linux/module.h> 97 #include <linux/module.h>
98 #include <linux/types.h> 98 #include <linux/types.h>
99 #include <linux/sched.h> 99 #include <linux/sched.h>
100 #include <linux/kernel.h> 100 #include <linux/kernel.h>
101 #include <asm/uaccess.h> 101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h> 102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h> 103 #include <linux/netdevice.h>
104 #include <linux/in.h> 104 #include <linux/in.h>
105 #include <linux/tcp.h> 105 #include <linux/tcp.h>
106 #include <linux/udp.h> 106 #include <linux/udp.h>
107 #include <linux/if_arp.h> 107 #include <linux/if_arp.h>
108 #include <linux/mroute.h> 108 #include <linux/mroute.h>
109 #include <linux/init.h> 109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h> 110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h> 111 #include <linux/if_ether.h>
112 112
113 #include <net/sock.h> 113 #include <net/sock.h>
114 #include <net/ip.h> 114 #include <net/ip.h>
115 #include <net/icmp.h> 115 #include <net/icmp.h>
116 #include <net/ipip.h> 116 #include <net/ipip.h>
117 #include <net/inet_ecn.h> 117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h> 118 #include <net/xfrm.h>
119 119
120 #define HASH_SIZE 16 120 #define HASH_SIZE 16
121 #define HASH(addr) ((addr^(addr>>4))&0xF) 121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122 122
123 static int ipip_fb_tunnel_init(struct net_device *dev); 123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev); 124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev); 125 static void ipip_tunnel_setup(struct net_device *dev);
126 126
127 static struct net_device *ipip_fb_tunnel_dev; 127 static struct net_device *ipip_fb_tunnel_dev;
128 128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE]; 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE]; 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1]; 132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 134
135 static DEFINE_RWLOCK(ipip_lock); 135 static DEFINE_RWLOCK(ipip_lock);
136 136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) 137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 { 138 {
139 unsigned h0 = HASH(remote); 139 unsigned h0 = HASH(remote);
140 unsigned h1 = HASH(local); 140 unsigned h1 = HASH(local);
141 struct ip_tunnel *t; 141 struct ip_tunnel *t;
142 142
143 for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 143 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144 if (local == t->parms.iph.saddr && 144 if (local == t->parms.iph.saddr &&
145 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 145 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146 return t; 146 return t;
147 } 147 }
148 for (t = tunnels_r[h0]; t; t = t->next) { 148 for (t = tunnels_r[h0]; t; t = t->next) {
149 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 149 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 return t; 150 return t;
151 } 151 }
152 for (t = tunnels_l[h1]; t; t = t->next) { 152 for (t = tunnels_l[h1]; t; t = t->next) {
153 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 153 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154 return t; 154 return t;
155 } 155 }
156 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 156 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157 return t; 157 return t;
158 return NULL; 158 return NULL;
159 } 159 }
160 160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 { 162 {
163 u32 remote = t->parms.iph.daddr; 163 u32 remote = t->parms.iph.daddr;
164 u32 local = t->parms.iph.saddr; 164 u32 local = t->parms.iph.saddr;
165 unsigned h = 0; 165 unsigned h = 0;
166 int prio = 0; 166 int prio = 0;
167 167
168 if (remote) { 168 if (remote) {
169 prio |= 2; 169 prio |= 2;
170 h ^= HASH(remote); 170 h ^= HASH(remote);
171 } 171 }
172 if (local) { 172 if (local) {
173 prio |= 1; 173 prio |= 1;
174 h ^= HASH(local); 174 h ^= HASH(local);
175 } 175 }
176 return &tunnels[prio][h]; 176 return &tunnels[prio][h];
177 } 177 }
178 178
179 179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t) 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 { 181 {
182 struct ip_tunnel **tp; 182 struct ip_tunnel **tp;
183 183
184 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { 184 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185 if (t == *tp) { 185 if (t == *tp) {
186 write_lock_bh(&ipip_lock); 186 write_lock_bh(&ipip_lock);
187 *tp = t->next; 187 *tp = t->next;
188 write_unlock_bh(&ipip_lock); 188 write_unlock_bh(&ipip_lock);
189 break; 189 break;
190 } 190 }
191 } 191 }
192 } 192 }
193 193
194 static void ipip_tunnel_link(struct ip_tunnel *t) 194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 { 195 {
196 struct ip_tunnel **tp = ipip_bucket(t); 196 struct ip_tunnel **tp = ipip_bucket(t);
197 197
198 t->next = *tp; 198 t->next = *tp;
199 write_lock_bh(&ipip_lock); 199 write_lock_bh(&ipip_lock);
200 *tp = t; 200 *tp = t;
201 write_unlock_bh(&ipip_lock); 201 write_unlock_bh(&ipip_lock);
202 } 202 }
203 203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 { 205 {
206 u32 remote = parms->iph.daddr; 206 u32 remote = parms->iph.daddr;
207 u32 local = parms->iph.saddr; 207 u32 local = parms->iph.saddr;
208 struct ip_tunnel *t, **tp, *nt; 208 struct ip_tunnel *t, **tp, *nt;
209 struct net_device *dev; 209 struct net_device *dev;
210 unsigned h = 0; 210 unsigned h = 0;
211 int prio = 0; 211 int prio = 0;
212 char name[IFNAMSIZ]; 212 char name[IFNAMSIZ];
213 213
214 if (remote) { 214 if (remote) {
215 prio |= 2; 215 prio |= 2;
216 h ^= HASH(remote); 216 h ^= HASH(remote);
217 } 217 }
218 if (local) { 218 if (local) {
219 prio |= 1; 219 prio |= 1;
220 h ^= HASH(local); 220 h ^= HASH(local);
221 } 221 }
222 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { 222 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 223 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224 return t; 224 return t;
225 } 225 }
226 if (!create) 226 if (!create)
227 return NULL; 227 return NULL;
228 228
229 if (parms->name[0]) 229 if (parms->name[0])
230 strlcpy(name, parms->name, IFNAMSIZ); 230 strlcpy(name, parms->name, IFNAMSIZ);
231 else { 231 else {
232 int i; 232 int i;
233 for (i=1; i<100; i++) { 233 for (i=1; i<100; i++) {
234 sprintf(name, "tunl%d", i); 234 sprintf(name, "tunl%d", i);
235 if (__dev_get_by_name(name) == NULL) 235 if (__dev_get_by_name(name) == NULL)
236 break; 236 break;
237 } 237 }
238 if (i==100) 238 if (i==100)
239 goto failed; 239 goto failed;
240 } 240 }
241 241
242 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); 242 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243 if (dev == NULL) 243 if (dev == NULL)
244 return NULL; 244 return NULL;
245 245
246 nt = netdev_priv(dev); 246 nt = netdev_priv(dev);
247 SET_MODULE_OWNER(dev); 247 SET_MODULE_OWNER(dev);
248 dev->init = ipip_tunnel_init; 248 dev->init = ipip_tunnel_init;
249 nt->parms = *parms; 249 nt->parms = *parms;
250 250
251 if (register_netdevice(dev) < 0) { 251 if (register_netdevice(dev) < 0) {
252 free_netdev(dev); 252 free_netdev(dev);
253 goto failed; 253 goto failed;
254 } 254 }
255 255
256 dev_hold(dev); 256 dev_hold(dev);
257 ipip_tunnel_link(nt); 257 ipip_tunnel_link(nt);
258 return nt; 258 return nt;
259 259
260 failed: 260 failed:
261 return NULL; 261 return NULL;
262 } 262 }
263 263
264 static void ipip_tunnel_uninit(struct net_device *dev) 264 static void ipip_tunnel_uninit(struct net_device *dev)
265 { 265 {
266 if (dev == ipip_fb_tunnel_dev) { 266 if (dev == ipip_fb_tunnel_dev) {
267 write_lock_bh(&ipip_lock); 267 write_lock_bh(&ipip_lock);
268 tunnels_wc[0] = NULL; 268 tunnels_wc[0] = NULL;
269 write_unlock_bh(&ipip_lock); 269 write_unlock_bh(&ipip_lock);
270 } else 270 } else
271 ipip_tunnel_unlink(netdev_priv(dev)); 271 ipip_tunnel_unlink(netdev_priv(dev));
272 dev_put(dev); 272 dev_put(dev);
273 } 273 }
274 274
275 static int ipip_err(struct sk_buff *skb, u32 info) 275 static int ipip_err(struct sk_buff *skb, u32 info)
276 { 276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT 277 #ifndef I_WISH_WORLD_WERE_PERFECT
278 278
279 /* It is not :-( All the routers (except for Linux) return only 279 /* It is not :-( All the routers (except for Linux) return only
280 8 bytes of packet payload. It means, that precise relaying of 280 8 bytes of packet payload. It means, that precise relaying of
281 ICMP in the real Internet is absolutely infeasible. 281 ICMP in the real Internet is absolutely infeasible.
282 */ 282 */
283 struct iphdr *iph = (struct iphdr*)skb->data; 283 struct iphdr *iph = (struct iphdr*)skb->data;
284 int type = skb->h.icmph->type; 284 int type = skb->h.icmph->type;
285 int code = skb->h.icmph->code; 285 int code = skb->h.icmph->code;
286 struct ip_tunnel *t; 286 struct ip_tunnel *t;
287 int err; 287 int err;
288 288
289 switch (type) { 289 switch (type) {
290 default: 290 default:
291 case ICMP_PARAMETERPROB: 291 case ICMP_PARAMETERPROB:
292 return 0; 292 return 0;
293 293
294 case ICMP_DEST_UNREACH: 294 case ICMP_DEST_UNREACH:
295 switch (code) { 295 switch (code) {
296 case ICMP_SR_FAILED: 296 case ICMP_SR_FAILED:
297 case ICMP_PORT_UNREACH: 297 case ICMP_PORT_UNREACH:
298 /* Impossible event. */ 298 /* Impossible event. */
299 return 0; 299 return 0;
300 case ICMP_FRAG_NEEDED: 300 case ICMP_FRAG_NEEDED:
301 /* Soft state for pmtu is maintained by IP core. */ 301 /* Soft state for pmtu is maintained by IP core. */
302 return 0; 302 return 0;
303 default: 303 default:
304 /* All others are translated to HOST_UNREACH. 304 /* All others are translated to HOST_UNREACH.
305 rfc2003 contains "deep thoughts" about NET_UNREACH, 305 rfc2003 contains "deep thoughts" about NET_UNREACH,
306 I believe they are just ether pollution. --ANK 306 I believe they are just ether pollution. --ANK
307 */ 307 */
308 break; 308 break;
309 } 309 }
310 break; 310 break;
311 case ICMP_TIME_EXCEEDED: 311 case ICMP_TIME_EXCEEDED:
312 if (code != ICMP_EXC_TTL) 312 if (code != ICMP_EXC_TTL)
313 return 0; 313 return 0;
314 break; 314 break;
315 } 315 }
316 316
317 err = -ENOENT; 317 err = -ENOENT;
318 318
319 read_lock(&ipip_lock); 319 read_lock(&ipip_lock);
320 t = ipip_tunnel_lookup(iph->daddr, iph->saddr); 320 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
321 if (t == NULL || t->parms.iph.daddr == 0) 321 if (t == NULL || t->parms.iph.daddr == 0)
322 goto out; 322 goto out;
323 323
324 err = 0; 324 err = 0;
325 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 325 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
326 goto out; 326 goto out;
327 327
328 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 328 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
329 t->err_count++; 329 t->err_count++;
330 else 330 else
331 t->err_count = 1; 331 t->err_count = 1;
332 t->err_time = jiffies; 332 t->err_time = jiffies;
333 out: 333 out:
334 read_unlock(&ipip_lock); 334 read_unlock(&ipip_lock);
335 return err; 335 return err;
336 #else 336 #else
337 struct iphdr *iph = (struct iphdr*)dp; 337 struct iphdr *iph = (struct iphdr*)dp;
338 int hlen = iph->ihl<<2; 338 int hlen = iph->ihl<<2;
339 struct iphdr *eiph; 339 struct iphdr *eiph;
340 int type = skb->h.icmph->type; 340 int type = skb->h.icmph->type;
341 int code = skb->h.icmph->code; 341 int code = skb->h.icmph->code;
342 int rel_type = 0; 342 int rel_type = 0;
343 int rel_code = 0; 343 int rel_code = 0;
344 int rel_info = 0; 344 int rel_info = 0;
345 struct sk_buff *skb2; 345 struct sk_buff *skb2;
346 struct flowi fl; 346 struct flowi fl;
347 struct rtable *rt; 347 struct rtable *rt;
348 348
349 if (len < hlen + sizeof(struct iphdr)) 349 if (len < hlen + sizeof(struct iphdr))
350 return 0; 350 return 0;
351 eiph = (struct iphdr*)(dp + hlen); 351 eiph = (struct iphdr*)(dp + hlen);
352 352
353 switch (type) { 353 switch (type) {
354 default: 354 default:
355 return 0; 355 return 0;
356 case ICMP_PARAMETERPROB: 356 case ICMP_PARAMETERPROB:
357 if (skb->h.icmph->un.gateway < hlen) 357 if (skb->h.icmph->un.gateway < hlen)
358 return 0; 358 return 0;
359 359
360 /* So... This guy found something strange INSIDE encapsulated 360 /* So... This guy found something strange INSIDE encapsulated
361 packet. Well, he is fool, but what can we do ? 361 packet. Well, he is fool, but what can we do ?
362 */ 362 */
363 rel_type = ICMP_PARAMETERPROB; 363 rel_type = ICMP_PARAMETERPROB;
364 rel_info = skb->h.icmph->un.gateway - hlen; 364 rel_info = skb->h.icmph->un.gateway - hlen;
365 break; 365 break;
366 366
367 case ICMP_DEST_UNREACH: 367 case ICMP_DEST_UNREACH:
368 switch (code) { 368 switch (code) {
369 case ICMP_SR_FAILED: 369 case ICMP_SR_FAILED:
370 case ICMP_PORT_UNREACH: 370 case ICMP_PORT_UNREACH:
371 /* Impossible event. */ 371 /* Impossible event. */
372 return 0; 372 return 0;
373 case ICMP_FRAG_NEEDED: 373 case ICMP_FRAG_NEEDED:
374 /* And it is the only really necessary thing :-) */ 374 /* And it is the only really necessary thing :-) */
375 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 375 rel_info = ntohs(skb->h.icmph->un.frag.mtu);
376 if (rel_info < hlen+68) 376 if (rel_info < hlen+68)
377 return 0; 377 return 0;
378 rel_info -= hlen; 378 rel_info -= hlen;
379 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 379 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380 if (rel_info > ntohs(eiph->tot_len)) 380 if (rel_info > ntohs(eiph->tot_len))
381 return 0; 381 return 0;
382 break; 382 break;
383 default: 383 default:
384 /* All others are translated to HOST_UNREACH. 384 /* All others are translated to HOST_UNREACH.
385 rfc2003 contains "deep thoughts" about NET_UNREACH, 385 rfc2003 contains "deep thoughts" about NET_UNREACH,
386 I believe, it is just ether pollution. --ANK 386 I believe, it is just ether pollution. --ANK
387 */ 387 */
388 rel_type = ICMP_DEST_UNREACH; 388 rel_type = ICMP_DEST_UNREACH;
389 rel_code = ICMP_HOST_UNREACH; 389 rel_code = ICMP_HOST_UNREACH;
390 break; 390 break;
391 } 391 }
392 break; 392 break;
393 case ICMP_TIME_EXCEEDED: 393 case ICMP_TIME_EXCEEDED:
394 if (code != ICMP_EXC_TTL) 394 if (code != ICMP_EXC_TTL)
395 return 0; 395 return 0;
396 break; 396 break;
397 } 397 }
398 398
399 /* Prepare fake skb to feed it to icmp_send */ 399 /* Prepare fake skb to feed it to icmp_send */
400 skb2 = skb_clone(skb, GFP_ATOMIC); 400 skb2 = skb_clone(skb, GFP_ATOMIC);
401 if (skb2 == NULL) 401 if (skb2 == NULL)
402 return 0; 402 return 0;
403 dst_release(skb2->dst); 403 dst_release(skb2->dst);
404 skb2->dst = NULL; 404 skb2->dst = NULL;
405 skb_pull(skb2, skb->data - (u8*)eiph); 405 skb_pull(skb2, skb->data - (u8*)eiph);
406 skb2->nh.raw = skb2->data; 406 skb2->nh.raw = skb2->data;
407 407
408 /* Try to guess incoming interface */ 408 /* Try to guess incoming interface */
409 memset(&fl, 0, sizeof(fl)); 409 memset(&fl, 0, sizeof(fl));
410 fl.fl4_daddr = eiph->saddr; 410 fl.fl4_daddr = eiph->saddr;
411 fl.fl4_tos = RT_TOS(eiph->tos); 411 fl.fl4_tos = RT_TOS(eiph->tos);
412 fl.proto = IPPROTO_IPIP; 412 fl.proto = IPPROTO_IPIP;
413 if (ip_route_output_key(&rt, &key)) { 413 if (ip_route_output_key(&rt, &key)) {
414 kfree_skb(skb2); 414 kfree_skb(skb2);
415 return 0; 415 return 0;
416 } 416 }
417 skb2->dev = rt->u.dst.dev; 417 skb2->dev = rt->u.dst.dev;
418 418
419 /* route "incoming" packet */ 419 /* route "incoming" packet */
420 if (rt->rt_flags&RTCF_LOCAL) { 420 if (rt->rt_flags&RTCF_LOCAL) {
421 ip_rt_put(rt); 421 ip_rt_put(rt);
422 rt = NULL; 422 rt = NULL;
423 fl.fl4_daddr = eiph->daddr; 423 fl.fl4_daddr = eiph->daddr;
424 fl.fl4_src = eiph->saddr; 424 fl.fl4_src = eiph->saddr;
425 fl.fl4_tos = eiph->tos; 425 fl.fl4_tos = eiph->tos;
426 if (ip_route_output_key(&rt, &fl) || 426 if (ip_route_output_key(&rt, &fl) ||
427 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 427 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
428 ip_rt_put(rt); 428 ip_rt_put(rt);
429 kfree_skb(skb2); 429 kfree_skb(skb2);
430 return 0; 430 return 0;
431 } 431 }
432 } else { 432 } else {
433 ip_rt_put(rt); 433 ip_rt_put(rt);
434 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || 434 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
435 skb2->dst->dev->type != ARPHRD_TUNNEL) { 435 skb2->dst->dev->type != ARPHRD_TUNNEL) {
436 kfree_skb(skb2); 436 kfree_skb(skb2);
437 return 0; 437 return 0;
438 } 438 }
439 } 439 }
440 440
441 /* change mtu on this route */ 441 /* change mtu on this route */
442 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 442 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443 if (rel_info > dst_mtu(skb2->dst)) { 443 if (rel_info > dst_mtu(skb2->dst)) {
444 kfree_skb(skb2); 444 kfree_skb(skb2);
445 return 0; 445 return 0;
446 } 446 }
447 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 447 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
448 rel_info = htonl(rel_info); 448 rel_info = htonl(rel_info);
449 } else if (type == ICMP_TIME_EXCEEDED) { 449 } else if (type == ICMP_TIME_EXCEEDED) {
450 struct ip_tunnel *t = netdev_priv(skb2->dev); 450 struct ip_tunnel *t = netdev_priv(skb2->dev);
451 if (t->parms.iph.ttl) { 451 if (t->parms.iph.ttl) {
452 rel_type = ICMP_DEST_UNREACH; 452 rel_type = ICMP_DEST_UNREACH;
453 rel_code = ICMP_HOST_UNREACH; 453 rel_code = ICMP_HOST_UNREACH;
454 } 454 }
455 } 455 }
456 456
457 icmp_send(skb2, rel_type, rel_code, rel_info); 457 icmp_send(skb2, rel_type, rel_code, rel_info);
458 kfree_skb(skb2); 458 kfree_skb(skb2);
459 return 0; 459 return 0;
460 #endif 460 #endif
461 } 461 }
462 462
463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) 463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
464 { 464 {
465 struct iphdr *inner_iph = skb->nh.iph; 465 struct iphdr *inner_iph = skb->nh.iph;
466 466
467 if (INET_ECN_is_ce(outer_iph->tos)) 467 if (INET_ECN_is_ce(outer_iph->tos))
468 IP_ECN_set_ce(inner_iph); 468 IP_ECN_set_ce(inner_iph);
469 } 469 }
470 470
471 static int ipip_rcv(struct sk_buff *skb) 471 static int ipip_rcv(struct sk_buff *skb)
472 { 472 {
473 struct iphdr *iph; 473 struct iphdr *iph;
474 struct ip_tunnel *tunnel; 474 struct ip_tunnel *tunnel;
475 475
476 iph = skb->nh.iph; 476 iph = skb->nh.iph;
477 477
478 read_lock(&ipip_lock); 478 read_lock(&ipip_lock);
479 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { 479 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 480 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481 read_unlock(&ipip_lock); 481 read_unlock(&ipip_lock);
482 kfree_skb(skb); 482 kfree_skb(skb);
483 return 0; 483 return 0;
484 } 484 }
485 485
486 secpath_reset(skb); 486 secpath_reset(skb);
487 487
488 skb->mac.raw = skb->nh.raw; 488 skb->mac.raw = skb->nh.raw;
489 skb->nh.raw = skb->data; 489 skb->nh.raw = skb->data;
490 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491 skb->protocol = htons(ETH_P_IP); 490 skb->protocol = htons(ETH_P_IP);
492 skb->pkt_type = PACKET_HOST; 491 skb->pkt_type = PACKET_HOST;
493 492
494 tunnel->stat.rx_packets++; 493 tunnel->stat.rx_packets++;
495 tunnel->stat.rx_bytes += skb->len; 494 tunnel->stat.rx_bytes += skb->len;
496 skb->dev = tunnel->dev; 495 skb->dev = tunnel->dev;
497 dst_release(skb->dst); 496 dst_release(skb->dst);
498 skb->dst = NULL; 497 skb->dst = NULL;
499 nf_reset(skb); 498 nf_reset(skb);
500 ipip_ecn_decapsulate(iph, skb); 499 ipip_ecn_decapsulate(iph, skb);
501 netif_rx(skb); 500 netif_rx(skb);
502 read_unlock(&ipip_lock); 501 read_unlock(&ipip_lock);
503 return 0; 502 return 0;
504 } 503 }
505 read_unlock(&ipip_lock); 504 read_unlock(&ipip_lock);
506 505
507 return -1; 506 return -1;
508 } 507 }
509 508
510 /* 509 /*
511 * This function assumes it is being called from dev_queue_xmit() 510 * This function assumes it is being called from dev_queue_xmit()
512 * and that skb is filled properly by that function. 511 * and that skb is filled properly by that function.
513 */ 512 */
514 513
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 { 515 {
517 struct ip_tunnel *tunnel = netdev_priv(dev); 516 struct ip_tunnel *tunnel = netdev_priv(dev);
518 struct net_device_stats *stats = &tunnel->stat; 517 struct net_device_stats *stats = &tunnel->stat;
519 struct iphdr *tiph = &tunnel->parms.iph; 518 struct iphdr *tiph = &tunnel->parms.iph;
520 u8 tos = tunnel->parms.iph.tos; 519 u8 tos = tunnel->parms.iph.tos;
521 u16 df = tiph->frag_off; 520 u16 df = tiph->frag_off;
522 struct rtable *rt; /* Route to the other host */ 521 struct rtable *rt; /* Route to the other host */
523 struct net_device *tdev; /* Device to other host */ 522 struct net_device *tdev; /* Device to other host */
524 struct iphdr *old_iph = skb->nh.iph; 523 struct iphdr *old_iph = skb->nh.iph;
525 struct iphdr *iph; /* Our new IP header */ 524 struct iphdr *iph; /* Our new IP header */
526 int max_headroom; /* The extra header space needed */ 525 int max_headroom; /* The extra header space needed */
527 u32 dst = tiph->daddr; 526 u32 dst = tiph->daddr;
528 int mtu; 527 int mtu;
529 528
530 if (tunnel->recursion++) { 529 if (tunnel->recursion++) {
531 tunnel->stat.collisions++; 530 tunnel->stat.collisions++;
532 goto tx_error; 531 goto tx_error;
533 } 532 }
534 533
535 if (skb->protocol != htons(ETH_P_IP)) 534 if (skb->protocol != htons(ETH_P_IP))
536 goto tx_error; 535 goto tx_error;
537 536
538 if (tos&1) 537 if (tos&1)
539 tos = old_iph->tos; 538 tos = old_iph->tos;
540 539
541 if (!dst) { 540 if (!dst) {
542 /* NBMA tunnel */ 541 /* NBMA tunnel */
543 if ((rt = (struct rtable*)skb->dst) == NULL) { 542 if ((rt = (struct rtable*)skb->dst) == NULL) {
544 tunnel->stat.tx_fifo_errors++; 543 tunnel->stat.tx_fifo_errors++;
545 goto tx_error; 544 goto tx_error;
546 } 545 }
547 if ((dst = rt->rt_gateway) == 0) 546 if ((dst = rt->rt_gateway) == 0)
548 goto tx_error_icmp; 547 goto tx_error_icmp;
549 } 548 }
550 549
551 { 550 {
552 struct flowi fl = { .oif = tunnel->parms.link, 551 struct flowi fl = { .oif = tunnel->parms.link,
553 .nl_u = { .ip4_u = 552 .nl_u = { .ip4_u =
554 { .daddr = dst, 553 { .daddr = dst,
555 .saddr = tiph->saddr, 554 .saddr = tiph->saddr,
556 .tos = RT_TOS(tos) } }, 555 .tos = RT_TOS(tos) } },
557 .proto = IPPROTO_IPIP }; 556 .proto = IPPROTO_IPIP };
558 if (ip_route_output_key(&rt, &fl)) { 557 if (ip_route_output_key(&rt, &fl)) {
559 tunnel->stat.tx_carrier_errors++; 558 tunnel->stat.tx_carrier_errors++;
560 goto tx_error_icmp; 559 goto tx_error_icmp;
561 } 560 }
562 } 561 }
563 tdev = rt->u.dst.dev; 562 tdev = rt->u.dst.dev;
564 563
565 if (tdev == dev) { 564 if (tdev == dev) {
566 ip_rt_put(rt); 565 ip_rt_put(rt);
567 tunnel->stat.collisions++; 566 tunnel->stat.collisions++;
568 goto tx_error; 567 goto tx_error;
569 } 568 }
570 569
571 if (tiph->frag_off) 570 if (tiph->frag_off)
572 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 571 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573 else 572 else
574 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 573 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575 574
576 if (mtu < 68) { 575 if (mtu < 68) {
577 tunnel->stat.collisions++; 576 tunnel->stat.collisions++;
578 ip_rt_put(rt); 577 ip_rt_put(rt);
579 goto tx_error; 578 goto tx_error;
580 } 579 }
581 if (skb->dst) 580 if (skb->dst)
582 skb->dst->ops->update_pmtu(skb->dst, mtu); 581 skb->dst->ops->update_pmtu(skb->dst, mtu);
583 582
584 df |= (old_iph->frag_off&htons(IP_DF)); 583 df |= (old_iph->frag_off&htons(IP_DF));
585 584
586 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { 585 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 586 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588 ip_rt_put(rt); 587 ip_rt_put(rt);
589 goto tx_error; 588 goto tx_error;
590 } 589 }
591 590
592 if (tunnel->err_count > 0) { 591 if (tunnel->err_count > 0) {
593 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 592 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594 tunnel->err_count--; 593 tunnel->err_count--;
595 dst_link_failure(skb); 594 dst_link_failure(skb);
596 } else 595 } else
597 tunnel->err_count = 0; 596 tunnel->err_count = 0;
598 } 597 }
599 598
600 /* 599 /*
601 * Okay, now see if we can stuff it in the buffer as-is. 600 * Okay, now see if we can stuff it in the buffer as-is.
602 */ 601 */
603 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); 602 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604 603
605 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { 604 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 605 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607 if (!new_skb) { 606 if (!new_skb) {
608 ip_rt_put(rt); 607 ip_rt_put(rt);
609 stats->tx_dropped++; 608 stats->tx_dropped++;
610 dev_kfree_skb(skb); 609 dev_kfree_skb(skb);
611 tunnel->recursion--; 610 tunnel->recursion--;
612 return 0; 611 return 0;
613 } 612 }
614 if (skb->sk) 613 if (skb->sk)
615 skb_set_owner_w(new_skb, skb->sk); 614 skb_set_owner_w(new_skb, skb->sk);
616 dev_kfree_skb(skb); 615 dev_kfree_skb(skb);
617 skb = new_skb; 616 skb = new_skb;
618 old_iph = skb->nh.iph; 617 old_iph = skb->nh.iph;
619 } 618 }
620 619
621 skb->h.raw = skb->nh.raw; 620 skb->h.raw = skb->nh.raw;
622 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 621 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 622 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 623 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
625 IPSKB_REROUTED); 624 IPSKB_REROUTED);
626 dst_release(skb->dst); 625 dst_release(skb->dst);
627 skb->dst = &rt->u.dst; 626 skb->dst = &rt->u.dst;
628 627
629 /* 628 /*
630 * Push down and install the IPIP header. 629 * Push down and install the IPIP header.
631 */ 630 */
632 631
633 iph = skb->nh.iph; 632 iph = skb->nh.iph;
634 iph->version = 4; 633 iph->version = 4;
635 iph->ihl = sizeof(struct iphdr)>>2; 634 iph->ihl = sizeof(struct iphdr)>>2;
636 iph->frag_off = df; 635 iph->frag_off = df;
637 iph->protocol = IPPROTO_IPIP; 636 iph->protocol = IPPROTO_IPIP;
638 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); 637 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
639 iph->daddr = rt->rt_dst; 638 iph->daddr = rt->rt_dst;
640 iph->saddr = rt->rt_src; 639 iph->saddr = rt->rt_src;
641 640
642 if ((iph->ttl = tiph->ttl) == 0) 641 if ((iph->ttl = tiph->ttl) == 0)
643 iph->ttl = old_iph->ttl; 642 iph->ttl = old_iph->ttl;
644 643
645 nf_reset(skb); 644 nf_reset(skb);
646 645
647 IPTUNNEL_XMIT(); 646 IPTUNNEL_XMIT();
648 tunnel->recursion--; 647 tunnel->recursion--;
649 return 0; 648 return 0;
650 649
651 tx_error_icmp: 650 tx_error_icmp:
652 dst_link_failure(skb); 651 dst_link_failure(skb);
653 tx_error: 652 tx_error:
654 stats->tx_errors++; 653 stats->tx_errors++;
655 dev_kfree_skb(skb); 654 dev_kfree_skb(skb);
656 tunnel->recursion--; 655 tunnel->recursion--;
657 return 0; 656 return 0;
658 } 657 }
659 658
660 static int 659 static int
661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
662 { 661 {
663 int err = 0; 662 int err = 0;
664 struct ip_tunnel_parm p; 663 struct ip_tunnel_parm p;
665 struct ip_tunnel *t; 664 struct ip_tunnel *t;
666 665
667 switch (cmd) { 666 switch (cmd) {
668 case SIOCGETTUNNEL: 667 case SIOCGETTUNNEL:
669 t = NULL; 668 t = NULL;
670 if (dev == ipip_fb_tunnel_dev) { 669 if (dev == ipip_fb_tunnel_dev) {
671 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 670 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
672 err = -EFAULT; 671 err = -EFAULT;
673 break; 672 break;
674 } 673 }
675 t = ipip_tunnel_locate(&p, 0); 674 t = ipip_tunnel_locate(&p, 0);
676 } 675 }
677 if (t == NULL) 676 if (t == NULL)
678 t = netdev_priv(dev); 677 t = netdev_priv(dev);
679 memcpy(&p, &t->parms, sizeof(p)); 678 memcpy(&p, &t->parms, sizeof(p));
680 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 679 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
681 err = -EFAULT; 680 err = -EFAULT;
682 break; 681 break;
683 682
684 case SIOCADDTUNNEL: 683 case SIOCADDTUNNEL:
685 case SIOCCHGTUNNEL: 684 case SIOCCHGTUNNEL:
686 err = -EPERM; 685 err = -EPERM;
687 if (!capable(CAP_NET_ADMIN)) 686 if (!capable(CAP_NET_ADMIN))
688 goto done; 687 goto done;
689 688
690 err = -EFAULT; 689 err = -EFAULT;
691 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 690 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
692 goto done; 691 goto done;
693 692
694 err = -EINVAL; 693 err = -EINVAL;
695 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 694 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
696 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 695 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
697 goto done; 696 goto done;
698 if (p.iph.ttl) 697 if (p.iph.ttl)
699 p.iph.frag_off |= htons(IP_DF); 698 p.iph.frag_off |= htons(IP_DF);
700 699
701 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 700 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
702 701
703 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 702 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
704 if (t != NULL) { 703 if (t != NULL) {
705 if (t->dev != dev) { 704 if (t->dev != dev) {
706 err = -EEXIST; 705 err = -EEXIST;
707 break; 706 break;
708 } 707 }
709 } else { 708 } else {
710 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || 709 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
711 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { 710 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
712 err = -EINVAL; 711 err = -EINVAL;
713 break; 712 break;
714 } 713 }
715 t = netdev_priv(dev); 714 t = netdev_priv(dev);
716 ipip_tunnel_unlink(t); 715 ipip_tunnel_unlink(t);
717 t->parms.iph.saddr = p.iph.saddr; 716 t->parms.iph.saddr = p.iph.saddr;
718 t->parms.iph.daddr = p.iph.daddr; 717 t->parms.iph.daddr = p.iph.daddr;
719 memcpy(dev->dev_addr, &p.iph.saddr, 4); 718 memcpy(dev->dev_addr, &p.iph.saddr, 4);
720 memcpy(dev->broadcast, &p.iph.daddr, 4); 719 memcpy(dev->broadcast, &p.iph.daddr, 4);
721 ipip_tunnel_link(t); 720 ipip_tunnel_link(t);
722 netdev_state_change(dev); 721 netdev_state_change(dev);
723 } 722 }
724 } 723 }
725 724
726 if (t) { 725 if (t) {
727 err = 0; 726 err = 0;
728 if (cmd == SIOCCHGTUNNEL) { 727 if (cmd == SIOCCHGTUNNEL) {
729 t->parms.iph.ttl = p.iph.ttl; 728 t->parms.iph.ttl = p.iph.ttl;
730 t->parms.iph.tos = p.iph.tos; 729 t->parms.iph.tos = p.iph.tos;
731 t->parms.iph.frag_off = p.iph.frag_off; 730 t->parms.iph.frag_off = p.iph.frag_off;
732 } 731 }
733 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 732 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
734 err = -EFAULT; 733 err = -EFAULT;
735 } else 734 } else
736 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 735 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
737 break; 736 break;
738 737
739 case SIOCDELTUNNEL: 738 case SIOCDELTUNNEL:
740 err = -EPERM; 739 err = -EPERM;
741 if (!capable(CAP_NET_ADMIN)) 740 if (!capable(CAP_NET_ADMIN))
742 goto done; 741 goto done;
743 742
744 if (dev == ipip_fb_tunnel_dev) { 743 if (dev == ipip_fb_tunnel_dev) {
745 err = -EFAULT; 744 err = -EFAULT;
746 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 745 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
747 goto done; 746 goto done;
748 err = -ENOENT; 747 err = -ENOENT;
749 if ((t = ipip_tunnel_locate(&p, 0)) == NULL) 748 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
750 goto done; 749 goto done;
751 err = -EPERM; 750 err = -EPERM;
752 if (t->dev == ipip_fb_tunnel_dev) 751 if (t->dev == ipip_fb_tunnel_dev)
753 goto done; 752 goto done;
754 dev = t->dev; 753 dev = t->dev;
755 } 754 }
756 err = unregister_netdevice(dev); 755 err = unregister_netdevice(dev);
757 break; 756 break;
758 757
759 default: 758 default:
760 err = -EINVAL; 759 err = -EINVAL;
761 } 760 }
762 761
763 done: 762 done:
764 return err; 763 return err;
765 } 764 }
766 765
767 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) 766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
768 { 767 {
769 return &(((struct ip_tunnel*)netdev_priv(dev))->stat); 768 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
770 } 769 }
771 770
772 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
773 { 772 {
774 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 773 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
775 return -EINVAL; 774 return -EINVAL;
776 dev->mtu = new_mtu; 775 dev->mtu = new_mtu;
777 return 0; 776 return 0;
778 } 777 }
779 778
780 static void ipip_tunnel_setup(struct net_device *dev) 779 static void ipip_tunnel_setup(struct net_device *dev)
781 { 780 {
782 SET_MODULE_OWNER(dev); 781 SET_MODULE_OWNER(dev);
783 dev->uninit = ipip_tunnel_uninit; 782 dev->uninit = ipip_tunnel_uninit;
784 dev->hard_start_xmit = ipip_tunnel_xmit; 783 dev->hard_start_xmit = ipip_tunnel_xmit;
785 dev->get_stats = ipip_tunnel_get_stats; 784 dev->get_stats = ipip_tunnel_get_stats;
786 dev->do_ioctl = ipip_tunnel_ioctl; 785 dev->do_ioctl = ipip_tunnel_ioctl;
787 dev->change_mtu = ipip_tunnel_change_mtu; 786 dev->change_mtu = ipip_tunnel_change_mtu;
788 dev->destructor = free_netdev; 787 dev->destructor = free_netdev;
789 788
790 dev->type = ARPHRD_TUNNEL; 789 dev->type = ARPHRD_TUNNEL;
791 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 790 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
792 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); 791 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
793 dev->flags = IFF_NOARP; 792 dev->flags = IFF_NOARP;
794 dev->iflink = 0; 793 dev->iflink = 0;
795 dev->addr_len = 4; 794 dev->addr_len = 4;
796 } 795 }
797 796
798 static int ipip_tunnel_init(struct net_device *dev) 797 static int ipip_tunnel_init(struct net_device *dev)
799 { 798 {
800 struct net_device *tdev = NULL; 799 struct net_device *tdev = NULL;
801 struct ip_tunnel *tunnel; 800 struct ip_tunnel *tunnel;
802 struct iphdr *iph; 801 struct iphdr *iph;
803 802
804 tunnel = netdev_priv(dev); 803 tunnel = netdev_priv(dev);
805 iph = &tunnel->parms.iph; 804 iph = &tunnel->parms.iph;
806 805
807 tunnel->dev = dev; 806 tunnel->dev = dev;
808 strcpy(tunnel->parms.name, dev->name); 807 strcpy(tunnel->parms.name, dev->name);
809 808
810 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 809 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
811 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 810 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
812 811
813 if (iph->daddr) { 812 if (iph->daddr) {
814 struct flowi fl = { .oif = tunnel->parms.link, 813 struct flowi fl = { .oif = tunnel->parms.link,
815 .nl_u = { .ip4_u = 814 .nl_u = { .ip4_u =
816 { .daddr = iph->daddr, 815 { .daddr = iph->daddr,
817 .saddr = iph->saddr, 816 .saddr = iph->saddr,
818 .tos = RT_TOS(iph->tos) } }, 817 .tos = RT_TOS(iph->tos) } },
819 .proto = IPPROTO_IPIP }; 818 .proto = IPPROTO_IPIP };
820 struct rtable *rt; 819 struct rtable *rt;
821 if (!ip_route_output_key(&rt, &fl)) { 820 if (!ip_route_output_key(&rt, &fl)) {
822 tdev = rt->u.dst.dev; 821 tdev = rt->u.dst.dev;
823 ip_rt_put(rt); 822 ip_rt_put(rt);
824 } 823 }
825 dev->flags |= IFF_POINTOPOINT; 824 dev->flags |= IFF_POINTOPOINT;
826 } 825 }
827 826
828 if (!tdev && tunnel->parms.link) 827 if (!tdev && tunnel->parms.link)
829 tdev = __dev_get_by_index(tunnel->parms.link); 828 tdev = __dev_get_by_index(tunnel->parms.link);
830 829
831 if (tdev) { 830 if (tdev) {
832 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 831 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
833 dev->mtu = tdev->mtu - sizeof(struct iphdr); 832 dev->mtu = tdev->mtu - sizeof(struct iphdr);
834 } 833 }
835 dev->iflink = tunnel->parms.link; 834 dev->iflink = tunnel->parms.link;
836 835
837 return 0; 836 return 0;
838 } 837 }
839 838
840 static int __init ipip_fb_tunnel_init(struct net_device *dev) 839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
841 { 840 {
842 struct ip_tunnel *tunnel = netdev_priv(dev); 841 struct ip_tunnel *tunnel = netdev_priv(dev);
843 struct iphdr *iph = &tunnel->parms.iph; 842 struct iphdr *iph = &tunnel->parms.iph;
844 843
845 tunnel->dev = dev; 844 tunnel->dev = dev;
846 strcpy(tunnel->parms.name, dev->name); 845 strcpy(tunnel->parms.name, dev->name);
847 846
848 iph->version = 4; 847 iph->version = 4;
849 iph->protocol = IPPROTO_IPIP; 848 iph->protocol = IPPROTO_IPIP;
850 iph->ihl = 5; 849 iph->ihl = 5;
851 850
852 dev_hold(dev); 851 dev_hold(dev);
853 tunnels_wc[0] = tunnel; 852 tunnels_wc[0] = tunnel;
854 return 0; 853 return 0;
855 } 854 }
856 855
857 static struct xfrm_tunnel ipip_handler = { 856 static struct xfrm_tunnel ipip_handler = {
858 .handler = ipip_rcv, 857 .handler = ipip_rcv,
859 .err_handler = ipip_err, 858 .err_handler = ipip_err,
860 .priority = 1, 859 .priority = 1,
861 }; 860 };
862 861
863 static char banner[] __initdata = 862 static char banner[] __initdata =
864 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 863 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865 864
866 static int __init ipip_init(void) 865 static int __init ipip_init(void)
867 { 866 {
868 int err; 867 int err;
869 868
870 printk(banner); 869 printk(banner);
871 870
872 if (xfrm4_tunnel_register(&ipip_handler)) { 871 if (xfrm4_tunnel_register(&ipip_handler)) {
873 printk(KERN_INFO "ipip init: can't register tunnel\n"); 872 printk(KERN_INFO "ipip init: can't register tunnel\n");
874 return -EAGAIN; 873 return -EAGAIN;
875 } 874 }
876 875
877 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), 876 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878 "tunl0", 877 "tunl0",
879 ipip_tunnel_setup); 878 ipip_tunnel_setup);
880 if (!ipip_fb_tunnel_dev) { 879 if (!ipip_fb_tunnel_dev) {
881 err = -ENOMEM; 880 err = -ENOMEM;
882 goto err1; 881 goto err1;
883 } 882 }
884 883
885 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; 884 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886 885
887 if ((err = register_netdev(ipip_fb_tunnel_dev))) 886 if ((err = register_netdev(ipip_fb_tunnel_dev)))
888 goto err2; 887 goto err2;
889 out: 888 out:
890 return err; 889 return err;
891 err2: 890 err2:
892 free_netdev(ipip_fb_tunnel_dev); 891 free_netdev(ipip_fb_tunnel_dev);
893 err1: 892 err1:
894 xfrm4_tunnel_deregister(&ipip_handler); 893 xfrm4_tunnel_deregister(&ipip_handler);
895 goto out; 894 goto out;
896 } 895 }
897 896
898 static void __exit ipip_destroy_tunnels(void) 897 static void __exit ipip_destroy_tunnels(void)
899 { 898 {
900 int prio; 899 int prio;
901 900
902 for (prio = 1; prio < 4; prio++) { 901 for (prio = 1; prio < 4; prio++) {
903 int h; 902 int h;
904 for (h = 0; h < HASH_SIZE; h++) { 903 for (h = 0; h < HASH_SIZE; h++) {
905 struct ip_tunnel *t; 904 struct ip_tunnel *t;
906 while ((t = tunnels[prio][h]) != NULL) 905 while ((t = tunnels[prio][h]) != NULL)
907 unregister_netdevice(t->dev); 906 unregister_netdevice(t->dev);
908 } 907 }
909 } 908 }
910 } 909 }
911 910
912 static void __exit ipip_fini(void) 911 static void __exit ipip_fini(void)
913 { 912 {
914 if (xfrm4_tunnel_deregister(&ipip_handler)) 913 if (xfrm4_tunnel_deregister(&ipip_handler))
915 printk(KERN_INFO "ipip close: can't deregister tunnel\n"); 914 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916 915
917 rtnl_lock(); 916 rtnl_lock();
918 ipip_destroy_tunnels(); 917 ipip_destroy_tunnels();
919 unregister_netdevice(ipip_fb_tunnel_dev); 918 unregister_netdevice(ipip_fb_tunnel_dev);
920 rtnl_unlock(); 919 rtnl_unlock();
921 } 920 }
922 921
923 module_init(ipip_init); 922 module_init(ipip_init);
924 module_exit(ipip_fini); 923 module_exit(ipip_fini);
925 MODULE_LICENSE("GPL"); 924 MODULE_LICENSE("GPL");
926 925
1 /* 1 /*
2 * IP multicast routing support for mrouted 3.6/3.8 2 * IP multicast routing support for mrouted 3.6/3.8
3 * 3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com> 4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development 5 * Linux Consultancy and Custom Driver Development
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ 12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13 * 13 *
14 * Fixes: 14 * Fixes:
15 * Michael Chastain : Incorrect size of copying. 15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code 16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race. 17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source 18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes. 19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes. 20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems. 21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more. 22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall 23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow. 24 * overflow.
25 * Carlos Picoto : PIMv1 Support 25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers. 27 * Relax this requrement to work with older peers.
28 * 28 *
29 */ 29 */
30 30
31 #include <asm/system.h> 31 #include <asm/system.h>
32 #include <asm/uaccess.h> 32 #include <asm/uaccess.h>
33 #include <linux/types.h> 33 #include <linux/types.h>
34 #include <linux/sched.h> 34 #include <linux/sched.h>
35 #include <linux/capability.h> 35 #include <linux/capability.h>
36 #include <linux/errno.h> 36 #include <linux/errno.h>
37 #include <linux/timer.h> 37 #include <linux/timer.h>
38 #include <linux/mm.h> 38 #include <linux/mm.h>
39 #include <linux/kernel.h> 39 #include <linux/kernel.h>
40 #include <linux/fcntl.h> 40 #include <linux/fcntl.h>
41 #include <linux/stat.h> 41 #include <linux/stat.h>
42 #include <linux/socket.h> 42 #include <linux/socket.h>
43 #include <linux/in.h> 43 #include <linux/in.h>
44 #include <linux/inet.h> 44 #include <linux/inet.h>
45 #include <linux/netdevice.h> 45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h> 46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h> 47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h> 48 #include <linux/proc_fs.h>
49 #include <linux/seq_file.h> 49 #include <linux/seq_file.h>
50 #include <linux/mroute.h> 50 #include <linux/mroute.h>
51 #include <linux/init.h> 51 #include <linux/init.h>
52 #include <linux/if_ether.h> 52 #include <linux/if_ether.h>
53 #include <net/ip.h> 53 #include <net/ip.h>
54 #include <net/protocol.h> 54 #include <net/protocol.h>
55 #include <linux/skbuff.h> 55 #include <linux/skbuff.h>
56 #include <net/route.h> 56 #include <net/route.h>
57 #include <net/sock.h> 57 #include <net/sock.h>
58 #include <net/icmp.h> 58 #include <net/icmp.h>
59 #include <net/udp.h> 59 #include <net/udp.h>
60 #include <net/raw.h> 60 #include <net/raw.h>
61 #include <linux/notifier.h> 61 #include <linux/notifier.h>
62 #include <linux/if_arp.h> 62 #include <linux/if_arp.h>
63 #include <linux/netfilter_ipv4.h> 63 #include <linux/netfilter_ipv4.h>
64 #include <net/ipip.h> 64 #include <net/ipip.h>
65 #include <net/checksum.h> 65 #include <net/checksum.h>
66 66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1 68 #define CONFIG_IP_PIMSM 1
69 #endif 69 #endif
70 70
71 static struct sock *mroute_socket; 71 static struct sock *mroute_socket;
72 72
73 73
74 /* Big lock, protecting vif table, mrt cache and mroute socket state. 74 /* Big lock, protecting vif table, mrt cache and mroute socket state.
75 Note that the changes are semaphored via rtnl_lock. 75 Note that the changes are semaphored via rtnl_lock.
76 */ 76 */
77 77
78 static DEFINE_RWLOCK(mrt_lock); 78 static DEFINE_RWLOCK(mrt_lock);
79 79
80 /* 80 /*
81 * Multicast router control variables 81 * Multicast router control variables
82 */ 82 */
83 83
84 static struct vif_device vif_table[MAXVIFS]; /* Devices */ 84 static struct vif_device vif_table[MAXVIFS]; /* Devices */
85 static int maxvif; 85 static int maxvif;
86 86
87 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) 87 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
88 88
89 static int mroute_do_assert; /* Set in PIM assert */ 89 static int mroute_do_assert; /* Set in PIM assert */
90 static int mroute_do_pim; 90 static int mroute_do_pim;
91 91
92 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ 92 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
93 93
94 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 94 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
95 static atomic_t cache_resolve_queue_len; /* Size of unresolved */ 95 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
96 96
97 /* Special spinlock for queue of unresolved entries */ 97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock); 98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 99
100 /* We return to original Alan's scheme. Hash table of resolved 100 /* We return to original Alan's scheme. Hash table of resolved
101 entries is changed only in process context and protected 101 entries is changed only in process context and protected
102 with weak lock mrt_lock. Queue of unresolved entries is protected 102 with weak lock mrt_lock. Queue of unresolved entries is protected
103 with strong spinlock mfc_unres_lock. 103 with strong spinlock mfc_unres_lock.
104 104
105 In this case data path is free of exclusive locks at all. 105 In this case data path is free of exclusive locks at all.
106 */ 106 */
107 107
108 static kmem_cache_t *mrt_cachep __read_mostly; 108 static kmem_cache_t *mrt_cachep __read_mostly;
109 109
110 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 110 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
111 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 111 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
112 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 112 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
113 113
114 #ifdef CONFIG_IP_PIMSM_V2 114 #ifdef CONFIG_IP_PIMSM_V2
115 static struct net_protocol pim_protocol; 115 static struct net_protocol pim_protocol;
116 #endif 116 #endif
117 117
118 static struct timer_list ipmr_expire_timer; 118 static struct timer_list ipmr_expire_timer;
119 119
120 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 120 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
121 121
122 static 122 static
123 struct net_device *ipmr_new_tunnel(struct vifctl *v) 123 struct net_device *ipmr_new_tunnel(struct vifctl *v)
124 { 124 {
125 struct net_device *dev; 125 struct net_device *dev;
126 126
127 dev = __dev_get_by_name("tunl0"); 127 dev = __dev_get_by_name("tunl0");
128 128
129 if (dev) { 129 if (dev) {
130 int err; 130 int err;
131 struct ifreq ifr; 131 struct ifreq ifr;
132 mm_segment_t oldfs; 132 mm_segment_t oldfs;
133 struct ip_tunnel_parm p; 133 struct ip_tunnel_parm p;
134 struct in_device *in_dev; 134 struct in_device *in_dev;
135 135
136 memset(&p, 0, sizeof(p)); 136 memset(&p, 0, sizeof(p));
137 p.iph.daddr = v->vifc_rmt_addr.s_addr; 137 p.iph.daddr = v->vifc_rmt_addr.s_addr;
138 p.iph.saddr = v->vifc_lcl_addr.s_addr; 138 p.iph.saddr = v->vifc_lcl_addr.s_addr;
139 p.iph.version = 4; 139 p.iph.version = 4;
140 p.iph.ihl = 5; 140 p.iph.ihl = 5;
141 p.iph.protocol = IPPROTO_IPIP; 141 p.iph.protocol = IPPROTO_IPIP;
142 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 142 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
143 ifr.ifr_ifru.ifru_data = (void*)&p; 143 ifr.ifr_ifru.ifru_data = (void*)&p;
144 144
145 oldfs = get_fs(); set_fs(KERNEL_DS); 145 oldfs = get_fs(); set_fs(KERNEL_DS);
146 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 146 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
147 set_fs(oldfs); 147 set_fs(oldfs);
148 148
149 dev = NULL; 149 dev = NULL;
150 150
151 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { 151 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
152 dev->flags |= IFF_MULTICAST; 152 dev->flags |= IFF_MULTICAST;
153 153
154 in_dev = __in_dev_get_rtnl(dev); 154 in_dev = __in_dev_get_rtnl(dev);
155 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) 155 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
156 goto failure; 156 goto failure;
157 in_dev->cnf.rp_filter = 0; 157 in_dev->cnf.rp_filter = 0;
158 158
159 if (dev_open(dev)) 159 if (dev_open(dev))
160 goto failure; 160 goto failure;
161 } 161 }
162 } 162 }
163 return dev; 163 return dev;
164 164
165 failure: 165 failure:
166 /* allow the register to be completed before unregistering. */ 166 /* allow the register to be completed before unregistering. */
167 rtnl_unlock(); 167 rtnl_unlock();
168 rtnl_lock(); 168 rtnl_lock();
169 169
170 unregister_netdevice(dev); 170 unregister_netdevice(dev);
171 return NULL; 171 return NULL;
172 } 172 }
173 173
174 #ifdef CONFIG_IP_PIMSM 174 #ifdef CONFIG_IP_PIMSM
175 175
176 static int reg_vif_num = -1; 176 static int reg_vif_num = -1;
177 177
178 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 178 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
179 { 179 {
180 read_lock(&mrt_lock); 180 read_lock(&mrt_lock);
181 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; 181 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
182 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; 182 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
183 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 183 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
184 read_unlock(&mrt_lock); 184 read_unlock(&mrt_lock);
185 kfree_skb(skb); 185 kfree_skb(skb);
186 return 0; 186 return 0;
187 } 187 }
188 188
189 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) 189 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
190 { 190 {
191 return (struct net_device_stats*)netdev_priv(dev); 191 return (struct net_device_stats*)netdev_priv(dev);
192 } 192 }
193 193
194 static void reg_vif_setup(struct net_device *dev) 194 static void reg_vif_setup(struct net_device *dev)
195 { 195 {
196 dev->type = ARPHRD_PIMREG; 196 dev->type = ARPHRD_PIMREG;
197 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 197 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
198 dev->flags = IFF_NOARP; 198 dev->flags = IFF_NOARP;
199 dev->hard_start_xmit = reg_vif_xmit; 199 dev->hard_start_xmit = reg_vif_xmit;
200 dev->get_stats = reg_vif_get_stats; 200 dev->get_stats = reg_vif_get_stats;
201 dev->destructor = free_netdev; 201 dev->destructor = free_netdev;
202 } 202 }
203 203
204 static struct net_device *ipmr_reg_vif(void) 204 static struct net_device *ipmr_reg_vif(void)
205 { 205 {
206 struct net_device *dev; 206 struct net_device *dev;
207 struct in_device *in_dev; 207 struct in_device *in_dev;
208 208
209 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", 209 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
210 reg_vif_setup); 210 reg_vif_setup);
211 211
212 if (dev == NULL) 212 if (dev == NULL)
213 return NULL; 213 return NULL;
214 214
215 if (register_netdevice(dev)) { 215 if (register_netdevice(dev)) {
216 free_netdev(dev); 216 free_netdev(dev);
217 return NULL; 217 return NULL;
218 } 218 }
219 dev->iflink = 0; 219 dev->iflink = 0;
220 220
221 if ((in_dev = inetdev_init(dev)) == NULL) 221 if ((in_dev = inetdev_init(dev)) == NULL)
222 goto failure; 222 goto failure;
223 223
224 in_dev->cnf.rp_filter = 0; 224 in_dev->cnf.rp_filter = 0;
225 225
226 if (dev_open(dev)) 226 if (dev_open(dev))
227 goto failure; 227 goto failure;
228 228
229 return dev; 229 return dev;
230 230
231 failure: 231 failure:
232 /* allow the register to be completed before unregistering. */ 232 /* allow the register to be completed before unregistering. */
233 rtnl_unlock(); 233 rtnl_unlock();
234 rtnl_lock(); 234 rtnl_lock();
235 235
236 unregister_netdevice(dev); 236 unregister_netdevice(dev);
237 return NULL; 237 return NULL;
238 } 238 }
239 #endif 239 #endif
240 240
241 /* 241 /*
242 * Delete a VIF entry 242 * Delete a VIF entry
243 */ 243 */
244 244
245 static int vif_delete(int vifi) 245 static int vif_delete(int vifi)
246 { 246 {
247 struct vif_device *v; 247 struct vif_device *v;
248 struct net_device *dev; 248 struct net_device *dev;
249 struct in_device *in_dev; 249 struct in_device *in_dev;
250 250
251 if (vifi < 0 || vifi >= maxvif) 251 if (vifi < 0 || vifi >= maxvif)
252 return -EADDRNOTAVAIL; 252 return -EADDRNOTAVAIL;
253 253
254 v = &vif_table[vifi]; 254 v = &vif_table[vifi];
255 255
256 write_lock_bh(&mrt_lock); 256 write_lock_bh(&mrt_lock);
257 dev = v->dev; 257 dev = v->dev;
258 v->dev = NULL; 258 v->dev = NULL;
259 259
260 if (!dev) { 260 if (!dev) {
261 write_unlock_bh(&mrt_lock); 261 write_unlock_bh(&mrt_lock);
262 return -EADDRNOTAVAIL; 262 return -EADDRNOTAVAIL;
263 } 263 }
264 264
265 #ifdef CONFIG_IP_PIMSM 265 #ifdef CONFIG_IP_PIMSM
266 if (vifi == reg_vif_num) 266 if (vifi == reg_vif_num)
267 reg_vif_num = -1; 267 reg_vif_num = -1;
268 #endif 268 #endif
269 269
270 if (vifi+1 == maxvif) { 270 if (vifi+1 == maxvif) {
271 int tmp; 271 int tmp;
272 for (tmp=vifi-1; tmp>=0; tmp--) { 272 for (tmp=vifi-1; tmp>=0; tmp--) {
273 if (VIF_EXISTS(tmp)) 273 if (VIF_EXISTS(tmp))
274 break; 274 break;
275 } 275 }
276 maxvif = tmp+1; 276 maxvif = tmp+1;
277 } 277 }
278 278
279 write_unlock_bh(&mrt_lock); 279 write_unlock_bh(&mrt_lock);
280 280
281 dev_set_allmulti(dev, -1); 281 dev_set_allmulti(dev, -1);
282 282
283 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 283 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
284 in_dev->cnf.mc_forwarding--; 284 in_dev->cnf.mc_forwarding--;
285 ip_rt_multicast_event(in_dev); 285 ip_rt_multicast_event(in_dev);
286 } 286 }
287 287
288 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 288 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
289 unregister_netdevice(dev); 289 unregister_netdevice(dev);
290 290
291 dev_put(dev); 291 dev_put(dev);
292 return 0; 292 return 0;
293 } 293 }
294 294
295 /* Destroy an unresolved cache entry, killing queued skbs 295 /* Destroy an unresolved cache entry, killing queued skbs
296 and reporting error to netlink readers. 296 and reporting error to netlink readers.
297 */ 297 */
298 298
299 static void ipmr_destroy_unres(struct mfc_cache *c) 299 static void ipmr_destroy_unres(struct mfc_cache *c)
300 { 300 {
301 struct sk_buff *skb; 301 struct sk_buff *skb;
302 struct nlmsgerr *e; 302 struct nlmsgerr *e;
303 303
304 atomic_dec(&cache_resolve_queue_len); 304 atomic_dec(&cache_resolve_queue_len);
305 305
306 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { 306 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
307 if (skb->nh.iph->version == 0) { 307 if (skb->nh.iph->version == 0) {
308 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 308 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
309 nlh->nlmsg_type = NLMSG_ERROR; 309 nlh->nlmsg_type = NLMSG_ERROR;
310 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 310 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
311 skb_trim(skb, nlh->nlmsg_len); 311 skb_trim(skb, nlh->nlmsg_len);
312 e = NLMSG_DATA(nlh); 312 e = NLMSG_DATA(nlh);
313 e->error = -ETIMEDOUT; 313 e->error = -ETIMEDOUT;
314 memset(&e->msg, 0, sizeof(e->msg)); 314 memset(&e->msg, 0, sizeof(e->msg));
315 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 315 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
316 } else 316 } else
317 kfree_skb(skb); 317 kfree_skb(skb);
318 } 318 }
319 319
320 kmem_cache_free(mrt_cachep, c); 320 kmem_cache_free(mrt_cachep, c);
321 } 321 }
322 322
323 323
324 /* Single timer process for all the unresolved queue. */ 324 /* Single timer process for all the unresolved queue. */
325 325
326 static void ipmr_expire_process(unsigned long dummy) 326 static void ipmr_expire_process(unsigned long dummy)
327 { 327 {
328 unsigned long now; 328 unsigned long now;
329 unsigned long expires; 329 unsigned long expires;
330 struct mfc_cache *c, **cp; 330 struct mfc_cache *c, **cp;
331 331
332 if (!spin_trylock(&mfc_unres_lock)) { 332 if (!spin_trylock(&mfc_unres_lock)) {
333 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 333 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
334 return; 334 return;
335 } 335 }
336 336
337 if (atomic_read(&cache_resolve_queue_len) == 0) 337 if (atomic_read(&cache_resolve_queue_len) == 0)
338 goto out; 338 goto out;
339 339
340 now = jiffies; 340 now = jiffies;
341 expires = 10*HZ; 341 expires = 10*HZ;
342 cp = &mfc_unres_queue; 342 cp = &mfc_unres_queue;
343 343
344 while ((c=*cp) != NULL) { 344 while ((c=*cp) != NULL) {
345 if (time_after(c->mfc_un.unres.expires, now)) { 345 if (time_after(c->mfc_un.unres.expires, now)) {
346 unsigned long interval = c->mfc_un.unres.expires - now; 346 unsigned long interval = c->mfc_un.unres.expires - now;
347 if (interval < expires) 347 if (interval < expires)
348 expires = interval; 348 expires = interval;
349 cp = &c->next; 349 cp = &c->next;
350 continue; 350 continue;
351 } 351 }
352 352
353 *cp = c->next; 353 *cp = c->next;
354 354
355 ipmr_destroy_unres(c); 355 ipmr_destroy_unres(c);
356 } 356 }
357 357
358 if (atomic_read(&cache_resolve_queue_len)) 358 if (atomic_read(&cache_resolve_queue_len))
359 mod_timer(&ipmr_expire_timer, jiffies + expires); 359 mod_timer(&ipmr_expire_timer, jiffies + expires);
360 360
361 out: 361 out:
362 spin_unlock(&mfc_unres_lock); 362 spin_unlock(&mfc_unres_lock);
363 } 363 }
364 364
365 /* Fill oifs list. It is called under write locked mrt_lock. */ 365 /* Fill oifs list. It is called under write locked mrt_lock. */
366 366
367 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 367 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
368 { 368 {
369 int vifi; 369 int vifi;
370 370
371 cache->mfc_un.res.minvif = MAXVIFS; 371 cache->mfc_un.res.minvif = MAXVIFS;
372 cache->mfc_un.res.maxvif = 0; 372 cache->mfc_un.res.maxvif = 0;
373 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 373 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
374 374
375 for (vifi=0; vifi<maxvif; vifi++) { 375 for (vifi=0; vifi<maxvif; vifi++) {
376 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 376 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
377 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 377 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
378 if (cache->mfc_un.res.minvif > vifi) 378 if (cache->mfc_un.res.minvif > vifi)
379 cache->mfc_un.res.minvif = vifi; 379 cache->mfc_un.res.minvif = vifi;
380 if (cache->mfc_un.res.maxvif <= vifi) 380 if (cache->mfc_un.res.maxvif <= vifi)
381 cache->mfc_un.res.maxvif = vifi + 1; 381 cache->mfc_un.res.maxvif = vifi + 1;
382 } 382 }
383 } 383 }
384 } 384 }
385 385
386 static int vif_add(struct vifctl *vifc, int mrtsock) 386 static int vif_add(struct vifctl *vifc, int mrtsock)
387 { 387 {
388 int vifi = vifc->vifc_vifi; 388 int vifi = vifc->vifc_vifi;
389 struct vif_device *v = &vif_table[vifi]; 389 struct vif_device *v = &vif_table[vifi];
390 struct net_device *dev; 390 struct net_device *dev;
391 struct in_device *in_dev; 391 struct in_device *in_dev;
392 392
393 /* Is vif busy ? */ 393 /* Is vif busy ? */
394 if (VIF_EXISTS(vifi)) 394 if (VIF_EXISTS(vifi))
395 return -EADDRINUSE; 395 return -EADDRINUSE;
396 396
397 switch (vifc->vifc_flags) { 397 switch (vifc->vifc_flags) {
398 #ifdef CONFIG_IP_PIMSM 398 #ifdef CONFIG_IP_PIMSM
399 case VIFF_REGISTER: 399 case VIFF_REGISTER:
400 /* 400 /*
401 * Special Purpose VIF in PIM 401 * Special Purpose VIF in PIM
402 * All the packets will be sent to the daemon 402 * All the packets will be sent to the daemon
403 */ 403 */
404 if (reg_vif_num >= 0) 404 if (reg_vif_num >= 0)
405 return -EADDRINUSE; 405 return -EADDRINUSE;
406 dev = ipmr_reg_vif(); 406 dev = ipmr_reg_vif();
407 if (!dev) 407 if (!dev)
408 return -ENOBUFS; 408 return -ENOBUFS;
409 break; 409 break;
410 #endif 410 #endif
411 case VIFF_TUNNEL: 411 case VIFF_TUNNEL:
412 dev = ipmr_new_tunnel(vifc); 412 dev = ipmr_new_tunnel(vifc);
413 if (!dev) 413 if (!dev)
414 return -ENOBUFS; 414 return -ENOBUFS;
415 break; 415 break;
416 case 0: 416 case 0:
417 dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); 417 dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
418 if (!dev) 418 if (!dev)
419 return -EADDRNOTAVAIL; 419 return -EADDRNOTAVAIL;
420 dev_put(dev); 420 dev_put(dev);
421 break; 421 break;
422 default: 422 default:
423 return -EINVAL; 423 return -EINVAL;
424 } 424 }
425 425
426 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 426 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
427 return -EADDRNOTAVAIL; 427 return -EADDRNOTAVAIL;
428 in_dev->cnf.mc_forwarding++; 428 in_dev->cnf.mc_forwarding++;
429 dev_set_allmulti(dev, +1); 429 dev_set_allmulti(dev, +1);
430 ip_rt_multicast_event(in_dev); 430 ip_rt_multicast_event(in_dev);
431 431
432 /* 432 /*
433 * Fill in the VIF structures 433 * Fill in the VIF structures
434 */ 434 */
435 v->rate_limit=vifc->vifc_rate_limit; 435 v->rate_limit=vifc->vifc_rate_limit;
436 v->local=vifc->vifc_lcl_addr.s_addr; 436 v->local=vifc->vifc_lcl_addr.s_addr;
437 v->remote=vifc->vifc_rmt_addr.s_addr; 437 v->remote=vifc->vifc_rmt_addr.s_addr;
438 v->flags=vifc->vifc_flags; 438 v->flags=vifc->vifc_flags;
439 if (!mrtsock) 439 if (!mrtsock)
440 v->flags |= VIFF_STATIC; 440 v->flags |= VIFF_STATIC;
441 v->threshold=vifc->vifc_threshold; 441 v->threshold=vifc->vifc_threshold;
442 v->bytes_in = 0; 442 v->bytes_in = 0;
443 v->bytes_out = 0; 443 v->bytes_out = 0;
444 v->pkt_in = 0; 444 v->pkt_in = 0;
445 v->pkt_out = 0; 445 v->pkt_out = 0;
446 v->link = dev->ifindex; 446 v->link = dev->ifindex;
447 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 447 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
448 v->link = dev->iflink; 448 v->link = dev->iflink;
449 449
450 /* And finish update writing critical data */ 450 /* And finish update writing critical data */
451 write_lock_bh(&mrt_lock); 451 write_lock_bh(&mrt_lock);
452 dev_hold(dev); 452 dev_hold(dev);
453 v->dev=dev; 453 v->dev=dev;
454 #ifdef CONFIG_IP_PIMSM 454 #ifdef CONFIG_IP_PIMSM
455 if (v->flags&VIFF_REGISTER) 455 if (v->flags&VIFF_REGISTER)
456 reg_vif_num = vifi; 456 reg_vif_num = vifi;
457 #endif 457 #endif
458 if (vifi+1 > maxvif) 458 if (vifi+1 > maxvif)
459 maxvif = vifi+1; 459 maxvif = vifi+1;
460 write_unlock_bh(&mrt_lock); 460 write_unlock_bh(&mrt_lock);
461 return 0; 461 return 0;
462 } 462 }
463 463
464 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) 464 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
465 { 465 {
466 int line=MFC_HASH(mcastgrp,origin); 466 int line=MFC_HASH(mcastgrp,origin);
467 struct mfc_cache *c; 467 struct mfc_cache *c;
468 468
469 for (c=mfc_cache_array[line]; c; c = c->next) { 469 for (c=mfc_cache_array[line]; c; c = c->next) {
470 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 470 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
471 break; 471 break;
472 } 472 }
473 return c; 473 return c;
474 } 474 }
475 475
476 /* 476 /*
477 * Allocate a multicast cache entry 477 * Allocate a multicast cache entry
478 */ 478 */
479 static struct mfc_cache *ipmr_cache_alloc(void) 479 static struct mfc_cache *ipmr_cache_alloc(void)
480 { 480 {
481 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); 481 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
482 if(c==NULL) 482 if(c==NULL)
483 return NULL; 483 return NULL;
484 memset(c, 0, sizeof(*c)); 484 memset(c, 0, sizeof(*c));
485 c->mfc_un.res.minvif = MAXVIFS; 485 c->mfc_un.res.minvif = MAXVIFS;
486 return c; 486 return c;
487 } 487 }
488 488
489 static struct mfc_cache *ipmr_cache_alloc_unres(void) 489 static struct mfc_cache *ipmr_cache_alloc_unres(void)
490 { 490 {
491 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); 491 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
492 if(c==NULL) 492 if(c==NULL)
493 return NULL; 493 return NULL;
494 memset(c, 0, sizeof(*c)); 494 memset(c, 0, sizeof(*c));
495 skb_queue_head_init(&c->mfc_un.unres.unresolved); 495 skb_queue_head_init(&c->mfc_un.unres.unresolved);
496 c->mfc_un.unres.expires = jiffies + 10*HZ; 496 c->mfc_un.unres.expires = jiffies + 10*HZ;
497 return c; 497 return c;
498 } 498 }
499 499
500 /* 500 /*
501 * A cache entry has gone into a resolved state from queued 501 * A cache entry has gone into a resolved state from queued
502 */ 502 */
503 503
504 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 504 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
505 { 505 {
506 struct sk_buff *skb; 506 struct sk_buff *skb;
507 struct nlmsgerr *e; 507 struct nlmsgerr *e;
508 508
509 /* 509 /*
510 * Play the pending entries through our router 510 * Play the pending entries through our router
511 */ 511 */
512 512
513 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 513 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
514 if (skb->nh.iph->version == 0) { 514 if (skb->nh.iph->version == 0) {
515 int err; 515 int err;
516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
517 517
518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
519 nlh->nlmsg_len = skb->tail - (u8*)nlh; 519 nlh->nlmsg_len = skb->tail - (u8*)nlh;
520 } else { 520 } else {
521 nlh->nlmsg_type = NLMSG_ERROR; 521 nlh->nlmsg_type = NLMSG_ERROR;
522 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 522 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
523 skb_trim(skb, nlh->nlmsg_len); 523 skb_trim(skb, nlh->nlmsg_len);
524 e = NLMSG_DATA(nlh); 524 e = NLMSG_DATA(nlh);
525 e->error = -EMSGSIZE; 525 e->error = -EMSGSIZE;
526 memset(&e->msg, 0, sizeof(e->msg)); 526 memset(&e->msg, 0, sizeof(e->msg));
527 } 527 }
528 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 528 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
529 } else 529 } else
530 ip_mr_forward(skb, c, 0); 530 ip_mr_forward(skb, c, 0);
531 } 531 }
532 } 532 }
533 533
534 /* 534 /*
535 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 535 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
536 * expects the following bizarre scheme. 536 * expects the following bizarre scheme.
537 * 537 *
538 * Called under mrt_lock. 538 * Called under mrt_lock.
539 */ 539 */
540 540
541 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 541 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
542 { 542 {
543 struct sk_buff *skb; 543 struct sk_buff *skb;
544 int ihl = pkt->nh.iph->ihl<<2; 544 int ihl = pkt->nh.iph->ihl<<2;
545 struct igmphdr *igmp; 545 struct igmphdr *igmp;
546 struct igmpmsg *msg; 546 struct igmpmsg *msg;
547 int ret; 547 int ret;
548 548
549 #ifdef CONFIG_IP_PIMSM 549 #ifdef CONFIG_IP_PIMSM
550 if (assert == IGMPMSG_WHOLEPKT) 550 if (assert == IGMPMSG_WHOLEPKT)
551 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 551 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
552 else 552 else
553 #endif 553 #endif
554 skb = alloc_skb(128, GFP_ATOMIC); 554 skb = alloc_skb(128, GFP_ATOMIC);
555 555
556 if(!skb) 556 if(!skb)
557 return -ENOBUFS; 557 return -ENOBUFS;
558 558
559 #ifdef CONFIG_IP_PIMSM 559 #ifdef CONFIG_IP_PIMSM
560 if (assert == IGMPMSG_WHOLEPKT) { 560 if (assert == IGMPMSG_WHOLEPKT) {
561 /* Ugly, but we have no choice with this interface. 561 /* Ugly, but we have no choice with this interface.
562 Duplicate old header, fix ihl, length etc. 562 Duplicate old header, fix ihl, length etc.
563 And all this only to mangle msg->im_msgtype and 563 And all this only to mangle msg->im_msgtype and
564 to set msg->im_mbz to "mbz" :-) 564 to set msg->im_mbz to "mbz" :-)
565 */ 565 */
566 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); 566 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
567 skb->nh.raw = skb->h.raw = (u8*)msg; 567 skb->nh.raw = skb->h.raw = (u8*)msg;
568 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); 568 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
569 msg->im_msgtype = IGMPMSG_WHOLEPKT; 569 msg->im_msgtype = IGMPMSG_WHOLEPKT;
570 msg->im_mbz = 0; 570 msg->im_mbz = 0;
571 msg->im_vif = reg_vif_num; 571 msg->im_vif = reg_vif_num;
572 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; 572 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
573 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); 573 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
574 } else 574 } else
575 #endif 575 #endif
576 { 576 {
577 577
578 /* 578 /*
579 * Copy the IP header 579 * Copy the IP header
580 */ 580 */
581 581
582 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); 582 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
583 memcpy(skb->data,pkt->data,ihl); 583 memcpy(skb->data,pkt->data,ihl);
584 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ 584 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
585 msg = (struct igmpmsg*)skb->nh.iph; 585 msg = (struct igmpmsg*)skb->nh.iph;
586 msg->im_vif = vifi; 586 msg->im_vif = vifi;
587 skb->dst = dst_clone(pkt->dst); 587 skb->dst = dst_clone(pkt->dst);
588 588
589 /* 589 /*
590 * Add our header 590 * Add our header
591 */ 591 */
592 592
593 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); 593 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
594 igmp->type = 594 igmp->type =
595 msg->im_msgtype = assert; 595 msg->im_msgtype = assert;
596 igmp->code = 0; 596 igmp->code = 0;
597 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ 597 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
598 skb->h.raw = skb->nh.raw; 598 skb->h.raw = skb->nh.raw;
599 } 599 }
600 600
601 if (mroute_socket == NULL) { 601 if (mroute_socket == NULL) {
602 kfree_skb(skb); 602 kfree_skb(skb);
603 return -EINVAL; 603 return -EINVAL;
604 } 604 }
605 605
606 /* 606 /*
607 * Deliver to mrouted 607 * Deliver to mrouted
608 */ 608 */
609 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { 609 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
610 if (net_ratelimit()) 610 if (net_ratelimit())
611 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 611 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
612 kfree_skb(skb); 612 kfree_skb(skb);
613 } 613 }
614 614
615 return ret; 615 return ret;
616 } 616 }
617 617
618 /* 618 /*
619 * Queue a packet for resolution. It gets locked cache entry! 619 * Queue a packet for resolution. It gets locked cache entry!
620 */ 620 */
621 621
622 static int 622 static int
623 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 623 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
624 { 624 {
625 int err; 625 int err;
626 struct mfc_cache *c; 626 struct mfc_cache *c;
627 627
628 spin_lock_bh(&mfc_unres_lock); 628 spin_lock_bh(&mfc_unres_lock);
629 for (c=mfc_unres_queue; c; c=c->next) { 629 for (c=mfc_unres_queue; c; c=c->next) {
630 if (c->mfc_mcastgrp == skb->nh.iph->daddr && 630 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
631 c->mfc_origin == skb->nh.iph->saddr) 631 c->mfc_origin == skb->nh.iph->saddr)
632 break; 632 break;
633 } 633 }
634 634
635 if (c == NULL) { 635 if (c == NULL) {
636 /* 636 /*
637 * Create a new entry if allowable 637 * Create a new entry if allowable
638 */ 638 */
639 639
640 if (atomic_read(&cache_resolve_queue_len)>=10 || 640 if (atomic_read(&cache_resolve_queue_len)>=10 ||
641 (c=ipmr_cache_alloc_unres())==NULL) { 641 (c=ipmr_cache_alloc_unres())==NULL) {
642 spin_unlock_bh(&mfc_unres_lock); 642 spin_unlock_bh(&mfc_unres_lock);
643 643
644 kfree_skb(skb); 644 kfree_skb(skb);
645 return -ENOBUFS; 645 return -ENOBUFS;
646 } 646 }
647 647
648 /* 648 /*
649 * Fill in the new cache entry 649 * Fill in the new cache entry
650 */ 650 */
651 c->mfc_parent=-1; 651 c->mfc_parent=-1;
652 c->mfc_origin=skb->nh.iph->saddr; 652 c->mfc_origin=skb->nh.iph->saddr;
653 c->mfc_mcastgrp=skb->nh.iph->daddr; 653 c->mfc_mcastgrp=skb->nh.iph->daddr;
654 654
655 /* 655 /*
656 * Reflect first query at mrouted. 656 * Reflect first query at mrouted.
657 */ 657 */
658 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 658 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
659 /* If the report failed throw the cache entry 659 /* If the report failed throw the cache entry
660 out - Brad Parker 660 out - Brad Parker
661 */ 661 */
662 spin_unlock_bh(&mfc_unres_lock); 662 spin_unlock_bh(&mfc_unres_lock);
663 663
664 kmem_cache_free(mrt_cachep, c); 664 kmem_cache_free(mrt_cachep, c);
665 kfree_skb(skb); 665 kfree_skb(skb);
666 return err; 666 return err;
667 } 667 }
668 668
669 atomic_inc(&cache_resolve_queue_len); 669 atomic_inc(&cache_resolve_queue_len);
670 c->next = mfc_unres_queue; 670 c->next = mfc_unres_queue;
671 mfc_unres_queue = c; 671 mfc_unres_queue = c;
672 672
673 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 673 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
674 } 674 }
675 675
676 /* 676 /*
677 * See if we can append the packet 677 * See if we can append the packet
678 */ 678 */
679 if (c->mfc_un.unres.unresolved.qlen>3) { 679 if (c->mfc_un.unres.unresolved.qlen>3) {
680 kfree_skb(skb); 680 kfree_skb(skb);
681 err = -ENOBUFS; 681 err = -ENOBUFS;
682 } else { 682 } else {
683 skb_queue_tail(&c->mfc_un.unres.unresolved,skb); 683 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
684 err = 0; 684 err = 0;
685 } 685 }
686 686
687 spin_unlock_bh(&mfc_unres_lock); 687 spin_unlock_bh(&mfc_unres_lock);
688 return err; 688 return err;
689 } 689 }
690 690
691 /* 691 /*
692 * MFC cache manipulation by user space mroute daemon 692 * MFC cache manipulation by user space mroute daemon
693 */ 693 */
694 694
695 static int ipmr_mfc_delete(struct mfcctl *mfc) 695 static int ipmr_mfc_delete(struct mfcctl *mfc)
696 { 696 {
697 int line; 697 int line;
698 struct mfc_cache *c, **cp; 698 struct mfc_cache *c, **cp;
699 699
700 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 700 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
701 701
702 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 702 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
703 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 703 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
704 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 704 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
705 write_lock_bh(&mrt_lock); 705 write_lock_bh(&mrt_lock);
706 *cp = c->next; 706 *cp = c->next;
707 write_unlock_bh(&mrt_lock); 707 write_unlock_bh(&mrt_lock);
708 708
709 kmem_cache_free(mrt_cachep, c); 709 kmem_cache_free(mrt_cachep, c);
710 return 0; 710 return 0;
711 } 711 }
712 } 712 }
713 return -ENOENT; 713 return -ENOENT;
714 } 714 }
715 715
716 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 716 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
717 { 717 {
718 int line; 718 int line;
719 struct mfc_cache *uc, *c, **cp; 719 struct mfc_cache *uc, *c, **cp;
720 720
721 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 721 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
722 722
723 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 723 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
724 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 724 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
725 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 725 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
726 break; 726 break;
727 } 727 }
728 728
729 if (c != NULL) { 729 if (c != NULL) {
730 write_lock_bh(&mrt_lock); 730 write_lock_bh(&mrt_lock);
731 c->mfc_parent = mfc->mfcc_parent; 731 c->mfc_parent = mfc->mfcc_parent;
732 ipmr_update_thresholds(c, mfc->mfcc_ttls); 732 ipmr_update_thresholds(c, mfc->mfcc_ttls);
733 if (!mrtsock) 733 if (!mrtsock)
734 c->mfc_flags |= MFC_STATIC; 734 c->mfc_flags |= MFC_STATIC;
735 write_unlock_bh(&mrt_lock); 735 write_unlock_bh(&mrt_lock);
736 return 0; 736 return 0;
737 } 737 }
738 738
739 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 739 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
740 return -EINVAL; 740 return -EINVAL;
741 741
742 c=ipmr_cache_alloc(); 742 c=ipmr_cache_alloc();
743 if (c==NULL) 743 if (c==NULL)
744 return -ENOMEM; 744 return -ENOMEM;
745 745
746 c->mfc_origin=mfc->mfcc_origin.s_addr; 746 c->mfc_origin=mfc->mfcc_origin.s_addr;
747 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; 747 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
748 c->mfc_parent=mfc->mfcc_parent; 748 c->mfc_parent=mfc->mfcc_parent;
749 ipmr_update_thresholds(c, mfc->mfcc_ttls); 749 ipmr_update_thresholds(c, mfc->mfcc_ttls);
750 if (!mrtsock) 750 if (!mrtsock)
751 c->mfc_flags |= MFC_STATIC; 751 c->mfc_flags |= MFC_STATIC;
752 752
753 write_lock_bh(&mrt_lock); 753 write_lock_bh(&mrt_lock);
754 c->next = mfc_cache_array[line]; 754 c->next = mfc_cache_array[line];
755 mfc_cache_array[line] = c; 755 mfc_cache_array[line] = c;
756 write_unlock_bh(&mrt_lock); 756 write_unlock_bh(&mrt_lock);
757 757
758 /* 758 /*
759 * Check to see if we resolved a queued list. If so we 759 * Check to see if we resolved a queued list. If so we
760 * need to send on the frames and tidy up. 760 * need to send on the frames and tidy up.
761 */ 761 */
762 spin_lock_bh(&mfc_unres_lock); 762 spin_lock_bh(&mfc_unres_lock);
763 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 763 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
764 cp = &uc->next) { 764 cp = &uc->next) {
765 if (uc->mfc_origin == c->mfc_origin && 765 if (uc->mfc_origin == c->mfc_origin &&
766 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 766 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
767 *cp = uc->next; 767 *cp = uc->next;
768 if (atomic_dec_and_test(&cache_resolve_queue_len)) 768 if (atomic_dec_and_test(&cache_resolve_queue_len))
769 del_timer(&ipmr_expire_timer); 769 del_timer(&ipmr_expire_timer);
770 break; 770 break;
771 } 771 }
772 } 772 }
773 spin_unlock_bh(&mfc_unres_lock); 773 spin_unlock_bh(&mfc_unres_lock);
774 774
775 if (uc) { 775 if (uc) {
776 ipmr_cache_resolve(uc, c); 776 ipmr_cache_resolve(uc, c);
777 kmem_cache_free(mrt_cachep, uc); 777 kmem_cache_free(mrt_cachep, uc);
778 } 778 }
779 return 0; 779 return 0;
780 } 780 }
781 781
782 /* 782 /*
783 * Close the multicast socket, and clear the vif tables etc 783 * Close the multicast socket, and clear the vif tables etc
784 */ 784 */
785 785
786 static void mroute_clean_tables(struct sock *sk) 786 static void mroute_clean_tables(struct sock *sk)
787 { 787 {
788 int i; 788 int i;
789 789
790 /* 790 /*
791 * Shut down all active vif entries 791 * Shut down all active vif entries
792 */ 792 */
793 for(i=0; i<maxvif; i++) { 793 for(i=0; i<maxvif; i++) {
794 if (!(vif_table[i].flags&VIFF_STATIC)) 794 if (!(vif_table[i].flags&VIFF_STATIC))
795 vif_delete(i); 795 vif_delete(i);
796 } 796 }
797 797
798 /* 798 /*
799 * Wipe the cache 799 * Wipe the cache
800 */ 800 */
801 for (i=0;i<MFC_LINES;i++) { 801 for (i=0;i<MFC_LINES;i++) {
802 struct mfc_cache *c, **cp; 802 struct mfc_cache *c, **cp;
803 803
804 cp = &mfc_cache_array[i]; 804 cp = &mfc_cache_array[i];
805 while ((c = *cp) != NULL) { 805 while ((c = *cp) != NULL) {
806 if (c->mfc_flags&MFC_STATIC) { 806 if (c->mfc_flags&MFC_STATIC) {
807 cp = &c->next; 807 cp = &c->next;
808 continue; 808 continue;
809 } 809 }
810 write_lock_bh(&mrt_lock); 810 write_lock_bh(&mrt_lock);
811 *cp = c->next; 811 *cp = c->next;
812 write_unlock_bh(&mrt_lock); 812 write_unlock_bh(&mrt_lock);
813 813
814 kmem_cache_free(mrt_cachep, c); 814 kmem_cache_free(mrt_cachep, c);
815 } 815 }
816 } 816 }
817 817
818 if (atomic_read(&cache_resolve_queue_len) != 0) { 818 if (atomic_read(&cache_resolve_queue_len) != 0) {
819 struct mfc_cache *c; 819 struct mfc_cache *c;
820 820
821 spin_lock_bh(&mfc_unres_lock); 821 spin_lock_bh(&mfc_unres_lock);
822 while (mfc_unres_queue != NULL) { 822 while (mfc_unres_queue != NULL) {
823 c = mfc_unres_queue; 823 c = mfc_unres_queue;
824 mfc_unres_queue = c->next; 824 mfc_unres_queue = c->next;
825 spin_unlock_bh(&mfc_unres_lock); 825 spin_unlock_bh(&mfc_unres_lock);
826 826
827 ipmr_destroy_unres(c); 827 ipmr_destroy_unres(c);
828 828
829 spin_lock_bh(&mfc_unres_lock); 829 spin_lock_bh(&mfc_unres_lock);
830 } 830 }
831 spin_unlock_bh(&mfc_unres_lock); 831 spin_unlock_bh(&mfc_unres_lock);
832 } 832 }
833 } 833 }
834 834
835 static void mrtsock_destruct(struct sock *sk) 835 static void mrtsock_destruct(struct sock *sk)
836 { 836 {
837 rtnl_lock(); 837 rtnl_lock();
838 if (sk == mroute_socket) { 838 if (sk == mroute_socket) {
839 ipv4_devconf.mc_forwarding--; 839 ipv4_devconf.mc_forwarding--;
840 840
841 write_lock_bh(&mrt_lock); 841 write_lock_bh(&mrt_lock);
842 mroute_socket=NULL; 842 mroute_socket=NULL;
843 write_unlock_bh(&mrt_lock); 843 write_unlock_bh(&mrt_lock);
844 844
845 mroute_clean_tables(sk); 845 mroute_clean_tables(sk);
846 } 846 }
847 rtnl_unlock(); 847 rtnl_unlock();
848 } 848 }
849 849
850 /* 850 /*
851 * Socket options and virtual interface manipulation. The whole 851 * Socket options and virtual interface manipulation. The whole
852 * virtual interface system is a complete heap, but unfortunately 852 * virtual interface system is a complete heap, but unfortunately
853 * that's how BSD mrouted happens to think. Maybe one day with a proper 853 * that's how BSD mrouted happens to think. Maybe one day with a proper
854 * MOSPF/PIM router set up we can clean this up. 854 * MOSPF/PIM router set up we can clean this up.
855 */ 855 */
856 856
857 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) 857 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
858 { 858 {
859 int ret; 859 int ret;
860 struct vifctl vif; 860 struct vifctl vif;
861 struct mfcctl mfc; 861 struct mfcctl mfc;
862 862
863 if(optname!=MRT_INIT) 863 if(optname!=MRT_INIT)
864 { 864 {
865 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) 865 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
866 return -EACCES; 866 return -EACCES;
867 } 867 }
868 868
869 switch(optname) 869 switch(optname)
870 { 870 {
871 case MRT_INIT: 871 case MRT_INIT:
872 if (sk->sk_type != SOCK_RAW || 872 if (sk->sk_type != SOCK_RAW ||
873 inet_sk(sk)->num != IPPROTO_IGMP) 873 inet_sk(sk)->num != IPPROTO_IGMP)
874 return -EOPNOTSUPP; 874 return -EOPNOTSUPP;
875 if(optlen!=sizeof(int)) 875 if(optlen!=sizeof(int))
876 return -ENOPROTOOPT; 876 return -ENOPROTOOPT;
877 877
878 rtnl_lock(); 878 rtnl_lock();
879 if (mroute_socket) { 879 if (mroute_socket) {
880 rtnl_unlock(); 880 rtnl_unlock();
881 return -EADDRINUSE; 881 return -EADDRINUSE;
882 } 882 }
883 883
884 ret = ip_ra_control(sk, 1, mrtsock_destruct); 884 ret = ip_ra_control(sk, 1, mrtsock_destruct);
885 if (ret == 0) { 885 if (ret == 0) {
886 write_lock_bh(&mrt_lock); 886 write_lock_bh(&mrt_lock);
887 mroute_socket=sk; 887 mroute_socket=sk;
888 write_unlock_bh(&mrt_lock); 888 write_unlock_bh(&mrt_lock);
889 889
890 ipv4_devconf.mc_forwarding++; 890 ipv4_devconf.mc_forwarding++;
891 } 891 }
892 rtnl_unlock(); 892 rtnl_unlock();
893 return ret; 893 return ret;
894 case MRT_DONE: 894 case MRT_DONE:
895 if (sk!=mroute_socket) 895 if (sk!=mroute_socket)
896 return -EACCES; 896 return -EACCES;
897 return ip_ra_control(sk, 0, NULL); 897 return ip_ra_control(sk, 0, NULL);
898 case MRT_ADD_VIF: 898 case MRT_ADD_VIF:
899 case MRT_DEL_VIF: 899 case MRT_DEL_VIF:
900 if(optlen!=sizeof(vif)) 900 if(optlen!=sizeof(vif))
901 return -EINVAL; 901 return -EINVAL;
902 if (copy_from_user(&vif,optval,sizeof(vif))) 902 if (copy_from_user(&vif,optval,sizeof(vif)))
903 return -EFAULT; 903 return -EFAULT;
904 if(vif.vifc_vifi >= MAXVIFS) 904 if(vif.vifc_vifi >= MAXVIFS)
905 return -ENFILE; 905 return -ENFILE;
906 rtnl_lock(); 906 rtnl_lock();
907 if (optname==MRT_ADD_VIF) { 907 if (optname==MRT_ADD_VIF) {
908 ret = vif_add(&vif, sk==mroute_socket); 908 ret = vif_add(&vif, sk==mroute_socket);
909 } else { 909 } else {
910 ret = vif_delete(vif.vifc_vifi); 910 ret = vif_delete(vif.vifc_vifi);
911 } 911 }
912 rtnl_unlock(); 912 rtnl_unlock();
913 return ret; 913 return ret;
914 914
915 /* 915 /*
916 * Manipulate the forwarding caches. These live 916 * Manipulate the forwarding caches. These live
917 * in a sort of kernel/user symbiosis. 917 * in a sort of kernel/user symbiosis.
918 */ 918 */
919 case MRT_ADD_MFC: 919 case MRT_ADD_MFC:
920 case MRT_DEL_MFC: 920 case MRT_DEL_MFC:
921 if(optlen!=sizeof(mfc)) 921 if(optlen!=sizeof(mfc))
922 return -EINVAL; 922 return -EINVAL;
923 if (copy_from_user(&mfc,optval, sizeof(mfc))) 923 if (copy_from_user(&mfc,optval, sizeof(mfc)))
924 return -EFAULT; 924 return -EFAULT;
925 rtnl_lock(); 925 rtnl_lock();
926 if (optname==MRT_DEL_MFC) 926 if (optname==MRT_DEL_MFC)
927 ret = ipmr_mfc_delete(&mfc); 927 ret = ipmr_mfc_delete(&mfc);
928 else 928 else
929 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 929 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
930 rtnl_unlock(); 930 rtnl_unlock();
931 return ret; 931 return ret;
932 /* 932 /*
933 * Control PIM assert. 933 * Control PIM assert.
934 */ 934 */
935 case MRT_ASSERT: 935 case MRT_ASSERT:
936 { 936 {
937 int v; 937 int v;
938 if(get_user(v,(int __user *)optval)) 938 if(get_user(v,(int __user *)optval))
939 return -EFAULT; 939 return -EFAULT;
940 mroute_do_assert=(v)?1:0; 940 mroute_do_assert=(v)?1:0;
941 return 0; 941 return 0;
942 } 942 }
943 #ifdef CONFIG_IP_PIMSM 943 #ifdef CONFIG_IP_PIMSM
944 case MRT_PIM: 944 case MRT_PIM:
945 { 945 {
946 int v, ret; 946 int v, ret;
947 if(get_user(v,(int __user *)optval)) 947 if(get_user(v,(int __user *)optval))
948 return -EFAULT; 948 return -EFAULT;
949 v = (v)?1:0; 949 v = (v)?1:0;
950 rtnl_lock(); 950 rtnl_lock();
951 ret = 0; 951 ret = 0;
952 if (v != mroute_do_pim) { 952 if (v != mroute_do_pim) {
953 mroute_do_pim = v; 953 mroute_do_pim = v;
954 mroute_do_assert = v; 954 mroute_do_assert = v;
955 #ifdef CONFIG_IP_PIMSM_V2 955 #ifdef CONFIG_IP_PIMSM_V2
956 if (mroute_do_pim) 956 if (mroute_do_pim)
957 ret = inet_add_protocol(&pim_protocol, 957 ret = inet_add_protocol(&pim_protocol,
958 IPPROTO_PIM); 958 IPPROTO_PIM);
959 else 959 else
960 ret = inet_del_protocol(&pim_protocol, 960 ret = inet_del_protocol(&pim_protocol,
961 IPPROTO_PIM); 961 IPPROTO_PIM);
962 if (ret < 0) 962 if (ret < 0)
963 ret = -EAGAIN; 963 ret = -EAGAIN;
964 #endif 964 #endif
965 } 965 }
966 rtnl_unlock(); 966 rtnl_unlock();
967 return ret; 967 return ret;
968 } 968 }
969 #endif 969 #endif
970 /* 970 /*
971 * Spurious command, or MRT_VERSION which you cannot 971 * Spurious command, or MRT_VERSION which you cannot
972 * set. 972 * set.
973 */ 973 */
974 default: 974 default:
975 return -ENOPROTOOPT; 975 return -ENOPROTOOPT;
976 } 976 }
977 } 977 }
978 978
979 /* 979 /*
980 * Getsock opt support for the multicast routing system. 980 * Getsock opt support for the multicast routing system.
981 */ 981 */
982 982
983 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) 983 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
984 { 984 {
985 int olr; 985 int olr;
986 int val; 986 int val;
987 987
988 if(optname!=MRT_VERSION && 988 if(optname!=MRT_VERSION &&
989 #ifdef CONFIG_IP_PIMSM 989 #ifdef CONFIG_IP_PIMSM
990 optname!=MRT_PIM && 990 optname!=MRT_PIM &&
991 #endif 991 #endif
992 optname!=MRT_ASSERT) 992 optname!=MRT_ASSERT)
993 return -ENOPROTOOPT; 993 return -ENOPROTOOPT;
994 994
995 if (get_user(olr, optlen)) 995 if (get_user(olr, optlen))
996 return -EFAULT; 996 return -EFAULT;
997 997
998 olr = min_t(unsigned int, olr, sizeof(int)); 998 olr = min_t(unsigned int, olr, sizeof(int));
999 if (olr < 0) 999 if (olr < 0)
1000 return -EINVAL; 1000 return -EINVAL;
1001 1001
1002 if(put_user(olr,optlen)) 1002 if(put_user(olr,optlen))
1003 return -EFAULT; 1003 return -EFAULT;
1004 if(optname==MRT_VERSION) 1004 if(optname==MRT_VERSION)
1005 val=0x0305; 1005 val=0x0305;
1006 #ifdef CONFIG_IP_PIMSM 1006 #ifdef CONFIG_IP_PIMSM
1007 else if(optname==MRT_PIM) 1007 else if(optname==MRT_PIM)
1008 val=mroute_do_pim; 1008 val=mroute_do_pim;
1009 #endif 1009 #endif
1010 else 1010 else
1011 val=mroute_do_assert; 1011 val=mroute_do_assert;
1012 if(copy_to_user(optval,&val,olr)) 1012 if(copy_to_user(optval,&val,olr))
1013 return -EFAULT; 1013 return -EFAULT;
1014 return 0; 1014 return 0;
1015 } 1015 }
1016 1016
1017 /* 1017 /*
1018 * The IP multicast ioctl support routines. 1018 * The IP multicast ioctl support routines.
1019 */ 1019 */
1020 1020
1021 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1021 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1022 { 1022 {
1023 struct sioc_sg_req sr; 1023 struct sioc_sg_req sr;
1024 struct sioc_vif_req vr; 1024 struct sioc_vif_req vr;
1025 struct vif_device *vif; 1025 struct vif_device *vif;
1026 struct mfc_cache *c; 1026 struct mfc_cache *c;
1027 1027
1028 switch(cmd) 1028 switch(cmd)
1029 { 1029 {
1030 case SIOCGETVIFCNT: 1030 case SIOCGETVIFCNT:
1031 if (copy_from_user(&vr,arg,sizeof(vr))) 1031 if (copy_from_user(&vr,arg,sizeof(vr)))
1032 return -EFAULT; 1032 return -EFAULT;
1033 if(vr.vifi>=maxvif) 1033 if(vr.vifi>=maxvif)
1034 return -EINVAL; 1034 return -EINVAL;
1035 read_lock(&mrt_lock); 1035 read_lock(&mrt_lock);
1036 vif=&vif_table[vr.vifi]; 1036 vif=&vif_table[vr.vifi];
1037 if(VIF_EXISTS(vr.vifi)) { 1037 if(VIF_EXISTS(vr.vifi)) {
1038 vr.icount=vif->pkt_in; 1038 vr.icount=vif->pkt_in;
1039 vr.ocount=vif->pkt_out; 1039 vr.ocount=vif->pkt_out;
1040 vr.ibytes=vif->bytes_in; 1040 vr.ibytes=vif->bytes_in;
1041 vr.obytes=vif->bytes_out; 1041 vr.obytes=vif->bytes_out;
1042 read_unlock(&mrt_lock); 1042 read_unlock(&mrt_lock);
1043 1043
1044 if (copy_to_user(arg,&vr,sizeof(vr))) 1044 if (copy_to_user(arg,&vr,sizeof(vr)))
1045 return -EFAULT; 1045 return -EFAULT;
1046 return 0; 1046 return 0;
1047 } 1047 }
1048 read_unlock(&mrt_lock); 1048 read_unlock(&mrt_lock);
1049 return -EADDRNOTAVAIL; 1049 return -EADDRNOTAVAIL;
1050 case SIOCGETSGCNT: 1050 case SIOCGETSGCNT:
1051 if (copy_from_user(&sr,arg,sizeof(sr))) 1051 if (copy_from_user(&sr,arg,sizeof(sr)))
1052 return -EFAULT; 1052 return -EFAULT;
1053 1053
1054 read_lock(&mrt_lock); 1054 read_lock(&mrt_lock);
1055 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1055 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1056 if (c) { 1056 if (c) {
1057 sr.pktcnt = c->mfc_un.res.pkt; 1057 sr.pktcnt = c->mfc_un.res.pkt;
1058 sr.bytecnt = c->mfc_un.res.bytes; 1058 sr.bytecnt = c->mfc_un.res.bytes;
1059 sr.wrong_if = c->mfc_un.res.wrong_if; 1059 sr.wrong_if = c->mfc_un.res.wrong_if;
1060 read_unlock(&mrt_lock); 1060 read_unlock(&mrt_lock);
1061 1061
1062 if (copy_to_user(arg,&sr,sizeof(sr))) 1062 if (copy_to_user(arg,&sr,sizeof(sr)))
1063 return -EFAULT; 1063 return -EFAULT;
1064 return 0; 1064 return 0;
1065 } 1065 }
1066 read_unlock(&mrt_lock); 1066 read_unlock(&mrt_lock);
1067 return -EADDRNOTAVAIL; 1067 return -EADDRNOTAVAIL;
1068 default: 1068 default:
1069 return -ENOIOCTLCMD; 1069 return -ENOIOCTLCMD;
1070 } 1070 }
1071 } 1071 }
1072 1072
1073 1073
1074 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1074 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1075 { 1075 {
1076 struct vif_device *v; 1076 struct vif_device *v;
1077 int ct; 1077 int ct;
1078 if (event != NETDEV_UNREGISTER) 1078 if (event != NETDEV_UNREGISTER)
1079 return NOTIFY_DONE; 1079 return NOTIFY_DONE;
1080 v=&vif_table[0]; 1080 v=&vif_table[0];
1081 for(ct=0;ct<maxvif;ct++,v++) { 1081 for(ct=0;ct<maxvif;ct++,v++) {
1082 if (v->dev==ptr) 1082 if (v->dev==ptr)
1083 vif_delete(ct); 1083 vif_delete(ct);
1084 } 1084 }
1085 return NOTIFY_DONE; 1085 return NOTIFY_DONE;
1086 } 1086 }
1087 1087
1088 1088
1089 static struct notifier_block ip_mr_notifier={ 1089 static struct notifier_block ip_mr_notifier={
1090 .notifier_call = ipmr_device_event, 1090 .notifier_call = ipmr_device_event,
1091 }; 1091 };
1092 1092
1093 /* 1093 /*
1094 * Encapsulate a packet by attaching a valid IPIP header to it. 1094 * Encapsulate a packet by attaching a valid IPIP header to it.
1095 * This avoids tunnel drivers and other mess and gives us the speed so 1095 * This avoids tunnel drivers and other mess and gives us the speed so
1096 * important for multicast video. 1096 * important for multicast video.
1097 */ 1097 */
1098 1098
1099 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) 1099 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1100 { 1100 {
1101 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); 1101 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1102 1102
1103 iph->version = 4; 1103 iph->version = 4;
1104 iph->tos = skb->nh.iph->tos; 1104 iph->tos = skb->nh.iph->tos;
1105 iph->ttl = skb->nh.iph->ttl; 1105 iph->ttl = skb->nh.iph->ttl;
1106 iph->frag_off = 0; 1106 iph->frag_off = 0;
1107 iph->daddr = daddr; 1107 iph->daddr = daddr;
1108 iph->saddr = saddr; 1108 iph->saddr = saddr;
1109 iph->protocol = IPPROTO_IPIP; 1109 iph->protocol = IPPROTO_IPIP;
1110 iph->ihl = 5; 1110 iph->ihl = 5;
1111 iph->tot_len = htons(skb->len); 1111 iph->tot_len = htons(skb->len);
1112 ip_select_ident(iph, skb->dst, NULL); 1112 ip_select_ident(iph, skb->dst, NULL);
1113 ip_send_check(iph); 1113 ip_send_check(iph);
1114 1114
1115 skb->h.ipiph = skb->nh.iph; 1115 skb->h.ipiph = skb->nh.iph;
1116 skb->nh.iph = iph; 1116 skb->nh.iph = iph;
1117 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1117 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1118 nf_reset(skb); 1118 nf_reset(skb);
1119 } 1119 }
1120 1120
1121 static inline int ipmr_forward_finish(struct sk_buff *skb) 1121 static inline int ipmr_forward_finish(struct sk_buff *skb)
1122 { 1122 {
1123 struct ip_options * opt = &(IPCB(skb)->opt); 1123 struct ip_options * opt = &(IPCB(skb)->opt);
1124 1124
1125 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); 1125 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1126 1126
1127 if (unlikely(opt->optlen)) 1127 if (unlikely(opt->optlen))
1128 ip_forward_options(skb); 1128 ip_forward_options(skb);
1129 1129
1130 return dst_output(skb); 1130 return dst_output(skb);
1131 } 1131 }
1132 1132
1133 /* 1133 /*
1134 * Processing handlers for ipmr_forward 1134 * Processing handlers for ipmr_forward
1135 */ 1135 */
1136 1136
1137 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1137 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1138 { 1138 {
1139 struct iphdr *iph = skb->nh.iph; 1139 struct iphdr *iph = skb->nh.iph;
1140 struct vif_device *vif = &vif_table[vifi]; 1140 struct vif_device *vif = &vif_table[vifi];
1141 struct net_device *dev; 1141 struct net_device *dev;
1142 struct rtable *rt; 1142 struct rtable *rt;
1143 int encap = 0; 1143 int encap = 0;
1144 1144
1145 if (vif->dev == NULL) 1145 if (vif->dev == NULL)
1146 goto out_free; 1146 goto out_free;
1147 1147
1148 #ifdef CONFIG_IP_PIMSM 1148 #ifdef CONFIG_IP_PIMSM
1149 if (vif->flags & VIFF_REGISTER) { 1149 if (vif->flags & VIFF_REGISTER) {
1150 vif->pkt_out++; 1150 vif->pkt_out++;
1151 vif->bytes_out+=skb->len; 1151 vif->bytes_out+=skb->len;
1152 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; 1152 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1153 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; 1153 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1154 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1154 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1155 kfree_skb(skb); 1155 kfree_skb(skb);
1156 return; 1156 return;
1157 } 1157 }
1158 #endif 1158 #endif
1159 1159
1160 if (vif->flags&VIFF_TUNNEL) { 1160 if (vif->flags&VIFF_TUNNEL) {
1161 struct flowi fl = { .oif = vif->link, 1161 struct flowi fl = { .oif = vif->link,
1162 .nl_u = { .ip4_u = 1162 .nl_u = { .ip4_u =
1163 { .daddr = vif->remote, 1163 { .daddr = vif->remote,
1164 .saddr = vif->local, 1164 .saddr = vif->local,
1165 .tos = RT_TOS(iph->tos) } }, 1165 .tos = RT_TOS(iph->tos) } },
1166 .proto = IPPROTO_IPIP }; 1166 .proto = IPPROTO_IPIP };
1167 if (ip_route_output_key(&rt, &fl)) 1167 if (ip_route_output_key(&rt, &fl))
1168 goto out_free; 1168 goto out_free;
1169 encap = sizeof(struct iphdr); 1169 encap = sizeof(struct iphdr);
1170 } else { 1170 } else {
1171 struct flowi fl = { .oif = vif->link, 1171 struct flowi fl = { .oif = vif->link,
1172 .nl_u = { .ip4_u = 1172 .nl_u = { .ip4_u =
1173 { .daddr = iph->daddr, 1173 { .daddr = iph->daddr,
1174 .tos = RT_TOS(iph->tos) } }, 1174 .tos = RT_TOS(iph->tos) } },
1175 .proto = IPPROTO_IPIP }; 1175 .proto = IPPROTO_IPIP };
1176 if (ip_route_output_key(&rt, &fl)) 1176 if (ip_route_output_key(&rt, &fl))
1177 goto out_free; 1177 goto out_free;
1178 } 1178 }
1179 1179
1180 dev = rt->u.dst.dev; 1180 dev = rt->u.dst.dev;
1181 1181
1182 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1182 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1183 /* Do not fragment multicasts. Alas, IPv4 does not 1183 /* Do not fragment multicasts. Alas, IPv4 does not
1184 allow to send ICMP, so that packets will disappear 1184 allow to send ICMP, so that packets will disappear
1185 to blackhole. 1185 to blackhole.
1186 */ 1186 */
1187 1187
1188 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); 1188 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1189 ip_rt_put(rt); 1189 ip_rt_put(rt);
1190 goto out_free; 1190 goto out_free;
1191 } 1191 }
1192 1192
1193 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1193 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1194 1194
1195 if (skb_cow(skb, encap)) { 1195 if (skb_cow(skb, encap)) {
1196 ip_rt_put(rt); 1196 ip_rt_put(rt);
1197 goto out_free; 1197 goto out_free;
1198 } 1198 }
1199 1199
1200 vif->pkt_out++; 1200 vif->pkt_out++;
1201 vif->bytes_out+=skb->len; 1201 vif->bytes_out+=skb->len;
1202 1202
1203 dst_release(skb->dst); 1203 dst_release(skb->dst);
1204 skb->dst = &rt->u.dst; 1204 skb->dst = &rt->u.dst;
1205 iph = skb->nh.iph; 1205 iph = skb->nh.iph;
1206 ip_decrease_ttl(iph); 1206 ip_decrease_ttl(iph);
1207 1207
1208 /* FIXME: forward and output firewalls used to be called here. 1208 /* FIXME: forward and output firewalls used to be called here.
1209 * What do we do with netfilter? -- RR */ 1209 * What do we do with netfilter? -- RR */
1210 if (vif->flags & VIFF_TUNNEL) { 1210 if (vif->flags & VIFF_TUNNEL) {
1211 ip_encap(skb, vif->local, vif->remote); 1211 ip_encap(skb, vif->local, vif->remote);
1212 /* FIXME: extra output firewall step used to be here. --RR */ 1212 /* FIXME: extra output firewall step used to be here. --RR */
1213 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; 1213 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1214 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; 1214 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1215 } 1215 }
1216 1216
1217 IPCB(skb)->flags |= IPSKB_FORWARDED; 1217 IPCB(skb)->flags |= IPSKB_FORWARDED;
1218 1218
1219 /* 1219 /*
1220 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1220 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1221 * not only before forwarding, but after forwarding on all output 1221 * not only before forwarding, but after forwarding on all output
1222 * interfaces. It is clear, if mrouter runs a multicasting 1222 * interfaces. It is clear, if mrouter runs a multicasting
1223 * program, it should receive packets not depending to what interface 1223 * program, it should receive packets not depending to what interface
1224 * program is joined. 1224 * program is joined.
1225 * If we will not make it, the program will have to join on all 1225 * If we will not make it, the program will have to join on all
1226 * interfaces. On the other hand, multihoming host (or router, but 1226 * interfaces. On the other hand, multihoming host (or router, but
1227 * not mrouter) cannot join to more than one interface - it will 1227 * not mrouter) cannot join to more than one interface - it will
1228 * result in receiving multiple packets. 1228 * result in receiving multiple packets.
1229 */ 1229 */
1230 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 1230 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1231 ipmr_forward_finish); 1231 ipmr_forward_finish);
1232 return; 1232 return;
1233 1233
1234 out_free: 1234 out_free:
1235 kfree_skb(skb); 1235 kfree_skb(skb);
1236 return; 1236 return;
1237 } 1237 }
1238 1238
1239 static int ipmr_find_vif(struct net_device *dev) 1239 static int ipmr_find_vif(struct net_device *dev)
1240 { 1240 {
1241 int ct; 1241 int ct;
1242 for (ct=maxvif-1; ct>=0; ct--) { 1242 for (ct=maxvif-1; ct>=0; ct--) {
1243 if (vif_table[ct].dev == dev) 1243 if (vif_table[ct].dev == dev)
1244 break; 1244 break;
1245 } 1245 }
1246 return ct; 1246 return ct;
1247 } 1247 }
1248 1248
1249 /* "local" means that we should preserve one skb (for local delivery) */ 1249 /* "local" means that we should preserve one skb (for local delivery) */
1250 1250
1251 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1251 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1252 { 1252 {
1253 int psend = -1; 1253 int psend = -1;
1254 int vif, ct; 1254 int vif, ct;
1255 1255
1256 vif = cache->mfc_parent; 1256 vif = cache->mfc_parent;
1257 cache->mfc_un.res.pkt++; 1257 cache->mfc_un.res.pkt++;
1258 cache->mfc_un.res.bytes += skb->len; 1258 cache->mfc_un.res.bytes += skb->len;
1259 1259
1260 /* 1260 /*
1261 * Wrong interface: drop packet and (maybe) send PIM assert. 1261 * Wrong interface: drop packet and (maybe) send PIM assert.
1262 */ 1262 */
1263 if (vif_table[vif].dev != skb->dev) { 1263 if (vif_table[vif].dev != skb->dev) {
1264 int true_vifi; 1264 int true_vifi;
1265 1265
1266 if (((struct rtable*)skb->dst)->fl.iif == 0) { 1266 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1267 /* It is our own packet, looped back. 1267 /* It is our own packet, looped back.
1268 Very complicated situation... 1268 Very complicated situation...
1269 1269
1270 The best workaround until routing daemons will be 1270 The best workaround until routing daemons will be
1271 fixed is not to redistribute packet, if it was 1271 fixed is not to redistribute packet, if it was
1272 send through wrong interface. It means, that 1272 send through wrong interface. It means, that
1273 multicast applications WILL NOT work for 1273 multicast applications WILL NOT work for
1274 (S,G), which have default multicast route pointing 1274 (S,G), which have default multicast route pointing
1275 to wrong oif. In any case, it is not a good 1275 to wrong oif. In any case, it is not a good
1276 idea to use multicasting applications on router. 1276 idea to use multicasting applications on router.
1277 */ 1277 */
1278 goto dont_forward; 1278 goto dont_forward;
1279 } 1279 }
1280 1280
1281 cache->mfc_un.res.wrong_if++; 1281 cache->mfc_un.res.wrong_if++;
1282 true_vifi = ipmr_find_vif(skb->dev); 1282 true_vifi = ipmr_find_vif(skb->dev);
1283 1283
1284 if (true_vifi >= 0 && mroute_do_assert && 1284 if (true_vifi >= 0 && mroute_do_assert &&
1285 /* pimsm uses asserts, when switching from RPT to SPT, 1285 /* pimsm uses asserts, when switching from RPT to SPT,
1286 so that we cannot check that packet arrived on an oif. 1286 so that we cannot check that packet arrived on an oif.
1287 It is bad, but otherwise we would need to move pretty 1287 It is bad, but otherwise we would need to move pretty
1288 large chunk of pimd to kernel. Ough... --ANK 1288 large chunk of pimd to kernel. Ough... --ANK
1289 */ 1289 */
1290 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1290 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1291 time_after(jiffies, 1291 time_after(jiffies,
1292 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1292 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1293 cache->mfc_un.res.last_assert = jiffies; 1293 cache->mfc_un.res.last_assert = jiffies;
1294 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 1294 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1295 } 1295 }
1296 goto dont_forward; 1296 goto dont_forward;
1297 } 1297 }
1298 1298
1299 vif_table[vif].pkt_in++; 1299 vif_table[vif].pkt_in++;
1300 vif_table[vif].bytes_in+=skb->len; 1300 vif_table[vif].bytes_in+=skb->len;
1301 1301
1302 /* 1302 /*
1303 * Forward the frame 1303 * Forward the frame
1304 */ 1304 */
1305 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1305 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1306 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { 1306 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1307 if (psend != -1) { 1307 if (psend != -1) {
1308 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1308 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1309 if (skb2) 1309 if (skb2)
1310 ipmr_queue_xmit(skb2, cache, psend); 1310 ipmr_queue_xmit(skb2, cache, psend);
1311 } 1311 }
1312 psend=ct; 1312 psend=ct;
1313 } 1313 }
1314 } 1314 }
1315 if (psend != -1) { 1315 if (psend != -1) {
1316 if (local) { 1316 if (local) {
1317 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1317 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1318 if (skb2) 1318 if (skb2)
1319 ipmr_queue_xmit(skb2, cache, psend); 1319 ipmr_queue_xmit(skb2, cache, psend);
1320 } else { 1320 } else {
1321 ipmr_queue_xmit(skb, cache, psend); 1321 ipmr_queue_xmit(skb, cache, psend);
1322 return 0; 1322 return 0;
1323 } 1323 }
1324 } 1324 }
1325 1325
1326 dont_forward: 1326 dont_forward:
1327 if (!local) 1327 if (!local)
1328 kfree_skb(skb); 1328 kfree_skb(skb);
1329 return 0; 1329 return 0;
1330 } 1330 }
1331 1331
1332 1332
1333 /* 1333 /*
1334 * Multicast packets for forwarding arrive here 1334 * Multicast packets for forwarding arrive here
1335 */ 1335 */
1336 1336
1337 int ip_mr_input(struct sk_buff *skb) 1337 int ip_mr_input(struct sk_buff *skb)
1338 { 1338 {
1339 struct mfc_cache *cache; 1339 struct mfc_cache *cache;
1340 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; 1340 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1341 1341
1342 /* Packet is looped back after forward, it should not be 1342 /* Packet is looped back after forward, it should not be
1343 forwarded second time, but still can be delivered locally. 1343 forwarded second time, but still can be delivered locally.
1344 */ 1344 */
1345 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1345 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1346 goto dont_forward; 1346 goto dont_forward;
1347 1347
1348 if (!local) { 1348 if (!local) {
1349 if (IPCB(skb)->opt.router_alert) { 1349 if (IPCB(skb)->opt.router_alert) {
1350 if (ip_call_ra_chain(skb)) 1350 if (ip_call_ra_chain(skb))
1351 return 0; 1351 return 0;
1352 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ 1352 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1353 /* IGMPv1 (and broken IGMPv2 implementations sort of 1353 /* IGMPv1 (and broken IGMPv2 implementations sort of
1354 Cisco IOS <= 11.2(8)) do not put router alert 1354 Cisco IOS <= 11.2(8)) do not put router alert
1355 option to IGMP packets destined to routable 1355 option to IGMP packets destined to routable
1356 groups. It is very bad, because it means 1356 groups. It is very bad, because it means
1357 that we can forward NO IGMP messages. 1357 that we can forward NO IGMP messages.
1358 */ 1358 */
1359 read_lock(&mrt_lock); 1359 read_lock(&mrt_lock);
1360 if (mroute_socket) { 1360 if (mroute_socket) {
1361 nf_reset(skb); 1361 nf_reset(skb);
1362 raw_rcv(mroute_socket, skb); 1362 raw_rcv(mroute_socket, skb);
1363 read_unlock(&mrt_lock); 1363 read_unlock(&mrt_lock);
1364 return 0; 1364 return 0;
1365 } 1365 }
1366 read_unlock(&mrt_lock); 1366 read_unlock(&mrt_lock);
1367 } 1367 }
1368 } 1368 }
1369 1369
1370 read_lock(&mrt_lock); 1370 read_lock(&mrt_lock);
1371 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); 1371 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1372 1372
1373 /* 1373 /*
1374 * No usable cache entry 1374 * No usable cache entry
1375 */ 1375 */
1376 if (cache==NULL) { 1376 if (cache==NULL) {
1377 int vif; 1377 int vif;
1378 1378
1379 if (local) { 1379 if (local) {
1380 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1380 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1381 ip_local_deliver(skb); 1381 ip_local_deliver(skb);
1382 if (skb2 == NULL) { 1382 if (skb2 == NULL) {
1383 read_unlock(&mrt_lock); 1383 read_unlock(&mrt_lock);
1384 return -ENOBUFS; 1384 return -ENOBUFS;
1385 } 1385 }
1386 skb = skb2; 1386 skb = skb2;
1387 } 1387 }
1388 1388
1389 vif = ipmr_find_vif(skb->dev); 1389 vif = ipmr_find_vif(skb->dev);
1390 if (vif >= 0) { 1390 if (vif >= 0) {
1391 int err = ipmr_cache_unresolved(vif, skb); 1391 int err = ipmr_cache_unresolved(vif, skb);
1392 read_unlock(&mrt_lock); 1392 read_unlock(&mrt_lock);
1393 1393
1394 return err; 1394 return err;
1395 } 1395 }
1396 read_unlock(&mrt_lock); 1396 read_unlock(&mrt_lock);
1397 kfree_skb(skb); 1397 kfree_skb(skb);
1398 return -ENODEV; 1398 return -ENODEV;
1399 } 1399 }
1400 1400
1401 ip_mr_forward(skb, cache, local); 1401 ip_mr_forward(skb, cache, local);
1402 1402
1403 read_unlock(&mrt_lock); 1403 read_unlock(&mrt_lock);
1404 1404
1405 if (local) 1405 if (local)
1406 return ip_local_deliver(skb); 1406 return ip_local_deliver(skb);
1407 1407
1408 return 0; 1408 return 0;
1409 1409
1410 dont_forward: 1410 dont_forward:
1411 if (local) 1411 if (local)
1412 return ip_local_deliver(skb); 1412 return ip_local_deliver(skb);
1413 kfree_skb(skb); 1413 kfree_skb(skb);
1414 return 0; 1414 return 0;
1415 } 1415 }
1416 1416
1417 #ifdef CONFIG_IP_PIMSM_V1 1417 #ifdef CONFIG_IP_PIMSM_V1
1418 /* 1418 /*
1419 * Handle IGMP messages of PIMv1 1419 * Handle IGMP messages of PIMv1
1420 */ 1420 */
1421 1421
1422 int pim_rcv_v1(struct sk_buff * skb) 1422 int pim_rcv_v1(struct sk_buff * skb)
1423 { 1423 {
1424 struct igmphdr *pim; 1424 struct igmphdr *pim;
1425 struct iphdr *encap; 1425 struct iphdr *encap;
1426 struct net_device *reg_dev = NULL; 1426 struct net_device *reg_dev = NULL;
1427 1427
1428 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1428 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1429 goto drop; 1429 goto drop;
1430 1430
1431 pim = (struct igmphdr*)skb->h.raw; 1431 pim = (struct igmphdr*)skb->h.raw;
1432 1432
1433 if (!mroute_do_pim || 1433 if (!mroute_do_pim ||
1434 skb->len < sizeof(*pim) + sizeof(*encap) || 1434 skb->len < sizeof(*pim) + sizeof(*encap) ||
1435 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1435 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1436 goto drop; 1436 goto drop;
1437 1437
1438 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); 1438 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1439 /* 1439 /*
1440 Check that: 1440 Check that:
1441 a. packet is really destinted to a multicast group 1441 a. packet is really destinted to a multicast group
1442 b. packet is not a NULL-REGISTER 1442 b. packet is not a NULL-REGISTER
1443 c. packet is not truncated 1443 c. packet is not truncated
1444 */ 1444 */
1445 if (!MULTICAST(encap->daddr) || 1445 if (!MULTICAST(encap->daddr) ||
1446 encap->tot_len == 0 || 1446 encap->tot_len == 0 ||
1447 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1447 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1448 goto drop; 1448 goto drop;
1449 1449
1450 read_lock(&mrt_lock); 1450 read_lock(&mrt_lock);
1451 if (reg_vif_num >= 0) 1451 if (reg_vif_num >= 0)
1452 reg_dev = vif_table[reg_vif_num].dev; 1452 reg_dev = vif_table[reg_vif_num].dev;
1453 if (reg_dev) 1453 if (reg_dev)
1454 dev_hold(reg_dev); 1454 dev_hold(reg_dev);
1455 read_unlock(&mrt_lock); 1455 read_unlock(&mrt_lock);
1456 1456
1457 if (reg_dev == NULL) 1457 if (reg_dev == NULL)
1458 goto drop; 1458 goto drop;
1459 1459
1460 skb->mac.raw = skb->nh.raw; 1460 skb->mac.raw = skb->nh.raw;
1461 skb_pull(skb, (u8*)encap - skb->data); 1461 skb_pull(skb, (u8*)encap - skb->data);
1462 skb->nh.iph = (struct iphdr *)skb->data; 1462 skb->nh.iph = (struct iphdr *)skb->data;
1463 skb->dev = reg_dev; 1463 skb->dev = reg_dev;
1464 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1465 skb->protocol = htons(ETH_P_IP); 1464 skb->protocol = htons(ETH_P_IP);
1466 skb->ip_summed = 0; 1465 skb->ip_summed = 0;
1467 skb->pkt_type = PACKET_HOST; 1466 skb->pkt_type = PACKET_HOST;
1468 dst_release(skb->dst); 1467 dst_release(skb->dst);
1469 skb->dst = NULL; 1468 skb->dst = NULL;
1470 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; 1469 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1471 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; 1470 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1472 nf_reset(skb); 1471 nf_reset(skb);
1473 netif_rx(skb); 1472 netif_rx(skb);
1474 dev_put(reg_dev); 1473 dev_put(reg_dev);
1475 return 0; 1474 return 0;
1476 drop: 1475 drop:
1477 kfree_skb(skb); 1476 kfree_skb(skb);
1478 return 0; 1477 return 0;
1479 } 1478 }
1480 #endif 1479 #endif
1481 1480
1482 #ifdef CONFIG_IP_PIMSM_V2 1481 #ifdef CONFIG_IP_PIMSM_V2
1483 static int pim_rcv(struct sk_buff * skb) 1482 static int pim_rcv(struct sk_buff * skb)
1484 { 1483 {
1485 struct pimreghdr *pim; 1484 struct pimreghdr *pim;
1486 struct iphdr *encap; 1485 struct iphdr *encap;
1487 struct net_device *reg_dev = NULL; 1486 struct net_device *reg_dev = NULL;
1488 1487
1489 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1488 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1490 goto drop; 1489 goto drop;
1491 1490
1492 pim = (struct pimreghdr*)skb->h.raw; 1491 pim = (struct pimreghdr*)skb->h.raw;
1493 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1492 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1494 (pim->flags&PIM_NULL_REGISTER) || 1493 (pim->flags&PIM_NULL_REGISTER) ||
1495 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1494 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1496 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1495 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1497 goto drop; 1496 goto drop;
1498 1497
1499 /* check if the inner packet is destined to mcast group */ 1498 /* check if the inner packet is destined to mcast group */
1500 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); 1499 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1501 if (!MULTICAST(encap->daddr) || 1500 if (!MULTICAST(encap->daddr) ||
1502 encap->tot_len == 0 || 1501 encap->tot_len == 0 ||
1503 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1502 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1504 goto drop; 1503 goto drop;
1505 1504
1506 read_lock(&mrt_lock); 1505 read_lock(&mrt_lock);
1507 if (reg_vif_num >= 0) 1506 if (reg_vif_num >= 0)
1508 reg_dev = vif_table[reg_vif_num].dev; 1507 reg_dev = vif_table[reg_vif_num].dev;
1509 if (reg_dev) 1508 if (reg_dev)
1510 dev_hold(reg_dev); 1509 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock); 1510 read_unlock(&mrt_lock);
1512 1511
1513 if (reg_dev == NULL) 1512 if (reg_dev == NULL)
1514 goto drop; 1513 goto drop;
1515 1514
1516 skb->mac.raw = skb->nh.raw; 1515 skb->mac.raw = skb->nh.raw;
1517 skb_pull(skb, (u8*)encap - skb->data); 1516 skb_pull(skb, (u8*)encap - skb->data);
1518 skb->nh.iph = (struct iphdr *)skb->data; 1517 skb->nh.iph = (struct iphdr *)skb->data;
1519 skb->dev = reg_dev; 1518 skb->dev = reg_dev;
1520 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1521 skb->protocol = htons(ETH_P_IP); 1519 skb->protocol = htons(ETH_P_IP);
1522 skb->ip_summed = 0; 1520 skb->ip_summed = 0;
1523 skb->pkt_type = PACKET_HOST; 1521 skb->pkt_type = PACKET_HOST;
1524 dst_release(skb->dst); 1522 dst_release(skb->dst);
1525 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; 1523 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1526 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; 1524 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1527 skb->dst = NULL; 1525 skb->dst = NULL;
1528 nf_reset(skb); 1526 nf_reset(skb);
1529 netif_rx(skb); 1527 netif_rx(skb);
1530 dev_put(reg_dev); 1528 dev_put(reg_dev);
1531 return 0; 1529 return 0;
1532 drop: 1530 drop:
1533 kfree_skb(skb); 1531 kfree_skb(skb);
1534 return 0; 1532 return 0;
1535 } 1533 }
1536 #endif 1534 #endif
1537 1535
1538 static int 1536 static int
1539 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1537 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1540 { 1538 {
1541 int ct; 1539 int ct;
1542 struct rtnexthop *nhp; 1540 struct rtnexthop *nhp;
1543 struct net_device *dev = vif_table[c->mfc_parent].dev; 1541 struct net_device *dev = vif_table[c->mfc_parent].dev;
1544 u8 *b = skb->tail; 1542 u8 *b = skb->tail;
1545 struct rtattr *mp_head; 1543 struct rtattr *mp_head;
1546 1544
1547 if (dev) 1545 if (dev)
1548 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1546 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1549 1547
1550 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); 1548 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1551 1549
1552 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1550 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1553 if (c->mfc_un.res.ttls[ct] < 255) { 1551 if (c->mfc_un.res.ttls[ct] < 255) {
1554 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1552 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1555 goto rtattr_failure; 1553 goto rtattr_failure;
1556 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1554 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1557 nhp->rtnh_flags = 0; 1555 nhp->rtnh_flags = 0;
1558 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1556 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1559 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 1557 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1560 nhp->rtnh_len = sizeof(*nhp); 1558 nhp->rtnh_len = sizeof(*nhp);
1561 } 1559 }
1562 } 1560 }
1563 mp_head->rta_type = RTA_MULTIPATH; 1561 mp_head->rta_type = RTA_MULTIPATH;
1564 mp_head->rta_len = skb->tail - (u8*)mp_head; 1562 mp_head->rta_len = skb->tail - (u8*)mp_head;
1565 rtm->rtm_type = RTN_MULTICAST; 1563 rtm->rtm_type = RTN_MULTICAST;
1566 return 1; 1564 return 1;
1567 1565
1568 rtattr_failure: 1566 rtattr_failure:
1569 skb_trim(skb, b - skb->data); 1567 skb_trim(skb, b - skb->data);
1570 return -EMSGSIZE; 1568 return -EMSGSIZE;
1571 } 1569 }
1572 1570
1573 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1571 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1574 { 1572 {
1575 int err; 1573 int err;
1576 struct mfc_cache *cache; 1574 struct mfc_cache *cache;
1577 struct rtable *rt = (struct rtable*)skb->dst; 1575 struct rtable *rt = (struct rtable*)skb->dst;
1578 1576
1579 read_lock(&mrt_lock); 1577 read_lock(&mrt_lock);
1580 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1578 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1581 1579
1582 if (cache==NULL) { 1580 if (cache==NULL) {
1583 struct net_device *dev; 1581 struct net_device *dev;
1584 int vif; 1582 int vif;
1585 1583
1586 if (nowait) { 1584 if (nowait) {
1587 read_unlock(&mrt_lock); 1585 read_unlock(&mrt_lock);
1588 return -EAGAIN; 1586 return -EAGAIN;
1589 } 1587 }
1590 1588
1591 dev = skb->dev; 1589 dev = skb->dev;
1592 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1590 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1593 read_unlock(&mrt_lock); 1591 read_unlock(&mrt_lock);
1594 return -ENODEV; 1592 return -ENODEV;
1595 } 1593 }
1596 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 1594 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1597 skb->nh.iph->ihl = sizeof(struct iphdr)>>2; 1595 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1598 skb->nh.iph->saddr = rt->rt_src; 1596 skb->nh.iph->saddr = rt->rt_src;
1599 skb->nh.iph->daddr = rt->rt_dst; 1597 skb->nh.iph->daddr = rt->rt_dst;
1600 skb->nh.iph->version = 0; 1598 skb->nh.iph->version = 0;
1601 err = ipmr_cache_unresolved(vif, skb); 1599 err = ipmr_cache_unresolved(vif, skb);
1602 read_unlock(&mrt_lock); 1600 read_unlock(&mrt_lock);
1603 return err; 1601 return err;
1604 } 1602 }
1605 1603
1606 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1604 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1607 cache->mfc_flags |= MFC_NOTIFY; 1605 cache->mfc_flags |= MFC_NOTIFY;
1608 err = ipmr_fill_mroute(skb, cache, rtm); 1606 err = ipmr_fill_mroute(skb, cache, rtm);
1609 read_unlock(&mrt_lock); 1607 read_unlock(&mrt_lock);
1610 return err; 1608 return err;
1611 } 1609 }
1612 1610
1613 #ifdef CONFIG_PROC_FS 1611 #ifdef CONFIG_PROC_FS
1614 /* 1612 /*
1615 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1613 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1616 */ 1614 */
1617 struct ipmr_vif_iter { 1615 struct ipmr_vif_iter {
1618 int ct; 1616 int ct;
1619 }; 1617 };
1620 1618
1621 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, 1619 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1622 loff_t pos) 1620 loff_t pos)
1623 { 1621 {
1624 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1622 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1625 if(!VIF_EXISTS(iter->ct)) 1623 if(!VIF_EXISTS(iter->ct))
1626 continue; 1624 continue;
1627 if (pos-- == 0) 1625 if (pos-- == 0)
1628 return &vif_table[iter->ct]; 1626 return &vif_table[iter->ct];
1629 } 1627 }
1630 return NULL; 1628 return NULL;
1631 } 1629 }
1632 1630
1633 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1631 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1634 { 1632 {
1635 read_lock(&mrt_lock); 1633 read_lock(&mrt_lock);
1636 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1634 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1637 : SEQ_START_TOKEN; 1635 : SEQ_START_TOKEN;
1638 } 1636 }
1639 1637
1640 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1638 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1641 { 1639 {
1642 struct ipmr_vif_iter *iter = seq->private; 1640 struct ipmr_vif_iter *iter = seq->private;
1643 1641
1644 ++*pos; 1642 ++*pos;
1645 if (v == SEQ_START_TOKEN) 1643 if (v == SEQ_START_TOKEN)
1646 return ipmr_vif_seq_idx(iter, 0); 1644 return ipmr_vif_seq_idx(iter, 0);
1647 1645
1648 while (++iter->ct < maxvif) { 1646 while (++iter->ct < maxvif) {
1649 if(!VIF_EXISTS(iter->ct)) 1647 if(!VIF_EXISTS(iter->ct))
1650 continue; 1648 continue;
1651 return &vif_table[iter->ct]; 1649 return &vif_table[iter->ct];
1652 } 1650 }
1653 return NULL; 1651 return NULL;
1654 } 1652 }
1655 1653
1656 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1654 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1657 { 1655 {
1658 read_unlock(&mrt_lock); 1656 read_unlock(&mrt_lock);
1659 } 1657 }
1660 1658
1661 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1659 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1662 { 1660 {
1663 if (v == SEQ_START_TOKEN) { 1661 if (v == SEQ_START_TOKEN) {
1664 seq_puts(seq, 1662 seq_puts(seq,
1665 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1663 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1666 } else { 1664 } else {
1667 const struct vif_device *vif = v; 1665 const struct vif_device *vif = v;
1668 const char *name = vif->dev ? vif->dev->name : "none"; 1666 const char *name = vif->dev ? vif->dev->name : "none";
1669 1667
1670 seq_printf(seq, 1668 seq_printf(seq,
1671 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1669 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1672 vif - vif_table, 1670 vif - vif_table,
1673 name, vif->bytes_in, vif->pkt_in, 1671 name, vif->bytes_in, vif->pkt_in,
1674 vif->bytes_out, vif->pkt_out, 1672 vif->bytes_out, vif->pkt_out,
1675 vif->flags, vif->local, vif->remote); 1673 vif->flags, vif->local, vif->remote);
1676 } 1674 }
1677 return 0; 1675 return 0;
1678 } 1676 }
1679 1677
1680 static struct seq_operations ipmr_vif_seq_ops = { 1678 static struct seq_operations ipmr_vif_seq_ops = {
1681 .start = ipmr_vif_seq_start, 1679 .start = ipmr_vif_seq_start,
1682 .next = ipmr_vif_seq_next, 1680 .next = ipmr_vif_seq_next,
1683 .stop = ipmr_vif_seq_stop, 1681 .stop = ipmr_vif_seq_stop,
1684 .show = ipmr_vif_seq_show, 1682 .show = ipmr_vif_seq_show,
1685 }; 1683 };
1686 1684
1687 static int ipmr_vif_open(struct inode *inode, struct file *file) 1685 static int ipmr_vif_open(struct inode *inode, struct file *file)
1688 { 1686 {
1689 struct seq_file *seq; 1687 struct seq_file *seq;
1690 int rc = -ENOMEM; 1688 int rc = -ENOMEM;
1691 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 1689 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1692 1690
1693 if (!s) 1691 if (!s)
1694 goto out; 1692 goto out;
1695 1693
1696 rc = seq_open(file, &ipmr_vif_seq_ops); 1694 rc = seq_open(file, &ipmr_vif_seq_ops);
1697 if (rc) 1695 if (rc)
1698 goto out_kfree; 1696 goto out_kfree;
1699 1697
1700 s->ct = 0; 1698 s->ct = 0;
1701 seq = file->private_data; 1699 seq = file->private_data;
1702 seq->private = s; 1700 seq->private = s;
1703 out: 1701 out:
1704 return rc; 1702 return rc;
1705 out_kfree: 1703 out_kfree:
1706 kfree(s); 1704 kfree(s);
1707 goto out; 1705 goto out;
1708 1706
1709 } 1707 }
1710 1708
1711 static struct file_operations ipmr_vif_fops = { 1709 static struct file_operations ipmr_vif_fops = {
1712 .owner = THIS_MODULE, 1710 .owner = THIS_MODULE,
1713 .open = ipmr_vif_open, 1711 .open = ipmr_vif_open,
1714 .read = seq_read, 1712 .read = seq_read,
1715 .llseek = seq_lseek, 1713 .llseek = seq_lseek,
1716 .release = seq_release_private, 1714 .release = seq_release_private,
1717 }; 1715 };
1718 1716
1719 struct ipmr_mfc_iter { 1717 struct ipmr_mfc_iter {
1720 struct mfc_cache **cache; 1718 struct mfc_cache **cache;
1721 int ct; 1719 int ct;
1722 }; 1720 };
1723 1721
1724 1722
1725 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 1723 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1726 { 1724 {
1727 struct mfc_cache *mfc; 1725 struct mfc_cache *mfc;
1728 1726
1729 it->cache = mfc_cache_array; 1727 it->cache = mfc_cache_array;
1730 read_lock(&mrt_lock); 1728 read_lock(&mrt_lock);
1731 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1729 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1732 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1730 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1733 if (pos-- == 0) 1731 if (pos-- == 0)
1734 return mfc; 1732 return mfc;
1735 read_unlock(&mrt_lock); 1733 read_unlock(&mrt_lock);
1736 1734
1737 it->cache = &mfc_unres_queue; 1735 it->cache = &mfc_unres_queue;
1738 spin_lock_bh(&mfc_unres_lock); 1736 spin_lock_bh(&mfc_unres_lock);
1739 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1737 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1740 if (pos-- == 0) 1738 if (pos-- == 0)
1741 return mfc; 1739 return mfc;
1742 spin_unlock_bh(&mfc_unres_lock); 1740 spin_unlock_bh(&mfc_unres_lock);
1743 1741
1744 it->cache = NULL; 1742 it->cache = NULL;
1745 return NULL; 1743 return NULL;
1746 } 1744 }
1747 1745
1748 1746
1749 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1747 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1750 { 1748 {
1751 struct ipmr_mfc_iter *it = seq->private; 1749 struct ipmr_mfc_iter *it = seq->private;
1752 it->cache = NULL; 1750 it->cache = NULL;
1753 it->ct = 0; 1751 it->ct = 0;
1754 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 1752 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1755 : SEQ_START_TOKEN; 1753 : SEQ_START_TOKEN;
1756 } 1754 }
1757 1755
1758 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1756 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1759 { 1757 {
1760 struct mfc_cache *mfc = v; 1758 struct mfc_cache *mfc = v;
1761 struct ipmr_mfc_iter *it = seq->private; 1759 struct ipmr_mfc_iter *it = seq->private;
1762 1760
1763 ++*pos; 1761 ++*pos;
1764 1762
1765 if (v == SEQ_START_TOKEN) 1763 if (v == SEQ_START_TOKEN)
1766 return ipmr_mfc_seq_idx(seq->private, 0); 1764 return ipmr_mfc_seq_idx(seq->private, 0);
1767 1765
1768 if (mfc->next) 1766 if (mfc->next)
1769 return mfc->next; 1767 return mfc->next;
1770 1768
1771 if (it->cache == &mfc_unres_queue) 1769 if (it->cache == &mfc_unres_queue)
1772 goto end_of_list; 1770 goto end_of_list;
1773 1771
1774 BUG_ON(it->cache != mfc_cache_array); 1772 BUG_ON(it->cache != mfc_cache_array);
1775 1773
1776 while (++it->ct < MFC_LINES) { 1774 while (++it->ct < MFC_LINES) {
1777 mfc = mfc_cache_array[it->ct]; 1775 mfc = mfc_cache_array[it->ct];
1778 if (mfc) 1776 if (mfc)
1779 return mfc; 1777 return mfc;
1780 } 1778 }
1781 1779
1782 /* exhausted cache_array, show unresolved */ 1780 /* exhausted cache_array, show unresolved */
1783 read_unlock(&mrt_lock); 1781 read_unlock(&mrt_lock);
1784 it->cache = &mfc_unres_queue; 1782 it->cache = &mfc_unres_queue;
1785 it->ct = 0; 1783 it->ct = 0;
1786 1784
1787 spin_lock_bh(&mfc_unres_lock); 1785 spin_lock_bh(&mfc_unres_lock);
1788 mfc = mfc_unres_queue; 1786 mfc = mfc_unres_queue;
1789 if (mfc) 1787 if (mfc)
1790 return mfc; 1788 return mfc;
1791 1789
1792 end_of_list: 1790 end_of_list:
1793 spin_unlock_bh(&mfc_unres_lock); 1791 spin_unlock_bh(&mfc_unres_lock);
1794 it->cache = NULL; 1792 it->cache = NULL;
1795 1793
1796 return NULL; 1794 return NULL;
1797 } 1795 }
1798 1796
1799 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1797 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1800 { 1798 {
1801 struct ipmr_mfc_iter *it = seq->private; 1799 struct ipmr_mfc_iter *it = seq->private;
1802 1800
1803 if (it->cache == &mfc_unres_queue) 1801 if (it->cache == &mfc_unres_queue)
1804 spin_unlock_bh(&mfc_unres_lock); 1802 spin_unlock_bh(&mfc_unres_lock);
1805 else if (it->cache == mfc_cache_array) 1803 else if (it->cache == mfc_cache_array)
1806 read_unlock(&mrt_lock); 1804 read_unlock(&mrt_lock);
1807 } 1805 }
1808 1806
1809 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1807 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1810 { 1808 {
1811 int n; 1809 int n;
1812 1810
1813 if (v == SEQ_START_TOKEN) { 1811 if (v == SEQ_START_TOKEN) {
1814 seq_puts(seq, 1812 seq_puts(seq,
1815 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 1813 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1816 } else { 1814 } else {
1817 const struct mfc_cache *mfc = v; 1815 const struct mfc_cache *mfc = v;
1818 const struct ipmr_mfc_iter *it = seq->private; 1816 const struct ipmr_mfc_iter *it = seq->private;
1819 1817
1820 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", 1818 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1821 (unsigned long) mfc->mfc_mcastgrp, 1819 (unsigned long) mfc->mfc_mcastgrp,
1822 (unsigned long) mfc->mfc_origin, 1820 (unsigned long) mfc->mfc_origin,
1823 mfc->mfc_parent, 1821 mfc->mfc_parent,
1824 mfc->mfc_un.res.pkt, 1822 mfc->mfc_un.res.pkt,
1825 mfc->mfc_un.res.bytes, 1823 mfc->mfc_un.res.bytes,
1826 mfc->mfc_un.res.wrong_if); 1824 mfc->mfc_un.res.wrong_if);
1827 1825
1828 if (it->cache != &mfc_unres_queue) { 1826 if (it->cache != &mfc_unres_queue) {
1829 for(n = mfc->mfc_un.res.minvif; 1827 for(n = mfc->mfc_un.res.minvif;
1830 n < mfc->mfc_un.res.maxvif; n++ ) { 1828 n < mfc->mfc_un.res.maxvif; n++ ) {
1831 if(VIF_EXISTS(n) 1829 if(VIF_EXISTS(n)
1832 && mfc->mfc_un.res.ttls[n] < 255) 1830 && mfc->mfc_un.res.ttls[n] < 255)
1833 seq_printf(seq, 1831 seq_printf(seq,
1834 " %2d:%-3d", 1832 " %2d:%-3d",
1835 n, mfc->mfc_un.res.ttls[n]); 1833 n, mfc->mfc_un.res.ttls[n]);
1836 } 1834 }
1837 } 1835 }
1838 seq_putc(seq, '\n'); 1836 seq_putc(seq, '\n');
1839 } 1837 }
1840 return 0; 1838 return 0;
1841 } 1839 }
1842 1840
1843 static struct seq_operations ipmr_mfc_seq_ops = { 1841 static struct seq_operations ipmr_mfc_seq_ops = {
1844 .start = ipmr_mfc_seq_start, 1842 .start = ipmr_mfc_seq_start,
1845 .next = ipmr_mfc_seq_next, 1843 .next = ipmr_mfc_seq_next,
1846 .stop = ipmr_mfc_seq_stop, 1844 .stop = ipmr_mfc_seq_stop,
1847 .show = ipmr_mfc_seq_show, 1845 .show = ipmr_mfc_seq_show,
1848 }; 1846 };
1849 1847
1850 static int ipmr_mfc_open(struct inode *inode, struct file *file) 1848 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1851 { 1849 {
1852 struct seq_file *seq; 1850 struct seq_file *seq;
1853 int rc = -ENOMEM; 1851 int rc = -ENOMEM;
1854 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 1852 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1855 1853
1856 if (!s) 1854 if (!s)
1857 goto out; 1855 goto out;
1858 1856
1859 rc = seq_open(file, &ipmr_mfc_seq_ops); 1857 rc = seq_open(file, &ipmr_mfc_seq_ops);
1860 if (rc) 1858 if (rc)
1861 goto out_kfree; 1859 goto out_kfree;
1862 1860
1863 seq = file->private_data; 1861 seq = file->private_data;
1864 seq->private = s; 1862 seq->private = s;
1865 out: 1863 out:
1866 return rc; 1864 return rc;
1867 out_kfree: 1865 out_kfree:
1868 kfree(s); 1866 kfree(s);
1869 goto out; 1867 goto out;
1870 1868
1871 } 1869 }
1872 1870
1873 static struct file_operations ipmr_mfc_fops = { 1871 static struct file_operations ipmr_mfc_fops = {
1874 .owner = THIS_MODULE, 1872 .owner = THIS_MODULE,
1875 .open = ipmr_mfc_open, 1873 .open = ipmr_mfc_open,
1876 .read = seq_read, 1874 .read = seq_read,
1877 .llseek = seq_lseek, 1875 .llseek = seq_lseek,
1878 .release = seq_release_private, 1876 .release = seq_release_private,
1879 }; 1877 };
1880 #endif 1878 #endif
1881 1879
1882 #ifdef CONFIG_IP_PIMSM_V2 1880 #ifdef CONFIG_IP_PIMSM_V2
1883 static struct net_protocol pim_protocol = { 1881 static struct net_protocol pim_protocol = {
1884 .handler = pim_rcv, 1882 .handler = pim_rcv,
1885 }; 1883 };
1886 #endif 1884 #endif
1887 1885
1888 1886
1889 /* 1887 /*
1890 * Setup for IP multicast routing 1888 * Setup for IP multicast routing
1891 */ 1889 */
1892 1890
1893 void __init ip_mr_init(void) 1891 void __init ip_mr_init(void)
1894 { 1892 {
1895 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1893 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1896 sizeof(struct mfc_cache), 1894 sizeof(struct mfc_cache),
1897 0, SLAB_HWCACHE_ALIGN, 1895 0, SLAB_HWCACHE_ALIGN,
1898 NULL, NULL); 1896 NULL, NULL);
1899 if (!mrt_cachep) 1897 if (!mrt_cachep)
1900 panic("cannot allocate ip_mrt_cache"); 1898 panic("cannot allocate ip_mrt_cache");
1901 1899
1902 init_timer(&ipmr_expire_timer); 1900 init_timer(&ipmr_expire_timer);
1903 ipmr_expire_timer.function=ipmr_expire_process; 1901 ipmr_expire_timer.function=ipmr_expire_process;
1904 register_netdevice_notifier(&ip_mr_notifier); 1902 register_netdevice_notifier(&ip_mr_notifier);
1905 #ifdef CONFIG_PROC_FS 1903 #ifdef CONFIG_PROC_FS
1906 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); 1904 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1907 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); 1905 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1908 #endif 1906 #endif
1909 } 1907 }
1910 1908
net/ipv4/xfrm4_mode_tunnel.c
1 /* 1 /*
2 * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. 2 * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4.
3 * 3 *
4 * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> 4 * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
5 */ 5 */
6 6
7 #include <linux/init.h> 7 #include <linux/init.h>
8 #include <linux/kernel.h> 8 #include <linux/kernel.h>
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/skbuff.h> 10 #include <linux/skbuff.h>
11 #include <linux/stringify.h> 11 #include <linux/stringify.h>
12 #include <net/dst.h> 12 #include <net/dst.h>
13 #include <net/inet_ecn.h> 13 #include <net/inet_ecn.h>
14 #include <net/ip.h> 14 #include <net/ip.h>
15 #include <net/xfrm.h> 15 #include <net/xfrm.h>
16 16
17 static inline void ipip_ecn_decapsulate(struct sk_buff *skb) 17 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
18 { 18 {
19 struct iphdr *outer_iph = skb->nh.iph; 19 struct iphdr *outer_iph = skb->nh.iph;
20 struct iphdr *inner_iph = skb->h.ipiph; 20 struct iphdr *inner_iph = skb->h.ipiph;
21 21
22 if (INET_ECN_is_ce(outer_iph->tos)) 22 if (INET_ECN_is_ce(outer_iph->tos))
23 IP_ECN_set_ce(inner_iph); 23 IP_ECN_set_ce(inner_iph);
24 } 24 }
25 25
26 /* Add encapsulation header. 26 /* Add encapsulation header.
27 * 27 *
28 * The top IP header will be constructed per RFC 2401. The following fields 28 * The top IP header will be constructed per RFC 2401. The following fields
29 * in it shall be filled in by x->type->output: 29 * in it shall be filled in by x->type->output:
30 * tot_len 30 * tot_len
31 * check 31 * check
32 * 32 *
33 * On exit, skb->h will be set to the start of the payload to be processed 33 * On exit, skb->h will be set to the start of the payload to be processed
34 * by x->type->output and skb->nh will be set to the top IP header. 34 * by x->type->output and skb->nh will be set to the top IP header.
35 */ 35 */
36 static int xfrm4_tunnel_output(struct sk_buff *skb) 36 static int xfrm4_tunnel_output(struct sk_buff *skb)
37 { 37 {
38 struct dst_entry *dst = skb->dst; 38 struct dst_entry *dst = skb->dst;
39 struct xfrm_state *x = dst->xfrm; 39 struct xfrm_state *x = dst->xfrm;
40 struct iphdr *iph, *top_iph; 40 struct iphdr *iph, *top_iph;
41 int flags; 41 int flags;
42 42
43 iph = skb->nh.iph; 43 iph = skb->nh.iph;
44 skb->h.ipiph = iph; 44 skb->h.ipiph = iph;
45 45
46 skb->nh.raw = skb_push(skb, x->props.header_len); 46 skb->nh.raw = skb_push(skb, x->props.header_len);
47 top_iph = skb->nh.iph; 47 top_iph = skb->nh.iph;
48 48
49 top_iph->ihl = 5; 49 top_iph->ihl = 5;
50 top_iph->version = 4; 50 top_iph->version = 4;
51 51
52 /* DS disclosed */ 52 /* DS disclosed */
53 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); 53 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
54 54
55 flags = x->props.flags; 55 flags = x->props.flags;
56 if (flags & XFRM_STATE_NOECN) 56 if (flags & XFRM_STATE_NOECN)
57 IP_ECN_clear(top_iph); 57 IP_ECN_clear(top_iph);
58 58
59 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 59 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
60 0 : (iph->frag_off & htons(IP_DF)); 60 0 : (iph->frag_off & htons(IP_DF));
61 if (!top_iph->frag_off) 61 if (!top_iph->frag_off)
62 __ip_select_ident(top_iph, dst->child, 0); 62 __ip_select_ident(top_iph, dst->child, 0);
63 63
64 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 64 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
65 65
66 top_iph->saddr = x->props.saddr.a4; 66 top_iph->saddr = x->props.saddr.a4;
67 top_iph->daddr = x->id.daddr.a4; 67 top_iph->daddr = x->id.daddr.a4;
68 top_iph->protocol = IPPROTO_IPIP; 68 top_iph->protocol = IPPROTO_IPIP;
69 69
70 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 70 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
71 return 0; 71 return 0;
72 } 72 }
73 73
74 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 74 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
75 { 75 {
76 struct iphdr *iph = skb->nh.iph; 76 struct iphdr *iph = skb->nh.iph;
77 int err = -EINVAL; 77 int err = -EINVAL;
78 78
79 if (iph->protocol != IPPROTO_IPIP) 79 if (iph->protocol != IPPROTO_IPIP)
80 goto out; 80 goto out;
81 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 81 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
82 goto out; 82 goto out;
83 83
84 if (skb_cloned(skb) && 84 if (skb_cloned(skb) &&
85 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 85 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
86 goto out; 86 goto out;
87 87
88 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 88 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
89 ipv4_copy_dscp(iph, skb->h.ipiph); 89 ipv4_copy_dscp(iph, skb->h.ipiph);
90 if (!(x->props.flags & XFRM_STATE_NOECN)) 90 if (!(x->props.flags & XFRM_STATE_NOECN))
91 ipip_ecn_decapsulate(skb); 91 ipip_ecn_decapsulate(skb);
92 skb->mac.raw = memmove(skb->data - skb->mac_len, 92 skb->mac.raw = memmove(skb->data - skb->mac_len,
93 skb->mac.raw, skb->mac_len); 93 skb->mac.raw, skb->mac_len);
94 skb->nh.raw = skb->data; 94 skb->nh.raw = skb->data;
95 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
96 err = 0; 95 err = 0;
97 96
98 out: 97 out:
99 return err; 98 return err;
100 } 99 }
101 100
102 static struct xfrm_mode xfrm4_tunnel_mode = { 101 static struct xfrm_mode xfrm4_tunnel_mode = {
103 .input = xfrm4_tunnel_input, 102 .input = xfrm4_tunnel_input,
104 .output = xfrm4_tunnel_output, 103 .output = xfrm4_tunnel_output,
105 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
106 .encap = XFRM_MODE_TUNNEL, 105 .encap = XFRM_MODE_TUNNEL,
107 }; 106 };
108 107
109 static int __init xfrm4_tunnel_init(void) 108 static int __init xfrm4_tunnel_init(void)
110 { 109 {
111 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); 110 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
112 } 111 }
113 112
114 static void __exit xfrm4_tunnel_exit(void) 113 static void __exit xfrm4_tunnel_exit(void)
115 { 114 {
116 int err; 115 int err;
117 116
118 err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); 117 err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET);
119 BUG_ON(err); 118 BUG_ON(err);
120 } 119 }
121 120
122 module_init(xfrm4_tunnel_init); 121 module_init(xfrm4_tunnel_init);
123 module_exit(xfrm4_tunnel_exit); 122 module_exit(xfrm4_tunnel_exit);
124 MODULE_LICENSE("GPL"); 123 MODULE_LICENSE("GPL");
125 MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); 124 MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
126 125
1 /* 1 /*
2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) 2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * 8 *
9 * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $ 9 * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 * 15 *
16 * Changes: 16 * Changes:
17 * Roger Venning <r.venning@telstra.com>: 6to4 support 17 * Roger Venning <r.venning@telstra.com>: 6to4 support
18 * Nate Thompson <nate@thebog.net>: 6to4 support 18 * Nate Thompson <nate@thebog.net>: 6to4 support
19 */ 19 */
20 20
21 #include <linux/module.h> 21 #include <linux/module.h>
22 #include <linux/capability.h> 22 #include <linux/capability.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/types.h> 24 #include <linux/types.h>
25 #include <linux/socket.h> 25 #include <linux/socket.h>
26 #include <linux/sockios.h> 26 #include <linux/sockios.h>
27 #include <linux/sched.h> 27 #include <linux/sched.h>
28 #include <linux/net.h> 28 #include <linux/net.h>
29 #include <linux/in6.h> 29 #include <linux/in6.h>
30 #include <linux/netdevice.h> 30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h> 31 #include <linux/if_arp.h>
32 #include <linux/icmp.h> 32 #include <linux/icmp.h>
33 #include <asm/uaccess.h> 33 #include <asm/uaccess.h>
34 #include <linux/init.h> 34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h> 35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h> 36 #include <linux/if_ether.h>
37 37
38 #include <net/sock.h> 38 #include <net/sock.h>
39 #include <net/snmp.h> 39 #include <net/snmp.h>
40 40
41 #include <net/ipv6.h> 41 #include <net/ipv6.h>
42 #include <net/protocol.h> 42 #include <net/protocol.h>
43 #include <net/transp_v6.h> 43 #include <net/transp_v6.h>
44 #include <net/ip6_fib.h> 44 #include <net/ip6_fib.h>
45 #include <net/ip6_route.h> 45 #include <net/ip6_route.h>
46 #include <net/ndisc.h> 46 #include <net/ndisc.h>
47 #include <net/addrconf.h> 47 #include <net/addrconf.h>
48 #include <net/ip.h> 48 #include <net/ip.h>
49 #include <net/udp.h> 49 #include <net/udp.h>
50 #include <net/icmp.h> 50 #include <net/icmp.h>
51 #include <net/ipip.h> 51 #include <net/ipip.h>
52 #include <net/inet_ecn.h> 52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h> 53 #include <net/xfrm.h>
54 #include <net/dsfield.h> 54 #include <net/dsfield.h>
55 55
56 /* 56 /*
57 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c 57 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
58 58
59 For comments look at net/ipv4/ip_gre.c --ANK 59 For comments look at net/ipv4/ip_gre.c --ANK
60 */ 60 */
61 61
62 #define HASH_SIZE 16 62 #define HASH_SIZE 16
63 #define HASH(addr) ((addr^(addr>>4))&0xF) 63 #define HASH(addr) ((addr^(addr>>4))&0xF)
64 64
65 static int ipip6_fb_tunnel_init(struct net_device *dev); 65 static int ipip6_fb_tunnel_init(struct net_device *dev);
66 static int ipip6_tunnel_init(struct net_device *dev); 66 static int ipip6_tunnel_init(struct net_device *dev);
67 static void ipip6_tunnel_setup(struct net_device *dev); 67 static void ipip6_tunnel_setup(struct net_device *dev);
68 68
69 static struct net_device *ipip6_fb_tunnel_dev; 69 static struct net_device *ipip6_fb_tunnel_dev;
70 70
71 static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 71 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
72 static struct ip_tunnel *tunnels_r[HASH_SIZE]; 72 static struct ip_tunnel *tunnels_r[HASH_SIZE];
73 static struct ip_tunnel *tunnels_l[HASH_SIZE]; 73 static struct ip_tunnel *tunnels_l[HASH_SIZE];
74 static struct ip_tunnel *tunnels_wc[1]; 74 static struct ip_tunnel *tunnels_wc[1];
75 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; 75 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
76 76
77 static DEFINE_RWLOCK(ipip6_lock); 77 static DEFINE_RWLOCK(ipip6_lock);
78 78
79 static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) 79 static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
80 { 80 {
81 unsigned h0 = HASH(remote); 81 unsigned h0 = HASH(remote);
82 unsigned h1 = HASH(local); 82 unsigned h1 = HASH(local);
83 struct ip_tunnel *t; 83 struct ip_tunnel *t;
84 84
85 for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 85 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
86 if (local == t->parms.iph.saddr && 86 if (local == t->parms.iph.saddr &&
87 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 87 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
88 return t; 88 return t;
89 } 89 }
90 for (t = tunnels_r[h0]; t; t = t->next) { 90 for (t = tunnels_r[h0]; t; t = t->next) {
91 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 91 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
92 return t; 92 return t;
93 } 93 }
94 for (t = tunnels_l[h1]; t; t = t->next) { 94 for (t = tunnels_l[h1]; t; t = t->next) {
95 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 95 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
96 return t; 96 return t;
97 } 97 }
98 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 98 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
99 return t; 99 return t;
100 return NULL; 100 return NULL;
101 } 101 }
102 102
103 static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t) 103 static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
104 { 104 {
105 u32 remote = t->parms.iph.daddr; 105 u32 remote = t->parms.iph.daddr;
106 u32 local = t->parms.iph.saddr; 106 u32 local = t->parms.iph.saddr;
107 unsigned h = 0; 107 unsigned h = 0;
108 int prio = 0; 108 int prio = 0;
109 109
110 if (remote) { 110 if (remote) {
111 prio |= 2; 111 prio |= 2;
112 h ^= HASH(remote); 112 h ^= HASH(remote);
113 } 113 }
114 if (local) { 114 if (local) {
115 prio |= 1; 115 prio |= 1;
116 h ^= HASH(local); 116 h ^= HASH(local);
117 } 117 }
118 return &tunnels[prio][h]; 118 return &tunnels[prio][h];
119 } 119 }
120 120
121 static void ipip6_tunnel_unlink(struct ip_tunnel *t) 121 static void ipip6_tunnel_unlink(struct ip_tunnel *t)
122 { 122 {
123 struct ip_tunnel **tp; 123 struct ip_tunnel **tp;
124 124
125 for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) { 125 for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) {
126 if (t == *tp) { 126 if (t == *tp) {
127 write_lock_bh(&ipip6_lock); 127 write_lock_bh(&ipip6_lock);
128 *tp = t->next; 128 *tp = t->next;
129 write_unlock_bh(&ipip6_lock); 129 write_unlock_bh(&ipip6_lock);
130 break; 130 break;
131 } 131 }
132 } 132 }
133 } 133 }
134 134
135 static void ipip6_tunnel_link(struct ip_tunnel *t) 135 static void ipip6_tunnel_link(struct ip_tunnel *t)
136 { 136 {
137 struct ip_tunnel **tp = ipip6_bucket(t); 137 struct ip_tunnel **tp = ipip6_bucket(t);
138 138
139 t->next = *tp; 139 t->next = *tp;
140 write_lock_bh(&ipip6_lock); 140 write_lock_bh(&ipip6_lock);
141 *tp = t; 141 *tp = t;
142 write_unlock_bh(&ipip6_lock); 142 write_unlock_bh(&ipip6_lock);
143 } 143 }
144 144
145 static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) 145 static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
146 { 146 {
147 u32 remote = parms->iph.daddr; 147 u32 remote = parms->iph.daddr;
148 u32 local = parms->iph.saddr; 148 u32 local = parms->iph.saddr;
149 struct ip_tunnel *t, **tp, *nt; 149 struct ip_tunnel *t, **tp, *nt;
150 struct net_device *dev; 150 struct net_device *dev;
151 unsigned h = 0; 151 unsigned h = 0;
152 int prio = 0; 152 int prio = 0;
153 char name[IFNAMSIZ]; 153 char name[IFNAMSIZ];
154 154
155 if (remote) { 155 if (remote) {
156 prio |= 2; 156 prio |= 2;
157 h ^= HASH(remote); 157 h ^= HASH(remote);
158 } 158 }
159 if (local) { 159 if (local) {
160 prio |= 1; 160 prio |= 1;
161 h ^= HASH(local); 161 h ^= HASH(local);
162 } 162 }
163 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { 163 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
164 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 164 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
165 return t; 165 return t;
166 } 166 }
167 if (!create) 167 if (!create)
168 goto failed; 168 goto failed;
169 169
170 if (parms->name[0]) 170 if (parms->name[0])
171 strlcpy(name, parms->name, IFNAMSIZ); 171 strlcpy(name, parms->name, IFNAMSIZ);
172 else { 172 else {
173 int i; 173 int i;
174 for (i=1; i<100; i++) { 174 for (i=1; i<100; i++) {
175 sprintf(name, "sit%d", i); 175 sprintf(name, "sit%d", i);
176 if (__dev_get_by_name(name) == NULL) 176 if (__dev_get_by_name(name) == NULL)
177 break; 177 break;
178 } 178 }
179 if (i==100) 179 if (i==100)
180 goto failed; 180 goto failed;
181 } 181 }
182 182
183 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 183 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
184 if (dev == NULL) 184 if (dev == NULL)
185 return NULL; 185 return NULL;
186 186
187 nt = netdev_priv(dev); 187 nt = netdev_priv(dev);
188 dev->init = ipip6_tunnel_init; 188 dev->init = ipip6_tunnel_init;
189 nt->parms = *parms; 189 nt->parms = *parms;
190 190
191 if (register_netdevice(dev) < 0) { 191 if (register_netdevice(dev) < 0) {
192 free_netdev(dev); 192 free_netdev(dev);
193 goto failed; 193 goto failed;
194 } 194 }
195 195
196 dev_hold(dev); 196 dev_hold(dev);
197 197
198 ipip6_tunnel_link(nt); 198 ipip6_tunnel_link(nt);
199 return nt; 199 return nt;
200 200
201 failed: 201 failed:
202 return NULL; 202 return NULL;
203 } 203 }
204 204
205 static void ipip6_tunnel_uninit(struct net_device *dev) 205 static void ipip6_tunnel_uninit(struct net_device *dev)
206 { 206 {
207 if (dev == ipip6_fb_tunnel_dev) { 207 if (dev == ipip6_fb_tunnel_dev) {
208 write_lock_bh(&ipip6_lock); 208 write_lock_bh(&ipip6_lock);
209 tunnels_wc[0] = NULL; 209 tunnels_wc[0] = NULL;
210 write_unlock_bh(&ipip6_lock); 210 write_unlock_bh(&ipip6_lock);
211 dev_put(dev); 211 dev_put(dev);
212 } else { 212 } else {
213 ipip6_tunnel_unlink(netdev_priv(dev)); 213 ipip6_tunnel_unlink(netdev_priv(dev));
214 dev_put(dev); 214 dev_put(dev);
215 } 215 }
216 } 216 }
217 217
218 218
219 static void ipip6_err(struct sk_buff *skb, u32 info) 219 static void ipip6_err(struct sk_buff *skb, u32 info)
220 { 220 {
221 #ifndef I_WISH_WORLD_WERE_PERFECT 221 #ifndef I_WISH_WORLD_WERE_PERFECT
222 222
223 /* It is not :-( All the routers (except for Linux) return only 223 /* It is not :-( All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of 224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible. 225 ICMP in the real Internet is absolutely infeasible.
226 */ 226 */
227 struct iphdr *iph = (struct iphdr*)skb->data; 227 struct iphdr *iph = (struct iphdr*)skb->data;
228 int type = skb->h.icmph->type; 228 int type = skb->h.icmph->type;
229 int code = skb->h.icmph->code; 229 int code = skb->h.icmph->code;
230 struct ip_tunnel *t; 230 struct ip_tunnel *t;
231 231
232 switch (type) { 232 switch (type) {
233 default: 233 default:
234 case ICMP_PARAMETERPROB: 234 case ICMP_PARAMETERPROB:
235 return; 235 return;
236 236
237 case ICMP_DEST_UNREACH: 237 case ICMP_DEST_UNREACH:
238 switch (code) { 238 switch (code) {
239 case ICMP_SR_FAILED: 239 case ICMP_SR_FAILED:
240 case ICMP_PORT_UNREACH: 240 case ICMP_PORT_UNREACH:
241 /* Impossible event. */ 241 /* Impossible event. */
242 return; 242 return;
243 case ICMP_FRAG_NEEDED: 243 case ICMP_FRAG_NEEDED:
244 /* Soft state for pmtu is maintained by IP core. */ 244 /* Soft state for pmtu is maintained by IP core. */
245 return; 245 return;
246 default: 246 default:
247 /* All others are translated to HOST_UNREACH. 247 /* All others are translated to HOST_UNREACH.
248 rfc2003 contains "deep thoughts" about NET_UNREACH, 248 rfc2003 contains "deep thoughts" about NET_UNREACH,
249 I believe they are just ether pollution. --ANK 249 I believe they are just ether pollution. --ANK
250 */ 250 */
251 break; 251 break;
252 } 252 }
253 break; 253 break;
254 case ICMP_TIME_EXCEEDED: 254 case ICMP_TIME_EXCEEDED:
255 if (code != ICMP_EXC_TTL) 255 if (code != ICMP_EXC_TTL)
256 return; 256 return;
257 break; 257 break;
258 } 258 }
259 259
260 read_lock(&ipip6_lock); 260 read_lock(&ipip6_lock);
261 t = ipip6_tunnel_lookup(iph->daddr, iph->saddr); 261 t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
262 if (t == NULL || t->parms.iph.daddr == 0) 262 if (t == NULL || t->parms.iph.daddr == 0)
263 goto out; 263 goto out;
264 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 264 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
265 goto out; 265 goto out;
266 266
267 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 267 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
268 t->err_count++; 268 t->err_count++;
269 else 269 else
270 t->err_count = 1; 270 t->err_count = 1;
271 t->err_time = jiffies; 271 t->err_time = jiffies;
272 out: 272 out:
273 read_unlock(&ipip6_lock); 273 read_unlock(&ipip6_lock);
274 return; 274 return;
275 #else 275 #else
276 struct iphdr *iph = (struct iphdr*)dp; 276 struct iphdr *iph = (struct iphdr*)dp;
277 int hlen = iph->ihl<<2; 277 int hlen = iph->ihl<<2;
278 struct ipv6hdr *iph6; 278 struct ipv6hdr *iph6;
279 int type = skb->h.icmph->type; 279 int type = skb->h.icmph->type;
280 int code = skb->h.icmph->code; 280 int code = skb->h.icmph->code;
281 int rel_type = 0; 281 int rel_type = 0;
282 int rel_code = 0; 282 int rel_code = 0;
283 int rel_info = 0; 283 int rel_info = 0;
284 struct sk_buff *skb2; 284 struct sk_buff *skb2;
285 struct rt6_info *rt6i; 285 struct rt6_info *rt6i;
286 286
287 if (len < hlen + sizeof(struct ipv6hdr)) 287 if (len < hlen + sizeof(struct ipv6hdr))
288 return; 288 return;
289 iph6 = (struct ipv6hdr*)(dp + hlen); 289 iph6 = (struct ipv6hdr*)(dp + hlen);
290 290
291 switch (type) { 291 switch (type) {
292 default: 292 default:
293 return; 293 return;
294 case ICMP_PARAMETERPROB: 294 case ICMP_PARAMETERPROB:
295 if (skb->h.icmph->un.gateway < hlen) 295 if (skb->h.icmph->un.gateway < hlen)
296 return; 296 return;
297 297
298 /* So... This guy found something strange INSIDE encapsulated 298 /* So... This guy found something strange INSIDE encapsulated
299 packet. Well, he is fool, but what can we do ? 299 packet. Well, he is fool, but what can we do ?
300 */ 300 */
301 rel_type = ICMPV6_PARAMPROB; 301 rel_type = ICMPV6_PARAMPROB;
302 rel_info = skb->h.icmph->un.gateway - hlen; 302 rel_info = skb->h.icmph->un.gateway - hlen;
303 break; 303 break;
304 304
305 case ICMP_DEST_UNREACH: 305 case ICMP_DEST_UNREACH:
306 switch (code) { 306 switch (code) {
307 case ICMP_SR_FAILED: 307 case ICMP_SR_FAILED:
308 case ICMP_PORT_UNREACH: 308 case ICMP_PORT_UNREACH:
309 /* Impossible event. */ 309 /* Impossible event. */
310 return; 310 return;
311 case ICMP_FRAG_NEEDED: 311 case ICMP_FRAG_NEEDED:
312 /* Too complicated case ... */ 312 /* Too complicated case ... */
313 return; 313 return;
314 default: 314 default:
315 /* All others are translated to HOST_UNREACH. 315 /* All others are translated to HOST_UNREACH.
316 rfc2003 contains "deep thoughts" about NET_UNREACH, 316 rfc2003 contains "deep thoughts" about NET_UNREACH,
317 I believe, it is just ether pollution. --ANK 317 I believe, it is just ether pollution. --ANK
318 */ 318 */
319 rel_type = ICMPV6_DEST_UNREACH; 319 rel_type = ICMPV6_DEST_UNREACH;
320 rel_code = ICMPV6_ADDR_UNREACH; 320 rel_code = ICMPV6_ADDR_UNREACH;
321 break; 321 break;
322 } 322 }
323 break; 323 break;
324 case ICMP_TIME_EXCEEDED: 324 case ICMP_TIME_EXCEEDED:
325 if (code != ICMP_EXC_TTL) 325 if (code != ICMP_EXC_TTL)
326 return; 326 return;
327 rel_type = ICMPV6_TIME_EXCEED; 327 rel_type = ICMPV6_TIME_EXCEED;
328 rel_code = ICMPV6_EXC_HOPLIMIT; 328 rel_code = ICMPV6_EXC_HOPLIMIT;
329 break; 329 break;
330 } 330 }
331 331
332 /* Prepare fake skb to feed it to icmpv6_send */ 332 /* Prepare fake skb to feed it to icmpv6_send */
333 skb2 = skb_clone(skb, GFP_ATOMIC); 333 skb2 = skb_clone(skb, GFP_ATOMIC);
334 if (skb2 == NULL) 334 if (skb2 == NULL)
335 return; 335 return;
336 dst_release(skb2->dst); 336 dst_release(skb2->dst);
337 skb2->dst = NULL; 337 skb2->dst = NULL;
338 skb_pull(skb2, skb->data - (u8*)iph6); 338 skb_pull(skb2, skb->data - (u8*)iph6);
339 skb2->nh.raw = skb2->data; 339 skb2->nh.raw = skb2->data;
340 340
341 /* Try to guess incoming interface */ 341 /* Try to guess incoming interface */
342 rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); 342 rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
343 if (rt6i && rt6i->rt6i_dev) { 343 if (rt6i && rt6i->rt6i_dev) {
344 skb2->dev = rt6i->rt6i_dev; 344 skb2->dev = rt6i->rt6i_dev;
345 345
346 rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); 346 rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
347 347
348 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { 348 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
349 struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); 349 struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
350 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { 350 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
351 rel_type = ICMPV6_DEST_UNREACH; 351 rel_type = ICMPV6_DEST_UNREACH;
352 rel_code = ICMPV6_ADDR_UNREACH; 352 rel_code = ICMPV6_ADDR_UNREACH;
353 } 353 }
354 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); 354 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
355 } 355 }
356 } 356 }
357 kfree_skb(skb2); 357 kfree_skb(skb2);
358 return; 358 return;
359 #endif 359 #endif
360 } 360 }
361 361
362 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 362 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
363 { 363 {
364 if (INET_ECN_is_ce(iph->tos)) 364 if (INET_ECN_is_ce(iph->tos))
365 IP6_ECN_set_ce(skb->nh.ipv6h); 365 IP6_ECN_set_ce(skb->nh.ipv6h);
366 } 366 }
367 367
368 static int ipip6_rcv(struct sk_buff *skb) 368 static int ipip6_rcv(struct sk_buff *skb)
369 { 369 {
370 struct iphdr *iph; 370 struct iphdr *iph;
371 struct ip_tunnel *tunnel; 371 struct ip_tunnel *tunnel;
372 372
373 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 373 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
374 goto out; 374 goto out;
375 375
376 iph = skb->nh.iph; 376 iph = skb->nh.iph;
377 377
378 read_lock(&ipip6_lock); 378 read_lock(&ipip6_lock);
379 if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { 379 if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
380 secpath_reset(skb); 380 secpath_reset(skb);
381 skb->mac.raw = skb->nh.raw; 381 skb->mac.raw = skb->nh.raw;
382 skb->nh.raw = skb->data; 382 skb->nh.raw = skb->data;
383 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
384 IPCB(skb)->flags = 0; 383 IPCB(skb)->flags = 0;
385 skb->protocol = htons(ETH_P_IPV6); 384 skb->protocol = htons(ETH_P_IPV6);
386 skb->pkt_type = PACKET_HOST; 385 skb->pkt_type = PACKET_HOST;
387 tunnel->stat.rx_packets++; 386 tunnel->stat.rx_packets++;
388 tunnel->stat.rx_bytes += skb->len; 387 tunnel->stat.rx_bytes += skb->len;
389 skb->dev = tunnel->dev; 388 skb->dev = tunnel->dev;
390 dst_release(skb->dst); 389 dst_release(skb->dst);
391 skb->dst = NULL; 390 skb->dst = NULL;
392 nf_reset(skb); 391 nf_reset(skb);
393 ipip6_ecn_decapsulate(iph, skb); 392 ipip6_ecn_decapsulate(iph, skb);
394 netif_rx(skb); 393 netif_rx(skb);
395 read_unlock(&ipip6_lock); 394 read_unlock(&ipip6_lock);
396 return 0; 395 return 0;
397 } 396 }
398 397
399 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 398 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
400 kfree_skb(skb); 399 kfree_skb(skb);
401 read_unlock(&ipip6_lock); 400 read_unlock(&ipip6_lock);
402 out: 401 out:
403 return 0; 402 return 0;
404 } 403 }
405 404
406 /* Returns the embedded IPv4 address if the IPv6 address 405 /* Returns the embedded IPv4 address if the IPv6 address
407 comes from 6to4 (RFC 3056) addr space */ 406 comes from 6to4 (RFC 3056) addr space */
408 407
409 static inline u32 try_6to4(struct in6_addr *v6dst) 408 static inline u32 try_6to4(struct in6_addr *v6dst)
410 { 409 {
411 u32 dst = 0; 410 u32 dst = 0;
412 411
413 if (v6dst->s6_addr16[0] == htons(0x2002)) { 412 if (v6dst->s6_addr16[0] == htons(0x2002)) {
414 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ 413 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
415 memcpy(&dst, &v6dst->s6_addr16[1], 4); 414 memcpy(&dst, &v6dst->s6_addr16[1], 4);
416 } 415 }
417 return dst; 416 return dst;
418 } 417 }
419 418
420 /* 419 /*
421 * This function assumes it is being called from dev_queue_xmit() 420 * This function assumes it is being called from dev_queue_xmit()
422 * and that skb is filled properly by that function. 421 * and that skb is filled properly by that function.
423 */ 422 */
424 423
425 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 424 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
426 { 425 {
427 struct ip_tunnel *tunnel = netdev_priv(dev); 426 struct ip_tunnel *tunnel = netdev_priv(dev);
428 struct net_device_stats *stats = &tunnel->stat; 427 struct net_device_stats *stats = &tunnel->stat;
429 struct iphdr *tiph = &tunnel->parms.iph; 428 struct iphdr *tiph = &tunnel->parms.iph;
430 struct ipv6hdr *iph6 = skb->nh.ipv6h; 429 struct ipv6hdr *iph6 = skb->nh.ipv6h;
431 u8 tos = tunnel->parms.iph.tos; 430 u8 tos = tunnel->parms.iph.tos;
432 struct rtable *rt; /* Route to the other host */ 431 struct rtable *rt; /* Route to the other host */
433 struct net_device *tdev; /* Device to other host */ 432 struct net_device *tdev; /* Device to other host */
434 struct iphdr *iph; /* Our new IP header */ 433 struct iphdr *iph; /* Our new IP header */
435 int max_headroom; /* The extra header space needed */ 434 int max_headroom; /* The extra header space needed */
436 u32 dst = tiph->daddr; 435 u32 dst = tiph->daddr;
437 int mtu; 436 int mtu;
438 struct in6_addr *addr6; 437 struct in6_addr *addr6;
439 int addr_type; 438 int addr_type;
440 439
441 if (tunnel->recursion++) { 440 if (tunnel->recursion++) {
442 tunnel->stat.collisions++; 441 tunnel->stat.collisions++;
443 goto tx_error; 442 goto tx_error;
444 } 443 }
445 444
446 if (skb->protocol != htons(ETH_P_IPV6)) 445 if (skb->protocol != htons(ETH_P_IPV6))
447 goto tx_error; 446 goto tx_error;
448 447
449 if (!dst) 448 if (!dst)
450 dst = try_6to4(&iph6->daddr); 449 dst = try_6to4(&iph6->daddr);
451 450
452 if (!dst) { 451 if (!dst) {
453 struct neighbour *neigh = NULL; 452 struct neighbour *neigh = NULL;
454 453
455 if (skb->dst) 454 if (skb->dst)
456 neigh = skb->dst->neighbour; 455 neigh = skb->dst->neighbour;
457 456
458 if (neigh == NULL) { 457 if (neigh == NULL) {
459 if (net_ratelimit()) 458 if (net_ratelimit())
460 printk(KERN_DEBUG "sit: nexthop == NULL\n"); 459 printk(KERN_DEBUG "sit: nexthop == NULL\n");
461 goto tx_error; 460 goto tx_error;
462 } 461 }
463 462
464 addr6 = (struct in6_addr*)&neigh->primary_key; 463 addr6 = (struct in6_addr*)&neigh->primary_key;
465 addr_type = ipv6_addr_type(addr6); 464 addr_type = ipv6_addr_type(addr6);
466 465
467 if (addr_type == IPV6_ADDR_ANY) { 466 if (addr_type == IPV6_ADDR_ANY) {
468 addr6 = &skb->nh.ipv6h->daddr; 467 addr6 = &skb->nh.ipv6h->daddr;
469 addr_type = ipv6_addr_type(addr6); 468 addr_type = ipv6_addr_type(addr6);
470 } 469 }
471 470
472 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 471 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
473 goto tx_error_icmp; 472 goto tx_error_icmp;
474 473
475 dst = addr6->s6_addr32[3]; 474 dst = addr6->s6_addr32[3];
476 } 475 }
477 476
478 { 477 {
479 struct flowi fl = { .nl_u = { .ip4_u = 478 struct flowi fl = { .nl_u = { .ip4_u =
480 { .daddr = dst, 479 { .daddr = dst,
481 .saddr = tiph->saddr, 480 .saddr = tiph->saddr,
482 .tos = RT_TOS(tos) } }, 481 .tos = RT_TOS(tos) } },
483 .oif = tunnel->parms.link, 482 .oif = tunnel->parms.link,
484 .proto = IPPROTO_IPV6 }; 483 .proto = IPPROTO_IPV6 };
485 if (ip_route_output_key(&rt, &fl)) { 484 if (ip_route_output_key(&rt, &fl)) {
486 tunnel->stat.tx_carrier_errors++; 485 tunnel->stat.tx_carrier_errors++;
487 goto tx_error_icmp; 486 goto tx_error_icmp;
488 } 487 }
489 } 488 }
490 if (rt->rt_type != RTN_UNICAST) { 489 if (rt->rt_type != RTN_UNICAST) {
491 ip_rt_put(rt); 490 ip_rt_put(rt);
492 tunnel->stat.tx_carrier_errors++; 491 tunnel->stat.tx_carrier_errors++;
493 goto tx_error_icmp; 492 goto tx_error_icmp;
494 } 493 }
495 tdev = rt->u.dst.dev; 494 tdev = rt->u.dst.dev;
496 495
497 if (tdev == dev) { 496 if (tdev == dev) {
498 ip_rt_put(rt); 497 ip_rt_put(rt);
499 tunnel->stat.collisions++; 498 tunnel->stat.collisions++;
500 goto tx_error; 499 goto tx_error;
501 } 500 }
502 501
503 if (tiph->frag_off) 502 if (tiph->frag_off)
504 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 503 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
505 else 504 else
506 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 505 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
507 506
508 if (mtu < 68) { 507 if (mtu < 68) {
509 tunnel->stat.collisions++; 508 tunnel->stat.collisions++;
510 ip_rt_put(rt); 509 ip_rt_put(rt);
511 goto tx_error; 510 goto tx_error;
512 } 511 }
513 if (mtu < IPV6_MIN_MTU) 512 if (mtu < IPV6_MIN_MTU)
514 mtu = IPV6_MIN_MTU; 513 mtu = IPV6_MIN_MTU;
515 if (tunnel->parms.iph.daddr && skb->dst) 514 if (tunnel->parms.iph.daddr && skb->dst)
516 skb->dst->ops->update_pmtu(skb->dst, mtu); 515 skb->dst->ops->update_pmtu(skb->dst, mtu);
517 516
518 if (skb->len > mtu) { 517 if (skb->len > mtu) {
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 518 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
520 ip_rt_put(rt); 519 ip_rt_put(rt);
521 goto tx_error; 520 goto tx_error;
522 } 521 }
523 522
524 if (tunnel->err_count > 0) { 523 if (tunnel->err_count > 0) {
525 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 524 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
526 tunnel->err_count--; 525 tunnel->err_count--;
527 dst_link_failure(skb); 526 dst_link_failure(skb);
528 } else 527 } else
529 tunnel->err_count = 0; 528 tunnel->err_count = 0;
530 } 529 }
531 530
532 /* 531 /*
533 * Okay, now see if we can stuff it in the buffer as-is. 532 * Okay, now see if we can stuff it in the buffer as-is.
534 */ 533 */
535 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); 534 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
536 535
537 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { 536 if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
538 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 537 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
539 if (!new_skb) { 538 if (!new_skb) {
540 ip_rt_put(rt); 539 ip_rt_put(rt);
541 stats->tx_dropped++; 540 stats->tx_dropped++;
542 dev_kfree_skb(skb); 541 dev_kfree_skb(skb);
543 tunnel->recursion--; 542 tunnel->recursion--;
544 return 0; 543 return 0;
545 } 544 }
546 if (skb->sk) 545 if (skb->sk)
547 skb_set_owner_w(new_skb, skb->sk); 546 skb_set_owner_w(new_skb, skb->sk);
548 dev_kfree_skb(skb); 547 dev_kfree_skb(skb);
549 skb = new_skb; 548 skb = new_skb;
550 iph6 = skb->nh.ipv6h; 549 iph6 = skb->nh.ipv6h;
551 } 550 }
552 551
553 skb->h.raw = skb->nh.raw; 552 skb->h.raw = skb->nh.raw;
554 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 553 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
555 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 554 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
556 IPCB(skb)->flags = 0; 555 IPCB(skb)->flags = 0;
557 dst_release(skb->dst); 556 dst_release(skb->dst);
558 skb->dst = &rt->u.dst; 557 skb->dst = &rt->u.dst;
559 558
560 /* 559 /*
561 * Push down and install the IPIP header. 560 * Push down and install the IPIP header.
562 */ 561 */
563 562
564 iph = skb->nh.iph; 563 iph = skb->nh.iph;
565 iph->version = 4; 564 iph->version = 4;
566 iph->ihl = sizeof(struct iphdr)>>2; 565 iph->ihl = sizeof(struct iphdr)>>2;
567 if (mtu > IPV6_MIN_MTU) 566 if (mtu > IPV6_MIN_MTU)
568 iph->frag_off = htons(IP_DF); 567 iph->frag_off = htons(IP_DF);
569 else 568 else
570 iph->frag_off = 0; 569 iph->frag_off = 0;
571 570
572 iph->protocol = IPPROTO_IPV6; 571 iph->protocol = IPPROTO_IPV6;
573 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); 572 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
574 iph->daddr = rt->rt_dst; 573 iph->daddr = rt->rt_dst;
575 iph->saddr = rt->rt_src; 574 iph->saddr = rt->rt_src;
576 575
577 if ((iph->ttl = tiph->ttl) == 0) 576 if ((iph->ttl = tiph->ttl) == 0)
578 iph->ttl = iph6->hop_limit; 577 iph->ttl = iph6->hop_limit;
579 578
580 nf_reset(skb); 579 nf_reset(skb);
581 580
582 IPTUNNEL_XMIT(); 581 IPTUNNEL_XMIT();
583 tunnel->recursion--; 582 tunnel->recursion--;
584 return 0; 583 return 0;
585 584
586 tx_error_icmp: 585 tx_error_icmp:
587 dst_link_failure(skb); 586 dst_link_failure(skb);
588 tx_error: 587 tx_error:
589 stats->tx_errors++; 588 stats->tx_errors++;
590 dev_kfree_skb(skb); 589 dev_kfree_skb(skb);
591 tunnel->recursion--; 590 tunnel->recursion--;
592 return 0; 591 return 0;
593 } 592 }
594 593
595 static int 594 static int
596 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 595 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
597 { 596 {
598 int err = 0; 597 int err = 0;
599 struct ip_tunnel_parm p; 598 struct ip_tunnel_parm p;
600 struct ip_tunnel *t; 599 struct ip_tunnel *t;
601 600
602 switch (cmd) { 601 switch (cmd) {
603 case SIOCGETTUNNEL: 602 case SIOCGETTUNNEL:
604 t = NULL; 603 t = NULL;
605 if (dev == ipip6_fb_tunnel_dev) { 604 if (dev == ipip6_fb_tunnel_dev) {
606 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 605 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
607 err = -EFAULT; 606 err = -EFAULT;
608 break; 607 break;
609 } 608 }
610 t = ipip6_tunnel_locate(&p, 0); 609 t = ipip6_tunnel_locate(&p, 0);
611 } 610 }
612 if (t == NULL) 611 if (t == NULL)
613 t = netdev_priv(dev); 612 t = netdev_priv(dev);
614 memcpy(&p, &t->parms, sizeof(p)); 613 memcpy(&p, &t->parms, sizeof(p));
615 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 614 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
616 err = -EFAULT; 615 err = -EFAULT;
617 break; 616 break;
618 617
619 case SIOCADDTUNNEL: 618 case SIOCADDTUNNEL:
620 case SIOCCHGTUNNEL: 619 case SIOCCHGTUNNEL:
621 err = -EPERM; 620 err = -EPERM;
622 if (!capable(CAP_NET_ADMIN)) 621 if (!capable(CAP_NET_ADMIN))
623 goto done; 622 goto done;
624 623
625 err = -EFAULT; 624 err = -EFAULT;
626 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 625 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
627 goto done; 626 goto done;
628 627
629 err = -EINVAL; 628 err = -EINVAL;
630 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || 629 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
631 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 630 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
632 goto done; 631 goto done;
633 if (p.iph.ttl) 632 if (p.iph.ttl)
634 p.iph.frag_off |= htons(IP_DF); 633 p.iph.frag_off |= htons(IP_DF);
635 634
636 t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 635 t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
637 636
638 if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 637 if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
639 if (t != NULL) { 638 if (t != NULL) {
640 if (t->dev != dev) { 639 if (t->dev != dev) {
641 err = -EEXIST; 640 err = -EEXIST;
642 break; 641 break;
643 } 642 }
644 } else { 643 } else {
645 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || 644 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
646 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { 645 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
647 err = -EINVAL; 646 err = -EINVAL;
648 break; 647 break;
649 } 648 }
650 t = netdev_priv(dev); 649 t = netdev_priv(dev);
651 ipip6_tunnel_unlink(t); 650 ipip6_tunnel_unlink(t);
652 t->parms.iph.saddr = p.iph.saddr; 651 t->parms.iph.saddr = p.iph.saddr;
653 t->parms.iph.daddr = p.iph.daddr; 652 t->parms.iph.daddr = p.iph.daddr;
654 memcpy(dev->dev_addr, &p.iph.saddr, 4); 653 memcpy(dev->dev_addr, &p.iph.saddr, 4);
655 memcpy(dev->broadcast, &p.iph.daddr, 4); 654 memcpy(dev->broadcast, &p.iph.daddr, 4);
656 ipip6_tunnel_link(t); 655 ipip6_tunnel_link(t);
657 netdev_state_change(dev); 656 netdev_state_change(dev);
658 } 657 }
659 } 658 }
660 659
661 if (t) { 660 if (t) {
662 err = 0; 661 err = 0;
663 if (cmd == SIOCCHGTUNNEL) { 662 if (cmd == SIOCCHGTUNNEL) {
664 t->parms.iph.ttl = p.iph.ttl; 663 t->parms.iph.ttl = p.iph.ttl;
665 t->parms.iph.tos = p.iph.tos; 664 t->parms.iph.tos = p.iph.tos;
666 } 665 }
667 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 666 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
668 err = -EFAULT; 667 err = -EFAULT;
669 } else 668 } else
670 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 669 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
671 break; 670 break;
672 671
673 case SIOCDELTUNNEL: 672 case SIOCDELTUNNEL:
674 err = -EPERM; 673 err = -EPERM;
675 if (!capable(CAP_NET_ADMIN)) 674 if (!capable(CAP_NET_ADMIN))
676 goto done; 675 goto done;
677 676
678 if (dev == ipip6_fb_tunnel_dev) { 677 if (dev == ipip6_fb_tunnel_dev) {
679 err = -EFAULT; 678 err = -EFAULT;
680 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 679 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
681 goto done; 680 goto done;
682 err = -ENOENT; 681 err = -ENOENT;
683 if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) 682 if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
684 goto done; 683 goto done;
685 err = -EPERM; 684 err = -EPERM;
686 if (t == netdev_priv(ipip6_fb_tunnel_dev)) 685 if (t == netdev_priv(ipip6_fb_tunnel_dev))
687 goto done; 686 goto done;
688 dev = t->dev; 687 dev = t->dev;
689 } 688 }
690 err = unregister_netdevice(dev); 689 err = unregister_netdevice(dev);
691 break; 690 break;
692 691
693 default: 692 default:
694 err = -EINVAL; 693 err = -EINVAL;
695 } 694 }
696 695
697 done: 696 done:
698 return err; 697 return err;
699 } 698 }
700 699
701 static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) 700 static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
702 { 701 {
703 return &(((struct ip_tunnel*)netdev_priv(dev))->stat); 702 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
704 } 703 }
705 704
706 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) 705 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
707 { 706 {
708 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 707 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
709 return -EINVAL; 708 return -EINVAL;
710 dev->mtu = new_mtu; 709 dev->mtu = new_mtu;
711 return 0; 710 return 0;
712 } 711 }
713 712
714 static void ipip6_tunnel_setup(struct net_device *dev) 713 static void ipip6_tunnel_setup(struct net_device *dev)
715 { 714 {
716 SET_MODULE_OWNER(dev); 715 SET_MODULE_OWNER(dev);
717 dev->uninit = ipip6_tunnel_uninit; 716 dev->uninit = ipip6_tunnel_uninit;
718 dev->destructor = free_netdev; 717 dev->destructor = free_netdev;
719 dev->hard_start_xmit = ipip6_tunnel_xmit; 718 dev->hard_start_xmit = ipip6_tunnel_xmit;
720 dev->get_stats = ipip6_tunnel_get_stats; 719 dev->get_stats = ipip6_tunnel_get_stats;
721 dev->do_ioctl = ipip6_tunnel_ioctl; 720 dev->do_ioctl = ipip6_tunnel_ioctl;
722 dev->change_mtu = ipip6_tunnel_change_mtu; 721 dev->change_mtu = ipip6_tunnel_change_mtu;
723 722
724 dev->type = ARPHRD_SIT; 723 dev->type = ARPHRD_SIT;
725 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 724 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
726 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); 725 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
727 dev->flags = IFF_NOARP; 726 dev->flags = IFF_NOARP;
728 dev->iflink = 0; 727 dev->iflink = 0;
729 dev->addr_len = 4; 728 dev->addr_len = 4;
730 } 729 }
731 730
732 static int ipip6_tunnel_init(struct net_device *dev) 731 static int ipip6_tunnel_init(struct net_device *dev)
733 { 732 {
734 struct net_device *tdev = NULL; 733 struct net_device *tdev = NULL;
735 struct ip_tunnel *tunnel; 734 struct ip_tunnel *tunnel;
736 struct iphdr *iph; 735 struct iphdr *iph;
737 736
738 tunnel = netdev_priv(dev); 737 tunnel = netdev_priv(dev);
739 iph = &tunnel->parms.iph; 738 iph = &tunnel->parms.iph;
740 739
741 tunnel->dev = dev; 740 tunnel->dev = dev;
742 strcpy(tunnel->parms.name, dev->name); 741 strcpy(tunnel->parms.name, dev->name);
743 742
744 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 743 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
745 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 744 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
746 745
747 if (iph->daddr) { 746 if (iph->daddr) {
748 struct flowi fl = { .nl_u = { .ip4_u = 747 struct flowi fl = { .nl_u = { .ip4_u =
749 { .daddr = iph->daddr, 748 { .daddr = iph->daddr,
750 .saddr = iph->saddr, 749 .saddr = iph->saddr,
751 .tos = RT_TOS(iph->tos) } }, 750 .tos = RT_TOS(iph->tos) } },
752 .oif = tunnel->parms.link, 751 .oif = tunnel->parms.link,
753 .proto = IPPROTO_IPV6 }; 752 .proto = IPPROTO_IPV6 };
754 struct rtable *rt; 753 struct rtable *rt;
755 if (!ip_route_output_key(&rt, &fl)) { 754 if (!ip_route_output_key(&rt, &fl)) {
756 tdev = rt->u.dst.dev; 755 tdev = rt->u.dst.dev;
757 ip_rt_put(rt); 756 ip_rt_put(rt);
758 } 757 }
759 dev->flags |= IFF_POINTOPOINT; 758 dev->flags |= IFF_POINTOPOINT;
760 } 759 }
761 760
762 if (!tdev && tunnel->parms.link) 761 if (!tdev && tunnel->parms.link)
763 tdev = __dev_get_by_index(tunnel->parms.link); 762 tdev = __dev_get_by_index(tunnel->parms.link);
764 763
765 if (tdev) { 764 if (tdev) {
766 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 765 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
767 dev->mtu = tdev->mtu - sizeof(struct iphdr); 766 dev->mtu = tdev->mtu - sizeof(struct iphdr);
768 if (dev->mtu < IPV6_MIN_MTU) 767 if (dev->mtu < IPV6_MIN_MTU)
769 dev->mtu = IPV6_MIN_MTU; 768 dev->mtu = IPV6_MIN_MTU;
770 } 769 }
771 dev->iflink = tunnel->parms.link; 770 dev->iflink = tunnel->parms.link;
772 771
773 return 0; 772 return 0;
774 } 773 }
775 774
776 static int __init ipip6_fb_tunnel_init(struct net_device *dev) 775 static int __init ipip6_fb_tunnel_init(struct net_device *dev)
777 { 776 {
778 struct ip_tunnel *tunnel = netdev_priv(dev); 777 struct ip_tunnel *tunnel = netdev_priv(dev);
779 struct iphdr *iph = &tunnel->parms.iph; 778 struct iphdr *iph = &tunnel->parms.iph;
780 779
781 tunnel->dev = dev; 780 tunnel->dev = dev;
782 strcpy(tunnel->parms.name, dev->name); 781 strcpy(tunnel->parms.name, dev->name);
783 782
784 iph->version = 4; 783 iph->version = 4;
785 iph->protocol = IPPROTO_IPV6; 784 iph->protocol = IPPROTO_IPV6;
786 iph->ihl = 5; 785 iph->ihl = 5;
787 iph->ttl = 64; 786 iph->ttl = 64;
788 787
789 dev_hold(dev); 788 dev_hold(dev);
790 tunnels_wc[0] = tunnel; 789 tunnels_wc[0] = tunnel;
791 return 0; 790 return 0;
792 } 791 }
793 792
794 static struct net_protocol sit_protocol = { 793 static struct net_protocol sit_protocol = {
795 .handler = ipip6_rcv, 794 .handler = ipip6_rcv,
796 .err_handler = ipip6_err, 795 .err_handler = ipip6_err,
797 }; 796 };
798 797
799 static void __exit sit_destroy_tunnels(void) 798 static void __exit sit_destroy_tunnels(void)
800 { 799 {
801 int prio; 800 int prio;
802 801
803 for (prio = 1; prio < 4; prio++) { 802 for (prio = 1; prio < 4; prio++) {
804 int h; 803 int h;
805 for (h = 0; h < HASH_SIZE; h++) { 804 for (h = 0; h < HASH_SIZE; h++) {
806 struct ip_tunnel *t; 805 struct ip_tunnel *t;
807 while ((t = tunnels[prio][h]) != NULL) 806 while ((t = tunnels[prio][h]) != NULL)
808 unregister_netdevice(t->dev); 807 unregister_netdevice(t->dev);
809 } 808 }
810 } 809 }
811 } 810 }
812 811
813 void __exit sit_cleanup(void) 812 void __exit sit_cleanup(void)
814 { 813 {
815 inet_del_protocol(&sit_protocol, IPPROTO_IPV6); 814 inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
816 815
817 rtnl_lock(); 816 rtnl_lock();
818 sit_destroy_tunnels(); 817 sit_destroy_tunnels();
819 unregister_netdevice(ipip6_fb_tunnel_dev); 818 unregister_netdevice(ipip6_fb_tunnel_dev);
820 rtnl_unlock(); 819 rtnl_unlock();
821 } 820 }
822 821
823 int __init sit_init(void) 822 int __init sit_init(void)
824 { 823 {
825 int err; 824 int err;
826 825
827 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); 826 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
828 827
829 if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) { 828 if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
830 printk(KERN_INFO "sit init: Can't add protocol\n"); 829 printk(KERN_INFO "sit init: Can't add protocol\n");
831 return -EAGAIN; 830 return -EAGAIN;
832 } 831 }
833 832
834 ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 833 ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
835 ipip6_tunnel_setup); 834 ipip6_tunnel_setup);
836 if (!ipip6_fb_tunnel_dev) { 835 if (!ipip6_fb_tunnel_dev) {
837 err = -ENOMEM; 836 err = -ENOMEM;
838 goto err1; 837 goto err1;
839 } 838 }
840 839
841 ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init; 840 ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
842 841
843 if ((err = register_netdev(ipip6_fb_tunnel_dev))) 842 if ((err = register_netdev(ipip6_fb_tunnel_dev)))
844 goto err2; 843 goto err2;
845 844
846 out: 845 out:
847 return err; 846 return err;
848 err2: 847 err2:
849 free_netdev(ipip6_fb_tunnel_dev); 848 free_netdev(ipip6_fb_tunnel_dev);
850 err1: 849 err1:
851 inet_del_protocol(&sit_protocol, IPPROTO_IPV6); 850 inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
852 goto out; 851 goto out;
853 } 852 }
854 853