Commit 5d9c5a32920c5c0e6716b0f6ed16157783dc56a4
Committed by
David S. Miller
1 parent
00ab956f2f
Exists in
master
and in
7 other branches
[IPV4]: Get rid of redundant IPCB->opts initialisation
Now that we always zero the IPCB->opts in ip_rcv, it is no longer necessary to do so before calling netif_rx for tunneled packets. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 6 changed files with 0 additions and 7 deletions Inline Diff
net/ipv4/ip_gre.c
1 | /* | 1 | /* |
2 | * Linux NET3: GRE over IP protocol decoder. | 2 | * Linux NET3: GRE over IP protocol decoder. |
3 | * | 3 | * |
4 | * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) | 4 | * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 7 | * modify it under the terms of the GNU General Public License |
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
20 | #include <linux/netdevice.h> | 20 | #include <linux/netdevice.h> |
21 | #include <linux/in.h> | 21 | #include <linux/in.h> |
22 | #include <linux/tcp.h> | 22 | #include <linux/tcp.h> |
23 | #include <linux/udp.h> | 23 | #include <linux/udp.h> |
24 | #include <linux/if_arp.h> | 24 | #include <linux/if_arp.h> |
25 | #include <linux/mroute.h> | 25 | #include <linux/mroute.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/in6.h> | 27 | #include <linux/in6.h> |
28 | #include <linux/inetdevice.h> | 28 | #include <linux/inetdevice.h> |
29 | #include <linux/igmp.h> | 29 | #include <linux/igmp.h> |
30 | #include <linux/netfilter_ipv4.h> | 30 | #include <linux/netfilter_ipv4.h> |
31 | #include <linux/if_ether.h> | 31 | #include <linux/if_ether.h> |
32 | 32 | ||
33 | #include <net/sock.h> | 33 | #include <net/sock.h> |
34 | #include <net/ip.h> | 34 | #include <net/ip.h> |
35 | #include <net/icmp.h> | 35 | #include <net/icmp.h> |
36 | #include <net/protocol.h> | 36 | #include <net/protocol.h> |
37 | #include <net/ipip.h> | 37 | #include <net/ipip.h> |
38 | #include <net/arp.h> | 38 | #include <net/arp.h> |
39 | #include <net/checksum.h> | 39 | #include <net/checksum.h> |
40 | #include <net/dsfield.h> | 40 | #include <net/dsfield.h> |
41 | #include <net/inet_ecn.h> | 41 | #include <net/inet_ecn.h> |
42 | #include <net/xfrm.h> | 42 | #include <net/xfrm.h> |
43 | 43 | ||
44 | #ifdef CONFIG_IPV6 | 44 | #ifdef CONFIG_IPV6 |
45 | #include <net/ipv6.h> | 45 | #include <net/ipv6.h> |
46 | #include <net/ip6_fib.h> | 46 | #include <net/ip6_fib.h> |
47 | #include <net/ip6_route.h> | 47 | #include <net/ip6_route.h> |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | /* | 50 | /* |
51 | Problems & solutions | 51 | Problems & solutions |
52 | -------------------- | 52 | -------------------- |
53 | 53 | ||
54 | 1. The most important issue is detecting local dead loops. | 54 | 1. The most important issue is detecting local dead loops. |
55 | They would cause complete host lockup in transmit, which | 55 | They would cause complete host lockup in transmit, which |
56 | would be "resolved" by stack overflow or, if queueing is enabled, | 56 | would be "resolved" by stack overflow or, if queueing is enabled, |
57 | with infinite looping in net_bh. | 57 | with infinite looping in net_bh. |
58 | 58 | ||
59 | We cannot track such dead loops during route installation, | 59 | We cannot track such dead loops during route installation, |
60 | it is infeasible task. The most general solutions would be | 60 | it is infeasible task. The most general solutions would be |
61 | to keep skb->encapsulation counter (sort of local ttl), | 61 | to keep skb->encapsulation counter (sort of local ttl), |
62 | and silently drop packet when it expires. It is the best | 62 | and silently drop packet when it expires. It is the best |
63 | solution, but it supposes maintaing new variable in ALL | 63 | solution, but it supposes maintaing new variable in ALL |
64 | skb, even if no tunneling is used. | 64 | skb, even if no tunneling is used. |
65 | 65 | ||
66 | Current solution: t->recursion lock breaks dead loops. It looks | 66 | Current solution: t->recursion lock breaks dead loops. It looks |
67 | like dev->tbusy flag, but I preferred new variable, because | 67 | like dev->tbusy flag, but I preferred new variable, because |
68 | the semantics is different. One day, when hard_start_xmit | 68 | the semantics is different. One day, when hard_start_xmit |
69 | will be multithreaded we will have to use skb->encapsulation. | 69 | will be multithreaded we will have to use skb->encapsulation. |
70 | 70 | ||
71 | 71 | ||
72 | 72 | ||
73 | 2. Networking dead loops would not kill routers, but would really | 73 | 2. Networking dead loops would not kill routers, but would really |
74 | kill network. IP hop limit plays role of "t->recursion" in this case, | 74 | kill network. IP hop limit plays role of "t->recursion" in this case, |
75 | if we copy it from packet being encapsulated to upper header. | 75 | if we copy it from packet being encapsulated to upper header. |
76 | It is very good solution, but it introduces two problems: | 76 | It is very good solution, but it introduces two problems: |
77 | 77 | ||
78 | - Routing protocols, using packets with ttl=1 (OSPF, RIP2), | 78 | - Routing protocols, using packets with ttl=1 (OSPF, RIP2), |
79 | do not work over tunnels. | 79 | do not work over tunnels. |
80 | - traceroute does not work. I planned to relay ICMP from tunnel, | 80 | - traceroute does not work. I planned to relay ICMP from tunnel, |
81 | so that this problem would be solved and traceroute output | 81 | so that this problem would be solved and traceroute output |
82 | would even more informative. This idea appeared to be wrong: | 82 | would even more informative. This idea appeared to be wrong: |
83 | only Linux complies to rfc1812 now (yes, guys, Linux is the only | 83 | only Linux complies to rfc1812 now (yes, guys, Linux is the only |
84 | true router now :-)), all routers (at least, in neighbourhood of mine) | 84 | true router now :-)), all routers (at least, in neighbourhood of mine) |
85 | return only 8 bytes of payload. It is the end. | 85 | return only 8 bytes of payload. It is the end. |
86 | 86 | ||
87 | Hence, if we want that OSPF worked or traceroute said something reasonable, | 87 | Hence, if we want that OSPF worked or traceroute said something reasonable, |
88 | we should search for another solution. | 88 | we should search for another solution. |
89 | 89 | ||
90 | One of them is to parse packet trying to detect inner encapsulation | 90 | One of them is to parse packet trying to detect inner encapsulation |
91 | made by our node. It is difficult or even impossible, especially, | 91 | made by our node. It is difficult or even impossible, especially, |
92 | taking into account fragmentation. TO be short, tt is not solution at all. | 92 | taking into account fragmentation. TO be short, tt is not solution at all. |
93 | 93 | ||
94 | Current solution: The solution was UNEXPECTEDLY SIMPLE. | 94 | Current solution: The solution was UNEXPECTEDLY SIMPLE. |
95 | We force DF flag on tunnels with preconfigured hop limit, | 95 | We force DF flag on tunnels with preconfigured hop limit, |
96 | that is ALL. :-) Well, it does not remove the problem completely, | 96 | that is ALL. :-) Well, it does not remove the problem completely, |
97 | but exponential growth of network traffic is changed to linear | 97 | but exponential growth of network traffic is changed to linear |
98 | (branches, that exceed pmtu are pruned) and tunnel mtu | 98 | (branches, that exceed pmtu are pruned) and tunnel mtu |
99 | fastly degrades to value <68, where looping stops. | 99 | fastly degrades to value <68, where looping stops. |
100 | Yes, it is not good if there exists a router in the loop, | 100 | Yes, it is not good if there exists a router in the loop, |
101 | which does not force DF, even when encapsulating packets have DF set. | 101 | which does not force DF, even when encapsulating packets have DF set. |
102 | But it is not our problem! Nobody could accuse us, we made | 102 | But it is not our problem! Nobody could accuse us, we made |
103 | all that we could make. Even if it is your gated who injected | 103 | all that we could make. Even if it is your gated who injected |
104 | fatal route to network, even if it were you who configured | 104 | fatal route to network, even if it were you who configured |
105 | fatal static route: you are innocent. :-) | 105 | fatal static route: you are innocent. :-) |
106 | 106 | ||
107 | 107 | ||
108 | 108 | ||
109 | 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain | 109 | 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain |
110 | practically identical code. It would be good to glue them | 110 | practically identical code. It would be good to glue them |
111 | together, but it is not very evident, how to make them modular. | 111 | together, but it is not very evident, how to make them modular. |
112 | sit is integral part of IPv6, ipip and gre are naturally modular. | 112 | sit is integral part of IPv6, ipip and gre are naturally modular. |
113 | We could extract common parts (hash table, ioctl etc) | 113 | We could extract common parts (hash table, ioctl etc) |
114 | to a separate module (ip_tunnel.c). | 114 | to a separate module (ip_tunnel.c). |
115 | 115 | ||
116 | Alexey Kuznetsov. | 116 | Alexey Kuznetsov. |
117 | */ | 117 | */ |
118 | 118 | ||
119 | static int ipgre_tunnel_init(struct net_device *dev); | 119 | static int ipgre_tunnel_init(struct net_device *dev); |
120 | static void ipgre_tunnel_setup(struct net_device *dev); | 120 | static void ipgre_tunnel_setup(struct net_device *dev); |
121 | 121 | ||
122 | /* Fallback tunnel: no source, no destination, no key, no options */ | 122 | /* Fallback tunnel: no source, no destination, no key, no options */ |
123 | 123 | ||
124 | static int ipgre_fb_tunnel_init(struct net_device *dev); | 124 | static int ipgre_fb_tunnel_init(struct net_device *dev); |
125 | 125 | ||
126 | static struct net_device *ipgre_fb_tunnel_dev; | 126 | static struct net_device *ipgre_fb_tunnel_dev; |
127 | 127 | ||
128 | /* Tunnel hash table */ | 128 | /* Tunnel hash table */ |
129 | 129 | ||
130 | /* | 130 | /* |
131 | 4 hash tables: | 131 | 4 hash tables: |
132 | 132 | ||
133 | 3: (remote,local) | 133 | 3: (remote,local) |
134 | 2: (remote,*) | 134 | 2: (remote,*) |
135 | 1: (*,local) | 135 | 1: (*,local) |
136 | 0: (*,*) | 136 | 0: (*,*) |
137 | 137 | ||
138 | We require exact key match i.e. if a key is present in packet | 138 | We require exact key match i.e. if a key is present in packet |
139 | it will match only tunnel with the same key; if it is not present, | 139 | it will match only tunnel with the same key; if it is not present, |
140 | it will match only keyless tunnel. | 140 | it will match only keyless tunnel. |
141 | 141 | ||
142 | All keysless packets, if not matched configured keyless tunnels | 142 | All keysless packets, if not matched configured keyless tunnels |
143 | will match fallback tunnel. | 143 | will match fallback tunnel. |
144 | */ | 144 | */ |
145 | 145 | ||
146 | #define HASH_SIZE 16 | 146 | #define HASH_SIZE 16 |
147 | #define HASH(addr) ((addr^(addr>>4))&0xF) | 147 | #define HASH(addr) ((addr^(addr>>4))&0xF) |
148 | 148 | ||
149 | static struct ip_tunnel *tunnels[4][HASH_SIZE]; | 149 | static struct ip_tunnel *tunnels[4][HASH_SIZE]; |
150 | 150 | ||
151 | #define tunnels_r_l (tunnels[3]) | 151 | #define tunnels_r_l (tunnels[3]) |
152 | #define tunnels_r (tunnels[2]) | 152 | #define tunnels_r (tunnels[2]) |
153 | #define tunnels_l (tunnels[1]) | 153 | #define tunnels_l (tunnels[1]) |
154 | #define tunnels_wc (tunnels[0]) | 154 | #define tunnels_wc (tunnels[0]) |
155 | 155 | ||
156 | static DEFINE_RWLOCK(ipgre_lock); | 156 | static DEFINE_RWLOCK(ipgre_lock); |
157 | 157 | ||
158 | /* Given src, dst and key, find appropriate for input tunnel. */ | 158 | /* Given src, dst and key, find appropriate for input tunnel. */ |
159 | 159 | ||
160 | static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) | 160 | static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) |
161 | { | 161 | { |
162 | unsigned h0 = HASH(remote); | 162 | unsigned h0 = HASH(remote); |
163 | unsigned h1 = HASH(key); | 163 | unsigned h1 = HASH(key); |
164 | struct ip_tunnel *t; | 164 | struct ip_tunnel *t; |
165 | 165 | ||
166 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { | 166 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { |
167 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 167 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { |
168 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 168 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
169 | return t; | 169 | return t; |
170 | } | 170 | } |
171 | } | 171 | } |
172 | for (t = tunnels_r[h0^h1]; t; t = t->next) { | 172 | for (t = tunnels_r[h0^h1]; t; t = t->next) { |
173 | if (remote == t->parms.iph.daddr) { | 173 | if (remote == t->parms.iph.daddr) { |
174 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 174 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
175 | return t; | 175 | return t; |
176 | } | 176 | } |
177 | } | 177 | } |
178 | for (t = tunnels_l[h1]; t; t = t->next) { | 178 | for (t = tunnels_l[h1]; t; t = t->next) { |
179 | if (local == t->parms.iph.saddr || | 179 | if (local == t->parms.iph.saddr || |
180 | (local == t->parms.iph.daddr && MULTICAST(local))) { | 180 | (local == t->parms.iph.daddr && MULTICAST(local))) { |
181 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 181 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
182 | return t; | 182 | return t; |
183 | } | 183 | } |
184 | } | 184 | } |
185 | for (t = tunnels_wc[h1]; t; t = t->next) { | 185 | for (t = tunnels_wc[h1]; t; t = t->next) { |
186 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 186 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
187 | return t; | 187 | return t; |
188 | } | 188 | } |
189 | 189 | ||
190 | if (ipgre_fb_tunnel_dev->flags&IFF_UP) | 190 | if (ipgre_fb_tunnel_dev->flags&IFF_UP) |
191 | return netdev_priv(ipgre_fb_tunnel_dev); | 191 | return netdev_priv(ipgre_fb_tunnel_dev); |
192 | return NULL; | 192 | return NULL; |
193 | } | 193 | } |
194 | 194 | ||
195 | static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) | 195 | static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) |
196 | { | 196 | { |
197 | u32 remote = t->parms.iph.daddr; | 197 | u32 remote = t->parms.iph.daddr; |
198 | u32 local = t->parms.iph.saddr; | 198 | u32 local = t->parms.iph.saddr; |
199 | u32 key = t->parms.i_key; | 199 | u32 key = t->parms.i_key; |
200 | unsigned h = HASH(key); | 200 | unsigned h = HASH(key); |
201 | int prio = 0; | 201 | int prio = 0; |
202 | 202 | ||
203 | if (local) | 203 | if (local) |
204 | prio |= 1; | 204 | prio |= 1; |
205 | if (remote && !MULTICAST(remote)) { | 205 | if (remote && !MULTICAST(remote)) { |
206 | prio |= 2; | 206 | prio |= 2; |
207 | h ^= HASH(remote); | 207 | h ^= HASH(remote); |
208 | } | 208 | } |
209 | 209 | ||
210 | return &tunnels[prio][h]; | 210 | return &tunnels[prio][h]; |
211 | } | 211 | } |
212 | 212 | ||
213 | static void ipgre_tunnel_link(struct ip_tunnel *t) | 213 | static void ipgre_tunnel_link(struct ip_tunnel *t) |
214 | { | 214 | { |
215 | struct ip_tunnel **tp = ipgre_bucket(t); | 215 | struct ip_tunnel **tp = ipgre_bucket(t); |
216 | 216 | ||
217 | t->next = *tp; | 217 | t->next = *tp; |
218 | write_lock_bh(&ipgre_lock); | 218 | write_lock_bh(&ipgre_lock); |
219 | *tp = t; | 219 | *tp = t; |
220 | write_unlock_bh(&ipgre_lock); | 220 | write_unlock_bh(&ipgre_lock); |
221 | } | 221 | } |
222 | 222 | ||
223 | static void ipgre_tunnel_unlink(struct ip_tunnel *t) | 223 | static void ipgre_tunnel_unlink(struct ip_tunnel *t) |
224 | { | 224 | { |
225 | struct ip_tunnel **tp; | 225 | struct ip_tunnel **tp; |
226 | 226 | ||
227 | for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { | 227 | for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { |
228 | if (t == *tp) { | 228 | if (t == *tp) { |
229 | write_lock_bh(&ipgre_lock); | 229 | write_lock_bh(&ipgre_lock); |
230 | *tp = t->next; | 230 | *tp = t->next; |
231 | write_unlock_bh(&ipgre_lock); | 231 | write_unlock_bh(&ipgre_lock); |
232 | break; | 232 | break; |
233 | } | 233 | } |
234 | } | 234 | } |
235 | } | 235 | } |
236 | 236 | ||
237 | static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) | 237 | static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) |
238 | { | 238 | { |
239 | u32 remote = parms->iph.daddr; | 239 | u32 remote = parms->iph.daddr; |
240 | u32 local = parms->iph.saddr; | 240 | u32 local = parms->iph.saddr; |
241 | u32 key = parms->i_key; | 241 | u32 key = parms->i_key; |
242 | struct ip_tunnel *t, **tp, *nt; | 242 | struct ip_tunnel *t, **tp, *nt; |
243 | struct net_device *dev; | 243 | struct net_device *dev; |
244 | unsigned h = HASH(key); | 244 | unsigned h = HASH(key); |
245 | int prio = 0; | 245 | int prio = 0; |
246 | char name[IFNAMSIZ]; | 246 | char name[IFNAMSIZ]; |
247 | 247 | ||
248 | if (local) | 248 | if (local) |
249 | prio |= 1; | 249 | prio |= 1; |
250 | if (remote && !MULTICAST(remote)) { | 250 | if (remote && !MULTICAST(remote)) { |
251 | prio |= 2; | 251 | prio |= 2; |
252 | h ^= HASH(remote); | 252 | h ^= HASH(remote); |
253 | } | 253 | } |
254 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { | 254 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { |
255 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 255 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { |
256 | if (key == t->parms.i_key) | 256 | if (key == t->parms.i_key) |
257 | return t; | 257 | return t; |
258 | } | 258 | } |
259 | } | 259 | } |
260 | if (!create) | 260 | if (!create) |
261 | return NULL; | 261 | return NULL; |
262 | 262 | ||
263 | if (parms->name[0]) | 263 | if (parms->name[0]) |
264 | strlcpy(name, parms->name, IFNAMSIZ); | 264 | strlcpy(name, parms->name, IFNAMSIZ); |
265 | else { | 265 | else { |
266 | int i; | 266 | int i; |
267 | for (i=1; i<100; i++) { | 267 | for (i=1; i<100; i++) { |
268 | sprintf(name, "gre%d", i); | 268 | sprintf(name, "gre%d", i); |
269 | if (__dev_get_by_name(name) == NULL) | 269 | if (__dev_get_by_name(name) == NULL) |
270 | break; | 270 | break; |
271 | } | 271 | } |
272 | if (i==100) | 272 | if (i==100) |
273 | goto failed; | 273 | goto failed; |
274 | } | 274 | } |
275 | 275 | ||
276 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); | 276 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); |
277 | if (!dev) | 277 | if (!dev) |
278 | return NULL; | 278 | return NULL; |
279 | 279 | ||
280 | dev->init = ipgre_tunnel_init; | 280 | dev->init = ipgre_tunnel_init; |
281 | nt = netdev_priv(dev); | 281 | nt = netdev_priv(dev); |
282 | nt->parms = *parms; | 282 | nt->parms = *parms; |
283 | 283 | ||
284 | if (register_netdevice(dev) < 0) { | 284 | if (register_netdevice(dev) < 0) { |
285 | free_netdev(dev); | 285 | free_netdev(dev); |
286 | goto failed; | 286 | goto failed; |
287 | } | 287 | } |
288 | 288 | ||
289 | dev_hold(dev); | 289 | dev_hold(dev); |
290 | ipgre_tunnel_link(nt); | 290 | ipgre_tunnel_link(nt); |
291 | return nt; | 291 | return nt; |
292 | 292 | ||
293 | failed: | 293 | failed: |
294 | return NULL; | 294 | return NULL; |
295 | } | 295 | } |
296 | 296 | ||
297 | static void ipgre_tunnel_uninit(struct net_device *dev) | 297 | static void ipgre_tunnel_uninit(struct net_device *dev) |
298 | { | 298 | { |
299 | ipgre_tunnel_unlink(netdev_priv(dev)); | 299 | ipgre_tunnel_unlink(netdev_priv(dev)); |
300 | dev_put(dev); | 300 | dev_put(dev); |
301 | } | 301 | } |
302 | 302 | ||
303 | 303 | ||
304 | static void ipgre_err(struct sk_buff *skb, u32 info) | 304 | static void ipgre_err(struct sk_buff *skb, u32 info) |
305 | { | 305 | { |
306 | #ifndef I_WISH_WORLD_WERE_PERFECT | 306 | #ifndef I_WISH_WORLD_WERE_PERFECT |
307 | 307 | ||
308 | /* It is not :-( All the routers (except for Linux) return only | 308 | /* It is not :-( All the routers (except for Linux) return only |
309 | 8 bytes of packet payload. It means, that precise relaying of | 309 | 8 bytes of packet payload. It means, that precise relaying of |
310 | ICMP in the real Internet is absolutely infeasible. | 310 | ICMP in the real Internet is absolutely infeasible. |
311 | 311 | ||
312 | Moreover, Cisco "wise men" put GRE key to the third word | 312 | Moreover, Cisco "wise men" put GRE key to the third word |
313 | in GRE header. It makes impossible maintaining even soft state for keyed | 313 | in GRE header. It makes impossible maintaining even soft state for keyed |
314 | GRE tunnels with enabled checksum. Tell them "thank you". | 314 | GRE tunnels with enabled checksum. Tell them "thank you". |
315 | 315 | ||
316 | Well, I wonder, rfc1812 was written by Cisco employee, | 316 | Well, I wonder, rfc1812 was written by Cisco employee, |
317 | what the hell these idiots break standrads established | 317 | what the hell these idiots break standrads established |
318 | by themself??? | 318 | by themself??? |
319 | */ | 319 | */ |
320 | 320 | ||
321 | struct iphdr *iph = (struct iphdr*)skb->data; | 321 | struct iphdr *iph = (struct iphdr*)skb->data; |
322 | u16 *p = (u16*)(skb->data+(iph->ihl<<2)); | 322 | u16 *p = (u16*)(skb->data+(iph->ihl<<2)); |
323 | int grehlen = (iph->ihl<<2) + 4; | 323 | int grehlen = (iph->ihl<<2) + 4; |
324 | int type = skb->h.icmph->type; | 324 | int type = skb->h.icmph->type; |
325 | int code = skb->h.icmph->code; | 325 | int code = skb->h.icmph->code; |
326 | struct ip_tunnel *t; | 326 | struct ip_tunnel *t; |
327 | u16 flags; | 327 | u16 flags; |
328 | 328 | ||
329 | flags = p[0]; | 329 | flags = p[0]; |
330 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { | 330 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { |
331 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 331 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
332 | return; | 332 | return; |
333 | if (flags&GRE_KEY) { | 333 | if (flags&GRE_KEY) { |
334 | grehlen += 4; | 334 | grehlen += 4; |
335 | if (flags&GRE_CSUM) | 335 | if (flags&GRE_CSUM) |
336 | grehlen += 4; | 336 | grehlen += 4; |
337 | } | 337 | } |
338 | } | 338 | } |
339 | 339 | ||
340 | /* If only 8 bytes returned, keyed message will be dropped here */ | 340 | /* If only 8 bytes returned, keyed message will be dropped here */ |
341 | if (skb_headlen(skb) < grehlen) | 341 | if (skb_headlen(skb) < grehlen) |
342 | return; | 342 | return; |
343 | 343 | ||
344 | switch (type) { | 344 | switch (type) { |
345 | default: | 345 | default: |
346 | case ICMP_PARAMETERPROB: | 346 | case ICMP_PARAMETERPROB: |
347 | return; | 347 | return; |
348 | 348 | ||
349 | case ICMP_DEST_UNREACH: | 349 | case ICMP_DEST_UNREACH: |
350 | switch (code) { | 350 | switch (code) { |
351 | case ICMP_SR_FAILED: | 351 | case ICMP_SR_FAILED: |
352 | case ICMP_PORT_UNREACH: | 352 | case ICMP_PORT_UNREACH: |
353 | /* Impossible event. */ | 353 | /* Impossible event. */ |
354 | return; | 354 | return; |
355 | case ICMP_FRAG_NEEDED: | 355 | case ICMP_FRAG_NEEDED: |
356 | /* Soft state for pmtu is maintained by IP core. */ | 356 | /* Soft state for pmtu is maintained by IP core. */ |
357 | return; | 357 | return; |
358 | default: | 358 | default: |
359 | /* All others are translated to HOST_UNREACH. | 359 | /* All others are translated to HOST_UNREACH. |
360 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 360 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
361 | I believe they are just ether pollution. --ANK | 361 | I believe they are just ether pollution. --ANK |
362 | */ | 362 | */ |
363 | break; | 363 | break; |
364 | } | 364 | } |
365 | break; | 365 | break; |
366 | case ICMP_TIME_EXCEEDED: | 366 | case ICMP_TIME_EXCEEDED: |
367 | if (code != ICMP_EXC_TTL) | 367 | if (code != ICMP_EXC_TTL) |
368 | return; | 368 | return; |
369 | break; | 369 | break; |
370 | } | 370 | } |
371 | 371 | ||
372 | read_lock(&ipgre_lock); | 372 | read_lock(&ipgre_lock); |
373 | t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); | 373 | t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); |
374 | if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) | 374 | if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) |
375 | goto out; | 375 | goto out; |
376 | 376 | ||
377 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 377 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
378 | goto out; | 378 | goto out; |
379 | 379 | ||
380 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 380 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) |
381 | t->err_count++; | 381 | t->err_count++; |
382 | else | 382 | else |
383 | t->err_count = 1; | 383 | t->err_count = 1; |
384 | t->err_time = jiffies; | 384 | t->err_time = jiffies; |
385 | out: | 385 | out: |
386 | read_unlock(&ipgre_lock); | 386 | read_unlock(&ipgre_lock); |
387 | return; | 387 | return; |
388 | #else | 388 | #else |
389 | struct iphdr *iph = (struct iphdr*)dp; | 389 | struct iphdr *iph = (struct iphdr*)dp; |
390 | struct iphdr *eiph; | 390 | struct iphdr *eiph; |
391 | u16 *p = (u16*)(dp+(iph->ihl<<2)); | 391 | u16 *p = (u16*)(dp+(iph->ihl<<2)); |
392 | int type = skb->h.icmph->type; | 392 | int type = skb->h.icmph->type; |
393 | int code = skb->h.icmph->code; | 393 | int code = skb->h.icmph->code; |
394 | int rel_type = 0; | 394 | int rel_type = 0; |
395 | int rel_code = 0; | 395 | int rel_code = 0; |
396 | int rel_info = 0; | 396 | int rel_info = 0; |
397 | u16 flags; | 397 | u16 flags; |
398 | int grehlen = (iph->ihl<<2) + 4; | 398 | int grehlen = (iph->ihl<<2) + 4; |
399 | struct sk_buff *skb2; | 399 | struct sk_buff *skb2; |
400 | struct flowi fl; | 400 | struct flowi fl; |
401 | struct rtable *rt; | 401 | struct rtable *rt; |
402 | 402 | ||
403 | if (p[1] != htons(ETH_P_IP)) | 403 | if (p[1] != htons(ETH_P_IP)) |
404 | return; | 404 | return; |
405 | 405 | ||
406 | flags = p[0]; | 406 | flags = p[0]; |
407 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { | 407 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { |
408 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 408 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
409 | return; | 409 | return; |
410 | if (flags&GRE_CSUM) | 410 | if (flags&GRE_CSUM) |
411 | grehlen += 4; | 411 | grehlen += 4; |
412 | if (flags&GRE_KEY) | 412 | if (flags&GRE_KEY) |
413 | grehlen += 4; | 413 | grehlen += 4; |
414 | if (flags&GRE_SEQ) | 414 | if (flags&GRE_SEQ) |
415 | grehlen += 4; | 415 | grehlen += 4; |
416 | } | 416 | } |
417 | if (len < grehlen + sizeof(struct iphdr)) | 417 | if (len < grehlen + sizeof(struct iphdr)) |
418 | return; | 418 | return; |
419 | eiph = (struct iphdr*)(dp + grehlen); | 419 | eiph = (struct iphdr*)(dp + grehlen); |
420 | 420 | ||
421 | switch (type) { | 421 | switch (type) { |
422 | default: | 422 | default: |
423 | return; | 423 | return; |
424 | case ICMP_PARAMETERPROB: | 424 | case ICMP_PARAMETERPROB: |
425 | if (skb->h.icmph->un.gateway < (iph->ihl<<2)) | 425 | if (skb->h.icmph->un.gateway < (iph->ihl<<2)) |
426 | return; | 426 | return; |
427 | 427 | ||
428 | /* So... This guy found something strange INSIDE encapsulated | 428 | /* So... This guy found something strange INSIDE encapsulated |
429 | packet. Well, he is fool, but what can we do ? | 429 | packet. Well, he is fool, but what can we do ? |
430 | */ | 430 | */ |
431 | rel_type = ICMP_PARAMETERPROB; | 431 | rel_type = ICMP_PARAMETERPROB; |
432 | rel_info = skb->h.icmph->un.gateway - grehlen; | 432 | rel_info = skb->h.icmph->un.gateway - grehlen; |
433 | break; | 433 | break; |
434 | 434 | ||
435 | case ICMP_DEST_UNREACH: | 435 | case ICMP_DEST_UNREACH: |
436 | switch (code) { | 436 | switch (code) { |
437 | case ICMP_SR_FAILED: | 437 | case ICMP_SR_FAILED: |
438 | case ICMP_PORT_UNREACH: | 438 | case ICMP_PORT_UNREACH: |
439 | /* Impossible event. */ | 439 | /* Impossible event. */ |
440 | return; | 440 | return; |
441 | case ICMP_FRAG_NEEDED: | 441 | case ICMP_FRAG_NEEDED: |
442 | /* And it is the only really necessary thing :-) */ | 442 | /* And it is the only really necessary thing :-) */ |
443 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); | 443 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); |
444 | if (rel_info < grehlen+68) | 444 | if (rel_info < grehlen+68) |
445 | return; | 445 | return; |
446 | rel_info -= grehlen; | 446 | rel_info -= grehlen; |
447 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ | 447 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ |
448 | if (rel_info > ntohs(eiph->tot_len)) | 448 | if (rel_info > ntohs(eiph->tot_len)) |
449 | return; | 449 | return; |
450 | break; | 450 | break; |
451 | default: | 451 | default: |
452 | /* All others are translated to HOST_UNREACH. | 452 | /* All others are translated to HOST_UNREACH. |
453 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 453 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
454 | I believe, it is just ether pollution. --ANK | 454 | I believe, it is just ether pollution. --ANK |
455 | */ | 455 | */ |
456 | rel_type = ICMP_DEST_UNREACH; | 456 | rel_type = ICMP_DEST_UNREACH; |
457 | rel_code = ICMP_HOST_UNREACH; | 457 | rel_code = ICMP_HOST_UNREACH; |
458 | break; | 458 | break; |
459 | } | 459 | } |
460 | break; | 460 | break; |
461 | case ICMP_TIME_EXCEEDED: | 461 | case ICMP_TIME_EXCEEDED: |
462 | if (code != ICMP_EXC_TTL) | 462 | if (code != ICMP_EXC_TTL) |
463 | return; | 463 | return; |
464 | break; | 464 | break; |
465 | } | 465 | } |
466 | 466 | ||
467 | /* Prepare fake skb to feed it to icmp_send */ | 467 | /* Prepare fake skb to feed it to icmp_send */ |
468 | skb2 = skb_clone(skb, GFP_ATOMIC); | 468 | skb2 = skb_clone(skb, GFP_ATOMIC); |
469 | if (skb2 == NULL) | 469 | if (skb2 == NULL) |
470 | return; | 470 | return; |
471 | dst_release(skb2->dst); | 471 | dst_release(skb2->dst); |
472 | skb2->dst = NULL; | 472 | skb2->dst = NULL; |
473 | skb_pull(skb2, skb->data - (u8*)eiph); | 473 | skb_pull(skb2, skb->data - (u8*)eiph); |
474 | skb2->nh.raw = skb2->data; | 474 | skb2->nh.raw = skb2->data; |
475 | 475 | ||
476 | /* Try to guess incoming interface */ | 476 | /* Try to guess incoming interface */ |
477 | memset(&fl, 0, sizeof(fl)); | 477 | memset(&fl, 0, sizeof(fl)); |
478 | fl.fl4_dst = eiph->saddr; | 478 | fl.fl4_dst = eiph->saddr; |
479 | fl.fl4_tos = RT_TOS(eiph->tos); | 479 | fl.fl4_tos = RT_TOS(eiph->tos); |
480 | fl.proto = IPPROTO_GRE; | 480 | fl.proto = IPPROTO_GRE; |
481 | if (ip_route_output_key(&rt, &fl)) { | 481 | if (ip_route_output_key(&rt, &fl)) { |
482 | kfree_skb(skb2); | 482 | kfree_skb(skb2); |
483 | return; | 483 | return; |
484 | } | 484 | } |
485 | skb2->dev = rt->u.dst.dev; | 485 | skb2->dev = rt->u.dst.dev; |
486 | 486 | ||
487 | /* route "incoming" packet */ | 487 | /* route "incoming" packet */ |
488 | if (rt->rt_flags&RTCF_LOCAL) { | 488 | if (rt->rt_flags&RTCF_LOCAL) { |
489 | ip_rt_put(rt); | 489 | ip_rt_put(rt); |
490 | rt = NULL; | 490 | rt = NULL; |
491 | fl.fl4_dst = eiph->daddr; | 491 | fl.fl4_dst = eiph->daddr; |
492 | fl.fl4_src = eiph->saddr; | 492 | fl.fl4_src = eiph->saddr; |
493 | fl.fl4_tos = eiph->tos; | 493 | fl.fl4_tos = eiph->tos; |
494 | if (ip_route_output_key(&rt, &fl) || | 494 | if (ip_route_output_key(&rt, &fl) || |
495 | rt->u.dst.dev->type != ARPHRD_IPGRE) { | 495 | rt->u.dst.dev->type != ARPHRD_IPGRE) { |
496 | ip_rt_put(rt); | 496 | ip_rt_put(rt); |
497 | kfree_skb(skb2); | 497 | kfree_skb(skb2); |
498 | return; | 498 | return; |
499 | } | 499 | } |
500 | } else { | 500 | } else { |
501 | ip_rt_put(rt); | 501 | ip_rt_put(rt); |
502 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || | 502 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || |
503 | skb2->dst->dev->type != ARPHRD_IPGRE) { | 503 | skb2->dst->dev->type != ARPHRD_IPGRE) { |
504 | kfree_skb(skb2); | 504 | kfree_skb(skb2); |
505 | return; | 505 | return; |
506 | } | 506 | } |
507 | } | 507 | } |
508 | 508 | ||
509 | /* change mtu on this route */ | 509 | /* change mtu on this route */ |
510 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 510 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
511 | if (rel_info > dst_mtu(skb2->dst)) { | 511 | if (rel_info > dst_mtu(skb2->dst)) { |
512 | kfree_skb(skb2); | 512 | kfree_skb(skb2); |
513 | return; | 513 | return; |
514 | } | 514 | } |
515 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); | 515 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); |
516 | rel_info = htonl(rel_info); | 516 | rel_info = htonl(rel_info); |
517 | } else if (type == ICMP_TIME_EXCEEDED) { | 517 | } else if (type == ICMP_TIME_EXCEEDED) { |
518 | struct ip_tunnel *t = netdev_priv(skb2->dev); | 518 | struct ip_tunnel *t = netdev_priv(skb2->dev); |
519 | if (t->parms.iph.ttl) { | 519 | if (t->parms.iph.ttl) { |
520 | rel_type = ICMP_DEST_UNREACH; | 520 | rel_type = ICMP_DEST_UNREACH; |
521 | rel_code = ICMP_HOST_UNREACH; | 521 | rel_code = ICMP_HOST_UNREACH; |
522 | } | 522 | } |
523 | } | 523 | } |
524 | 524 | ||
525 | icmp_send(skb2, rel_type, rel_code, rel_info); | 525 | icmp_send(skb2, rel_type, rel_code, rel_info); |
526 | kfree_skb(skb2); | 526 | kfree_skb(skb2); |
527 | #endif | 527 | #endif |
528 | } | 528 | } |
529 | 529 | ||
530 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 530 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
531 | { | 531 | { |
532 | if (INET_ECN_is_ce(iph->tos)) { | 532 | if (INET_ECN_is_ce(iph->tos)) { |
533 | if (skb->protocol == htons(ETH_P_IP)) { | 533 | if (skb->protocol == htons(ETH_P_IP)) { |
534 | IP_ECN_set_ce(skb->nh.iph); | 534 | IP_ECN_set_ce(skb->nh.iph); |
535 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | 535 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
536 | IP6_ECN_set_ce(skb->nh.ipv6h); | 536 | IP6_ECN_set_ce(skb->nh.ipv6h); |
537 | } | 537 | } |
538 | } | 538 | } |
539 | } | 539 | } |
540 | 540 | ||
541 | static inline u8 | 541 | static inline u8 |
542 | ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) | 542 | ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) |
543 | { | 543 | { |
544 | u8 inner = 0; | 544 | u8 inner = 0; |
545 | if (skb->protocol == htons(ETH_P_IP)) | 545 | if (skb->protocol == htons(ETH_P_IP)) |
546 | inner = old_iph->tos; | 546 | inner = old_iph->tos; |
547 | else if (skb->protocol == htons(ETH_P_IPV6)) | 547 | else if (skb->protocol == htons(ETH_P_IPV6)) |
548 | inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 548 | inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); |
549 | return INET_ECN_encapsulate(tos, inner); | 549 | return INET_ECN_encapsulate(tos, inner); |
550 | } | 550 | } |
551 | 551 | ||
552 | static int ipgre_rcv(struct sk_buff *skb) | 552 | static int ipgre_rcv(struct sk_buff *skb) |
553 | { | 553 | { |
554 | struct iphdr *iph; | 554 | struct iphdr *iph; |
555 | u8 *h; | 555 | u8 *h; |
556 | u16 flags; | 556 | u16 flags; |
557 | u16 csum = 0; | 557 | u16 csum = 0; |
558 | u32 key = 0; | 558 | u32 key = 0; |
559 | u32 seqno = 0; | 559 | u32 seqno = 0; |
560 | struct ip_tunnel *tunnel; | 560 | struct ip_tunnel *tunnel; |
561 | int offset = 4; | 561 | int offset = 4; |
562 | 562 | ||
563 | if (!pskb_may_pull(skb, 16)) | 563 | if (!pskb_may_pull(skb, 16)) |
564 | goto drop_nolock; | 564 | goto drop_nolock; |
565 | 565 | ||
566 | iph = skb->nh.iph; | 566 | iph = skb->nh.iph; |
567 | h = skb->data; | 567 | h = skb->data; |
568 | flags = *(u16*)h; | 568 | flags = *(u16*)h; |
569 | 569 | ||
570 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { | 570 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { |
571 | /* - Version must be 0. | 571 | /* - Version must be 0. |
572 | - We do not support routing headers. | 572 | - We do not support routing headers. |
573 | */ | 573 | */ |
574 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 574 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
575 | goto drop_nolock; | 575 | goto drop_nolock; |
576 | 576 | ||
577 | if (flags&GRE_CSUM) { | 577 | if (flags&GRE_CSUM) { |
578 | switch (skb->ip_summed) { | 578 | switch (skb->ip_summed) { |
579 | case CHECKSUM_HW: | 579 | case CHECKSUM_HW: |
580 | csum = (u16)csum_fold(skb->csum); | 580 | csum = (u16)csum_fold(skb->csum); |
581 | if (!csum) | 581 | if (!csum) |
582 | break; | 582 | break; |
583 | /* fall through */ | 583 | /* fall through */ |
584 | case CHECKSUM_NONE: | 584 | case CHECKSUM_NONE: |
585 | skb->csum = 0; | 585 | skb->csum = 0; |
586 | csum = __skb_checksum_complete(skb); | 586 | csum = __skb_checksum_complete(skb); |
587 | skb->ip_summed = CHECKSUM_HW; | 587 | skb->ip_summed = CHECKSUM_HW; |
588 | } | 588 | } |
589 | offset += 4; | 589 | offset += 4; |
590 | } | 590 | } |
591 | if (flags&GRE_KEY) { | 591 | if (flags&GRE_KEY) { |
592 | key = *(u32*)(h + offset); | 592 | key = *(u32*)(h + offset); |
593 | offset += 4; | 593 | offset += 4; |
594 | } | 594 | } |
595 | if (flags&GRE_SEQ) { | 595 | if (flags&GRE_SEQ) { |
596 | seqno = ntohl(*(u32*)(h + offset)); | 596 | seqno = ntohl(*(u32*)(h + offset)); |
597 | offset += 4; | 597 | offset += 4; |
598 | } | 598 | } |
599 | } | 599 | } |
600 | 600 | ||
601 | read_lock(&ipgre_lock); | 601 | read_lock(&ipgre_lock); |
602 | if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { | 602 | if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { |
603 | secpath_reset(skb); | 603 | secpath_reset(skb); |
604 | 604 | ||
605 | skb->protocol = *(u16*)(h + 2); | 605 | skb->protocol = *(u16*)(h + 2); |
606 | /* WCCP version 1 and 2 protocol decoding. | 606 | /* WCCP version 1 and 2 protocol decoding. |
607 | * - Change protocol to IP | 607 | * - Change protocol to IP |
608 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header | 608 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header |
609 | */ | 609 | */ |
610 | if (flags == 0 && | 610 | if (flags == 0 && |
611 | skb->protocol == __constant_htons(ETH_P_WCCP)) { | 611 | skb->protocol == __constant_htons(ETH_P_WCCP)) { |
612 | skb->protocol = __constant_htons(ETH_P_IP); | 612 | skb->protocol = __constant_htons(ETH_P_IP); |
613 | if ((*(h + offset) & 0xF0) != 0x40) | 613 | if ((*(h + offset) & 0xF0) != 0x40) |
614 | offset += 4; | 614 | offset += 4; |
615 | } | 615 | } |
616 | 616 | ||
617 | skb->mac.raw = skb->nh.raw; | 617 | skb->mac.raw = skb->nh.raw; |
618 | skb->nh.raw = __pskb_pull(skb, offset); | 618 | skb->nh.raw = __pskb_pull(skb, offset); |
619 | skb_postpull_rcsum(skb, skb->h.raw, offset); | 619 | skb_postpull_rcsum(skb, skb->h.raw, offset); |
620 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
621 | skb->pkt_type = PACKET_HOST; | 620 | skb->pkt_type = PACKET_HOST; |
622 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 621 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
623 | if (MULTICAST(iph->daddr)) { | 622 | if (MULTICAST(iph->daddr)) { |
624 | /* Looped back packet, drop it! */ | 623 | /* Looped back packet, drop it! */ |
625 | if (((struct rtable*)skb->dst)->fl.iif == 0) | 624 | if (((struct rtable*)skb->dst)->fl.iif == 0) |
626 | goto drop; | 625 | goto drop; |
627 | tunnel->stat.multicast++; | 626 | tunnel->stat.multicast++; |
628 | skb->pkt_type = PACKET_BROADCAST; | 627 | skb->pkt_type = PACKET_BROADCAST; |
629 | } | 628 | } |
630 | #endif | 629 | #endif |
631 | 630 | ||
632 | if (((flags&GRE_CSUM) && csum) || | 631 | if (((flags&GRE_CSUM) && csum) || |
633 | (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { | 632 | (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { |
634 | tunnel->stat.rx_crc_errors++; | 633 | tunnel->stat.rx_crc_errors++; |
635 | tunnel->stat.rx_errors++; | 634 | tunnel->stat.rx_errors++; |
636 | goto drop; | 635 | goto drop; |
637 | } | 636 | } |
638 | if (tunnel->parms.i_flags&GRE_SEQ) { | 637 | if (tunnel->parms.i_flags&GRE_SEQ) { |
639 | if (!(flags&GRE_SEQ) || | 638 | if (!(flags&GRE_SEQ) || |
640 | (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { | 639 | (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { |
641 | tunnel->stat.rx_fifo_errors++; | 640 | tunnel->stat.rx_fifo_errors++; |
642 | tunnel->stat.rx_errors++; | 641 | tunnel->stat.rx_errors++; |
643 | goto drop; | 642 | goto drop; |
644 | } | 643 | } |
645 | tunnel->i_seqno = seqno + 1; | 644 | tunnel->i_seqno = seqno + 1; |
646 | } | 645 | } |
647 | tunnel->stat.rx_packets++; | 646 | tunnel->stat.rx_packets++; |
648 | tunnel->stat.rx_bytes += skb->len; | 647 | tunnel->stat.rx_bytes += skb->len; |
649 | skb->dev = tunnel->dev; | 648 | skb->dev = tunnel->dev; |
650 | dst_release(skb->dst); | 649 | dst_release(skb->dst); |
651 | skb->dst = NULL; | 650 | skb->dst = NULL; |
652 | nf_reset(skb); | 651 | nf_reset(skb); |
653 | ipgre_ecn_decapsulate(iph, skb); | 652 | ipgre_ecn_decapsulate(iph, skb); |
654 | netif_rx(skb); | 653 | netif_rx(skb); |
655 | read_unlock(&ipgre_lock); | 654 | read_unlock(&ipgre_lock); |
656 | return(0); | 655 | return(0); |
657 | } | 656 | } |
658 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 657 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
659 | 658 | ||
660 | drop: | 659 | drop: |
661 | read_unlock(&ipgre_lock); | 660 | read_unlock(&ipgre_lock); |
662 | drop_nolock: | 661 | drop_nolock: |
663 | kfree_skb(skb); | 662 | kfree_skb(skb); |
664 | return(0); | 663 | return(0); |
665 | } | 664 | } |
666 | 665 | ||
667 | static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 666 | static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
668 | { | 667 | { |
669 | struct ip_tunnel *tunnel = netdev_priv(dev); | 668 | struct ip_tunnel *tunnel = netdev_priv(dev); |
670 | struct net_device_stats *stats = &tunnel->stat; | 669 | struct net_device_stats *stats = &tunnel->stat; |
671 | struct iphdr *old_iph = skb->nh.iph; | 670 | struct iphdr *old_iph = skb->nh.iph; |
672 | struct iphdr *tiph; | 671 | struct iphdr *tiph; |
673 | u8 tos; | 672 | u8 tos; |
674 | u16 df; | 673 | u16 df; |
675 | struct rtable *rt; /* Route to the other host */ | 674 | struct rtable *rt; /* Route to the other host */ |
676 | struct net_device *tdev; /* Device to other host */ | 675 | struct net_device *tdev; /* Device to other host */ |
677 | struct iphdr *iph; /* Our new IP header */ | 676 | struct iphdr *iph; /* Our new IP header */ |
678 | int max_headroom; /* The extra header space needed */ | 677 | int max_headroom; /* The extra header space needed */ |
679 | int gre_hlen; | 678 | int gre_hlen; |
680 | u32 dst; | 679 | u32 dst; |
681 | int mtu; | 680 | int mtu; |
682 | 681 | ||
683 | if (tunnel->recursion++) { | 682 | if (tunnel->recursion++) { |
684 | tunnel->stat.collisions++; | 683 | tunnel->stat.collisions++; |
685 | goto tx_error; | 684 | goto tx_error; |
686 | } | 685 | } |
687 | 686 | ||
688 | if (dev->hard_header) { | 687 | if (dev->hard_header) { |
689 | gre_hlen = 0; | 688 | gre_hlen = 0; |
690 | tiph = (struct iphdr*)skb->data; | 689 | tiph = (struct iphdr*)skb->data; |
691 | } else { | 690 | } else { |
692 | gre_hlen = tunnel->hlen; | 691 | gre_hlen = tunnel->hlen; |
693 | tiph = &tunnel->parms.iph; | 692 | tiph = &tunnel->parms.iph; |
694 | } | 693 | } |
695 | 694 | ||
696 | if ((dst = tiph->daddr) == 0) { | 695 | if ((dst = tiph->daddr) == 0) { |
697 | /* NBMA tunnel */ | 696 | /* NBMA tunnel */ |
698 | 697 | ||
699 | if (skb->dst == NULL) { | 698 | if (skb->dst == NULL) { |
700 | tunnel->stat.tx_fifo_errors++; | 699 | tunnel->stat.tx_fifo_errors++; |
701 | goto tx_error; | 700 | goto tx_error; |
702 | } | 701 | } |
703 | 702 | ||
704 | if (skb->protocol == htons(ETH_P_IP)) { | 703 | if (skb->protocol == htons(ETH_P_IP)) { |
705 | rt = (struct rtable*)skb->dst; | 704 | rt = (struct rtable*)skb->dst; |
706 | if ((dst = rt->rt_gateway) == 0) | 705 | if ((dst = rt->rt_gateway) == 0) |
707 | goto tx_error_icmp; | 706 | goto tx_error_icmp; |
708 | } | 707 | } |
709 | #ifdef CONFIG_IPV6 | 708 | #ifdef CONFIG_IPV6 |
710 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 709 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
711 | struct in6_addr *addr6; | 710 | struct in6_addr *addr6; |
712 | int addr_type; | 711 | int addr_type; |
713 | struct neighbour *neigh = skb->dst->neighbour; | 712 | struct neighbour *neigh = skb->dst->neighbour; |
714 | 713 | ||
715 | if (neigh == NULL) | 714 | if (neigh == NULL) |
716 | goto tx_error; | 715 | goto tx_error; |
717 | 716 | ||
718 | addr6 = (struct in6_addr*)&neigh->primary_key; | 717 | addr6 = (struct in6_addr*)&neigh->primary_key; |
719 | addr_type = ipv6_addr_type(addr6); | 718 | addr_type = ipv6_addr_type(addr6); |
720 | 719 | ||
721 | if (addr_type == IPV6_ADDR_ANY) { | 720 | if (addr_type == IPV6_ADDR_ANY) { |
722 | addr6 = &skb->nh.ipv6h->daddr; | 721 | addr6 = &skb->nh.ipv6h->daddr; |
723 | addr_type = ipv6_addr_type(addr6); | 722 | addr_type = ipv6_addr_type(addr6); |
724 | } | 723 | } |
725 | 724 | ||
726 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | 725 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) |
727 | goto tx_error_icmp; | 726 | goto tx_error_icmp; |
728 | 727 | ||
729 | dst = addr6->s6_addr32[3]; | 728 | dst = addr6->s6_addr32[3]; |
730 | } | 729 | } |
731 | #endif | 730 | #endif |
732 | else | 731 | else |
733 | goto tx_error; | 732 | goto tx_error; |
734 | } | 733 | } |
735 | 734 | ||
736 | tos = tiph->tos; | 735 | tos = tiph->tos; |
737 | if (tos&1) { | 736 | if (tos&1) { |
738 | if (skb->protocol == htons(ETH_P_IP)) | 737 | if (skb->protocol == htons(ETH_P_IP)) |
739 | tos = old_iph->tos; | 738 | tos = old_iph->tos; |
740 | tos &= ~1; | 739 | tos &= ~1; |
741 | } | 740 | } |
742 | 741 | ||
743 | { | 742 | { |
744 | struct flowi fl = { .oif = tunnel->parms.link, | 743 | struct flowi fl = { .oif = tunnel->parms.link, |
745 | .nl_u = { .ip4_u = | 744 | .nl_u = { .ip4_u = |
746 | { .daddr = dst, | 745 | { .daddr = dst, |
747 | .saddr = tiph->saddr, | 746 | .saddr = tiph->saddr, |
748 | .tos = RT_TOS(tos) } }, | 747 | .tos = RT_TOS(tos) } }, |
749 | .proto = IPPROTO_GRE }; | 748 | .proto = IPPROTO_GRE }; |
750 | if (ip_route_output_key(&rt, &fl)) { | 749 | if (ip_route_output_key(&rt, &fl)) { |
751 | tunnel->stat.tx_carrier_errors++; | 750 | tunnel->stat.tx_carrier_errors++; |
752 | goto tx_error; | 751 | goto tx_error; |
753 | } | 752 | } |
754 | } | 753 | } |
755 | tdev = rt->u.dst.dev; | 754 | tdev = rt->u.dst.dev; |
756 | 755 | ||
757 | if (tdev == dev) { | 756 | if (tdev == dev) { |
758 | ip_rt_put(rt); | 757 | ip_rt_put(rt); |
759 | tunnel->stat.collisions++; | 758 | tunnel->stat.collisions++; |
760 | goto tx_error; | 759 | goto tx_error; |
761 | } | 760 | } |
762 | 761 | ||
763 | df = tiph->frag_off; | 762 | df = tiph->frag_off; |
764 | if (df) | 763 | if (df) |
765 | mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; | 764 | mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; |
766 | else | 765 | else |
767 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; | 766 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; |
768 | 767 | ||
769 | if (skb->dst) | 768 | if (skb->dst) |
770 | skb->dst->ops->update_pmtu(skb->dst, mtu); | 769 | skb->dst->ops->update_pmtu(skb->dst, mtu); |
771 | 770 | ||
772 | if (skb->protocol == htons(ETH_P_IP)) { | 771 | if (skb->protocol == htons(ETH_P_IP)) { |
773 | df |= (old_iph->frag_off&htons(IP_DF)); | 772 | df |= (old_iph->frag_off&htons(IP_DF)); |
774 | 773 | ||
775 | if ((old_iph->frag_off&htons(IP_DF)) && | 774 | if ((old_iph->frag_off&htons(IP_DF)) && |
776 | mtu < ntohs(old_iph->tot_len)) { | 775 | mtu < ntohs(old_iph->tot_len)) { |
777 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 776 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
778 | ip_rt_put(rt); | 777 | ip_rt_put(rt); |
779 | goto tx_error; | 778 | goto tx_error; |
780 | } | 779 | } |
781 | } | 780 | } |
782 | #ifdef CONFIG_IPV6 | 781 | #ifdef CONFIG_IPV6 |
783 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 782 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
784 | struct rt6_info *rt6 = (struct rt6_info*)skb->dst; | 783 | struct rt6_info *rt6 = (struct rt6_info*)skb->dst; |
785 | 784 | ||
786 | if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { | 785 | if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { |
787 | if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || | 786 | if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || |
788 | rt6->rt6i_dst.plen == 128) { | 787 | rt6->rt6i_dst.plen == 128) { |
789 | rt6->rt6i_flags |= RTF_MODIFIED; | 788 | rt6->rt6i_flags |= RTF_MODIFIED; |
790 | skb->dst->metrics[RTAX_MTU-1] = mtu; | 789 | skb->dst->metrics[RTAX_MTU-1] = mtu; |
791 | } | 790 | } |
792 | } | 791 | } |
793 | 792 | ||
794 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 793 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { |
795 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | 794 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); |
796 | ip_rt_put(rt); | 795 | ip_rt_put(rt); |
797 | goto tx_error; | 796 | goto tx_error; |
798 | } | 797 | } |
799 | } | 798 | } |
800 | #endif | 799 | #endif |
801 | 800 | ||
802 | if (tunnel->err_count > 0) { | 801 | if (tunnel->err_count > 0) { |
803 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 802 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { |
804 | tunnel->err_count--; | 803 | tunnel->err_count--; |
805 | 804 | ||
806 | dst_link_failure(skb); | 805 | dst_link_failure(skb); |
807 | } else | 806 | } else |
808 | tunnel->err_count = 0; | 807 | tunnel->err_count = 0; |
809 | } | 808 | } |
810 | 809 | ||
811 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; | 810 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; |
812 | 811 | ||
813 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { | 812 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { |
814 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 813 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
815 | if (!new_skb) { | 814 | if (!new_skb) { |
816 | ip_rt_put(rt); | 815 | ip_rt_put(rt); |
817 | stats->tx_dropped++; | 816 | stats->tx_dropped++; |
818 | dev_kfree_skb(skb); | 817 | dev_kfree_skb(skb); |
819 | tunnel->recursion--; | 818 | tunnel->recursion--; |
820 | return 0; | 819 | return 0; |
821 | } | 820 | } |
822 | if (skb->sk) | 821 | if (skb->sk) |
823 | skb_set_owner_w(new_skb, skb->sk); | 822 | skb_set_owner_w(new_skb, skb->sk); |
824 | dev_kfree_skb(skb); | 823 | dev_kfree_skb(skb); |
825 | skb = new_skb; | 824 | skb = new_skb; |
826 | old_iph = skb->nh.iph; | 825 | old_iph = skb->nh.iph; |
827 | } | 826 | } |
828 | 827 | ||
829 | skb->h.raw = skb->nh.raw; | 828 | skb->h.raw = skb->nh.raw; |
830 | skb->nh.raw = skb_push(skb, gre_hlen); | 829 | skb->nh.raw = skb_push(skb, gre_hlen); |
831 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 830 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
832 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 831 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
833 | IPSKB_REROUTED); | 832 | IPSKB_REROUTED); |
834 | dst_release(skb->dst); | 833 | dst_release(skb->dst); |
835 | skb->dst = &rt->u.dst; | 834 | skb->dst = &rt->u.dst; |
836 | 835 | ||
837 | /* | 836 | /* |
838 | * Push down and install the IPIP header. | 837 | * Push down and install the IPIP header. |
839 | */ | 838 | */ |
840 | 839 | ||
841 | iph = skb->nh.iph; | 840 | iph = skb->nh.iph; |
842 | iph->version = 4; | 841 | iph->version = 4; |
843 | iph->ihl = sizeof(struct iphdr) >> 2; | 842 | iph->ihl = sizeof(struct iphdr) >> 2; |
844 | iph->frag_off = df; | 843 | iph->frag_off = df; |
845 | iph->protocol = IPPROTO_GRE; | 844 | iph->protocol = IPPROTO_GRE; |
846 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); | 845 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); |
847 | iph->daddr = rt->rt_dst; | 846 | iph->daddr = rt->rt_dst; |
848 | iph->saddr = rt->rt_src; | 847 | iph->saddr = rt->rt_src; |
849 | 848 | ||
850 | if ((iph->ttl = tiph->ttl) == 0) { | 849 | if ((iph->ttl = tiph->ttl) == 0) { |
851 | if (skb->protocol == htons(ETH_P_IP)) | 850 | if (skb->protocol == htons(ETH_P_IP)) |
852 | iph->ttl = old_iph->ttl; | 851 | iph->ttl = old_iph->ttl; |
853 | #ifdef CONFIG_IPV6 | 852 | #ifdef CONFIG_IPV6 |
854 | else if (skb->protocol == htons(ETH_P_IPV6)) | 853 | else if (skb->protocol == htons(ETH_P_IPV6)) |
855 | iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; | 854 | iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; |
856 | #endif | 855 | #endif |
857 | else | 856 | else |
858 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | 857 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); |
859 | } | 858 | } |
860 | 859 | ||
861 | ((u16*)(iph+1))[0] = tunnel->parms.o_flags; | 860 | ((u16*)(iph+1))[0] = tunnel->parms.o_flags; |
862 | ((u16*)(iph+1))[1] = skb->protocol; | 861 | ((u16*)(iph+1))[1] = skb->protocol; |
863 | 862 | ||
864 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { | 863 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { |
865 | u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); | 864 | u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); |
866 | 865 | ||
867 | if (tunnel->parms.o_flags&GRE_SEQ) { | 866 | if (tunnel->parms.o_flags&GRE_SEQ) { |
868 | ++tunnel->o_seqno; | 867 | ++tunnel->o_seqno; |
869 | *ptr = htonl(tunnel->o_seqno); | 868 | *ptr = htonl(tunnel->o_seqno); |
870 | ptr--; | 869 | ptr--; |
871 | } | 870 | } |
872 | if (tunnel->parms.o_flags&GRE_KEY) { | 871 | if (tunnel->parms.o_flags&GRE_KEY) { |
873 | *ptr = tunnel->parms.o_key; | 872 | *ptr = tunnel->parms.o_key; |
874 | ptr--; | 873 | ptr--; |
875 | } | 874 | } |
876 | if (tunnel->parms.o_flags&GRE_CSUM) { | 875 | if (tunnel->parms.o_flags&GRE_CSUM) { |
877 | *ptr = 0; | 876 | *ptr = 0; |
878 | *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); | 877 | *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); |
879 | } | 878 | } |
880 | } | 879 | } |
881 | 880 | ||
882 | nf_reset(skb); | 881 | nf_reset(skb); |
883 | 882 | ||
884 | IPTUNNEL_XMIT(); | 883 | IPTUNNEL_XMIT(); |
885 | tunnel->recursion--; | 884 | tunnel->recursion--; |
886 | return 0; | 885 | return 0; |
887 | 886 | ||
888 | tx_error_icmp: | 887 | tx_error_icmp: |
889 | dst_link_failure(skb); | 888 | dst_link_failure(skb); |
890 | 889 | ||
891 | tx_error: | 890 | tx_error: |
892 | stats->tx_errors++; | 891 | stats->tx_errors++; |
893 | dev_kfree_skb(skb); | 892 | dev_kfree_skb(skb); |
894 | tunnel->recursion--; | 893 | tunnel->recursion--; |
895 | return 0; | 894 | return 0; |
896 | } | 895 | } |
897 | 896 | ||
898 | static int | 897 | static int |
899 | ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 898 | ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
900 | { | 899 | { |
901 | int err = 0; | 900 | int err = 0; |
902 | struct ip_tunnel_parm p; | 901 | struct ip_tunnel_parm p; |
903 | struct ip_tunnel *t; | 902 | struct ip_tunnel *t; |
904 | 903 | ||
905 | switch (cmd) { | 904 | switch (cmd) { |
906 | case SIOCGETTUNNEL: | 905 | case SIOCGETTUNNEL: |
907 | t = NULL; | 906 | t = NULL; |
908 | if (dev == ipgre_fb_tunnel_dev) { | 907 | if (dev == ipgre_fb_tunnel_dev) { |
909 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 908 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
910 | err = -EFAULT; | 909 | err = -EFAULT; |
911 | break; | 910 | break; |
912 | } | 911 | } |
913 | t = ipgre_tunnel_locate(&p, 0); | 912 | t = ipgre_tunnel_locate(&p, 0); |
914 | } | 913 | } |
915 | if (t == NULL) | 914 | if (t == NULL) |
916 | t = netdev_priv(dev); | 915 | t = netdev_priv(dev); |
917 | memcpy(&p, &t->parms, sizeof(p)); | 916 | memcpy(&p, &t->parms, sizeof(p)); |
918 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 917 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
919 | err = -EFAULT; | 918 | err = -EFAULT; |
920 | break; | 919 | break; |
921 | 920 | ||
922 | case SIOCADDTUNNEL: | 921 | case SIOCADDTUNNEL: |
923 | case SIOCCHGTUNNEL: | 922 | case SIOCCHGTUNNEL: |
924 | err = -EPERM; | 923 | err = -EPERM; |
925 | if (!capable(CAP_NET_ADMIN)) | 924 | if (!capable(CAP_NET_ADMIN)) |
926 | goto done; | 925 | goto done; |
927 | 926 | ||
928 | err = -EFAULT; | 927 | err = -EFAULT; |
929 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 928 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
930 | goto done; | 929 | goto done; |
931 | 930 | ||
932 | err = -EINVAL; | 931 | err = -EINVAL; |
933 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || | 932 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || |
934 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || | 933 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || |
935 | ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) | 934 | ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) |
936 | goto done; | 935 | goto done; |
937 | if (p.iph.ttl) | 936 | if (p.iph.ttl) |
938 | p.iph.frag_off |= htons(IP_DF); | 937 | p.iph.frag_off |= htons(IP_DF); |
939 | 938 | ||
940 | if (!(p.i_flags&GRE_KEY)) | 939 | if (!(p.i_flags&GRE_KEY)) |
941 | p.i_key = 0; | 940 | p.i_key = 0; |
942 | if (!(p.o_flags&GRE_KEY)) | 941 | if (!(p.o_flags&GRE_KEY)) |
943 | p.o_key = 0; | 942 | p.o_key = 0; |
944 | 943 | ||
945 | t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); | 944 | t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); |
946 | 945 | ||
947 | if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 946 | if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
948 | if (t != NULL) { | 947 | if (t != NULL) { |
949 | if (t->dev != dev) { | 948 | if (t->dev != dev) { |
950 | err = -EEXIST; | 949 | err = -EEXIST; |
951 | break; | 950 | break; |
952 | } | 951 | } |
953 | } else { | 952 | } else { |
954 | unsigned nflags=0; | 953 | unsigned nflags=0; |
955 | 954 | ||
956 | t = netdev_priv(dev); | 955 | t = netdev_priv(dev); |
957 | 956 | ||
958 | if (MULTICAST(p.iph.daddr)) | 957 | if (MULTICAST(p.iph.daddr)) |
959 | nflags = IFF_BROADCAST; | 958 | nflags = IFF_BROADCAST; |
960 | else if (p.iph.daddr) | 959 | else if (p.iph.daddr) |
961 | nflags = IFF_POINTOPOINT; | 960 | nflags = IFF_POINTOPOINT; |
962 | 961 | ||
963 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { | 962 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { |
964 | err = -EINVAL; | 963 | err = -EINVAL; |
965 | break; | 964 | break; |
966 | } | 965 | } |
967 | ipgre_tunnel_unlink(t); | 966 | ipgre_tunnel_unlink(t); |
968 | t->parms.iph.saddr = p.iph.saddr; | 967 | t->parms.iph.saddr = p.iph.saddr; |
969 | t->parms.iph.daddr = p.iph.daddr; | 968 | t->parms.iph.daddr = p.iph.daddr; |
970 | t->parms.i_key = p.i_key; | 969 | t->parms.i_key = p.i_key; |
971 | t->parms.o_key = p.o_key; | 970 | t->parms.o_key = p.o_key; |
972 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 971 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
973 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 972 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
974 | ipgre_tunnel_link(t); | 973 | ipgre_tunnel_link(t); |
975 | netdev_state_change(dev); | 974 | netdev_state_change(dev); |
976 | } | 975 | } |
977 | } | 976 | } |
978 | 977 | ||
979 | if (t) { | 978 | if (t) { |
980 | err = 0; | 979 | err = 0; |
981 | if (cmd == SIOCCHGTUNNEL) { | 980 | if (cmd == SIOCCHGTUNNEL) { |
982 | t->parms.iph.ttl = p.iph.ttl; | 981 | t->parms.iph.ttl = p.iph.ttl; |
983 | t->parms.iph.tos = p.iph.tos; | 982 | t->parms.iph.tos = p.iph.tos; |
984 | t->parms.iph.frag_off = p.iph.frag_off; | 983 | t->parms.iph.frag_off = p.iph.frag_off; |
985 | } | 984 | } |
986 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 985 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
987 | err = -EFAULT; | 986 | err = -EFAULT; |
988 | } else | 987 | } else |
989 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 988 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
990 | break; | 989 | break; |
991 | 990 | ||
992 | case SIOCDELTUNNEL: | 991 | case SIOCDELTUNNEL: |
993 | err = -EPERM; | 992 | err = -EPERM; |
994 | if (!capable(CAP_NET_ADMIN)) | 993 | if (!capable(CAP_NET_ADMIN)) |
995 | goto done; | 994 | goto done; |
996 | 995 | ||
997 | if (dev == ipgre_fb_tunnel_dev) { | 996 | if (dev == ipgre_fb_tunnel_dev) { |
998 | err = -EFAULT; | 997 | err = -EFAULT; |
999 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 998 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
1000 | goto done; | 999 | goto done; |
1001 | err = -ENOENT; | 1000 | err = -ENOENT; |
1002 | if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) | 1001 | if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) |
1003 | goto done; | 1002 | goto done; |
1004 | err = -EPERM; | 1003 | err = -EPERM; |
1005 | if (t == netdev_priv(ipgre_fb_tunnel_dev)) | 1004 | if (t == netdev_priv(ipgre_fb_tunnel_dev)) |
1006 | goto done; | 1005 | goto done; |
1007 | dev = t->dev; | 1006 | dev = t->dev; |
1008 | } | 1007 | } |
1009 | err = unregister_netdevice(dev); | 1008 | err = unregister_netdevice(dev); |
1010 | break; | 1009 | break; |
1011 | 1010 | ||
1012 | default: | 1011 | default: |
1013 | err = -EINVAL; | 1012 | err = -EINVAL; |
1014 | } | 1013 | } |
1015 | 1014 | ||
1016 | done: | 1015 | done: |
1017 | return err; | 1016 | return err; |
1018 | } | 1017 | } |
1019 | 1018 | ||
1020 | static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) | 1019 | static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) |
1021 | { | 1020 | { |
1022 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); | 1021 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); |
1023 | } | 1022 | } |
1024 | 1023 | ||
1025 | static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 1024 | static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
1026 | { | 1025 | { |
1027 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1026 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1028 | if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) | 1027 | if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) |
1029 | return -EINVAL; | 1028 | return -EINVAL; |
1030 | dev->mtu = new_mtu; | 1029 | dev->mtu = new_mtu; |
1031 | return 0; | 1030 | return 0; |
1032 | } | 1031 | } |
1033 | 1032 | ||
1034 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 1033 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
1035 | /* Nice toy. Unfortunately, useless in real life :-) | 1034 | /* Nice toy. Unfortunately, useless in real life :-) |
1036 | It allows to construct virtual multiprotocol broadcast "LAN" | 1035 | It allows to construct virtual multiprotocol broadcast "LAN" |
1037 | over the Internet, provided multicast routing is tuned. | 1036 | over the Internet, provided multicast routing is tuned. |
1038 | 1037 | ||
1039 | 1038 | ||
1040 | I have no idea was this bicycle invented before me, | 1039 | I have no idea was this bicycle invented before me, |
1041 | so that I had to set ARPHRD_IPGRE to a random value. | 1040 | so that I had to set ARPHRD_IPGRE to a random value. |
1042 | I have an impression, that Cisco could make something similar, | 1041 | I have an impression, that Cisco could make something similar, |
1043 | but this feature is apparently missing in IOS<=11.2(8). | 1042 | but this feature is apparently missing in IOS<=11.2(8). |
1044 | 1043 | ||
1045 | I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks | 1044 | I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks |
1046 | with broadcast 224.66.66.66. If you have access to mbone, play with me :-) | 1045 | with broadcast 224.66.66.66. If you have access to mbone, play with me :-) |
1047 | 1046 | ||
1048 | ping -t 255 224.66.66.66 | 1047 | ping -t 255 224.66.66.66 |
1049 | 1048 | ||
1050 | If nobody answers, mbone does not work. | 1049 | If nobody answers, mbone does not work. |
1051 | 1050 | ||
1052 | ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 | 1051 | ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 |
1053 | ip addr add 10.66.66.<somewhat>/24 dev Universe | 1052 | ip addr add 10.66.66.<somewhat>/24 dev Universe |
1054 | ifconfig Universe up | 1053 | ifconfig Universe up |
1055 | ifconfig Universe add fe80::<Your_real_addr>/10 | 1054 | ifconfig Universe add fe80::<Your_real_addr>/10 |
1056 | ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 | 1055 | ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 |
1057 | ftp 10.66.66.66 | 1056 | ftp 10.66.66.66 |
1058 | ... | 1057 | ... |
1059 | ftp fec0:6666:6666::193.233.7.65 | 1058 | ftp fec0:6666:6666::193.233.7.65 |
1060 | ... | 1059 | ... |
1061 | 1060 | ||
1062 | */ | 1061 | */ |
1063 | 1062 | ||
1064 | static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, | 1063 | static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, |
1065 | void *daddr, void *saddr, unsigned len) | 1064 | void *daddr, void *saddr, unsigned len) |
1066 | { | 1065 | { |
1067 | struct ip_tunnel *t = netdev_priv(dev); | 1066 | struct ip_tunnel *t = netdev_priv(dev); |
1068 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); | 1067 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); |
1069 | u16 *p = (u16*)(iph+1); | 1068 | u16 *p = (u16*)(iph+1); |
1070 | 1069 | ||
1071 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); | 1070 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); |
1072 | p[0] = t->parms.o_flags; | 1071 | p[0] = t->parms.o_flags; |
1073 | p[1] = htons(type); | 1072 | p[1] = htons(type); |
1074 | 1073 | ||
1075 | /* | 1074 | /* |
1076 | * Set the source hardware address. | 1075 | * Set the source hardware address. |
1077 | */ | 1076 | */ |
1078 | 1077 | ||
1079 | if (saddr) | 1078 | if (saddr) |
1080 | memcpy(&iph->saddr, saddr, 4); | 1079 | memcpy(&iph->saddr, saddr, 4); |
1081 | 1080 | ||
1082 | if (daddr) { | 1081 | if (daddr) { |
1083 | memcpy(&iph->daddr, daddr, 4); | 1082 | memcpy(&iph->daddr, daddr, 4); |
1084 | return t->hlen; | 1083 | return t->hlen; |
1085 | } | 1084 | } |
1086 | if (iph->daddr && !MULTICAST(iph->daddr)) | 1085 | if (iph->daddr && !MULTICAST(iph->daddr)) |
1087 | return t->hlen; | 1086 | return t->hlen; |
1088 | 1087 | ||
1089 | return -t->hlen; | 1088 | return -t->hlen; |
1090 | } | 1089 | } |
1091 | 1090 | ||
1092 | static int ipgre_open(struct net_device *dev) | 1091 | static int ipgre_open(struct net_device *dev) |
1093 | { | 1092 | { |
1094 | struct ip_tunnel *t = netdev_priv(dev); | 1093 | struct ip_tunnel *t = netdev_priv(dev); |
1095 | 1094 | ||
1096 | if (MULTICAST(t->parms.iph.daddr)) { | 1095 | if (MULTICAST(t->parms.iph.daddr)) { |
1097 | struct flowi fl = { .oif = t->parms.link, | 1096 | struct flowi fl = { .oif = t->parms.link, |
1098 | .nl_u = { .ip4_u = | 1097 | .nl_u = { .ip4_u = |
1099 | { .daddr = t->parms.iph.daddr, | 1098 | { .daddr = t->parms.iph.daddr, |
1100 | .saddr = t->parms.iph.saddr, | 1099 | .saddr = t->parms.iph.saddr, |
1101 | .tos = RT_TOS(t->parms.iph.tos) } }, | 1100 | .tos = RT_TOS(t->parms.iph.tos) } }, |
1102 | .proto = IPPROTO_GRE }; | 1101 | .proto = IPPROTO_GRE }; |
1103 | struct rtable *rt; | 1102 | struct rtable *rt; |
1104 | if (ip_route_output_key(&rt, &fl)) | 1103 | if (ip_route_output_key(&rt, &fl)) |
1105 | return -EADDRNOTAVAIL; | 1104 | return -EADDRNOTAVAIL; |
1106 | dev = rt->u.dst.dev; | 1105 | dev = rt->u.dst.dev; |
1107 | ip_rt_put(rt); | 1106 | ip_rt_put(rt); |
1108 | if (__in_dev_get_rtnl(dev) == NULL) | 1107 | if (__in_dev_get_rtnl(dev) == NULL) |
1109 | return -EADDRNOTAVAIL; | 1108 | return -EADDRNOTAVAIL; |
1110 | t->mlink = dev->ifindex; | 1109 | t->mlink = dev->ifindex; |
1111 | ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); | 1110 | ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); |
1112 | } | 1111 | } |
1113 | return 0; | 1112 | return 0; |
1114 | } | 1113 | } |
1115 | 1114 | ||
1116 | static int ipgre_close(struct net_device *dev) | 1115 | static int ipgre_close(struct net_device *dev) |
1117 | { | 1116 | { |
1118 | struct ip_tunnel *t = netdev_priv(dev); | 1117 | struct ip_tunnel *t = netdev_priv(dev); |
1119 | if (MULTICAST(t->parms.iph.daddr) && t->mlink) { | 1118 | if (MULTICAST(t->parms.iph.daddr) && t->mlink) { |
1120 | struct in_device *in_dev = inetdev_by_index(t->mlink); | 1119 | struct in_device *in_dev = inetdev_by_index(t->mlink); |
1121 | if (in_dev) { | 1120 | if (in_dev) { |
1122 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); | 1121 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); |
1123 | in_dev_put(in_dev); | 1122 | in_dev_put(in_dev); |
1124 | } | 1123 | } |
1125 | } | 1124 | } |
1126 | return 0; | 1125 | return 0; |
1127 | } | 1126 | } |
1128 | 1127 | ||
1129 | #endif | 1128 | #endif |
1130 | 1129 | ||
1131 | static void ipgre_tunnel_setup(struct net_device *dev) | 1130 | static void ipgre_tunnel_setup(struct net_device *dev) |
1132 | { | 1131 | { |
1133 | SET_MODULE_OWNER(dev); | 1132 | SET_MODULE_OWNER(dev); |
1134 | dev->uninit = ipgre_tunnel_uninit; | 1133 | dev->uninit = ipgre_tunnel_uninit; |
1135 | dev->destructor = free_netdev; | 1134 | dev->destructor = free_netdev; |
1136 | dev->hard_start_xmit = ipgre_tunnel_xmit; | 1135 | dev->hard_start_xmit = ipgre_tunnel_xmit; |
1137 | dev->get_stats = ipgre_tunnel_get_stats; | 1136 | dev->get_stats = ipgre_tunnel_get_stats; |
1138 | dev->do_ioctl = ipgre_tunnel_ioctl; | 1137 | dev->do_ioctl = ipgre_tunnel_ioctl; |
1139 | dev->change_mtu = ipgre_tunnel_change_mtu; | 1138 | dev->change_mtu = ipgre_tunnel_change_mtu; |
1140 | 1139 | ||
1141 | dev->type = ARPHRD_IPGRE; | 1140 | dev->type = ARPHRD_IPGRE; |
1142 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; | 1141 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; |
1143 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; | 1142 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; |
1144 | dev->flags = IFF_NOARP; | 1143 | dev->flags = IFF_NOARP; |
1145 | dev->iflink = 0; | 1144 | dev->iflink = 0; |
1146 | dev->addr_len = 4; | 1145 | dev->addr_len = 4; |
1147 | } | 1146 | } |
1148 | 1147 | ||
1149 | static int ipgre_tunnel_init(struct net_device *dev) | 1148 | static int ipgre_tunnel_init(struct net_device *dev) |
1150 | { | 1149 | { |
1151 | struct net_device *tdev = NULL; | 1150 | struct net_device *tdev = NULL; |
1152 | struct ip_tunnel *tunnel; | 1151 | struct ip_tunnel *tunnel; |
1153 | struct iphdr *iph; | 1152 | struct iphdr *iph; |
1154 | int hlen = LL_MAX_HEADER; | 1153 | int hlen = LL_MAX_HEADER; |
1155 | int mtu = ETH_DATA_LEN; | 1154 | int mtu = ETH_DATA_LEN; |
1156 | int addend = sizeof(struct iphdr) + 4; | 1155 | int addend = sizeof(struct iphdr) + 4; |
1157 | 1156 | ||
1158 | tunnel = netdev_priv(dev); | 1157 | tunnel = netdev_priv(dev); |
1159 | iph = &tunnel->parms.iph; | 1158 | iph = &tunnel->parms.iph; |
1160 | 1159 | ||
1161 | tunnel->dev = dev; | 1160 | tunnel->dev = dev; |
1162 | strcpy(tunnel->parms.name, dev->name); | 1161 | strcpy(tunnel->parms.name, dev->name); |
1163 | 1162 | ||
1164 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 1163 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
1165 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 1164 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
1166 | 1165 | ||
1167 | /* Guess output device to choose reasonable mtu and hard_header_len */ | 1166 | /* Guess output device to choose reasonable mtu and hard_header_len */ |
1168 | 1167 | ||
1169 | if (iph->daddr) { | 1168 | if (iph->daddr) { |
1170 | struct flowi fl = { .oif = tunnel->parms.link, | 1169 | struct flowi fl = { .oif = tunnel->parms.link, |
1171 | .nl_u = { .ip4_u = | 1170 | .nl_u = { .ip4_u = |
1172 | { .daddr = iph->daddr, | 1171 | { .daddr = iph->daddr, |
1173 | .saddr = iph->saddr, | 1172 | .saddr = iph->saddr, |
1174 | .tos = RT_TOS(iph->tos) } }, | 1173 | .tos = RT_TOS(iph->tos) } }, |
1175 | .proto = IPPROTO_GRE }; | 1174 | .proto = IPPROTO_GRE }; |
1176 | struct rtable *rt; | 1175 | struct rtable *rt; |
1177 | if (!ip_route_output_key(&rt, &fl)) { | 1176 | if (!ip_route_output_key(&rt, &fl)) { |
1178 | tdev = rt->u.dst.dev; | 1177 | tdev = rt->u.dst.dev; |
1179 | ip_rt_put(rt); | 1178 | ip_rt_put(rt); |
1180 | } | 1179 | } |
1181 | 1180 | ||
1182 | dev->flags |= IFF_POINTOPOINT; | 1181 | dev->flags |= IFF_POINTOPOINT; |
1183 | 1182 | ||
1184 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 1183 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
1185 | if (MULTICAST(iph->daddr)) { | 1184 | if (MULTICAST(iph->daddr)) { |
1186 | if (!iph->saddr) | 1185 | if (!iph->saddr) |
1187 | return -EINVAL; | 1186 | return -EINVAL; |
1188 | dev->flags = IFF_BROADCAST; | 1187 | dev->flags = IFF_BROADCAST; |
1189 | dev->hard_header = ipgre_header; | 1188 | dev->hard_header = ipgre_header; |
1190 | dev->open = ipgre_open; | 1189 | dev->open = ipgre_open; |
1191 | dev->stop = ipgre_close; | 1190 | dev->stop = ipgre_close; |
1192 | } | 1191 | } |
1193 | #endif | 1192 | #endif |
1194 | } | 1193 | } |
1195 | 1194 | ||
1196 | if (!tdev && tunnel->parms.link) | 1195 | if (!tdev && tunnel->parms.link) |
1197 | tdev = __dev_get_by_index(tunnel->parms.link); | 1196 | tdev = __dev_get_by_index(tunnel->parms.link); |
1198 | 1197 | ||
1199 | if (tdev) { | 1198 | if (tdev) { |
1200 | hlen = tdev->hard_header_len; | 1199 | hlen = tdev->hard_header_len; |
1201 | mtu = tdev->mtu; | 1200 | mtu = tdev->mtu; |
1202 | } | 1201 | } |
1203 | dev->iflink = tunnel->parms.link; | 1202 | dev->iflink = tunnel->parms.link; |
1204 | 1203 | ||
1205 | /* Precalculate GRE options length */ | 1204 | /* Precalculate GRE options length */ |
1206 | if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { | 1205 | if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { |
1207 | if (tunnel->parms.o_flags&GRE_CSUM) | 1206 | if (tunnel->parms.o_flags&GRE_CSUM) |
1208 | addend += 4; | 1207 | addend += 4; |
1209 | if (tunnel->parms.o_flags&GRE_KEY) | 1208 | if (tunnel->parms.o_flags&GRE_KEY) |
1210 | addend += 4; | 1209 | addend += 4; |
1211 | if (tunnel->parms.o_flags&GRE_SEQ) | 1210 | if (tunnel->parms.o_flags&GRE_SEQ) |
1212 | addend += 4; | 1211 | addend += 4; |
1213 | } | 1212 | } |
1214 | dev->hard_header_len = hlen + addend; | 1213 | dev->hard_header_len = hlen + addend; |
1215 | dev->mtu = mtu - addend; | 1214 | dev->mtu = mtu - addend; |
1216 | tunnel->hlen = addend; | 1215 | tunnel->hlen = addend; |
1217 | return 0; | 1216 | return 0; |
1218 | } | 1217 | } |
1219 | 1218 | ||
1220 | static int __init ipgre_fb_tunnel_init(struct net_device *dev) | 1219 | static int __init ipgre_fb_tunnel_init(struct net_device *dev) |
1221 | { | 1220 | { |
1222 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1221 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1223 | struct iphdr *iph = &tunnel->parms.iph; | 1222 | struct iphdr *iph = &tunnel->parms.iph; |
1224 | 1223 | ||
1225 | tunnel->dev = dev; | 1224 | tunnel->dev = dev; |
1226 | strcpy(tunnel->parms.name, dev->name); | 1225 | strcpy(tunnel->parms.name, dev->name); |
1227 | 1226 | ||
1228 | iph->version = 4; | 1227 | iph->version = 4; |
1229 | iph->protocol = IPPROTO_GRE; | 1228 | iph->protocol = IPPROTO_GRE; |
1230 | iph->ihl = 5; | 1229 | iph->ihl = 5; |
1231 | tunnel->hlen = sizeof(struct iphdr) + 4; | 1230 | tunnel->hlen = sizeof(struct iphdr) + 4; |
1232 | 1231 | ||
1233 | dev_hold(dev); | 1232 | dev_hold(dev); |
1234 | tunnels_wc[0] = tunnel; | 1233 | tunnels_wc[0] = tunnel; |
1235 | return 0; | 1234 | return 0; |
1236 | } | 1235 | } |
1237 | 1236 | ||
1238 | 1237 | ||
1239 | static struct net_protocol ipgre_protocol = { | 1238 | static struct net_protocol ipgre_protocol = { |
1240 | .handler = ipgre_rcv, | 1239 | .handler = ipgre_rcv, |
1241 | .err_handler = ipgre_err, | 1240 | .err_handler = ipgre_err, |
1242 | }; | 1241 | }; |
1243 | 1242 | ||
1244 | 1243 | ||
1245 | /* | 1244 | /* |
1246 | * And now the modules code and kernel interface. | 1245 | * And now the modules code and kernel interface. |
1247 | */ | 1246 | */ |
1248 | 1247 | ||
1249 | static int __init ipgre_init(void) | 1248 | static int __init ipgre_init(void) |
1250 | { | 1249 | { |
1251 | int err; | 1250 | int err; |
1252 | 1251 | ||
1253 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | 1252 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); |
1254 | 1253 | ||
1255 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { | 1254 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { |
1256 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | 1255 | printk(KERN_INFO "ipgre init: can't add protocol\n"); |
1257 | return -EAGAIN; | 1256 | return -EAGAIN; |
1258 | } | 1257 | } |
1259 | 1258 | ||
1260 | ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", | 1259 | ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", |
1261 | ipgre_tunnel_setup); | 1260 | ipgre_tunnel_setup); |
1262 | if (!ipgre_fb_tunnel_dev) { | 1261 | if (!ipgre_fb_tunnel_dev) { |
1263 | err = -ENOMEM; | 1262 | err = -ENOMEM; |
1264 | goto err1; | 1263 | goto err1; |
1265 | } | 1264 | } |
1266 | 1265 | ||
1267 | ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; | 1266 | ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; |
1268 | 1267 | ||
1269 | if ((err = register_netdev(ipgre_fb_tunnel_dev))) | 1268 | if ((err = register_netdev(ipgre_fb_tunnel_dev))) |
1270 | goto err2; | 1269 | goto err2; |
1271 | out: | 1270 | out: |
1272 | return err; | 1271 | return err; |
1273 | err2: | 1272 | err2: |
1274 | free_netdev(ipgre_fb_tunnel_dev); | 1273 | free_netdev(ipgre_fb_tunnel_dev); |
1275 | err1: | 1274 | err1: |
1276 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | 1275 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); |
1277 | goto out; | 1276 | goto out; |
1278 | } | 1277 | } |
1279 | 1278 | ||
1280 | static void __exit ipgre_destroy_tunnels(void) | 1279 | static void __exit ipgre_destroy_tunnels(void) |
1281 | { | 1280 | { |
1282 | int prio; | 1281 | int prio; |
1283 | 1282 | ||
1284 | for (prio = 0; prio < 4; prio++) { | 1283 | for (prio = 0; prio < 4; prio++) { |
1285 | int h; | 1284 | int h; |
1286 | for (h = 0; h < HASH_SIZE; h++) { | 1285 | for (h = 0; h < HASH_SIZE; h++) { |
1287 | struct ip_tunnel *t; | 1286 | struct ip_tunnel *t; |
1288 | while ((t = tunnels[prio][h]) != NULL) | 1287 | while ((t = tunnels[prio][h]) != NULL) |
1289 | unregister_netdevice(t->dev); | 1288 | unregister_netdevice(t->dev); |
1290 | } | 1289 | } |
1291 | } | 1290 | } |
1292 | } | 1291 | } |
1293 | 1292 | ||
1294 | static void __exit ipgre_fini(void) | 1293 | static void __exit ipgre_fini(void) |
1295 | { | 1294 | { |
1296 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) | 1295 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) |
1297 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); | 1296 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); |
1298 | 1297 | ||
1299 | rtnl_lock(); | 1298 | rtnl_lock(); |
1300 | ipgre_destroy_tunnels(); | 1299 | ipgre_destroy_tunnels(); |
1301 | rtnl_unlock(); | 1300 | rtnl_unlock(); |
1302 | } | 1301 | } |
1303 | 1302 | ||
1304 | module_init(ipgre_init); | 1303 | module_init(ipgre_init); |
1305 | module_exit(ipgre_fini); | 1304 | module_exit(ipgre_fini); |
1306 | MODULE_LICENSE("GPL"); | 1305 | MODULE_LICENSE("GPL"); |
1307 | 1306 |
net/ipv4/ip_options.c
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * The options processing module for ip.c | 6 | * The options processing module for ip.c |
7 | * | 7 | * |
8 | * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ | 8 | * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ |
9 | * | 9 | * |
10 | * Authors: A.N.Kuznetsov | 10 | * Authors: A.N.Kuznetsov |
11 | * | 11 | * |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/capability.h> | 14 | #include <linux/capability.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <linux/skbuff.h> | 18 | #include <linux/skbuff.h> |
19 | #include <linux/ip.h> | 19 | #include <linux/ip.h> |
20 | #include <linux/icmp.h> | 20 | #include <linux/icmp.h> |
21 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
22 | #include <linux/rtnetlink.h> | 22 | #include <linux/rtnetlink.h> |
23 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <net/ip.h> | 24 | #include <net/ip.h> |
25 | #include <net/icmp.h> | 25 | #include <net/icmp.h> |
26 | #include <net/route.h> | 26 | #include <net/route.h> |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Write options to IP header, record destination address to | 29 | * Write options to IP header, record destination address to |
30 | * source route option, address of outgoing interface | 30 | * source route option, address of outgoing interface |
31 | * (we should already know it, so that this function is allowed be | 31 | * (we should already know it, so that this function is allowed be |
32 | * called only after routing decision) and timestamp, | 32 | * called only after routing decision) and timestamp, |
33 | * if we originate this datagram. | 33 | * if we originate this datagram. |
34 | * | 34 | * |
35 | * daddr is real destination address, next hop is recorded in IP header. | 35 | * daddr is real destination address, next hop is recorded in IP header. |
36 | * saddr is address of outgoing interface. | 36 | * saddr is address of outgoing interface. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | 39 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, |
40 | u32 daddr, struct rtable *rt, int is_frag) | 40 | u32 daddr, struct rtable *rt, int is_frag) |
41 | { | 41 | { |
42 | unsigned char * iph = skb->nh.raw; | 42 | unsigned char * iph = skb->nh.raw; |
43 | 43 | ||
44 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); | 44 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); |
45 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); | 45 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); |
46 | opt = &(IPCB(skb)->opt); | 46 | opt = &(IPCB(skb)->opt); |
47 | opt->is_data = 0; | 47 | opt->is_data = 0; |
48 | 48 | ||
49 | if (opt->srr) | 49 | if (opt->srr) |
50 | memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); | 50 | memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); |
51 | 51 | ||
52 | if (!is_frag) { | 52 | if (!is_frag) { |
53 | if (opt->rr_needaddr) | 53 | if (opt->rr_needaddr) |
54 | ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); | 54 | ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); |
55 | if (opt->ts_needaddr) | 55 | if (opt->ts_needaddr) |
56 | ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); | 56 | ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); |
57 | if (opt->ts_needtime) { | 57 | if (opt->ts_needtime) { |
58 | struct timeval tv; | 58 | struct timeval tv; |
59 | __u32 midtime; | 59 | __u32 midtime; |
60 | do_gettimeofday(&tv); | 60 | do_gettimeofday(&tv); |
61 | midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); | 61 | midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); |
62 | memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); | 62 | memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); |
63 | } | 63 | } |
64 | return; | 64 | return; |
65 | } | 65 | } |
66 | if (opt->rr) { | 66 | if (opt->rr) { |
67 | memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]); | 67 | memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]); |
68 | opt->rr = 0; | 68 | opt->rr = 0; |
69 | opt->rr_needaddr = 0; | 69 | opt->rr_needaddr = 0; |
70 | } | 70 | } |
71 | if (opt->ts) { | 71 | if (opt->ts) { |
72 | memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]); | 72 | memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]); |
73 | opt->ts = 0; | 73 | opt->ts = 0; |
74 | opt->ts_needaddr = opt->ts_needtime = 0; | 74 | opt->ts_needaddr = opt->ts_needtime = 0; |
75 | } | 75 | } |
76 | } | 76 | } |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * Provided (sopt, skb) points to received options, | 79 | * Provided (sopt, skb) points to received options, |
80 | * build in dopt compiled option set appropriate for answering. | 80 | * build in dopt compiled option set appropriate for answering. |
81 | * i.e. invert SRR option, copy anothers, | 81 | * i.e. invert SRR option, copy anothers, |
82 | * and grab room in RR/TS options. | 82 | * and grab room in RR/TS options. |
83 | * | 83 | * |
84 | * NOTE: dopt cannot point to skb. | 84 | * NOTE: dopt cannot point to skb. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | 87 | int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) |
88 | { | 88 | { |
89 | struct ip_options *sopt; | 89 | struct ip_options *sopt; |
90 | unsigned char *sptr, *dptr; | 90 | unsigned char *sptr, *dptr; |
91 | int soffset, doffset; | 91 | int soffset, doffset; |
92 | int optlen; | 92 | int optlen; |
93 | u32 daddr; | 93 | u32 daddr; |
94 | 94 | ||
95 | memset(dopt, 0, sizeof(struct ip_options)); | 95 | memset(dopt, 0, sizeof(struct ip_options)); |
96 | 96 | ||
97 | dopt->is_data = 1; | 97 | dopt->is_data = 1; |
98 | 98 | ||
99 | sopt = &(IPCB(skb)->opt); | 99 | sopt = &(IPCB(skb)->opt); |
100 | 100 | ||
101 | if (sopt->optlen == 0) { | 101 | if (sopt->optlen == 0) { |
102 | dopt->optlen = 0; | 102 | dopt->optlen = 0; |
103 | return 0; | 103 | return 0; |
104 | } | 104 | } |
105 | 105 | ||
106 | sptr = skb->nh.raw; | 106 | sptr = skb->nh.raw; |
107 | dptr = dopt->__data; | 107 | dptr = dopt->__data; |
108 | 108 | ||
109 | if (skb->dst) | 109 | if (skb->dst) |
110 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; | 110 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; |
111 | else | 111 | else |
112 | daddr = skb->nh.iph->daddr; | 112 | daddr = skb->nh.iph->daddr; |
113 | 113 | ||
114 | if (sopt->rr) { | 114 | if (sopt->rr) { |
115 | optlen = sptr[sopt->rr+1]; | 115 | optlen = sptr[sopt->rr+1]; |
116 | soffset = sptr[sopt->rr+2]; | 116 | soffset = sptr[sopt->rr+2]; |
117 | dopt->rr = dopt->optlen + sizeof(struct iphdr); | 117 | dopt->rr = dopt->optlen + sizeof(struct iphdr); |
118 | memcpy(dptr, sptr+sopt->rr, optlen); | 118 | memcpy(dptr, sptr+sopt->rr, optlen); |
119 | if (sopt->rr_needaddr && soffset <= optlen) { | 119 | if (sopt->rr_needaddr && soffset <= optlen) { |
120 | if (soffset + 3 > optlen) | 120 | if (soffset + 3 > optlen) |
121 | return -EINVAL; | 121 | return -EINVAL; |
122 | dptr[2] = soffset + 4; | 122 | dptr[2] = soffset + 4; |
123 | dopt->rr_needaddr = 1; | 123 | dopt->rr_needaddr = 1; |
124 | } | 124 | } |
125 | dptr += optlen; | 125 | dptr += optlen; |
126 | dopt->optlen += optlen; | 126 | dopt->optlen += optlen; |
127 | } | 127 | } |
128 | if (sopt->ts) { | 128 | if (sopt->ts) { |
129 | optlen = sptr[sopt->ts+1]; | 129 | optlen = sptr[sopt->ts+1]; |
130 | soffset = sptr[sopt->ts+2]; | 130 | soffset = sptr[sopt->ts+2]; |
131 | dopt->ts = dopt->optlen + sizeof(struct iphdr); | 131 | dopt->ts = dopt->optlen + sizeof(struct iphdr); |
132 | memcpy(dptr, sptr+sopt->ts, optlen); | 132 | memcpy(dptr, sptr+sopt->ts, optlen); |
133 | if (soffset <= optlen) { | 133 | if (soffset <= optlen) { |
134 | if (sopt->ts_needaddr) { | 134 | if (sopt->ts_needaddr) { |
135 | if (soffset + 3 > optlen) | 135 | if (soffset + 3 > optlen) |
136 | return -EINVAL; | 136 | return -EINVAL; |
137 | dopt->ts_needaddr = 1; | 137 | dopt->ts_needaddr = 1; |
138 | soffset += 4; | 138 | soffset += 4; |
139 | } | 139 | } |
140 | if (sopt->ts_needtime) { | 140 | if (sopt->ts_needtime) { |
141 | if (soffset + 3 > optlen) | 141 | if (soffset + 3 > optlen) |
142 | return -EINVAL; | 142 | return -EINVAL; |
143 | if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) { | 143 | if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) { |
144 | dopt->ts_needtime = 1; | 144 | dopt->ts_needtime = 1; |
145 | soffset += 4; | 145 | soffset += 4; |
146 | } else { | 146 | } else { |
147 | dopt->ts_needtime = 0; | 147 | dopt->ts_needtime = 0; |
148 | 148 | ||
149 | if (soffset + 8 <= optlen) { | 149 | if (soffset + 8 <= optlen) { |
150 | __u32 addr; | 150 | __u32 addr; |
151 | 151 | ||
152 | memcpy(&addr, sptr+soffset-1, 4); | 152 | memcpy(&addr, sptr+soffset-1, 4); |
153 | if (inet_addr_type(addr) != RTN_LOCAL) { | 153 | if (inet_addr_type(addr) != RTN_LOCAL) { |
154 | dopt->ts_needtime = 1; | 154 | dopt->ts_needtime = 1; |
155 | soffset += 8; | 155 | soffset += 8; |
156 | } | 156 | } |
157 | } | 157 | } |
158 | } | 158 | } |
159 | } | 159 | } |
160 | dptr[2] = soffset; | 160 | dptr[2] = soffset; |
161 | } | 161 | } |
162 | dptr += optlen; | 162 | dptr += optlen; |
163 | dopt->optlen += optlen; | 163 | dopt->optlen += optlen; |
164 | } | 164 | } |
165 | if (sopt->srr) { | 165 | if (sopt->srr) { |
166 | unsigned char * start = sptr+sopt->srr; | 166 | unsigned char * start = sptr+sopt->srr; |
167 | u32 faddr; | 167 | u32 faddr; |
168 | 168 | ||
169 | optlen = start[1]; | 169 | optlen = start[1]; |
170 | soffset = start[2]; | 170 | soffset = start[2]; |
171 | doffset = 0; | 171 | doffset = 0; |
172 | if (soffset > optlen) | 172 | if (soffset > optlen) |
173 | soffset = optlen + 1; | 173 | soffset = optlen + 1; |
174 | soffset -= 4; | 174 | soffset -= 4; |
175 | if (soffset > 3) { | 175 | if (soffset > 3) { |
176 | memcpy(&faddr, &start[soffset-1], 4); | 176 | memcpy(&faddr, &start[soffset-1], 4); |
177 | for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) | 177 | for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) |
178 | memcpy(&dptr[doffset-1], &start[soffset-1], 4); | 178 | memcpy(&dptr[doffset-1], &start[soffset-1], 4); |
179 | /* | 179 | /* |
180 | * RFC1812 requires to fix illegal source routes. | 180 | * RFC1812 requires to fix illegal source routes. |
181 | */ | 181 | */ |
182 | if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0) | 182 | if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0) |
183 | doffset -= 4; | 183 | doffset -= 4; |
184 | } | 184 | } |
185 | if (doffset > 3) { | 185 | if (doffset > 3) { |
186 | memcpy(&start[doffset-1], &daddr, 4); | 186 | memcpy(&start[doffset-1], &daddr, 4); |
187 | dopt->faddr = faddr; | 187 | dopt->faddr = faddr; |
188 | dptr[0] = start[0]; | 188 | dptr[0] = start[0]; |
189 | dptr[1] = doffset+3; | 189 | dptr[1] = doffset+3; |
190 | dptr[2] = 4; | 190 | dptr[2] = 4; |
191 | dptr += doffset+3; | 191 | dptr += doffset+3; |
192 | dopt->srr = dopt->optlen + sizeof(struct iphdr); | 192 | dopt->srr = dopt->optlen + sizeof(struct iphdr); |
193 | dopt->optlen += doffset+3; | 193 | dopt->optlen += doffset+3; |
194 | dopt->is_strictroute = sopt->is_strictroute; | 194 | dopt->is_strictroute = sopt->is_strictroute; |
195 | } | 195 | } |
196 | } | 196 | } |
197 | while (dopt->optlen & 3) { | 197 | while (dopt->optlen & 3) { |
198 | *dptr++ = IPOPT_END; | 198 | *dptr++ = IPOPT_END; |
199 | dopt->optlen++; | 199 | dopt->optlen++; |
200 | } | 200 | } |
201 | return 0; | 201 | return 0; |
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * Options "fragmenting", just fill options not | 205 | * Options "fragmenting", just fill options not |
206 | * allowed in fragments with NOOPs. | 206 | * allowed in fragments with NOOPs. |
207 | * Simple and stupid 8), but the most efficient way. | 207 | * Simple and stupid 8), but the most efficient way. |
208 | */ | 208 | */ |
209 | 209 | ||
210 | void ip_options_fragment(struct sk_buff * skb) | 210 | void ip_options_fragment(struct sk_buff * skb) |
211 | { | 211 | { |
212 | unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr); | 212 | unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr); |
213 | struct ip_options * opt = &(IPCB(skb)->opt); | 213 | struct ip_options * opt = &(IPCB(skb)->opt); |
214 | int l = opt->optlen; | 214 | int l = opt->optlen; |
215 | int optlen; | 215 | int optlen; |
216 | 216 | ||
217 | while (l > 0) { | 217 | while (l > 0) { |
218 | switch (*optptr) { | 218 | switch (*optptr) { |
219 | case IPOPT_END: | 219 | case IPOPT_END: |
220 | return; | 220 | return; |
221 | case IPOPT_NOOP: | 221 | case IPOPT_NOOP: |
222 | l--; | 222 | l--; |
223 | optptr++; | 223 | optptr++; |
224 | continue; | 224 | continue; |
225 | } | 225 | } |
226 | optlen = optptr[1]; | 226 | optlen = optptr[1]; |
227 | if (optlen<2 || optlen>l) | 227 | if (optlen<2 || optlen>l) |
228 | return; | 228 | return; |
229 | if (!IPOPT_COPIED(*optptr)) | 229 | if (!IPOPT_COPIED(*optptr)) |
230 | memset(optptr, IPOPT_NOOP, optlen); | 230 | memset(optptr, IPOPT_NOOP, optlen); |
231 | l -= optlen; | 231 | l -= optlen; |
232 | optptr += optlen; | 232 | optptr += optlen; |
233 | } | 233 | } |
234 | opt->ts = 0; | 234 | opt->ts = 0; |
235 | opt->rr = 0; | 235 | opt->rr = 0; |
236 | opt->rr_needaddr = 0; | 236 | opt->rr_needaddr = 0; |
237 | opt->ts_needaddr = 0; | 237 | opt->ts_needaddr = 0; |
238 | opt->ts_needtime = 0; | 238 | opt->ts_needtime = 0; |
239 | return; | 239 | return; |
240 | } | 240 | } |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * Verify options and fill pointers in struct options. | 243 | * Verify options and fill pointers in struct options. |
244 | * Caller should clear *opt, and set opt->data. | 244 | * Caller should clear *opt, and set opt->data. |
245 | * If opt == NULL, then skb->data should point to IP header. | 245 | * If opt == NULL, then skb->data should point to IP header. |
246 | */ | 246 | */ |
247 | 247 | ||
248 | int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) | 248 | int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) |
249 | { | 249 | { |
250 | int l; | 250 | int l; |
251 | unsigned char * iph; | 251 | unsigned char * iph; |
252 | unsigned char * optptr; | 252 | unsigned char * optptr; |
253 | int optlen; | 253 | int optlen; |
254 | unsigned char * pp_ptr = NULL; | 254 | unsigned char * pp_ptr = NULL; |
255 | struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; | 255 | struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; |
256 | 256 | ||
257 | if (!opt) { | 257 | if (!opt) { |
258 | opt = &(IPCB(skb)->opt); | 258 | opt = &(IPCB(skb)->opt); |
259 | memset(opt, 0, sizeof(struct ip_options)); | ||
260 | iph = skb->nh.raw; | 259 | iph = skb->nh.raw; |
261 | opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); | 260 | opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); |
262 | optptr = iph + sizeof(struct iphdr); | 261 | optptr = iph + sizeof(struct iphdr); |
263 | opt->is_data = 0; | 262 | opt->is_data = 0; |
264 | } else { | 263 | } else { |
265 | optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]); | 264 | optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]); |
266 | iph = optptr - sizeof(struct iphdr); | 265 | iph = optptr - sizeof(struct iphdr); |
267 | } | 266 | } |
268 | 267 | ||
269 | for (l = opt->optlen; l > 0; ) { | 268 | for (l = opt->optlen; l > 0; ) { |
270 | switch (*optptr) { | 269 | switch (*optptr) { |
271 | case IPOPT_END: | 270 | case IPOPT_END: |
272 | for (optptr++, l--; l>0; optptr++, l--) { | 271 | for (optptr++, l--; l>0; optptr++, l--) { |
273 | if (*optptr != IPOPT_END) { | 272 | if (*optptr != IPOPT_END) { |
274 | *optptr = IPOPT_END; | 273 | *optptr = IPOPT_END; |
275 | opt->is_changed = 1; | 274 | opt->is_changed = 1; |
276 | } | 275 | } |
277 | } | 276 | } |
278 | goto eol; | 277 | goto eol; |
279 | case IPOPT_NOOP: | 278 | case IPOPT_NOOP: |
280 | l--; | 279 | l--; |
281 | optptr++; | 280 | optptr++; |
282 | continue; | 281 | continue; |
283 | } | 282 | } |
284 | optlen = optptr[1]; | 283 | optlen = optptr[1]; |
285 | if (optlen<2 || optlen>l) { | 284 | if (optlen<2 || optlen>l) { |
286 | pp_ptr = optptr; | 285 | pp_ptr = optptr; |
287 | goto error; | 286 | goto error; |
288 | } | 287 | } |
289 | switch (*optptr) { | 288 | switch (*optptr) { |
290 | case IPOPT_SSRR: | 289 | case IPOPT_SSRR: |
291 | case IPOPT_LSRR: | 290 | case IPOPT_LSRR: |
292 | if (optlen < 3) { | 291 | if (optlen < 3) { |
293 | pp_ptr = optptr + 1; | 292 | pp_ptr = optptr + 1; |
294 | goto error; | 293 | goto error; |
295 | } | 294 | } |
296 | if (optptr[2] < 4) { | 295 | if (optptr[2] < 4) { |
297 | pp_ptr = optptr + 2; | 296 | pp_ptr = optptr + 2; |
298 | goto error; | 297 | goto error; |
299 | } | 298 | } |
300 | /* NB: cf RFC-1812 5.2.4.1 */ | 299 | /* NB: cf RFC-1812 5.2.4.1 */ |
301 | if (opt->srr) { | 300 | if (opt->srr) { |
302 | pp_ptr = optptr; | 301 | pp_ptr = optptr; |
303 | goto error; | 302 | goto error; |
304 | } | 303 | } |
305 | if (!skb) { | 304 | if (!skb) { |
306 | if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) { | 305 | if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) { |
307 | pp_ptr = optptr + 1; | 306 | pp_ptr = optptr + 1; |
308 | goto error; | 307 | goto error; |
309 | } | 308 | } |
310 | memcpy(&opt->faddr, &optptr[3], 4); | 309 | memcpy(&opt->faddr, &optptr[3], 4); |
311 | if (optlen > 7) | 310 | if (optlen > 7) |
312 | memmove(&optptr[3], &optptr[7], optlen-7); | 311 | memmove(&optptr[3], &optptr[7], optlen-7); |
313 | } | 312 | } |
314 | opt->is_strictroute = (optptr[0] == IPOPT_SSRR); | 313 | opt->is_strictroute = (optptr[0] == IPOPT_SSRR); |
315 | opt->srr = optptr - iph; | 314 | opt->srr = optptr - iph; |
316 | break; | 315 | break; |
317 | case IPOPT_RR: | 316 | case IPOPT_RR: |
318 | if (opt->rr) { | 317 | if (opt->rr) { |
319 | pp_ptr = optptr; | 318 | pp_ptr = optptr; |
320 | goto error; | 319 | goto error; |
321 | } | 320 | } |
322 | if (optlen < 3) { | 321 | if (optlen < 3) { |
323 | pp_ptr = optptr + 1; | 322 | pp_ptr = optptr + 1; |
324 | goto error; | 323 | goto error; |
325 | } | 324 | } |
326 | if (optptr[2] < 4) { | 325 | if (optptr[2] < 4) { |
327 | pp_ptr = optptr + 2; | 326 | pp_ptr = optptr + 2; |
328 | goto error; | 327 | goto error; |
329 | } | 328 | } |
330 | if (optptr[2] <= optlen) { | 329 | if (optptr[2] <= optlen) { |
331 | if (optptr[2]+3 > optlen) { | 330 | if (optptr[2]+3 > optlen) { |
332 | pp_ptr = optptr + 2; | 331 | pp_ptr = optptr + 2; |
333 | goto error; | 332 | goto error; |
334 | } | 333 | } |
335 | if (skb) { | 334 | if (skb) { |
336 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); | 335 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); |
337 | opt->is_changed = 1; | 336 | opt->is_changed = 1; |
338 | } | 337 | } |
339 | optptr[2] += 4; | 338 | optptr[2] += 4; |
340 | opt->rr_needaddr = 1; | 339 | opt->rr_needaddr = 1; |
341 | } | 340 | } |
342 | opt->rr = optptr - iph; | 341 | opt->rr = optptr - iph; |
343 | break; | 342 | break; |
344 | case IPOPT_TIMESTAMP: | 343 | case IPOPT_TIMESTAMP: |
345 | if (opt->ts) { | 344 | if (opt->ts) { |
346 | pp_ptr = optptr; | 345 | pp_ptr = optptr; |
347 | goto error; | 346 | goto error; |
348 | } | 347 | } |
349 | if (optlen < 4) { | 348 | if (optlen < 4) { |
350 | pp_ptr = optptr + 1; | 349 | pp_ptr = optptr + 1; |
351 | goto error; | 350 | goto error; |
352 | } | 351 | } |
353 | if (optptr[2] < 5) { | 352 | if (optptr[2] < 5) { |
354 | pp_ptr = optptr + 2; | 353 | pp_ptr = optptr + 2; |
355 | goto error; | 354 | goto error; |
356 | } | 355 | } |
357 | if (optptr[2] <= optlen) { | 356 | if (optptr[2] <= optlen) { |
358 | __u32 * timeptr = NULL; | 357 | __u32 * timeptr = NULL; |
359 | if (optptr[2]+3 > optptr[1]) { | 358 | if (optptr[2]+3 > optptr[1]) { |
360 | pp_ptr = optptr + 2; | 359 | pp_ptr = optptr + 2; |
361 | goto error; | 360 | goto error; |
362 | } | 361 | } |
363 | switch (optptr[3]&0xF) { | 362 | switch (optptr[3]&0xF) { |
364 | case IPOPT_TS_TSONLY: | 363 | case IPOPT_TS_TSONLY: |
365 | opt->ts = optptr - iph; | 364 | opt->ts = optptr - iph; |
366 | if (skb) | 365 | if (skb) |
367 | timeptr = (__u32*)&optptr[optptr[2]-1]; | 366 | timeptr = (__u32*)&optptr[optptr[2]-1]; |
368 | opt->ts_needtime = 1; | 367 | opt->ts_needtime = 1; |
369 | optptr[2] += 4; | 368 | optptr[2] += 4; |
370 | break; | 369 | break; |
371 | case IPOPT_TS_TSANDADDR: | 370 | case IPOPT_TS_TSANDADDR: |
372 | if (optptr[2]+7 > optptr[1]) { | 371 | if (optptr[2]+7 > optptr[1]) { |
373 | pp_ptr = optptr + 2; | 372 | pp_ptr = optptr + 2; |
374 | goto error; | 373 | goto error; |
375 | } | 374 | } |
376 | opt->ts = optptr - iph; | 375 | opt->ts = optptr - iph; |
377 | if (skb) { | 376 | if (skb) { |
378 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); | 377 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); |
379 | timeptr = (__u32*)&optptr[optptr[2]+3]; | 378 | timeptr = (__u32*)&optptr[optptr[2]+3]; |
380 | } | 379 | } |
381 | opt->ts_needaddr = 1; | 380 | opt->ts_needaddr = 1; |
382 | opt->ts_needtime = 1; | 381 | opt->ts_needtime = 1; |
383 | optptr[2] += 8; | 382 | optptr[2] += 8; |
384 | break; | 383 | break; |
385 | case IPOPT_TS_PRESPEC: | 384 | case IPOPT_TS_PRESPEC: |
386 | if (optptr[2]+7 > optptr[1]) { | 385 | if (optptr[2]+7 > optptr[1]) { |
387 | pp_ptr = optptr + 2; | 386 | pp_ptr = optptr + 2; |
388 | goto error; | 387 | goto error; |
389 | } | 388 | } |
390 | opt->ts = optptr - iph; | 389 | opt->ts = optptr - iph; |
391 | { | 390 | { |
392 | u32 addr; | 391 | u32 addr; |
393 | memcpy(&addr, &optptr[optptr[2]-1], 4); | 392 | memcpy(&addr, &optptr[optptr[2]-1], 4); |
394 | if (inet_addr_type(addr) == RTN_UNICAST) | 393 | if (inet_addr_type(addr) == RTN_UNICAST) |
395 | break; | 394 | break; |
396 | if (skb) | 395 | if (skb) |
397 | timeptr = (__u32*)&optptr[optptr[2]+3]; | 396 | timeptr = (__u32*)&optptr[optptr[2]+3]; |
398 | } | 397 | } |
399 | opt->ts_needtime = 1; | 398 | opt->ts_needtime = 1; |
400 | optptr[2] += 8; | 399 | optptr[2] += 8; |
401 | break; | 400 | break; |
402 | default: | 401 | default: |
403 | if (!skb && !capable(CAP_NET_RAW)) { | 402 | if (!skb && !capable(CAP_NET_RAW)) { |
404 | pp_ptr = optptr + 3; | 403 | pp_ptr = optptr + 3; |
405 | goto error; | 404 | goto error; |
406 | } | 405 | } |
407 | break; | 406 | break; |
408 | } | 407 | } |
409 | if (timeptr) { | 408 | if (timeptr) { |
410 | struct timeval tv; | 409 | struct timeval tv; |
411 | __u32 midtime; | 410 | __u32 midtime; |
412 | do_gettimeofday(&tv); | 411 | do_gettimeofday(&tv); |
413 | midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); | 412 | midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); |
414 | memcpy(timeptr, &midtime, sizeof(__u32)); | 413 | memcpy(timeptr, &midtime, sizeof(__u32)); |
415 | opt->is_changed = 1; | 414 | opt->is_changed = 1; |
416 | } | 415 | } |
417 | } else { | 416 | } else { |
418 | unsigned overflow = optptr[3]>>4; | 417 | unsigned overflow = optptr[3]>>4; |
419 | if (overflow == 15) { | 418 | if (overflow == 15) { |
420 | pp_ptr = optptr + 3; | 419 | pp_ptr = optptr + 3; |
421 | goto error; | 420 | goto error; |
422 | } | 421 | } |
423 | opt->ts = optptr - iph; | 422 | opt->ts = optptr - iph; |
424 | if (skb) { | 423 | if (skb) { |
425 | optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); | 424 | optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); |
426 | opt->is_changed = 1; | 425 | opt->is_changed = 1; |
427 | } | 426 | } |
428 | } | 427 | } |
429 | break; | 428 | break; |
430 | case IPOPT_RA: | 429 | case IPOPT_RA: |
431 | if (optlen < 4) { | 430 | if (optlen < 4) { |
432 | pp_ptr = optptr + 1; | 431 | pp_ptr = optptr + 1; |
433 | goto error; | 432 | goto error; |
434 | } | 433 | } |
435 | if (optptr[2] == 0 && optptr[3] == 0) | 434 | if (optptr[2] == 0 && optptr[3] == 0) |
436 | opt->router_alert = optptr - iph; | 435 | opt->router_alert = optptr - iph; |
437 | break; | 436 | break; |
438 | case IPOPT_SEC: | 437 | case IPOPT_SEC: |
439 | case IPOPT_SID: | 438 | case IPOPT_SID: |
440 | default: | 439 | default: |
441 | if (!skb && !capable(CAP_NET_RAW)) { | 440 | if (!skb && !capable(CAP_NET_RAW)) { |
442 | pp_ptr = optptr; | 441 | pp_ptr = optptr; |
443 | goto error; | 442 | goto error; |
444 | } | 443 | } |
445 | break; | 444 | break; |
446 | } | 445 | } |
447 | l -= optlen; | 446 | l -= optlen; |
448 | optptr += optlen; | 447 | optptr += optlen; |
449 | } | 448 | } |
450 | 449 | ||
451 | eol: | 450 | eol: |
452 | if (!pp_ptr) | 451 | if (!pp_ptr) |
453 | return 0; | 452 | return 0; |
454 | 453 | ||
455 | error: | 454 | error: |
456 | if (skb) { | 455 | if (skb) { |
457 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); | 456 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); |
458 | } | 457 | } |
459 | return -EINVAL; | 458 | return -EINVAL; |
460 | } | 459 | } |
461 | 460 | ||
462 | 461 | ||
463 | /* | 462 | /* |
464 | * Undo all the changes done by ip_options_compile(). | 463 | * Undo all the changes done by ip_options_compile(). |
465 | */ | 464 | */ |
466 | 465 | ||
467 | void ip_options_undo(struct ip_options * opt) | 466 | void ip_options_undo(struct ip_options * opt) |
468 | { | 467 | { |
469 | if (opt->srr) { | 468 | if (opt->srr) { |
470 | unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); | 469 | unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); |
471 | memmove(optptr+7, optptr+3, optptr[1]-7); | 470 | memmove(optptr+7, optptr+3, optptr[1]-7); |
472 | memcpy(optptr+3, &opt->faddr, 4); | 471 | memcpy(optptr+3, &opt->faddr, 4); |
473 | } | 472 | } |
474 | if (opt->rr_needaddr) { | 473 | if (opt->rr_needaddr) { |
475 | unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); | 474 | unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); |
476 | optptr[2] -= 4; | 475 | optptr[2] -= 4; |
477 | memset(&optptr[optptr[2]-1], 0, 4); | 476 | memset(&optptr[optptr[2]-1], 0, 4); |
478 | } | 477 | } |
479 | if (opt->ts) { | 478 | if (opt->ts) { |
480 | unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); | 479 | unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); |
481 | if (opt->ts_needtime) { | 480 | if (opt->ts_needtime) { |
482 | optptr[2] -= 4; | 481 | optptr[2] -= 4; |
483 | memset(&optptr[optptr[2]-1], 0, 4); | 482 | memset(&optptr[optptr[2]-1], 0, 4); |
484 | if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC) | 483 | if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC) |
485 | optptr[2] -= 4; | 484 | optptr[2] -= 4; |
486 | } | 485 | } |
487 | if (opt->ts_needaddr) { | 486 | if (opt->ts_needaddr) { |
488 | optptr[2] -= 4; | 487 | optptr[2] -= 4; |
489 | memset(&optptr[optptr[2]-1], 0, 4); | 488 | memset(&optptr[optptr[2]-1], 0, 4); |
490 | } | 489 | } |
491 | } | 490 | } |
492 | } | 491 | } |
493 | 492 | ||
494 | static struct ip_options *ip_options_get_alloc(const int optlen) | 493 | static struct ip_options *ip_options_get_alloc(const int optlen) |
495 | { | 494 | { |
496 | struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), | 495 | struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), |
497 | GFP_KERNEL); | 496 | GFP_KERNEL); |
498 | if (opt) | 497 | if (opt) |
499 | memset(opt, 0, sizeof(*opt)); | 498 | memset(opt, 0, sizeof(*opt)); |
500 | return opt; | 499 | return opt; |
501 | } | 500 | } |
502 | 501 | ||
503 | static int ip_options_get_finish(struct ip_options **optp, | 502 | static int ip_options_get_finish(struct ip_options **optp, |
504 | struct ip_options *opt, int optlen) | 503 | struct ip_options *opt, int optlen) |
505 | { | 504 | { |
506 | while (optlen & 3) | 505 | while (optlen & 3) |
507 | opt->__data[optlen++] = IPOPT_END; | 506 | opt->__data[optlen++] = IPOPT_END; |
508 | opt->optlen = optlen; | 507 | opt->optlen = optlen; |
509 | opt->is_data = 1; | 508 | opt->is_data = 1; |
510 | opt->is_setbyuser = 1; | 509 | opt->is_setbyuser = 1; |
511 | if (optlen && ip_options_compile(opt, NULL)) { | 510 | if (optlen && ip_options_compile(opt, NULL)) { |
512 | kfree(opt); | 511 | kfree(opt); |
513 | return -EINVAL; | 512 | return -EINVAL; |
514 | } | 513 | } |
515 | kfree(*optp); | 514 | kfree(*optp); |
516 | *optp = opt; | 515 | *optp = opt; |
517 | return 0; | 516 | return 0; |
518 | } | 517 | } |
519 | 518 | ||
520 | int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) | 519 | int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) |
521 | { | 520 | { |
522 | struct ip_options *opt = ip_options_get_alloc(optlen); | 521 | struct ip_options *opt = ip_options_get_alloc(optlen); |
523 | 522 | ||
524 | if (!opt) | 523 | if (!opt) |
525 | return -ENOMEM; | 524 | return -ENOMEM; |
526 | if (optlen && copy_from_user(opt->__data, data, optlen)) { | 525 | if (optlen && copy_from_user(opt->__data, data, optlen)) { |
527 | kfree(opt); | 526 | kfree(opt); |
528 | return -EFAULT; | 527 | return -EFAULT; |
529 | } | 528 | } |
530 | return ip_options_get_finish(optp, opt, optlen); | 529 | return ip_options_get_finish(optp, opt, optlen); |
531 | } | 530 | } |
532 | 531 | ||
533 | int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) | 532 | int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) |
534 | { | 533 | { |
535 | struct ip_options *opt = ip_options_get_alloc(optlen); | 534 | struct ip_options *opt = ip_options_get_alloc(optlen); |
536 | 535 | ||
537 | if (!opt) | 536 | if (!opt) |
538 | return -ENOMEM; | 537 | return -ENOMEM; |
539 | if (optlen) | 538 | if (optlen) |
540 | memcpy(opt->__data, data, optlen); | 539 | memcpy(opt->__data, data, optlen); |
541 | return ip_options_get_finish(optp, opt, optlen); | 540 | return ip_options_get_finish(optp, opt, optlen); |
542 | } | 541 | } |
543 | 542 | ||
544 | void ip_forward_options(struct sk_buff *skb) | 543 | void ip_forward_options(struct sk_buff *skb) |
545 | { | 544 | { |
546 | struct ip_options * opt = &(IPCB(skb)->opt); | 545 | struct ip_options * opt = &(IPCB(skb)->opt); |
547 | unsigned char * optptr; | 546 | unsigned char * optptr; |
548 | struct rtable *rt = (struct rtable*)skb->dst; | 547 | struct rtable *rt = (struct rtable*)skb->dst; |
549 | unsigned char *raw = skb->nh.raw; | 548 | unsigned char *raw = skb->nh.raw; |
550 | 549 | ||
551 | if (opt->rr_needaddr) { | 550 | if (opt->rr_needaddr) { |
552 | optptr = (unsigned char *)raw + opt->rr; | 551 | optptr = (unsigned char *)raw + opt->rr; |
553 | ip_rt_get_source(&optptr[optptr[2]-5], rt); | 552 | ip_rt_get_source(&optptr[optptr[2]-5], rt); |
554 | opt->is_changed = 1; | 553 | opt->is_changed = 1; |
555 | } | 554 | } |
556 | if (opt->srr_is_hit) { | 555 | if (opt->srr_is_hit) { |
557 | int srrptr, srrspace; | 556 | int srrptr, srrspace; |
558 | 557 | ||
559 | optptr = raw + opt->srr; | 558 | optptr = raw + opt->srr; |
560 | 559 | ||
561 | for ( srrptr=optptr[2], srrspace = optptr[1]; | 560 | for ( srrptr=optptr[2], srrspace = optptr[1]; |
562 | srrptr <= srrspace; | 561 | srrptr <= srrspace; |
563 | srrptr += 4 | 562 | srrptr += 4 |
564 | ) { | 563 | ) { |
565 | if (srrptr + 3 > srrspace) | 564 | if (srrptr + 3 > srrspace) |
566 | break; | 565 | break; |
567 | if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) | 566 | if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) |
568 | break; | 567 | break; |
569 | } | 568 | } |
570 | if (srrptr + 3 <= srrspace) { | 569 | if (srrptr + 3 <= srrspace) { |
571 | opt->is_changed = 1; | 570 | opt->is_changed = 1; |
572 | ip_rt_get_source(&optptr[srrptr-1], rt); | 571 | ip_rt_get_source(&optptr[srrptr-1], rt); |
573 | skb->nh.iph->daddr = rt->rt_dst; | 572 | skb->nh.iph->daddr = rt->rt_dst; |
574 | optptr[2] = srrptr+4; | 573 | optptr[2] = srrptr+4; |
575 | } else if (net_ratelimit()) | 574 | } else if (net_ratelimit()) |
576 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); | 575 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); |
577 | if (opt->ts_needaddr) { | 576 | if (opt->ts_needaddr) { |
578 | optptr = raw + opt->ts; | 577 | optptr = raw + opt->ts; |
579 | ip_rt_get_source(&optptr[optptr[2]-9], rt); | 578 | ip_rt_get_source(&optptr[optptr[2]-9], rt); |
580 | opt->is_changed = 1; | 579 | opt->is_changed = 1; |
581 | } | 580 | } |
582 | } | 581 | } |
583 | if (opt->is_changed) { | 582 | if (opt->is_changed) { |
584 | opt->is_changed = 0; | 583 | opt->is_changed = 0; |
585 | ip_send_check(skb->nh.iph); | 584 | ip_send_check(skb->nh.iph); |
586 | } | 585 | } |
587 | } | 586 | } |
588 | 587 | ||
589 | int ip_options_rcv_srr(struct sk_buff *skb) | 588 | int ip_options_rcv_srr(struct sk_buff *skb) |
590 | { | 589 | { |
591 | struct ip_options *opt = &(IPCB(skb)->opt); | 590 | struct ip_options *opt = &(IPCB(skb)->opt); |
592 | int srrspace, srrptr; | 591 | int srrspace, srrptr; |
593 | u32 nexthop; | 592 | u32 nexthop; |
594 | struct iphdr *iph = skb->nh.iph; | 593 | struct iphdr *iph = skb->nh.iph; |
595 | unsigned char * optptr = skb->nh.raw + opt->srr; | 594 | unsigned char * optptr = skb->nh.raw + opt->srr; |
596 | struct rtable *rt = (struct rtable*)skb->dst; | 595 | struct rtable *rt = (struct rtable*)skb->dst; |
597 | struct rtable *rt2; | 596 | struct rtable *rt2; |
598 | int err; | 597 | int err; |
599 | 598 | ||
600 | if (!opt->srr) | 599 | if (!opt->srr) |
601 | return 0; | 600 | return 0; |
602 | 601 | ||
603 | if (skb->pkt_type != PACKET_HOST) | 602 | if (skb->pkt_type != PACKET_HOST) |
604 | return -EINVAL; | 603 | return -EINVAL; |
605 | if (rt->rt_type == RTN_UNICAST) { | 604 | if (rt->rt_type == RTN_UNICAST) { |
606 | if (!opt->is_strictroute) | 605 | if (!opt->is_strictroute) |
607 | return 0; | 606 | return 0; |
608 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24)); | 607 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24)); |
609 | return -EINVAL; | 608 | return -EINVAL; |
610 | } | 609 | } |
611 | if (rt->rt_type != RTN_LOCAL) | 610 | if (rt->rt_type != RTN_LOCAL) |
612 | return -EINVAL; | 611 | return -EINVAL; |
613 | 612 | ||
614 | for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { | 613 | for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { |
615 | if (srrptr + 3 > srrspace) { | 614 | if (srrptr + 3 > srrspace) { |
616 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); | 615 | icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); |
617 | return -EINVAL; | 616 | return -EINVAL; |
618 | } | 617 | } |
619 | memcpy(&nexthop, &optptr[srrptr-1], 4); | 618 | memcpy(&nexthop, &optptr[srrptr-1], 4); |
620 | 619 | ||
621 | rt = (struct rtable*)skb->dst; | 620 | rt = (struct rtable*)skb->dst; |
622 | skb->dst = NULL; | 621 | skb->dst = NULL; |
623 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); | 622 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); |
624 | rt2 = (struct rtable*)skb->dst; | 623 | rt2 = (struct rtable*)skb->dst; |
625 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { | 624 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { |
626 | ip_rt_put(rt2); | 625 | ip_rt_put(rt2); |
627 | skb->dst = &rt->u.dst; | 626 | skb->dst = &rt->u.dst; |
628 | return -EINVAL; | 627 | return -EINVAL; |
629 | } | 628 | } |
630 | ip_rt_put(rt); | 629 | ip_rt_put(rt); |
631 | if (rt2->rt_type != RTN_LOCAL) | 630 | if (rt2->rt_type != RTN_LOCAL) |
632 | break; | 631 | break; |
633 | /* Superfast 8) loopback forward */ | 632 | /* Superfast 8) loopback forward */ |
634 | memcpy(&iph->daddr, &optptr[srrptr-1], 4); | 633 | memcpy(&iph->daddr, &optptr[srrptr-1], 4); |
635 | opt->is_changed = 1; | 634 | opt->is_changed = 1; |
636 | } | 635 | } |
637 | if (srrptr <= srrspace) { | 636 | if (srrptr <= srrspace) { |
638 | opt->srr_is_hit = 1; | 637 | opt->srr_is_hit = 1; |
639 | opt->is_changed = 1; | 638 | opt->is_changed = 1; |
640 | } | 639 | } |
641 | return 0; | 640 | return 0; |
642 | } | 641 | } |
643 | 642 |
net/ipv4/ipip.c
1 | /* | 1 | /* |
2 | * Linux NET3: IP/IP protocol decoder. | 2 | * Linux NET3: IP/IP protocol decoder. |
3 | * | 3 | * |
4 | * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ | 4 | * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ |
5 | * | 5 | * |
6 | * Authors: | 6 | * Authors: |
7 | * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 | 7 | * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 |
8 | * | 8 | * |
9 | * Fixes: | 9 | * Fixes: |
10 | * Alan Cox : Merged and made usable non modular (its so tiny its silly as | 10 | * Alan Cox : Merged and made usable non modular (its so tiny its silly as |
11 | * a module taking up 2 pages). | 11 | * a module taking up 2 pages). |
12 | * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) | 12 | * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) |
13 | * to keep ip_forward happy. | 13 | * to keep ip_forward happy. |
14 | * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). | 14 | * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). |
15 | * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL | 15 | * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL |
16 | * David Woodhouse : Perform some basic ICMP handling. | 16 | * David Woodhouse : Perform some basic ICMP handling. |
17 | * IPIP Routing without decapsulation. | 17 | * IPIP Routing without decapsulation. |
18 | * Carlos Picoto : GRE over IP support | 18 | * Carlos Picoto : GRE over IP support |
19 | * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. | 19 | * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. |
20 | * I do not want to merge them together. | 20 | * I do not want to merge them together. |
21 | * | 21 | * |
22 | * This program is free software; you can redistribute it and/or | 22 | * This program is free software; you can redistribute it and/or |
23 | * modify it under the terms of the GNU General Public License | 23 | * modify it under the terms of the GNU General Public License |
24 | * as published by the Free Software Foundation; either version | 24 | * as published by the Free Software Foundation; either version |
25 | * 2 of the License, or (at your option) any later version. | 25 | * 2 of the License, or (at your option) any later version. |
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* tunnel.c: an IP tunnel driver | 29 | /* tunnel.c: an IP tunnel driver |
30 | 30 | ||
31 | The purpose of this driver is to provide an IP tunnel through | 31 | The purpose of this driver is to provide an IP tunnel through |
32 | which you can tunnel network traffic transparently across subnets. | 32 | which you can tunnel network traffic transparently across subnets. |
33 | 33 | ||
34 | This was written by looking at Nick Holloway's dummy driver | 34 | This was written by looking at Nick Holloway's dummy driver |
35 | Thanks for the great code! | 35 | Thanks for the great code! |
36 | 36 | ||
37 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 | 37 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 |
38 | 38 | ||
39 | Minor tweaks: | 39 | Minor tweaks: |
40 | Cleaned up the code a little and added some pre-1.3.0 tweaks. | 40 | Cleaned up the code a little and added some pre-1.3.0 tweaks. |
41 | dev->hard_header/hard_header_len changed to use no headers. | 41 | dev->hard_header/hard_header_len changed to use no headers. |
42 | Comments/bracketing tweaked. | 42 | Comments/bracketing tweaked. |
43 | Made the tunnels use dev->name not tunnel: when error reporting. | 43 | Made the tunnels use dev->name not tunnel: when error reporting. |
44 | Added tx_dropped stat | 44 | Added tx_dropped stat |
45 | 45 | ||
46 | -Alan Cox (Alan.Cox@linux.org) 21 March 95 | 46 | -Alan Cox (Alan.Cox@linux.org) 21 March 95 |
47 | 47 | ||
48 | Reworked: | 48 | Reworked: |
49 | Changed to tunnel to destination gateway in addition to the | 49 | Changed to tunnel to destination gateway in addition to the |
50 | tunnel's pointopoint address | 50 | tunnel's pointopoint address |
51 | Almost completely rewritten | 51 | Almost completely rewritten |
52 | Note: There is currently no firewall or ICMP handling done. | 52 | Note: There is currently no firewall or ICMP handling done. |
53 | 53 | ||
54 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 | 54 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 |
55 | 55 | ||
56 | */ | 56 | */ |
57 | 57 | ||
58 | /* Things I wish I had known when writing the tunnel driver: | 58 | /* Things I wish I had known when writing the tunnel driver: |
59 | 59 | ||
60 | When the tunnel_xmit() function is called, the skb contains the | 60 | When the tunnel_xmit() function is called, the skb contains the |
61 | packet to be sent (plus a great deal of extra info), and dev | 61 | packet to be sent (plus a great deal of extra info), and dev |
62 | contains the tunnel device that _we_ are. | 62 | contains the tunnel device that _we_ are. |
63 | 63 | ||
64 | When we are passed a packet, we are expected to fill in the | 64 | When we are passed a packet, we are expected to fill in the |
65 | source address with our source IP address. | 65 | source address with our source IP address. |
66 | 66 | ||
67 | What is the proper way to allocate, copy and free a buffer? | 67 | What is the proper way to allocate, copy and free a buffer? |
68 | After you allocate it, it is a "0 length" chunk of memory | 68 | After you allocate it, it is a "0 length" chunk of memory |
69 | starting at zero. If you want to add headers to the buffer | 69 | starting at zero. If you want to add headers to the buffer |
70 | later, you'll have to call "skb_reserve(skb, amount)" with | 70 | later, you'll have to call "skb_reserve(skb, amount)" with |
71 | the amount of memory you want reserved. Then, you call | 71 | the amount of memory you want reserved. Then, you call |
72 | "skb_put(skb, amount)" with the amount of space you want in | 72 | "skb_put(skb, amount)" with the amount of space you want in |
73 | the buffer. skb_put() returns a pointer to the top (#0) of | 73 | the buffer. skb_put() returns a pointer to the top (#0) of |
74 | that buffer. skb->len is set to the amount of space you have | 74 | that buffer. skb->len is set to the amount of space you have |
75 | "allocated" with skb_put(). You can then write up to skb->len | 75 | "allocated" with skb_put(). You can then write up to skb->len |
76 | bytes to that buffer. If you need more, you can call skb_put() | 76 | bytes to that buffer. If you need more, you can call skb_put() |
77 | again with the additional amount of space you need. You can | 77 | again with the additional amount of space you need. You can |
78 | find out how much more space you can allocate by calling | 78 | find out how much more space you can allocate by calling |
79 | "skb_tailroom(skb)". | 79 | "skb_tailroom(skb)". |
80 | Now, to add header space, call "skb_push(skb, header_len)". | 80 | Now, to add header space, call "skb_push(skb, header_len)". |
81 | This creates space at the beginning of the buffer and returns | 81 | This creates space at the beginning of the buffer and returns |
82 | a pointer to this new space. If later you need to strip a | 82 | a pointer to this new space. If later you need to strip a |
83 | header from a buffer, call "skb_pull(skb, header_len)". | 83 | header from a buffer, call "skb_pull(skb, header_len)". |
84 | skb_headroom() will return how much space is left at the top | 84 | skb_headroom() will return how much space is left at the top |
85 | of the buffer (before the main data). Remember, this headroom | 85 | of the buffer (before the main data). Remember, this headroom |
86 | space must be reserved before the skb_put() function is called. | 86 | space must be reserved before the skb_put() function is called. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | /* | 89 | /* |
90 | This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c | 90 | This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c |
91 | 91 | ||
92 | For comments look at net/ipv4/ip_gre.c --ANK | 92 | For comments look at net/ipv4/ip_gre.c --ANK |
93 | */ | 93 | */ |
94 | 94 | ||
95 | 95 | ||
96 | #include <linux/capability.h> | 96 | #include <linux/capability.h> |
97 | #include <linux/module.h> | 97 | #include <linux/module.h> |
98 | #include <linux/types.h> | 98 | #include <linux/types.h> |
99 | #include <linux/sched.h> | 99 | #include <linux/sched.h> |
100 | #include <linux/kernel.h> | 100 | #include <linux/kernel.h> |
101 | #include <asm/uaccess.h> | 101 | #include <asm/uaccess.h> |
102 | #include <linux/skbuff.h> | 102 | #include <linux/skbuff.h> |
103 | #include <linux/netdevice.h> | 103 | #include <linux/netdevice.h> |
104 | #include <linux/in.h> | 104 | #include <linux/in.h> |
105 | #include <linux/tcp.h> | 105 | #include <linux/tcp.h> |
106 | #include <linux/udp.h> | 106 | #include <linux/udp.h> |
107 | #include <linux/if_arp.h> | 107 | #include <linux/if_arp.h> |
108 | #include <linux/mroute.h> | 108 | #include <linux/mroute.h> |
109 | #include <linux/init.h> | 109 | #include <linux/init.h> |
110 | #include <linux/netfilter_ipv4.h> | 110 | #include <linux/netfilter_ipv4.h> |
111 | #include <linux/if_ether.h> | 111 | #include <linux/if_ether.h> |
112 | 112 | ||
113 | #include <net/sock.h> | 113 | #include <net/sock.h> |
114 | #include <net/ip.h> | 114 | #include <net/ip.h> |
115 | #include <net/icmp.h> | 115 | #include <net/icmp.h> |
116 | #include <net/ipip.h> | 116 | #include <net/ipip.h> |
117 | #include <net/inet_ecn.h> | 117 | #include <net/inet_ecn.h> |
118 | #include <net/xfrm.h> | 118 | #include <net/xfrm.h> |
119 | 119 | ||
120 | #define HASH_SIZE 16 | 120 | #define HASH_SIZE 16 |
121 | #define HASH(addr) ((addr^(addr>>4))&0xF) | 121 | #define HASH(addr) ((addr^(addr>>4))&0xF) |
122 | 122 | ||
123 | static int ipip_fb_tunnel_init(struct net_device *dev); | 123 | static int ipip_fb_tunnel_init(struct net_device *dev); |
124 | static int ipip_tunnel_init(struct net_device *dev); | 124 | static int ipip_tunnel_init(struct net_device *dev); |
125 | static void ipip_tunnel_setup(struct net_device *dev); | 125 | static void ipip_tunnel_setup(struct net_device *dev); |
126 | 126 | ||
127 | static struct net_device *ipip_fb_tunnel_dev; | 127 | static struct net_device *ipip_fb_tunnel_dev; |
128 | 128 | ||
129 | static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | 129 | static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; |
130 | static struct ip_tunnel *tunnels_r[HASH_SIZE]; | 130 | static struct ip_tunnel *tunnels_r[HASH_SIZE]; |
131 | static struct ip_tunnel *tunnels_l[HASH_SIZE]; | 131 | static struct ip_tunnel *tunnels_l[HASH_SIZE]; |
132 | static struct ip_tunnel *tunnels_wc[1]; | 132 | static struct ip_tunnel *tunnels_wc[1]; |
133 | static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; | 133 | static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; |
134 | 134 | ||
135 | static DEFINE_RWLOCK(ipip_lock); | 135 | static DEFINE_RWLOCK(ipip_lock); |
136 | 136 | ||
137 | static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) | 137 | static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) |
138 | { | 138 | { |
139 | unsigned h0 = HASH(remote); | 139 | unsigned h0 = HASH(remote); |
140 | unsigned h1 = HASH(local); | 140 | unsigned h1 = HASH(local); |
141 | struct ip_tunnel *t; | 141 | struct ip_tunnel *t; |
142 | 142 | ||
143 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { | 143 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { |
144 | if (local == t->parms.iph.saddr && | 144 | if (local == t->parms.iph.saddr && |
145 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 145 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
146 | return t; | 146 | return t; |
147 | } | 147 | } |
148 | for (t = tunnels_r[h0]; t; t = t->next) { | 148 | for (t = tunnels_r[h0]; t; t = t->next) { |
149 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 149 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
150 | return t; | 150 | return t; |
151 | } | 151 | } |
152 | for (t = tunnels_l[h1]; t; t = t->next) { | 152 | for (t = tunnels_l[h1]; t; t = t->next) { |
153 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 153 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
154 | return t; | 154 | return t; |
155 | } | 155 | } |
156 | if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) | 156 | if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) |
157 | return t; | 157 | return t; |
158 | return NULL; | 158 | return NULL; |
159 | } | 159 | } |
160 | 160 | ||
161 | static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) | 161 | static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) |
162 | { | 162 | { |
163 | u32 remote = t->parms.iph.daddr; | 163 | u32 remote = t->parms.iph.daddr; |
164 | u32 local = t->parms.iph.saddr; | 164 | u32 local = t->parms.iph.saddr; |
165 | unsigned h = 0; | 165 | unsigned h = 0; |
166 | int prio = 0; | 166 | int prio = 0; |
167 | 167 | ||
168 | if (remote) { | 168 | if (remote) { |
169 | prio |= 2; | 169 | prio |= 2; |
170 | h ^= HASH(remote); | 170 | h ^= HASH(remote); |
171 | } | 171 | } |
172 | if (local) { | 172 | if (local) { |
173 | prio |= 1; | 173 | prio |= 1; |
174 | h ^= HASH(local); | 174 | h ^= HASH(local); |
175 | } | 175 | } |
176 | return &tunnels[prio][h]; | 176 | return &tunnels[prio][h]; |
177 | } | 177 | } |
178 | 178 | ||
179 | 179 | ||
180 | static void ipip_tunnel_unlink(struct ip_tunnel *t) | 180 | static void ipip_tunnel_unlink(struct ip_tunnel *t) |
181 | { | 181 | { |
182 | struct ip_tunnel **tp; | 182 | struct ip_tunnel **tp; |
183 | 183 | ||
184 | for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { | 184 | for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { |
185 | if (t == *tp) { | 185 | if (t == *tp) { |
186 | write_lock_bh(&ipip_lock); | 186 | write_lock_bh(&ipip_lock); |
187 | *tp = t->next; | 187 | *tp = t->next; |
188 | write_unlock_bh(&ipip_lock); | 188 | write_unlock_bh(&ipip_lock); |
189 | break; | 189 | break; |
190 | } | 190 | } |
191 | } | 191 | } |
192 | } | 192 | } |
193 | 193 | ||
194 | static void ipip_tunnel_link(struct ip_tunnel *t) | 194 | static void ipip_tunnel_link(struct ip_tunnel *t) |
195 | { | 195 | { |
196 | struct ip_tunnel **tp = ipip_bucket(t); | 196 | struct ip_tunnel **tp = ipip_bucket(t); |
197 | 197 | ||
198 | t->next = *tp; | 198 | t->next = *tp; |
199 | write_lock_bh(&ipip_lock); | 199 | write_lock_bh(&ipip_lock); |
200 | *tp = t; | 200 | *tp = t; |
201 | write_unlock_bh(&ipip_lock); | 201 | write_unlock_bh(&ipip_lock); |
202 | } | 202 | } |
203 | 203 | ||
204 | static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) | 204 | static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) |
205 | { | 205 | { |
206 | u32 remote = parms->iph.daddr; | 206 | u32 remote = parms->iph.daddr; |
207 | u32 local = parms->iph.saddr; | 207 | u32 local = parms->iph.saddr; |
208 | struct ip_tunnel *t, **tp, *nt; | 208 | struct ip_tunnel *t, **tp, *nt; |
209 | struct net_device *dev; | 209 | struct net_device *dev; |
210 | unsigned h = 0; | 210 | unsigned h = 0; |
211 | int prio = 0; | 211 | int prio = 0; |
212 | char name[IFNAMSIZ]; | 212 | char name[IFNAMSIZ]; |
213 | 213 | ||
214 | if (remote) { | 214 | if (remote) { |
215 | prio |= 2; | 215 | prio |= 2; |
216 | h ^= HASH(remote); | 216 | h ^= HASH(remote); |
217 | } | 217 | } |
218 | if (local) { | 218 | if (local) { |
219 | prio |= 1; | 219 | prio |= 1; |
220 | h ^= HASH(local); | 220 | h ^= HASH(local); |
221 | } | 221 | } |
222 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { | 222 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { |
223 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | 223 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) |
224 | return t; | 224 | return t; |
225 | } | 225 | } |
226 | if (!create) | 226 | if (!create) |
227 | return NULL; | 227 | return NULL; |
228 | 228 | ||
229 | if (parms->name[0]) | 229 | if (parms->name[0]) |
230 | strlcpy(name, parms->name, IFNAMSIZ); | 230 | strlcpy(name, parms->name, IFNAMSIZ); |
231 | else { | 231 | else { |
232 | int i; | 232 | int i; |
233 | for (i=1; i<100; i++) { | 233 | for (i=1; i<100; i++) { |
234 | sprintf(name, "tunl%d", i); | 234 | sprintf(name, "tunl%d", i); |
235 | if (__dev_get_by_name(name) == NULL) | 235 | if (__dev_get_by_name(name) == NULL) |
236 | break; | 236 | break; |
237 | } | 237 | } |
238 | if (i==100) | 238 | if (i==100) |
239 | goto failed; | 239 | goto failed; |
240 | } | 240 | } |
241 | 241 | ||
242 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); | 242 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); |
243 | if (dev == NULL) | 243 | if (dev == NULL) |
244 | return NULL; | 244 | return NULL; |
245 | 245 | ||
246 | nt = netdev_priv(dev); | 246 | nt = netdev_priv(dev); |
247 | SET_MODULE_OWNER(dev); | 247 | SET_MODULE_OWNER(dev); |
248 | dev->init = ipip_tunnel_init; | 248 | dev->init = ipip_tunnel_init; |
249 | nt->parms = *parms; | 249 | nt->parms = *parms; |
250 | 250 | ||
251 | if (register_netdevice(dev) < 0) { | 251 | if (register_netdevice(dev) < 0) { |
252 | free_netdev(dev); | 252 | free_netdev(dev); |
253 | goto failed; | 253 | goto failed; |
254 | } | 254 | } |
255 | 255 | ||
256 | dev_hold(dev); | 256 | dev_hold(dev); |
257 | ipip_tunnel_link(nt); | 257 | ipip_tunnel_link(nt); |
258 | return nt; | 258 | return nt; |
259 | 259 | ||
260 | failed: | 260 | failed: |
261 | return NULL; | 261 | return NULL; |
262 | } | 262 | } |
263 | 263 | ||
264 | static void ipip_tunnel_uninit(struct net_device *dev) | 264 | static void ipip_tunnel_uninit(struct net_device *dev) |
265 | { | 265 | { |
266 | if (dev == ipip_fb_tunnel_dev) { | 266 | if (dev == ipip_fb_tunnel_dev) { |
267 | write_lock_bh(&ipip_lock); | 267 | write_lock_bh(&ipip_lock); |
268 | tunnels_wc[0] = NULL; | 268 | tunnels_wc[0] = NULL; |
269 | write_unlock_bh(&ipip_lock); | 269 | write_unlock_bh(&ipip_lock); |
270 | } else | 270 | } else |
271 | ipip_tunnel_unlink(netdev_priv(dev)); | 271 | ipip_tunnel_unlink(netdev_priv(dev)); |
272 | dev_put(dev); | 272 | dev_put(dev); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int ipip_err(struct sk_buff *skb, u32 info) | 275 | static int ipip_err(struct sk_buff *skb, u32 info) |
276 | { | 276 | { |
277 | #ifndef I_WISH_WORLD_WERE_PERFECT | 277 | #ifndef I_WISH_WORLD_WERE_PERFECT |
278 | 278 | ||
279 | /* It is not :-( All the routers (except for Linux) return only | 279 | /* It is not :-( All the routers (except for Linux) return only |
280 | 8 bytes of packet payload. It means, that precise relaying of | 280 | 8 bytes of packet payload. It means, that precise relaying of |
281 | ICMP in the real Internet is absolutely infeasible. | 281 | ICMP in the real Internet is absolutely infeasible. |
282 | */ | 282 | */ |
283 | struct iphdr *iph = (struct iphdr*)skb->data; | 283 | struct iphdr *iph = (struct iphdr*)skb->data; |
284 | int type = skb->h.icmph->type; | 284 | int type = skb->h.icmph->type; |
285 | int code = skb->h.icmph->code; | 285 | int code = skb->h.icmph->code; |
286 | struct ip_tunnel *t; | 286 | struct ip_tunnel *t; |
287 | int err; | 287 | int err; |
288 | 288 | ||
289 | switch (type) { | 289 | switch (type) { |
290 | default: | 290 | default: |
291 | case ICMP_PARAMETERPROB: | 291 | case ICMP_PARAMETERPROB: |
292 | return 0; | 292 | return 0; |
293 | 293 | ||
294 | case ICMP_DEST_UNREACH: | 294 | case ICMP_DEST_UNREACH: |
295 | switch (code) { | 295 | switch (code) { |
296 | case ICMP_SR_FAILED: | 296 | case ICMP_SR_FAILED: |
297 | case ICMP_PORT_UNREACH: | 297 | case ICMP_PORT_UNREACH: |
298 | /* Impossible event. */ | 298 | /* Impossible event. */ |
299 | return 0; | 299 | return 0; |
300 | case ICMP_FRAG_NEEDED: | 300 | case ICMP_FRAG_NEEDED: |
301 | /* Soft state for pmtu is maintained by IP core. */ | 301 | /* Soft state for pmtu is maintained by IP core. */ |
302 | return 0; | 302 | return 0; |
303 | default: | 303 | default: |
304 | /* All others are translated to HOST_UNREACH. | 304 | /* All others are translated to HOST_UNREACH. |
305 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 305 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
306 | I believe they are just ether pollution. --ANK | 306 | I believe they are just ether pollution. --ANK |
307 | */ | 307 | */ |
308 | break; | 308 | break; |
309 | } | 309 | } |
310 | break; | 310 | break; |
311 | case ICMP_TIME_EXCEEDED: | 311 | case ICMP_TIME_EXCEEDED: |
312 | if (code != ICMP_EXC_TTL) | 312 | if (code != ICMP_EXC_TTL) |
313 | return 0; | 313 | return 0; |
314 | break; | 314 | break; |
315 | } | 315 | } |
316 | 316 | ||
317 | err = -ENOENT; | 317 | err = -ENOENT; |
318 | 318 | ||
319 | read_lock(&ipip_lock); | 319 | read_lock(&ipip_lock); |
320 | t = ipip_tunnel_lookup(iph->daddr, iph->saddr); | 320 | t = ipip_tunnel_lookup(iph->daddr, iph->saddr); |
321 | if (t == NULL || t->parms.iph.daddr == 0) | 321 | if (t == NULL || t->parms.iph.daddr == 0) |
322 | goto out; | 322 | goto out; |
323 | 323 | ||
324 | err = 0; | 324 | err = 0; |
325 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 325 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
326 | goto out; | 326 | goto out; |
327 | 327 | ||
328 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 328 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) |
329 | t->err_count++; | 329 | t->err_count++; |
330 | else | 330 | else |
331 | t->err_count = 1; | 331 | t->err_count = 1; |
332 | t->err_time = jiffies; | 332 | t->err_time = jiffies; |
333 | out: | 333 | out: |
334 | read_unlock(&ipip_lock); | 334 | read_unlock(&ipip_lock); |
335 | return err; | 335 | return err; |
336 | #else | 336 | #else |
337 | struct iphdr *iph = (struct iphdr*)dp; | 337 | struct iphdr *iph = (struct iphdr*)dp; |
338 | int hlen = iph->ihl<<2; | 338 | int hlen = iph->ihl<<2; |
339 | struct iphdr *eiph; | 339 | struct iphdr *eiph; |
340 | int type = skb->h.icmph->type; | 340 | int type = skb->h.icmph->type; |
341 | int code = skb->h.icmph->code; | 341 | int code = skb->h.icmph->code; |
342 | int rel_type = 0; | 342 | int rel_type = 0; |
343 | int rel_code = 0; | 343 | int rel_code = 0; |
344 | int rel_info = 0; | 344 | int rel_info = 0; |
345 | struct sk_buff *skb2; | 345 | struct sk_buff *skb2; |
346 | struct flowi fl; | 346 | struct flowi fl; |
347 | struct rtable *rt; | 347 | struct rtable *rt; |
348 | 348 | ||
349 | if (len < hlen + sizeof(struct iphdr)) | 349 | if (len < hlen + sizeof(struct iphdr)) |
350 | return 0; | 350 | return 0; |
351 | eiph = (struct iphdr*)(dp + hlen); | 351 | eiph = (struct iphdr*)(dp + hlen); |
352 | 352 | ||
353 | switch (type) { | 353 | switch (type) { |
354 | default: | 354 | default: |
355 | return 0; | 355 | return 0; |
356 | case ICMP_PARAMETERPROB: | 356 | case ICMP_PARAMETERPROB: |
357 | if (skb->h.icmph->un.gateway < hlen) | 357 | if (skb->h.icmph->un.gateway < hlen) |
358 | return 0; | 358 | return 0; |
359 | 359 | ||
360 | /* So... This guy found something strange INSIDE encapsulated | 360 | /* So... This guy found something strange INSIDE encapsulated |
361 | packet. Well, he is fool, but what can we do ? | 361 | packet. Well, he is fool, but what can we do ? |
362 | */ | 362 | */ |
363 | rel_type = ICMP_PARAMETERPROB; | 363 | rel_type = ICMP_PARAMETERPROB; |
364 | rel_info = skb->h.icmph->un.gateway - hlen; | 364 | rel_info = skb->h.icmph->un.gateway - hlen; |
365 | break; | 365 | break; |
366 | 366 | ||
367 | case ICMP_DEST_UNREACH: | 367 | case ICMP_DEST_UNREACH: |
368 | switch (code) { | 368 | switch (code) { |
369 | case ICMP_SR_FAILED: | 369 | case ICMP_SR_FAILED: |
370 | case ICMP_PORT_UNREACH: | 370 | case ICMP_PORT_UNREACH: |
371 | /* Impossible event. */ | 371 | /* Impossible event. */ |
372 | return 0; | 372 | return 0; |
373 | case ICMP_FRAG_NEEDED: | 373 | case ICMP_FRAG_NEEDED: |
374 | /* And it is the only really necessary thing :-) */ | 374 | /* And it is the only really necessary thing :-) */ |
375 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); | 375 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); |
376 | if (rel_info < hlen+68) | 376 | if (rel_info < hlen+68) |
377 | return 0; | 377 | return 0; |
378 | rel_info -= hlen; | 378 | rel_info -= hlen; |
379 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ | 379 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ |
380 | if (rel_info > ntohs(eiph->tot_len)) | 380 | if (rel_info > ntohs(eiph->tot_len)) |
381 | return 0; | 381 | return 0; |
382 | break; | 382 | break; |
383 | default: | 383 | default: |
384 | /* All others are translated to HOST_UNREACH. | 384 | /* All others are translated to HOST_UNREACH. |
385 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 385 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
386 | I believe, it is just ether pollution. --ANK | 386 | I believe, it is just ether pollution. --ANK |
387 | */ | 387 | */ |
388 | rel_type = ICMP_DEST_UNREACH; | 388 | rel_type = ICMP_DEST_UNREACH; |
389 | rel_code = ICMP_HOST_UNREACH; | 389 | rel_code = ICMP_HOST_UNREACH; |
390 | break; | 390 | break; |
391 | } | 391 | } |
392 | break; | 392 | break; |
393 | case ICMP_TIME_EXCEEDED: | 393 | case ICMP_TIME_EXCEEDED: |
394 | if (code != ICMP_EXC_TTL) | 394 | if (code != ICMP_EXC_TTL) |
395 | return 0; | 395 | return 0; |
396 | break; | 396 | break; |
397 | } | 397 | } |
398 | 398 | ||
399 | /* Prepare fake skb to feed it to icmp_send */ | 399 | /* Prepare fake skb to feed it to icmp_send */ |
400 | skb2 = skb_clone(skb, GFP_ATOMIC); | 400 | skb2 = skb_clone(skb, GFP_ATOMIC); |
401 | if (skb2 == NULL) | 401 | if (skb2 == NULL) |
402 | return 0; | 402 | return 0; |
403 | dst_release(skb2->dst); | 403 | dst_release(skb2->dst); |
404 | skb2->dst = NULL; | 404 | skb2->dst = NULL; |
405 | skb_pull(skb2, skb->data - (u8*)eiph); | 405 | skb_pull(skb2, skb->data - (u8*)eiph); |
406 | skb2->nh.raw = skb2->data; | 406 | skb2->nh.raw = skb2->data; |
407 | 407 | ||
408 | /* Try to guess incoming interface */ | 408 | /* Try to guess incoming interface */ |
409 | memset(&fl, 0, sizeof(fl)); | 409 | memset(&fl, 0, sizeof(fl)); |
410 | fl.fl4_daddr = eiph->saddr; | 410 | fl.fl4_daddr = eiph->saddr; |
411 | fl.fl4_tos = RT_TOS(eiph->tos); | 411 | fl.fl4_tos = RT_TOS(eiph->tos); |
412 | fl.proto = IPPROTO_IPIP; | 412 | fl.proto = IPPROTO_IPIP; |
413 | if (ip_route_output_key(&rt, &key)) { | 413 | if (ip_route_output_key(&rt, &key)) { |
414 | kfree_skb(skb2); | 414 | kfree_skb(skb2); |
415 | return 0; | 415 | return 0; |
416 | } | 416 | } |
417 | skb2->dev = rt->u.dst.dev; | 417 | skb2->dev = rt->u.dst.dev; |
418 | 418 | ||
419 | /* route "incoming" packet */ | 419 | /* route "incoming" packet */ |
420 | if (rt->rt_flags&RTCF_LOCAL) { | 420 | if (rt->rt_flags&RTCF_LOCAL) { |
421 | ip_rt_put(rt); | 421 | ip_rt_put(rt); |
422 | rt = NULL; | 422 | rt = NULL; |
423 | fl.fl4_daddr = eiph->daddr; | 423 | fl.fl4_daddr = eiph->daddr; |
424 | fl.fl4_src = eiph->saddr; | 424 | fl.fl4_src = eiph->saddr; |
425 | fl.fl4_tos = eiph->tos; | 425 | fl.fl4_tos = eiph->tos; |
426 | if (ip_route_output_key(&rt, &fl) || | 426 | if (ip_route_output_key(&rt, &fl) || |
427 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { | 427 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { |
428 | ip_rt_put(rt); | 428 | ip_rt_put(rt); |
429 | kfree_skb(skb2); | 429 | kfree_skb(skb2); |
430 | return 0; | 430 | return 0; |
431 | } | 431 | } |
432 | } else { | 432 | } else { |
433 | ip_rt_put(rt); | 433 | ip_rt_put(rt); |
434 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || | 434 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || |
435 | skb2->dst->dev->type != ARPHRD_TUNNEL) { | 435 | skb2->dst->dev->type != ARPHRD_TUNNEL) { |
436 | kfree_skb(skb2); | 436 | kfree_skb(skb2); |
437 | return 0; | 437 | return 0; |
438 | } | 438 | } |
439 | } | 439 | } |
440 | 440 | ||
441 | /* change mtu on this route */ | 441 | /* change mtu on this route */ |
442 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 442 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
443 | if (rel_info > dst_mtu(skb2->dst)) { | 443 | if (rel_info > dst_mtu(skb2->dst)) { |
444 | kfree_skb(skb2); | 444 | kfree_skb(skb2); |
445 | return 0; | 445 | return 0; |
446 | } | 446 | } |
447 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); | 447 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); |
448 | rel_info = htonl(rel_info); | 448 | rel_info = htonl(rel_info); |
449 | } else if (type == ICMP_TIME_EXCEEDED) { | 449 | } else if (type == ICMP_TIME_EXCEEDED) { |
450 | struct ip_tunnel *t = netdev_priv(skb2->dev); | 450 | struct ip_tunnel *t = netdev_priv(skb2->dev); |
451 | if (t->parms.iph.ttl) { | 451 | if (t->parms.iph.ttl) { |
452 | rel_type = ICMP_DEST_UNREACH; | 452 | rel_type = ICMP_DEST_UNREACH; |
453 | rel_code = ICMP_HOST_UNREACH; | 453 | rel_code = ICMP_HOST_UNREACH; |
454 | } | 454 | } |
455 | } | 455 | } |
456 | 456 | ||
457 | icmp_send(skb2, rel_type, rel_code, rel_info); | 457 | icmp_send(skb2, rel_type, rel_code, rel_info); |
458 | kfree_skb(skb2); | 458 | kfree_skb(skb2); |
459 | return 0; | 459 | return 0; |
460 | #endif | 460 | #endif |
461 | } | 461 | } |
462 | 462 | ||
463 | static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) | 463 | static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) |
464 | { | 464 | { |
465 | struct iphdr *inner_iph = skb->nh.iph; | 465 | struct iphdr *inner_iph = skb->nh.iph; |
466 | 466 | ||
467 | if (INET_ECN_is_ce(outer_iph->tos)) | 467 | if (INET_ECN_is_ce(outer_iph->tos)) |
468 | IP_ECN_set_ce(inner_iph); | 468 | IP_ECN_set_ce(inner_iph); |
469 | } | 469 | } |
470 | 470 | ||
471 | static int ipip_rcv(struct sk_buff *skb) | 471 | static int ipip_rcv(struct sk_buff *skb) |
472 | { | 472 | { |
473 | struct iphdr *iph; | 473 | struct iphdr *iph; |
474 | struct ip_tunnel *tunnel; | 474 | struct ip_tunnel *tunnel; |
475 | 475 | ||
476 | iph = skb->nh.iph; | 476 | iph = skb->nh.iph; |
477 | 477 | ||
478 | read_lock(&ipip_lock); | 478 | read_lock(&ipip_lock); |
479 | if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { | 479 | if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { |
480 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 480 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
481 | read_unlock(&ipip_lock); | 481 | read_unlock(&ipip_lock); |
482 | kfree_skb(skb); | 482 | kfree_skb(skb); |
483 | return 0; | 483 | return 0; |
484 | } | 484 | } |
485 | 485 | ||
486 | secpath_reset(skb); | 486 | secpath_reset(skb); |
487 | 487 | ||
488 | skb->mac.raw = skb->nh.raw; | 488 | skb->mac.raw = skb->nh.raw; |
489 | skb->nh.raw = skb->data; | 489 | skb->nh.raw = skb->data; |
490 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
491 | skb->protocol = htons(ETH_P_IP); | 490 | skb->protocol = htons(ETH_P_IP); |
492 | skb->pkt_type = PACKET_HOST; | 491 | skb->pkt_type = PACKET_HOST; |
493 | 492 | ||
494 | tunnel->stat.rx_packets++; | 493 | tunnel->stat.rx_packets++; |
495 | tunnel->stat.rx_bytes += skb->len; | 494 | tunnel->stat.rx_bytes += skb->len; |
496 | skb->dev = tunnel->dev; | 495 | skb->dev = tunnel->dev; |
497 | dst_release(skb->dst); | 496 | dst_release(skb->dst); |
498 | skb->dst = NULL; | 497 | skb->dst = NULL; |
499 | nf_reset(skb); | 498 | nf_reset(skb); |
500 | ipip_ecn_decapsulate(iph, skb); | 499 | ipip_ecn_decapsulate(iph, skb); |
501 | netif_rx(skb); | 500 | netif_rx(skb); |
502 | read_unlock(&ipip_lock); | 501 | read_unlock(&ipip_lock); |
503 | return 0; | 502 | return 0; |
504 | } | 503 | } |
505 | read_unlock(&ipip_lock); | 504 | read_unlock(&ipip_lock); |
506 | 505 | ||
507 | return -1; | 506 | return -1; |
508 | } | 507 | } |
509 | 508 | ||
510 | /* | 509 | /* |
511 | * This function assumes it is being called from dev_queue_xmit() | 510 | * This function assumes it is being called from dev_queue_xmit() |
512 | * and that skb is filled properly by that function. | 511 | * and that skb is filled properly by that function. |
513 | */ | 512 | */ |
514 | 513 | ||
515 | static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 514 | static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
516 | { | 515 | { |
517 | struct ip_tunnel *tunnel = netdev_priv(dev); | 516 | struct ip_tunnel *tunnel = netdev_priv(dev); |
518 | struct net_device_stats *stats = &tunnel->stat; | 517 | struct net_device_stats *stats = &tunnel->stat; |
519 | struct iphdr *tiph = &tunnel->parms.iph; | 518 | struct iphdr *tiph = &tunnel->parms.iph; |
520 | u8 tos = tunnel->parms.iph.tos; | 519 | u8 tos = tunnel->parms.iph.tos; |
521 | u16 df = tiph->frag_off; | 520 | u16 df = tiph->frag_off; |
522 | struct rtable *rt; /* Route to the other host */ | 521 | struct rtable *rt; /* Route to the other host */ |
523 | struct net_device *tdev; /* Device to other host */ | 522 | struct net_device *tdev; /* Device to other host */ |
524 | struct iphdr *old_iph = skb->nh.iph; | 523 | struct iphdr *old_iph = skb->nh.iph; |
525 | struct iphdr *iph; /* Our new IP header */ | 524 | struct iphdr *iph; /* Our new IP header */ |
526 | int max_headroom; /* The extra header space needed */ | 525 | int max_headroom; /* The extra header space needed */ |
527 | u32 dst = tiph->daddr; | 526 | u32 dst = tiph->daddr; |
528 | int mtu; | 527 | int mtu; |
529 | 528 | ||
530 | if (tunnel->recursion++) { | 529 | if (tunnel->recursion++) { |
531 | tunnel->stat.collisions++; | 530 | tunnel->stat.collisions++; |
532 | goto tx_error; | 531 | goto tx_error; |
533 | } | 532 | } |
534 | 533 | ||
535 | if (skb->protocol != htons(ETH_P_IP)) | 534 | if (skb->protocol != htons(ETH_P_IP)) |
536 | goto tx_error; | 535 | goto tx_error; |
537 | 536 | ||
538 | if (tos&1) | 537 | if (tos&1) |
539 | tos = old_iph->tos; | 538 | tos = old_iph->tos; |
540 | 539 | ||
541 | if (!dst) { | 540 | if (!dst) { |
542 | /* NBMA tunnel */ | 541 | /* NBMA tunnel */ |
543 | if ((rt = (struct rtable*)skb->dst) == NULL) { | 542 | if ((rt = (struct rtable*)skb->dst) == NULL) { |
544 | tunnel->stat.tx_fifo_errors++; | 543 | tunnel->stat.tx_fifo_errors++; |
545 | goto tx_error; | 544 | goto tx_error; |
546 | } | 545 | } |
547 | if ((dst = rt->rt_gateway) == 0) | 546 | if ((dst = rt->rt_gateway) == 0) |
548 | goto tx_error_icmp; | 547 | goto tx_error_icmp; |
549 | } | 548 | } |
550 | 549 | ||
551 | { | 550 | { |
552 | struct flowi fl = { .oif = tunnel->parms.link, | 551 | struct flowi fl = { .oif = tunnel->parms.link, |
553 | .nl_u = { .ip4_u = | 552 | .nl_u = { .ip4_u = |
554 | { .daddr = dst, | 553 | { .daddr = dst, |
555 | .saddr = tiph->saddr, | 554 | .saddr = tiph->saddr, |
556 | .tos = RT_TOS(tos) } }, | 555 | .tos = RT_TOS(tos) } }, |
557 | .proto = IPPROTO_IPIP }; | 556 | .proto = IPPROTO_IPIP }; |
558 | if (ip_route_output_key(&rt, &fl)) { | 557 | if (ip_route_output_key(&rt, &fl)) { |
559 | tunnel->stat.tx_carrier_errors++; | 558 | tunnel->stat.tx_carrier_errors++; |
560 | goto tx_error_icmp; | 559 | goto tx_error_icmp; |
561 | } | 560 | } |
562 | } | 561 | } |
563 | tdev = rt->u.dst.dev; | 562 | tdev = rt->u.dst.dev; |
564 | 563 | ||
565 | if (tdev == dev) { | 564 | if (tdev == dev) { |
566 | ip_rt_put(rt); | 565 | ip_rt_put(rt); |
567 | tunnel->stat.collisions++; | 566 | tunnel->stat.collisions++; |
568 | goto tx_error; | 567 | goto tx_error; |
569 | } | 568 | } |
570 | 569 | ||
571 | if (tiph->frag_off) | 570 | if (tiph->frag_off) |
572 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 571 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); |
573 | else | 572 | else |
574 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; | 573 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; |
575 | 574 | ||
576 | if (mtu < 68) { | 575 | if (mtu < 68) { |
577 | tunnel->stat.collisions++; | 576 | tunnel->stat.collisions++; |
578 | ip_rt_put(rt); | 577 | ip_rt_put(rt); |
579 | goto tx_error; | 578 | goto tx_error; |
580 | } | 579 | } |
581 | if (skb->dst) | 580 | if (skb->dst) |
582 | skb->dst->ops->update_pmtu(skb->dst, mtu); | 581 | skb->dst->ops->update_pmtu(skb->dst, mtu); |
583 | 582 | ||
584 | df |= (old_iph->frag_off&htons(IP_DF)); | 583 | df |= (old_iph->frag_off&htons(IP_DF)); |
585 | 584 | ||
586 | if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { | 585 | if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { |
587 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 586 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
588 | ip_rt_put(rt); | 587 | ip_rt_put(rt); |
589 | goto tx_error; | 588 | goto tx_error; |
590 | } | 589 | } |
591 | 590 | ||
592 | if (tunnel->err_count > 0) { | 591 | if (tunnel->err_count > 0) { |
593 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 592 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { |
594 | tunnel->err_count--; | 593 | tunnel->err_count--; |
595 | dst_link_failure(skb); | 594 | dst_link_failure(skb); |
596 | } else | 595 | } else |
597 | tunnel->err_count = 0; | 596 | tunnel->err_count = 0; |
598 | } | 597 | } |
599 | 598 | ||
600 | /* | 599 | /* |
601 | * Okay, now see if we can stuff it in the buffer as-is. | 600 | * Okay, now see if we can stuff it in the buffer as-is. |
602 | */ | 601 | */ |
603 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); | 602 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); |
604 | 603 | ||
605 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { | 604 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { |
606 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 605 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
607 | if (!new_skb) { | 606 | if (!new_skb) { |
608 | ip_rt_put(rt); | 607 | ip_rt_put(rt); |
609 | stats->tx_dropped++; | 608 | stats->tx_dropped++; |
610 | dev_kfree_skb(skb); | 609 | dev_kfree_skb(skb); |
611 | tunnel->recursion--; | 610 | tunnel->recursion--; |
612 | return 0; | 611 | return 0; |
613 | } | 612 | } |
614 | if (skb->sk) | 613 | if (skb->sk) |
615 | skb_set_owner_w(new_skb, skb->sk); | 614 | skb_set_owner_w(new_skb, skb->sk); |
616 | dev_kfree_skb(skb); | 615 | dev_kfree_skb(skb); |
617 | skb = new_skb; | 616 | skb = new_skb; |
618 | old_iph = skb->nh.iph; | 617 | old_iph = skb->nh.iph; |
619 | } | 618 | } |
620 | 619 | ||
621 | skb->h.raw = skb->nh.raw; | 620 | skb->h.raw = skb->nh.raw; |
622 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | 621 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); |
623 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 622 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
624 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 623 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
625 | IPSKB_REROUTED); | 624 | IPSKB_REROUTED); |
626 | dst_release(skb->dst); | 625 | dst_release(skb->dst); |
627 | skb->dst = &rt->u.dst; | 626 | skb->dst = &rt->u.dst; |
628 | 627 | ||
629 | /* | 628 | /* |
630 | * Push down and install the IPIP header. | 629 | * Push down and install the IPIP header. |
631 | */ | 630 | */ |
632 | 631 | ||
633 | iph = skb->nh.iph; | 632 | iph = skb->nh.iph; |
634 | iph->version = 4; | 633 | iph->version = 4; |
635 | iph->ihl = sizeof(struct iphdr)>>2; | 634 | iph->ihl = sizeof(struct iphdr)>>2; |
636 | iph->frag_off = df; | 635 | iph->frag_off = df; |
637 | iph->protocol = IPPROTO_IPIP; | 636 | iph->protocol = IPPROTO_IPIP; |
638 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); | 637 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); |
639 | iph->daddr = rt->rt_dst; | 638 | iph->daddr = rt->rt_dst; |
640 | iph->saddr = rt->rt_src; | 639 | iph->saddr = rt->rt_src; |
641 | 640 | ||
642 | if ((iph->ttl = tiph->ttl) == 0) | 641 | if ((iph->ttl = tiph->ttl) == 0) |
643 | iph->ttl = old_iph->ttl; | 642 | iph->ttl = old_iph->ttl; |
644 | 643 | ||
645 | nf_reset(skb); | 644 | nf_reset(skb); |
646 | 645 | ||
647 | IPTUNNEL_XMIT(); | 646 | IPTUNNEL_XMIT(); |
648 | tunnel->recursion--; | 647 | tunnel->recursion--; |
649 | return 0; | 648 | return 0; |
650 | 649 | ||
651 | tx_error_icmp: | 650 | tx_error_icmp: |
652 | dst_link_failure(skb); | 651 | dst_link_failure(skb); |
653 | tx_error: | 652 | tx_error: |
654 | stats->tx_errors++; | 653 | stats->tx_errors++; |
655 | dev_kfree_skb(skb); | 654 | dev_kfree_skb(skb); |
656 | tunnel->recursion--; | 655 | tunnel->recursion--; |
657 | return 0; | 656 | return 0; |
658 | } | 657 | } |
659 | 658 | ||
660 | static int | 659 | static int |
661 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 660 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
662 | { | 661 | { |
663 | int err = 0; | 662 | int err = 0; |
664 | struct ip_tunnel_parm p; | 663 | struct ip_tunnel_parm p; |
665 | struct ip_tunnel *t; | 664 | struct ip_tunnel *t; |
666 | 665 | ||
667 | switch (cmd) { | 666 | switch (cmd) { |
668 | case SIOCGETTUNNEL: | 667 | case SIOCGETTUNNEL: |
669 | t = NULL; | 668 | t = NULL; |
670 | if (dev == ipip_fb_tunnel_dev) { | 669 | if (dev == ipip_fb_tunnel_dev) { |
671 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 670 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
672 | err = -EFAULT; | 671 | err = -EFAULT; |
673 | break; | 672 | break; |
674 | } | 673 | } |
675 | t = ipip_tunnel_locate(&p, 0); | 674 | t = ipip_tunnel_locate(&p, 0); |
676 | } | 675 | } |
677 | if (t == NULL) | 676 | if (t == NULL) |
678 | t = netdev_priv(dev); | 677 | t = netdev_priv(dev); |
679 | memcpy(&p, &t->parms, sizeof(p)); | 678 | memcpy(&p, &t->parms, sizeof(p)); |
680 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 679 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
681 | err = -EFAULT; | 680 | err = -EFAULT; |
682 | break; | 681 | break; |
683 | 682 | ||
684 | case SIOCADDTUNNEL: | 683 | case SIOCADDTUNNEL: |
685 | case SIOCCHGTUNNEL: | 684 | case SIOCCHGTUNNEL: |
686 | err = -EPERM; | 685 | err = -EPERM; |
687 | if (!capable(CAP_NET_ADMIN)) | 686 | if (!capable(CAP_NET_ADMIN)) |
688 | goto done; | 687 | goto done; |
689 | 688 | ||
690 | err = -EFAULT; | 689 | err = -EFAULT; |
691 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 690 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
692 | goto done; | 691 | goto done; |
693 | 692 | ||
694 | err = -EINVAL; | 693 | err = -EINVAL; |
695 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || | 694 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || |
696 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | 695 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
697 | goto done; | 696 | goto done; |
698 | if (p.iph.ttl) | 697 | if (p.iph.ttl) |
699 | p.iph.frag_off |= htons(IP_DF); | 698 | p.iph.frag_off |= htons(IP_DF); |
700 | 699 | ||
701 | t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); | 700 | t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); |
702 | 701 | ||
703 | if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 702 | if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
704 | if (t != NULL) { | 703 | if (t != NULL) { |
705 | if (t->dev != dev) { | 704 | if (t->dev != dev) { |
706 | err = -EEXIST; | 705 | err = -EEXIST; |
707 | break; | 706 | break; |
708 | } | 707 | } |
709 | } else { | 708 | } else { |
710 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | 709 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || |
711 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | 710 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { |
712 | err = -EINVAL; | 711 | err = -EINVAL; |
713 | break; | 712 | break; |
714 | } | 713 | } |
715 | t = netdev_priv(dev); | 714 | t = netdev_priv(dev); |
716 | ipip_tunnel_unlink(t); | 715 | ipip_tunnel_unlink(t); |
717 | t->parms.iph.saddr = p.iph.saddr; | 716 | t->parms.iph.saddr = p.iph.saddr; |
718 | t->parms.iph.daddr = p.iph.daddr; | 717 | t->parms.iph.daddr = p.iph.daddr; |
719 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 718 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
720 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 719 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
721 | ipip_tunnel_link(t); | 720 | ipip_tunnel_link(t); |
722 | netdev_state_change(dev); | 721 | netdev_state_change(dev); |
723 | } | 722 | } |
724 | } | 723 | } |
725 | 724 | ||
726 | if (t) { | 725 | if (t) { |
727 | err = 0; | 726 | err = 0; |
728 | if (cmd == SIOCCHGTUNNEL) { | 727 | if (cmd == SIOCCHGTUNNEL) { |
729 | t->parms.iph.ttl = p.iph.ttl; | 728 | t->parms.iph.ttl = p.iph.ttl; |
730 | t->parms.iph.tos = p.iph.tos; | 729 | t->parms.iph.tos = p.iph.tos; |
731 | t->parms.iph.frag_off = p.iph.frag_off; | 730 | t->parms.iph.frag_off = p.iph.frag_off; |
732 | } | 731 | } |
733 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 732 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
734 | err = -EFAULT; | 733 | err = -EFAULT; |
735 | } else | 734 | } else |
736 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 735 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
737 | break; | 736 | break; |
738 | 737 | ||
739 | case SIOCDELTUNNEL: | 738 | case SIOCDELTUNNEL: |
740 | err = -EPERM; | 739 | err = -EPERM; |
741 | if (!capable(CAP_NET_ADMIN)) | 740 | if (!capable(CAP_NET_ADMIN)) |
742 | goto done; | 741 | goto done; |
743 | 742 | ||
744 | if (dev == ipip_fb_tunnel_dev) { | 743 | if (dev == ipip_fb_tunnel_dev) { |
745 | err = -EFAULT; | 744 | err = -EFAULT; |
746 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 745 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
747 | goto done; | 746 | goto done; |
748 | err = -ENOENT; | 747 | err = -ENOENT; |
749 | if ((t = ipip_tunnel_locate(&p, 0)) == NULL) | 748 | if ((t = ipip_tunnel_locate(&p, 0)) == NULL) |
750 | goto done; | 749 | goto done; |
751 | err = -EPERM; | 750 | err = -EPERM; |
752 | if (t->dev == ipip_fb_tunnel_dev) | 751 | if (t->dev == ipip_fb_tunnel_dev) |
753 | goto done; | 752 | goto done; |
754 | dev = t->dev; | 753 | dev = t->dev; |
755 | } | 754 | } |
756 | err = unregister_netdevice(dev); | 755 | err = unregister_netdevice(dev); |
757 | break; | 756 | break; |
758 | 757 | ||
759 | default: | 758 | default: |
760 | err = -EINVAL; | 759 | err = -EINVAL; |
761 | } | 760 | } |
762 | 761 | ||
763 | done: | 762 | done: |
764 | return err; | 763 | return err; |
765 | } | 764 | } |
766 | 765 | ||
767 | static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) | 766 | static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) |
768 | { | 767 | { |
769 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); | 768 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); |
770 | } | 769 | } |
771 | 770 | ||
772 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 771 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
773 | { | 772 | { |
774 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 773 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) |
775 | return -EINVAL; | 774 | return -EINVAL; |
776 | dev->mtu = new_mtu; | 775 | dev->mtu = new_mtu; |
777 | return 0; | 776 | return 0; |
778 | } | 777 | } |
779 | 778 | ||
780 | static void ipip_tunnel_setup(struct net_device *dev) | 779 | static void ipip_tunnel_setup(struct net_device *dev) |
781 | { | 780 | { |
782 | SET_MODULE_OWNER(dev); | 781 | SET_MODULE_OWNER(dev); |
783 | dev->uninit = ipip_tunnel_uninit; | 782 | dev->uninit = ipip_tunnel_uninit; |
784 | dev->hard_start_xmit = ipip_tunnel_xmit; | 783 | dev->hard_start_xmit = ipip_tunnel_xmit; |
785 | dev->get_stats = ipip_tunnel_get_stats; | 784 | dev->get_stats = ipip_tunnel_get_stats; |
786 | dev->do_ioctl = ipip_tunnel_ioctl; | 785 | dev->do_ioctl = ipip_tunnel_ioctl; |
787 | dev->change_mtu = ipip_tunnel_change_mtu; | 786 | dev->change_mtu = ipip_tunnel_change_mtu; |
788 | dev->destructor = free_netdev; | 787 | dev->destructor = free_netdev; |
789 | 788 | ||
790 | dev->type = ARPHRD_TUNNEL; | 789 | dev->type = ARPHRD_TUNNEL; |
791 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | 790 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); |
792 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | 791 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); |
793 | dev->flags = IFF_NOARP; | 792 | dev->flags = IFF_NOARP; |
794 | dev->iflink = 0; | 793 | dev->iflink = 0; |
795 | dev->addr_len = 4; | 794 | dev->addr_len = 4; |
796 | } | 795 | } |
797 | 796 | ||
798 | static int ipip_tunnel_init(struct net_device *dev) | 797 | static int ipip_tunnel_init(struct net_device *dev) |
799 | { | 798 | { |
800 | struct net_device *tdev = NULL; | 799 | struct net_device *tdev = NULL; |
801 | struct ip_tunnel *tunnel; | 800 | struct ip_tunnel *tunnel; |
802 | struct iphdr *iph; | 801 | struct iphdr *iph; |
803 | 802 | ||
804 | tunnel = netdev_priv(dev); | 803 | tunnel = netdev_priv(dev); |
805 | iph = &tunnel->parms.iph; | 804 | iph = &tunnel->parms.iph; |
806 | 805 | ||
807 | tunnel->dev = dev; | 806 | tunnel->dev = dev; |
808 | strcpy(tunnel->parms.name, dev->name); | 807 | strcpy(tunnel->parms.name, dev->name); |
809 | 808 | ||
810 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 809 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
811 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 810 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
812 | 811 | ||
813 | if (iph->daddr) { | 812 | if (iph->daddr) { |
814 | struct flowi fl = { .oif = tunnel->parms.link, | 813 | struct flowi fl = { .oif = tunnel->parms.link, |
815 | .nl_u = { .ip4_u = | 814 | .nl_u = { .ip4_u = |
816 | { .daddr = iph->daddr, | 815 | { .daddr = iph->daddr, |
817 | .saddr = iph->saddr, | 816 | .saddr = iph->saddr, |
818 | .tos = RT_TOS(iph->tos) } }, | 817 | .tos = RT_TOS(iph->tos) } }, |
819 | .proto = IPPROTO_IPIP }; | 818 | .proto = IPPROTO_IPIP }; |
820 | struct rtable *rt; | 819 | struct rtable *rt; |
821 | if (!ip_route_output_key(&rt, &fl)) { | 820 | if (!ip_route_output_key(&rt, &fl)) { |
822 | tdev = rt->u.dst.dev; | 821 | tdev = rt->u.dst.dev; |
823 | ip_rt_put(rt); | 822 | ip_rt_put(rt); |
824 | } | 823 | } |
825 | dev->flags |= IFF_POINTOPOINT; | 824 | dev->flags |= IFF_POINTOPOINT; |
826 | } | 825 | } |
827 | 826 | ||
828 | if (!tdev && tunnel->parms.link) | 827 | if (!tdev && tunnel->parms.link) |
829 | tdev = __dev_get_by_index(tunnel->parms.link); | 828 | tdev = __dev_get_by_index(tunnel->parms.link); |
830 | 829 | ||
831 | if (tdev) { | 830 | if (tdev) { |
832 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | 831 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); |
833 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | 832 | dev->mtu = tdev->mtu - sizeof(struct iphdr); |
834 | } | 833 | } |
835 | dev->iflink = tunnel->parms.link; | 834 | dev->iflink = tunnel->parms.link; |
836 | 835 | ||
837 | return 0; | 836 | return 0; |
838 | } | 837 | } |
839 | 838 | ||
840 | static int __init ipip_fb_tunnel_init(struct net_device *dev) | 839 | static int __init ipip_fb_tunnel_init(struct net_device *dev) |
841 | { | 840 | { |
842 | struct ip_tunnel *tunnel = netdev_priv(dev); | 841 | struct ip_tunnel *tunnel = netdev_priv(dev); |
843 | struct iphdr *iph = &tunnel->parms.iph; | 842 | struct iphdr *iph = &tunnel->parms.iph; |
844 | 843 | ||
845 | tunnel->dev = dev; | 844 | tunnel->dev = dev; |
846 | strcpy(tunnel->parms.name, dev->name); | 845 | strcpy(tunnel->parms.name, dev->name); |
847 | 846 | ||
848 | iph->version = 4; | 847 | iph->version = 4; |
849 | iph->protocol = IPPROTO_IPIP; | 848 | iph->protocol = IPPROTO_IPIP; |
850 | iph->ihl = 5; | 849 | iph->ihl = 5; |
851 | 850 | ||
852 | dev_hold(dev); | 851 | dev_hold(dev); |
853 | tunnels_wc[0] = tunnel; | 852 | tunnels_wc[0] = tunnel; |
854 | return 0; | 853 | return 0; |
855 | } | 854 | } |
856 | 855 | ||
857 | static struct xfrm_tunnel ipip_handler = { | 856 | static struct xfrm_tunnel ipip_handler = { |
858 | .handler = ipip_rcv, | 857 | .handler = ipip_rcv, |
859 | .err_handler = ipip_err, | 858 | .err_handler = ipip_err, |
860 | .priority = 1, | 859 | .priority = 1, |
861 | }; | 860 | }; |
862 | 861 | ||
863 | static char banner[] __initdata = | 862 | static char banner[] __initdata = |
864 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 863 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
865 | 864 | ||
866 | static int __init ipip_init(void) | 865 | static int __init ipip_init(void) |
867 | { | 866 | { |
868 | int err; | 867 | int err; |
869 | 868 | ||
870 | printk(banner); | 869 | printk(banner); |
871 | 870 | ||
872 | if (xfrm4_tunnel_register(&ipip_handler)) { | 871 | if (xfrm4_tunnel_register(&ipip_handler)) { |
873 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | 872 | printk(KERN_INFO "ipip init: can't register tunnel\n"); |
874 | return -EAGAIN; | 873 | return -EAGAIN; |
875 | } | 874 | } |
876 | 875 | ||
877 | ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | 876 | ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), |
878 | "tunl0", | 877 | "tunl0", |
879 | ipip_tunnel_setup); | 878 | ipip_tunnel_setup); |
880 | if (!ipip_fb_tunnel_dev) { | 879 | if (!ipip_fb_tunnel_dev) { |
881 | err = -ENOMEM; | 880 | err = -ENOMEM; |
882 | goto err1; | 881 | goto err1; |
883 | } | 882 | } |
884 | 883 | ||
885 | ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; | 884 | ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; |
886 | 885 | ||
887 | if ((err = register_netdev(ipip_fb_tunnel_dev))) | 886 | if ((err = register_netdev(ipip_fb_tunnel_dev))) |
888 | goto err2; | 887 | goto err2; |
889 | out: | 888 | out: |
890 | return err; | 889 | return err; |
891 | err2: | 890 | err2: |
892 | free_netdev(ipip_fb_tunnel_dev); | 891 | free_netdev(ipip_fb_tunnel_dev); |
893 | err1: | 892 | err1: |
894 | xfrm4_tunnel_deregister(&ipip_handler); | 893 | xfrm4_tunnel_deregister(&ipip_handler); |
895 | goto out; | 894 | goto out; |
896 | } | 895 | } |
897 | 896 | ||
898 | static void __exit ipip_destroy_tunnels(void) | 897 | static void __exit ipip_destroy_tunnels(void) |
899 | { | 898 | { |
900 | int prio; | 899 | int prio; |
901 | 900 | ||
902 | for (prio = 1; prio < 4; prio++) { | 901 | for (prio = 1; prio < 4; prio++) { |
903 | int h; | 902 | int h; |
904 | for (h = 0; h < HASH_SIZE; h++) { | 903 | for (h = 0; h < HASH_SIZE; h++) { |
905 | struct ip_tunnel *t; | 904 | struct ip_tunnel *t; |
906 | while ((t = tunnels[prio][h]) != NULL) | 905 | while ((t = tunnels[prio][h]) != NULL) |
907 | unregister_netdevice(t->dev); | 906 | unregister_netdevice(t->dev); |
908 | } | 907 | } |
909 | } | 908 | } |
910 | } | 909 | } |
911 | 910 | ||
912 | static void __exit ipip_fini(void) | 911 | static void __exit ipip_fini(void) |
913 | { | 912 | { |
914 | if (xfrm4_tunnel_deregister(&ipip_handler)) | 913 | if (xfrm4_tunnel_deregister(&ipip_handler)) |
915 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); | 914 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); |
916 | 915 | ||
917 | rtnl_lock(); | 916 | rtnl_lock(); |
918 | ipip_destroy_tunnels(); | 917 | ipip_destroy_tunnels(); |
919 | unregister_netdevice(ipip_fb_tunnel_dev); | 918 | unregister_netdevice(ipip_fb_tunnel_dev); |
920 | rtnl_unlock(); | 919 | rtnl_unlock(); |
921 | } | 920 | } |
922 | 921 | ||
923 | module_init(ipip_init); | 922 | module_init(ipip_init); |
924 | module_exit(ipip_fini); | 923 | module_exit(ipip_fini); |
925 | MODULE_LICENSE("GPL"); | 924 | MODULE_LICENSE("GPL"); |
926 | 925 |
net/ipv4/ipmr.c
1 | /* | 1 | /* |
2 | * IP multicast routing support for mrouted 3.6/3.8 | 2 | * IP multicast routing support for mrouted 3.6/3.8 |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, <alan@redhat.com> | 4 | * (c) 1995 Alan Cox, <alan@redhat.com> |
5 | * Linux Consultancy and Custom Driver Development | 5 | * Linux Consultancy and Custom Driver Development |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ | 12 | * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ |
13 | * | 13 | * |
14 | * Fixes: | 14 | * Fixes: |
15 | * Michael Chastain : Incorrect size of copying. | 15 | * Michael Chastain : Incorrect size of copying. |
16 | * Alan Cox : Added the cache manager code | 16 | * Alan Cox : Added the cache manager code |
17 | * Alan Cox : Fixed the clone/copy bug and device race. | 17 | * Alan Cox : Fixed the clone/copy bug and device race. |
18 | * Mike McLagan : Routing by source | 18 | * Mike McLagan : Routing by source |
19 | * Malcolm Beattie : Buffer handling fixes. | 19 | * Malcolm Beattie : Buffer handling fixes. |
20 | * Alexey Kuznetsov : Double buffer free and other fixes. | 20 | * Alexey Kuznetsov : Double buffer free and other fixes. |
21 | * SVR Anand : Fixed several multicast bugs and problems. | 21 | * SVR Anand : Fixed several multicast bugs and problems. |
22 | * Alexey Kuznetsov : Status, optimisations and more. | 22 | * Alexey Kuznetsov : Status, optimisations and more. |
23 | * Brad Parker : Better behaviour on mrouted upcall | 23 | * Brad Parker : Better behaviour on mrouted upcall |
24 | * overflow. | 24 | * overflow. |
25 | * Carlos Picoto : PIMv1 Support | 25 | * Carlos Picoto : PIMv1 Support |
26 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header | 26 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header |
27 | * Relax this requrement to work with older peers. | 27 | * Relax this requrement to work with older peers. |
28 | * | 28 | * |
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <asm/system.h> | 31 | #include <asm/system.h> |
32 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
33 | #include <linux/types.h> | 33 | #include <linux/types.h> |
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/capability.h> | 35 | #include <linux/capability.h> |
36 | #include <linux/errno.h> | 36 | #include <linux/errno.h> |
37 | #include <linux/timer.h> | 37 | #include <linux/timer.h> |
38 | #include <linux/mm.h> | 38 | #include <linux/mm.h> |
39 | #include <linux/kernel.h> | 39 | #include <linux/kernel.h> |
40 | #include <linux/fcntl.h> | 40 | #include <linux/fcntl.h> |
41 | #include <linux/stat.h> | 41 | #include <linux/stat.h> |
42 | #include <linux/socket.h> | 42 | #include <linux/socket.h> |
43 | #include <linux/in.h> | 43 | #include <linux/in.h> |
44 | #include <linux/inet.h> | 44 | #include <linux/inet.h> |
45 | #include <linux/netdevice.h> | 45 | #include <linux/netdevice.h> |
46 | #include <linux/inetdevice.h> | 46 | #include <linux/inetdevice.h> |
47 | #include <linux/igmp.h> | 47 | #include <linux/igmp.h> |
48 | #include <linux/proc_fs.h> | 48 | #include <linux/proc_fs.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | #include <linux/mroute.h> | 50 | #include <linux/mroute.h> |
51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
52 | #include <linux/if_ether.h> | 52 | #include <linux/if_ether.h> |
53 | #include <net/ip.h> | 53 | #include <net/ip.h> |
54 | #include <net/protocol.h> | 54 | #include <net/protocol.h> |
55 | #include <linux/skbuff.h> | 55 | #include <linux/skbuff.h> |
56 | #include <net/route.h> | 56 | #include <net/route.h> |
57 | #include <net/sock.h> | 57 | #include <net/sock.h> |
58 | #include <net/icmp.h> | 58 | #include <net/icmp.h> |
59 | #include <net/udp.h> | 59 | #include <net/udp.h> |
60 | #include <net/raw.h> | 60 | #include <net/raw.h> |
61 | #include <linux/notifier.h> | 61 | #include <linux/notifier.h> |
62 | #include <linux/if_arp.h> | 62 | #include <linux/if_arp.h> |
63 | #include <linux/netfilter_ipv4.h> | 63 | #include <linux/netfilter_ipv4.h> |
64 | #include <net/ipip.h> | 64 | #include <net/ipip.h> |
65 | #include <net/checksum.h> | 65 | #include <net/checksum.h> |
66 | 66 | ||
67 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | 67 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) |
68 | #define CONFIG_IP_PIMSM 1 | 68 | #define CONFIG_IP_PIMSM 1 |
69 | #endif | 69 | #endif |
70 | 70 | ||
71 | static struct sock *mroute_socket; | 71 | static struct sock *mroute_socket; |
72 | 72 | ||
73 | 73 | ||
74 | /* Big lock, protecting vif table, mrt cache and mroute socket state. | 74 | /* Big lock, protecting vif table, mrt cache and mroute socket state. |
75 | Note that the changes are semaphored via rtnl_lock. | 75 | Note that the changes are semaphored via rtnl_lock. |
76 | */ | 76 | */ |
77 | 77 | ||
78 | static DEFINE_RWLOCK(mrt_lock); | 78 | static DEFINE_RWLOCK(mrt_lock); |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Multicast router control variables | 81 | * Multicast router control variables |
82 | */ | 82 | */ |
83 | 83 | ||
84 | static struct vif_device vif_table[MAXVIFS]; /* Devices */ | 84 | static struct vif_device vif_table[MAXVIFS]; /* Devices */ |
85 | static int maxvif; | 85 | static int maxvif; |
86 | 86 | ||
87 | #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) | 87 | #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) |
88 | 88 | ||
89 | static int mroute_do_assert; /* Set in PIM assert */ | 89 | static int mroute_do_assert; /* Set in PIM assert */ |
90 | static int mroute_do_pim; | 90 | static int mroute_do_pim; |
91 | 91 | ||
92 | static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ | 92 | static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ |
93 | 93 | ||
94 | static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ | 94 | static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ |
95 | static atomic_t cache_resolve_queue_len; /* Size of unresolved */ | 95 | static atomic_t cache_resolve_queue_len; /* Size of unresolved */ |
96 | 96 | ||
97 | /* Special spinlock for queue of unresolved entries */ | 97 | /* Special spinlock for queue of unresolved entries */ |
98 | static DEFINE_SPINLOCK(mfc_unres_lock); | 98 | static DEFINE_SPINLOCK(mfc_unres_lock); |
99 | 99 | ||
100 | /* We return to original Alan's scheme. Hash table of resolved | 100 | /* We return to original Alan's scheme. Hash table of resolved |
101 | entries is changed only in process context and protected | 101 | entries is changed only in process context and protected |
102 | with weak lock mrt_lock. Queue of unresolved entries is protected | 102 | with weak lock mrt_lock. Queue of unresolved entries is protected |
103 | with strong spinlock mfc_unres_lock. | 103 | with strong spinlock mfc_unres_lock. |
104 | 104 | ||
105 | In this case data path is free of exclusive locks at all. | 105 | In this case data path is free of exclusive locks at all. |
106 | */ | 106 | */ |
107 | 107 | ||
108 | static kmem_cache_t *mrt_cachep __read_mostly; | 108 | static kmem_cache_t *mrt_cachep __read_mostly; |
109 | 109 | ||
110 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); | 110 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); |
111 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); | 111 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); |
112 | static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); | 112 | static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); |
113 | 113 | ||
114 | #ifdef CONFIG_IP_PIMSM_V2 | 114 | #ifdef CONFIG_IP_PIMSM_V2 |
115 | static struct net_protocol pim_protocol; | 115 | static struct net_protocol pim_protocol; |
116 | #endif | 116 | #endif |
117 | 117 | ||
118 | static struct timer_list ipmr_expire_timer; | 118 | static struct timer_list ipmr_expire_timer; |
119 | 119 | ||
120 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ | 120 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ |
121 | 121 | ||
122 | static | 122 | static |
123 | struct net_device *ipmr_new_tunnel(struct vifctl *v) | 123 | struct net_device *ipmr_new_tunnel(struct vifctl *v) |
124 | { | 124 | { |
125 | struct net_device *dev; | 125 | struct net_device *dev; |
126 | 126 | ||
127 | dev = __dev_get_by_name("tunl0"); | 127 | dev = __dev_get_by_name("tunl0"); |
128 | 128 | ||
129 | if (dev) { | 129 | if (dev) { |
130 | int err; | 130 | int err; |
131 | struct ifreq ifr; | 131 | struct ifreq ifr; |
132 | mm_segment_t oldfs; | 132 | mm_segment_t oldfs; |
133 | struct ip_tunnel_parm p; | 133 | struct ip_tunnel_parm p; |
134 | struct in_device *in_dev; | 134 | struct in_device *in_dev; |
135 | 135 | ||
136 | memset(&p, 0, sizeof(p)); | 136 | memset(&p, 0, sizeof(p)); |
137 | p.iph.daddr = v->vifc_rmt_addr.s_addr; | 137 | p.iph.daddr = v->vifc_rmt_addr.s_addr; |
138 | p.iph.saddr = v->vifc_lcl_addr.s_addr; | 138 | p.iph.saddr = v->vifc_lcl_addr.s_addr; |
139 | p.iph.version = 4; | 139 | p.iph.version = 4; |
140 | p.iph.ihl = 5; | 140 | p.iph.ihl = 5; |
141 | p.iph.protocol = IPPROTO_IPIP; | 141 | p.iph.protocol = IPPROTO_IPIP; |
142 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); | 142 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); |
143 | ifr.ifr_ifru.ifru_data = (void*)&p; | 143 | ifr.ifr_ifru.ifru_data = (void*)&p; |
144 | 144 | ||
145 | oldfs = get_fs(); set_fs(KERNEL_DS); | 145 | oldfs = get_fs(); set_fs(KERNEL_DS); |
146 | err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); | 146 | err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); |
147 | set_fs(oldfs); | 147 | set_fs(oldfs); |
148 | 148 | ||
149 | dev = NULL; | 149 | dev = NULL; |
150 | 150 | ||
151 | if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { | 151 | if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { |
152 | dev->flags |= IFF_MULTICAST; | 152 | dev->flags |= IFF_MULTICAST; |
153 | 153 | ||
154 | in_dev = __in_dev_get_rtnl(dev); | 154 | in_dev = __in_dev_get_rtnl(dev); |
155 | if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) | 155 | if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) |
156 | goto failure; | 156 | goto failure; |
157 | in_dev->cnf.rp_filter = 0; | 157 | in_dev->cnf.rp_filter = 0; |
158 | 158 | ||
159 | if (dev_open(dev)) | 159 | if (dev_open(dev)) |
160 | goto failure; | 160 | goto failure; |
161 | } | 161 | } |
162 | } | 162 | } |
163 | return dev; | 163 | return dev; |
164 | 164 | ||
165 | failure: | 165 | failure: |
166 | /* allow the register to be completed before unregistering. */ | 166 | /* allow the register to be completed before unregistering. */ |
167 | rtnl_unlock(); | 167 | rtnl_unlock(); |
168 | rtnl_lock(); | 168 | rtnl_lock(); |
169 | 169 | ||
170 | unregister_netdevice(dev); | 170 | unregister_netdevice(dev); |
171 | return NULL; | 171 | return NULL; |
172 | } | 172 | } |
173 | 173 | ||
174 | #ifdef CONFIG_IP_PIMSM | 174 | #ifdef CONFIG_IP_PIMSM |
175 | 175 | ||
176 | static int reg_vif_num = -1; | 176 | static int reg_vif_num = -1; |
177 | 177 | ||
178 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 178 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
179 | { | 179 | { |
180 | read_lock(&mrt_lock); | 180 | read_lock(&mrt_lock); |
181 | ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; | 181 | ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; |
182 | ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; | 182 | ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; |
183 | ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); | 183 | ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); |
184 | read_unlock(&mrt_lock); | 184 | read_unlock(&mrt_lock); |
185 | kfree_skb(skb); | 185 | kfree_skb(skb); |
186 | return 0; | 186 | return 0; |
187 | } | 187 | } |
188 | 188 | ||
189 | static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) | 189 | static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) |
190 | { | 190 | { |
191 | return (struct net_device_stats*)netdev_priv(dev); | 191 | return (struct net_device_stats*)netdev_priv(dev); |
192 | } | 192 | } |
193 | 193 | ||
194 | static void reg_vif_setup(struct net_device *dev) | 194 | static void reg_vif_setup(struct net_device *dev) |
195 | { | 195 | { |
196 | dev->type = ARPHRD_PIMREG; | 196 | dev->type = ARPHRD_PIMREG; |
197 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; | 197 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; |
198 | dev->flags = IFF_NOARP; | 198 | dev->flags = IFF_NOARP; |
199 | dev->hard_start_xmit = reg_vif_xmit; | 199 | dev->hard_start_xmit = reg_vif_xmit; |
200 | dev->get_stats = reg_vif_get_stats; | 200 | dev->get_stats = reg_vif_get_stats; |
201 | dev->destructor = free_netdev; | 201 | dev->destructor = free_netdev; |
202 | } | 202 | } |
203 | 203 | ||
204 | static struct net_device *ipmr_reg_vif(void) | 204 | static struct net_device *ipmr_reg_vif(void) |
205 | { | 205 | { |
206 | struct net_device *dev; | 206 | struct net_device *dev; |
207 | struct in_device *in_dev; | 207 | struct in_device *in_dev; |
208 | 208 | ||
209 | dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", | 209 | dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", |
210 | reg_vif_setup); | 210 | reg_vif_setup); |
211 | 211 | ||
212 | if (dev == NULL) | 212 | if (dev == NULL) |
213 | return NULL; | 213 | return NULL; |
214 | 214 | ||
215 | if (register_netdevice(dev)) { | 215 | if (register_netdevice(dev)) { |
216 | free_netdev(dev); | 216 | free_netdev(dev); |
217 | return NULL; | 217 | return NULL; |
218 | } | 218 | } |
219 | dev->iflink = 0; | 219 | dev->iflink = 0; |
220 | 220 | ||
221 | if ((in_dev = inetdev_init(dev)) == NULL) | 221 | if ((in_dev = inetdev_init(dev)) == NULL) |
222 | goto failure; | 222 | goto failure; |
223 | 223 | ||
224 | in_dev->cnf.rp_filter = 0; | 224 | in_dev->cnf.rp_filter = 0; |
225 | 225 | ||
226 | if (dev_open(dev)) | 226 | if (dev_open(dev)) |
227 | goto failure; | 227 | goto failure; |
228 | 228 | ||
229 | return dev; | 229 | return dev; |
230 | 230 | ||
231 | failure: | 231 | failure: |
232 | /* allow the register to be completed before unregistering. */ | 232 | /* allow the register to be completed before unregistering. */ |
233 | rtnl_unlock(); | 233 | rtnl_unlock(); |
234 | rtnl_lock(); | 234 | rtnl_lock(); |
235 | 235 | ||
236 | unregister_netdevice(dev); | 236 | unregister_netdevice(dev); |
237 | return NULL; | 237 | return NULL; |
238 | } | 238 | } |
239 | #endif | 239 | #endif |
240 | 240 | ||
241 | /* | 241 | /* |
242 | * Delete a VIF entry | 242 | * Delete a VIF entry |
243 | */ | 243 | */ |
244 | 244 | ||
245 | static int vif_delete(int vifi) | 245 | static int vif_delete(int vifi) |
246 | { | 246 | { |
247 | struct vif_device *v; | 247 | struct vif_device *v; |
248 | struct net_device *dev; | 248 | struct net_device *dev; |
249 | struct in_device *in_dev; | 249 | struct in_device *in_dev; |
250 | 250 | ||
251 | if (vifi < 0 || vifi >= maxvif) | 251 | if (vifi < 0 || vifi >= maxvif) |
252 | return -EADDRNOTAVAIL; | 252 | return -EADDRNOTAVAIL; |
253 | 253 | ||
254 | v = &vif_table[vifi]; | 254 | v = &vif_table[vifi]; |
255 | 255 | ||
256 | write_lock_bh(&mrt_lock); | 256 | write_lock_bh(&mrt_lock); |
257 | dev = v->dev; | 257 | dev = v->dev; |
258 | v->dev = NULL; | 258 | v->dev = NULL; |
259 | 259 | ||
260 | if (!dev) { | 260 | if (!dev) { |
261 | write_unlock_bh(&mrt_lock); | 261 | write_unlock_bh(&mrt_lock); |
262 | return -EADDRNOTAVAIL; | 262 | return -EADDRNOTAVAIL; |
263 | } | 263 | } |
264 | 264 | ||
265 | #ifdef CONFIG_IP_PIMSM | 265 | #ifdef CONFIG_IP_PIMSM |
266 | if (vifi == reg_vif_num) | 266 | if (vifi == reg_vif_num) |
267 | reg_vif_num = -1; | 267 | reg_vif_num = -1; |
268 | #endif | 268 | #endif |
269 | 269 | ||
270 | if (vifi+1 == maxvif) { | 270 | if (vifi+1 == maxvif) { |
271 | int tmp; | 271 | int tmp; |
272 | for (tmp=vifi-1; tmp>=0; tmp--) { | 272 | for (tmp=vifi-1; tmp>=0; tmp--) { |
273 | if (VIF_EXISTS(tmp)) | 273 | if (VIF_EXISTS(tmp)) |
274 | break; | 274 | break; |
275 | } | 275 | } |
276 | maxvif = tmp+1; | 276 | maxvif = tmp+1; |
277 | } | 277 | } |
278 | 278 | ||
279 | write_unlock_bh(&mrt_lock); | 279 | write_unlock_bh(&mrt_lock); |
280 | 280 | ||
281 | dev_set_allmulti(dev, -1); | 281 | dev_set_allmulti(dev, -1); |
282 | 282 | ||
283 | if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { | 283 | if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { |
284 | in_dev->cnf.mc_forwarding--; | 284 | in_dev->cnf.mc_forwarding--; |
285 | ip_rt_multicast_event(in_dev); | 285 | ip_rt_multicast_event(in_dev); |
286 | } | 286 | } |
287 | 287 | ||
288 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) | 288 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) |
289 | unregister_netdevice(dev); | 289 | unregister_netdevice(dev); |
290 | 290 | ||
291 | dev_put(dev); | 291 | dev_put(dev); |
292 | return 0; | 292 | return 0; |
293 | } | 293 | } |
294 | 294 | ||
295 | /* Destroy an unresolved cache entry, killing queued skbs | 295 | /* Destroy an unresolved cache entry, killing queued skbs |
296 | and reporting error to netlink readers. | 296 | and reporting error to netlink readers. |
297 | */ | 297 | */ |
298 | 298 | ||
299 | static void ipmr_destroy_unres(struct mfc_cache *c) | 299 | static void ipmr_destroy_unres(struct mfc_cache *c) |
300 | { | 300 | { |
301 | struct sk_buff *skb; | 301 | struct sk_buff *skb; |
302 | struct nlmsgerr *e; | 302 | struct nlmsgerr *e; |
303 | 303 | ||
304 | atomic_dec(&cache_resolve_queue_len); | 304 | atomic_dec(&cache_resolve_queue_len); |
305 | 305 | ||
306 | while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { | 306 | while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { |
307 | if (skb->nh.iph->version == 0) { | 307 | if (skb->nh.iph->version == 0) { |
308 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 308 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
309 | nlh->nlmsg_type = NLMSG_ERROR; | 309 | nlh->nlmsg_type = NLMSG_ERROR; |
310 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 310 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
311 | skb_trim(skb, nlh->nlmsg_len); | 311 | skb_trim(skb, nlh->nlmsg_len); |
312 | e = NLMSG_DATA(nlh); | 312 | e = NLMSG_DATA(nlh); |
313 | e->error = -ETIMEDOUT; | 313 | e->error = -ETIMEDOUT; |
314 | memset(&e->msg, 0, sizeof(e->msg)); | 314 | memset(&e->msg, 0, sizeof(e->msg)); |
315 | netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); | 315 | netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); |
316 | } else | 316 | } else |
317 | kfree_skb(skb); | 317 | kfree_skb(skb); |
318 | } | 318 | } |
319 | 319 | ||
320 | kmem_cache_free(mrt_cachep, c); | 320 | kmem_cache_free(mrt_cachep, c); |
321 | } | 321 | } |
322 | 322 | ||
323 | 323 | ||
324 | /* Single timer process for all the unresolved queue. */ | 324 | /* Single timer process for all the unresolved queue. */ |
325 | 325 | ||
326 | static void ipmr_expire_process(unsigned long dummy) | 326 | static void ipmr_expire_process(unsigned long dummy) |
327 | { | 327 | { |
328 | unsigned long now; | 328 | unsigned long now; |
329 | unsigned long expires; | 329 | unsigned long expires; |
330 | struct mfc_cache *c, **cp; | 330 | struct mfc_cache *c, **cp; |
331 | 331 | ||
332 | if (!spin_trylock(&mfc_unres_lock)) { | 332 | if (!spin_trylock(&mfc_unres_lock)) { |
333 | mod_timer(&ipmr_expire_timer, jiffies+HZ/10); | 333 | mod_timer(&ipmr_expire_timer, jiffies+HZ/10); |
334 | return; | 334 | return; |
335 | } | 335 | } |
336 | 336 | ||
337 | if (atomic_read(&cache_resolve_queue_len) == 0) | 337 | if (atomic_read(&cache_resolve_queue_len) == 0) |
338 | goto out; | 338 | goto out; |
339 | 339 | ||
340 | now = jiffies; | 340 | now = jiffies; |
341 | expires = 10*HZ; | 341 | expires = 10*HZ; |
342 | cp = &mfc_unres_queue; | 342 | cp = &mfc_unres_queue; |
343 | 343 | ||
344 | while ((c=*cp) != NULL) { | 344 | while ((c=*cp) != NULL) { |
345 | if (time_after(c->mfc_un.unres.expires, now)) { | 345 | if (time_after(c->mfc_un.unres.expires, now)) { |
346 | unsigned long interval = c->mfc_un.unres.expires - now; | 346 | unsigned long interval = c->mfc_un.unres.expires - now; |
347 | if (interval < expires) | 347 | if (interval < expires) |
348 | expires = interval; | 348 | expires = interval; |
349 | cp = &c->next; | 349 | cp = &c->next; |
350 | continue; | 350 | continue; |
351 | } | 351 | } |
352 | 352 | ||
353 | *cp = c->next; | 353 | *cp = c->next; |
354 | 354 | ||
355 | ipmr_destroy_unres(c); | 355 | ipmr_destroy_unres(c); |
356 | } | 356 | } |
357 | 357 | ||
358 | if (atomic_read(&cache_resolve_queue_len)) | 358 | if (atomic_read(&cache_resolve_queue_len)) |
359 | mod_timer(&ipmr_expire_timer, jiffies + expires); | 359 | mod_timer(&ipmr_expire_timer, jiffies + expires); |
360 | 360 | ||
361 | out: | 361 | out: |
362 | spin_unlock(&mfc_unres_lock); | 362 | spin_unlock(&mfc_unres_lock); |
363 | } | 363 | } |
364 | 364 | ||
365 | /* Fill oifs list. It is called under write locked mrt_lock. */ | 365 | /* Fill oifs list. It is called under write locked mrt_lock. */ |
366 | 366 | ||
367 | static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) | 367 | static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) |
368 | { | 368 | { |
369 | int vifi; | 369 | int vifi; |
370 | 370 | ||
371 | cache->mfc_un.res.minvif = MAXVIFS; | 371 | cache->mfc_un.res.minvif = MAXVIFS; |
372 | cache->mfc_un.res.maxvif = 0; | 372 | cache->mfc_un.res.maxvif = 0; |
373 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); | 373 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); |
374 | 374 | ||
375 | for (vifi=0; vifi<maxvif; vifi++) { | 375 | for (vifi=0; vifi<maxvif; vifi++) { |
376 | if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { | 376 | if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { |
377 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; | 377 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; |
378 | if (cache->mfc_un.res.minvif > vifi) | 378 | if (cache->mfc_un.res.minvif > vifi) |
379 | cache->mfc_un.res.minvif = vifi; | 379 | cache->mfc_un.res.minvif = vifi; |
380 | if (cache->mfc_un.res.maxvif <= vifi) | 380 | if (cache->mfc_un.res.maxvif <= vifi) |
381 | cache->mfc_un.res.maxvif = vifi + 1; | 381 | cache->mfc_un.res.maxvif = vifi + 1; |
382 | } | 382 | } |
383 | } | 383 | } |
384 | } | 384 | } |
385 | 385 | ||
386 | static int vif_add(struct vifctl *vifc, int mrtsock) | 386 | static int vif_add(struct vifctl *vifc, int mrtsock) |
387 | { | 387 | { |
388 | int vifi = vifc->vifc_vifi; | 388 | int vifi = vifc->vifc_vifi; |
389 | struct vif_device *v = &vif_table[vifi]; | 389 | struct vif_device *v = &vif_table[vifi]; |
390 | struct net_device *dev; | 390 | struct net_device *dev; |
391 | struct in_device *in_dev; | 391 | struct in_device *in_dev; |
392 | 392 | ||
393 | /* Is vif busy ? */ | 393 | /* Is vif busy ? */ |
394 | if (VIF_EXISTS(vifi)) | 394 | if (VIF_EXISTS(vifi)) |
395 | return -EADDRINUSE; | 395 | return -EADDRINUSE; |
396 | 396 | ||
397 | switch (vifc->vifc_flags) { | 397 | switch (vifc->vifc_flags) { |
398 | #ifdef CONFIG_IP_PIMSM | 398 | #ifdef CONFIG_IP_PIMSM |
399 | case VIFF_REGISTER: | 399 | case VIFF_REGISTER: |
400 | /* | 400 | /* |
401 | * Special Purpose VIF in PIM | 401 | * Special Purpose VIF in PIM |
402 | * All the packets will be sent to the daemon | 402 | * All the packets will be sent to the daemon |
403 | */ | 403 | */ |
404 | if (reg_vif_num >= 0) | 404 | if (reg_vif_num >= 0) |
405 | return -EADDRINUSE; | 405 | return -EADDRINUSE; |
406 | dev = ipmr_reg_vif(); | 406 | dev = ipmr_reg_vif(); |
407 | if (!dev) | 407 | if (!dev) |
408 | return -ENOBUFS; | 408 | return -ENOBUFS; |
409 | break; | 409 | break; |
410 | #endif | 410 | #endif |
411 | case VIFF_TUNNEL: | 411 | case VIFF_TUNNEL: |
412 | dev = ipmr_new_tunnel(vifc); | 412 | dev = ipmr_new_tunnel(vifc); |
413 | if (!dev) | 413 | if (!dev) |
414 | return -ENOBUFS; | 414 | return -ENOBUFS; |
415 | break; | 415 | break; |
416 | case 0: | 416 | case 0: |
417 | dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); | 417 | dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); |
418 | if (!dev) | 418 | if (!dev) |
419 | return -EADDRNOTAVAIL; | 419 | return -EADDRNOTAVAIL; |
420 | dev_put(dev); | 420 | dev_put(dev); |
421 | break; | 421 | break; |
422 | default: | 422 | default: |
423 | return -EINVAL; | 423 | return -EINVAL; |
424 | } | 424 | } |
425 | 425 | ||
426 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) | 426 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) |
427 | return -EADDRNOTAVAIL; | 427 | return -EADDRNOTAVAIL; |
428 | in_dev->cnf.mc_forwarding++; | 428 | in_dev->cnf.mc_forwarding++; |
429 | dev_set_allmulti(dev, +1); | 429 | dev_set_allmulti(dev, +1); |
430 | ip_rt_multicast_event(in_dev); | 430 | ip_rt_multicast_event(in_dev); |
431 | 431 | ||
432 | /* | 432 | /* |
433 | * Fill in the VIF structures | 433 | * Fill in the VIF structures |
434 | */ | 434 | */ |
435 | v->rate_limit=vifc->vifc_rate_limit; | 435 | v->rate_limit=vifc->vifc_rate_limit; |
436 | v->local=vifc->vifc_lcl_addr.s_addr; | 436 | v->local=vifc->vifc_lcl_addr.s_addr; |
437 | v->remote=vifc->vifc_rmt_addr.s_addr; | 437 | v->remote=vifc->vifc_rmt_addr.s_addr; |
438 | v->flags=vifc->vifc_flags; | 438 | v->flags=vifc->vifc_flags; |
439 | if (!mrtsock) | 439 | if (!mrtsock) |
440 | v->flags |= VIFF_STATIC; | 440 | v->flags |= VIFF_STATIC; |
441 | v->threshold=vifc->vifc_threshold; | 441 | v->threshold=vifc->vifc_threshold; |
442 | v->bytes_in = 0; | 442 | v->bytes_in = 0; |
443 | v->bytes_out = 0; | 443 | v->bytes_out = 0; |
444 | v->pkt_in = 0; | 444 | v->pkt_in = 0; |
445 | v->pkt_out = 0; | 445 | v->pkt_out = 0; |
446 | v->link = dev->ifindex; | 446 | v->link = dev->ifindex; |
447 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) | 447 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) |
448 | v->link = dev->iflink; | 448 | v->link = dev->iflink; |
449 | 449 | ||
450 | /* And finish update writing critical data */ | 450 | /* And finish update writing critical data */ |
451 | write_lock_bh(&mrt_lock); | 451 | write_lock_bh(&mrt_lock); |
452 | dev_hold(dev); | 452 | dev_hold(dev); |
453 | v->dev=dev; | 453 | v->dev=dev; |
454 | #ifdef CONFIG_IP_PIMSM | 454 | #ifdef CONFIG_IP_PIMSM |
455 | if (v->flags&VIFF_REGISTER) | 455 | if (v->flags&VIFF_REGISTER) |
456 | reg_vif_num = vifi; | 456 | reg_vif_num = vifi; |
457 | #endif | 457 | #endif |
458 | if (vifi+1 > maxvif) | 458 | if (vifi+1 > maxvif) |
459 | maxvif = vifi+1; | 459 | maxvif = vifi+1; |
460 | write_unlock_bh(&mrt_lock); | 460 | write_unlock_bh(&mrt_lock); |
461 | return 0; | 461 | return 0; |
462 | } | 462 | } |
463 | 463 | ||
464 | static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) | 464 | static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) |
465 | { | 465 | { |
466 | int line=MFC_HASH(mcastgrp,origin); | 466 | int line=MFC_HASH(mcastgrp,origin); |
467 | struct mfc_cache *c; | 467 | struct mfc_cache *c; |
468 | 468 | ||
469 | for (c=mfc_cache_array[line]; c; c = c->next) { | 469 | for (c=mfc_cache_array[line]; c; c = c->next) { |
470 | if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) | 470 | if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) |
471 | break; | 471 | break; |
472 | } | 472 | } |
473 | return c; | 473 | return c; |
474 | } | 474 | } |
475 | 475 | ||
476 | /* | 476 | /* |
477 | * Allocate a multicast cache entry | 477 | * Allocate a multicast cache entry |
478 | */ | 478 | */ |
479 | static struct mfc_cache *ipmr_cache_alloc(void) | 479 | static struct mfc_cache *ipmr_cache_alloc(void) |
480 | { | 480 | { |
481 | struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); | 481 | struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); |
482 | if(c==NULL) | 482 | if(c==NULL) |
483 | return NULL; | 483 | return NULL; |
484 | memset(c, 0, sizeof(*c)); | 484 | memset(c, 0, sizeof(*c)); |
485 | c->mfc_un.res.minvif = MAXVIFS; | 485 | c->mfc_un.res.minvif = MAXVIFS; |
486 | return c; | 486 | return c; |
487 | } | 487 | } |
488 | 488 | ||
489 | static struct mfc_cache *ipmr_cache_alloc_unres(void) | 489 | static struct mfc_cache *ipmr_cache_alloc_unres(void) |
490 | { | 490 | { |
491 | struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); | 491 | struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); |
492 | if(c==NULL) | 492 | if(c==NULL) |
493 | return NULL; | 493 | return NULL; |
494 | memset(c, 0, sizeof(*c)); | 494 | memset(c, 0, sizeof(*c)); |
495 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 495 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
496 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 496 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
497 | return c; | 497 | return c; |
498 | } | 498 | } |
499 | 499 | ||
500 | /* | 500 | /* |
501 | * A cache entry has gone into a resolved state from queued | 501 | * A cache entry has gone into a resolved state from queued |
502 | */ | 502 | */ |
503 | 503 | ||
504 | static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | 504 | static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) |
505 | { | 505 | { |
506 | struct sk_buff *skb; | 506 | struct sk_buff *skb; |
507 | struct nlmsgerr *e; | 507 | struct nlmsgerr *e; |
508 | 508 | ||
509 | /* | 509 | /* |
510 | * Play the pending entries through our router | 510 | * Play the pending entries through our router |
511 | */ | 511 | */ |
512 | 512 | ||
513 | while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 513 | while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
514 | if (skb->nh.iph->version == 0) { | 514 | if (skb->nh.iph->version == 0) { |
515 | int err; | 515 | int err; |
516 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 516 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
517 | 517 | ||
518 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { | 518 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { |
519 | nlh->nlmsg_len = skb->tail - (u8*)nlh; | 519 | nlh->nlmsg_len = skb->tail - (u8*)nlh; |
520 | } else { | 520 | } else { |
521 | nlh->nlmsg_type = NLMSG_ERROR; | 521 | nlh->nlmsg_type = NLMSG_ERROR; |
522 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 522 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
523 | skb_trim(skb, nlh->nlmsg_len); | 523 | skb_trim(skb, nlh->nlmsg_len); |
524 | e = NLMSG_DATA(nlh); | 524 | e = NLMSG_DATA(nlh); |
525 | e->error = -EMSGSIZE; | 525 | e->error = -EMSGSIZE; |
526 | memset(&e->msg, 0, sizeof(e->msg)); | 526 | memset(&e->msg, 0, sizeof(e->msg)); |
527 | } | 527 | } |
528 | err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); | 528 | err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); |
529 | } else | 529 | } else |
530 | ip_mr_forward(skb, c, 0); | 530 | ip_mr_forward(skb, c, 0); |
531 | } | 531 | } |
532 | } | 532 | } |
533 | 533 | ||
534 | /* | 534 | /* |
535 | * Bounce a cache query up to mrouted. We could use netlink for this but mrouted | 535 | * Bounce a cache query up to mrouted. We could use netlink for this but mrouted |
536 | * expects the following bizarre scheme. | 536 | * expects the following bizarre scheme. |
537 | * | 537 | * |
538 | * Called under mrt_lock. | 538 | * Called under mrt_lock. |
539 | */ | 539 | */ |
540 | 540 | ||
541 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | 541 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) |
542 | { | 542 | { |
543 | struct sk_buff *skb; | 543 | struct sk_buff *skb; |
544 | int ihl = pkt->nh.iph->ihl<<2; | 544 | int ihl = pkt->nh.iph->ihl<<2; |
545 | struct igmphdr *igmp; | 545 | struct igmphdr *igmp; |
546 | struct igmpmsg *msg; | 546 | struct igmpmsg *msg; |
547 | int ret; | 547 | int ret; |
548 | 548 | ||
549 | #ifdef CONFIG_IP_PIMSM | 549 | #ifdef CONFIG_IP_PIMSM |
550 | if (assert == IGMPMSG_WHOLEPKT) | 550 | if (assert == IGMPMSG_WHOLEPKT) |
551 | skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); | 551 | skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); |
552 | else | 552 | else |
553 | #endif | 553 | #endif |
554 | skb = alloc_skb(128, GFP_ATOMIC); | 554 | skb = alloc_skb(128, GFP_ATOMIC); |
555 | 555 | ||
556 | if(!skb) | 556 | if(!skb) |
557 | return -ENOBUFS; | 557 | return -ENOBUFS; |
558 | 558 | ||
559 | #ifdef CONFIG_IP_PIMSM | 559 | #ifdef CONFIG_IP_PIMSM |
560 | if (assert == IGMPMSG_WHOLEPKT) { | 560 | if (assert == IGMPMSG_WHOLEPKT) { |
561 | /* Ugly, but we have no choice with this interface. | 561 | /* Ugly, but we have no choice with this interface. |
562 | Duplicate old header, fix ihl, length etc. | 562 | Duplicate old header, fix ihl, length etc. |
563 | And all this only to mangle msg->im_msgtype and | 563 | And all this only to mangle msg->im_msgtype and |
564 | to set msg->im_mbz to "mbz" :-) | 564 | to set msg->im_mbz to "mbz" :-) |
565 | */ | 565 | */ |
566 | msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); | 566 | msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); |
567 | skb->nh.raw = skb->h.raw = (u8*)msg; | 567 | skb->nh.raw = skb->h.raw = (u8*)msg; |
568 | memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); | 568 | memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); |
569 | msg->im_msgtype = IGMPMSG_WHOLEPKT; | 569 | msg->im_msgtype = IGMPMSG_WHOLEPKT; |
570 | msg->im_mbz = 0; | 570 | msg->im_mbz = 0; |
571 | msg->im_vif = reg_vif_num; | 571 | msg->im_vif = reg_vif_num; |
572 | skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; | 572 | skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; |
573 | skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); | 573 | skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); |
574 | } else | 574 | } else |
575 | #endif | 575 | #endif |
576 | { | 576 | { |
577 | 577 | ||
578 | /* | 578 | /* |
579 | * Copy the IP header | 579 | * Copy the IP header |
580 | */ | 580 | */ |
581 | 581 | ||
582 | skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); | 582 | skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); |
583 | memcpy(skb->data,pkt->data,ihl); | 583 | memcpy(skb->data,pkt->data,ihl); |
584 | skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ | 584 | skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ |
585 | msg = (struct igmpmsg*)skb->nh.iph; | 585 | msg = (struct igmpmsg*)skb->nh.iph; |
586 | msg->im_vif = vifi; | 586 | msg->im_vif = vifi; |
587 | skb->dst = dst_clone(pkt->dst); | 587 | skb->dst = dst_clone(pkt->dst); |
588 | 588 | ||
589 | /* | 589 | /* |
590 | * Add our header | 590 | * Add our header |
591 | */ | 591 | */ |
592 | 592 | ||
593 | igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); | 593 | igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); |
594 | igmp->type = | 594 | igmp->type = |
595 | msg->im_msgtype = assert; | 595 | msg->im_msgtype = assert; |
596 | igmp->code = 0; | 596 | igmp->code = 0; |
597 | skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ | 597 | skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ |
598 | skb->h.raw = skb->nh.raw; | 598 | skb->h.raw = skb->nh.raw; |
599 | } | 599 | } |
600 | 600 | ||
601 | if (mroute_socket == NULL) { | 601 | if (mroute_socket == NULL) { |
602 | kfree_skb(skb); | 602 | kfree_skb(skb); |
603 | return -EINVAL; | 603 | return -EINVAL; |
604 | } | 604 | } |
605 | 605 | ||
606 | /* | 606 | /* |
607 | * Deliver to mrouted | 607 | * Deliver to mrouted |
608 | */ | 608 | */ |
609 | if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { | 609 | if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { |
610 | if (net_ratelimit()) | 610 | if (net_ratelimit()) |
611 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); | 611 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); |
612 | kfree_skb(skb); | 612 | kfree_skb(skb); |
613 | } | 613 | } |
614 | 614 | ||
615 | return ret; | 615 | return ret; |
616 | } | 616 | } |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Queue a packet for resolution. It gets locked cache entry! | 619 | * Queue a packet for resolution. It gets locked cache entry! |
620 | */ | 620 | */ |
621 | 621 | ||
622 | static int | 622 | static int |
623 | ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | 623 | ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) |
624 | { | 624 | { |
625 | int err; | 625 | int err; |
626 | struct mfc_cache *c; | 626 | struct mfc_cache *c; |
627 | 627 | ||
628 | spin_lock_bh(&mfc_unres_lock); | 628 | spin_lock_bh(&mfc_unres_lock); |
629 | for (c=mfc_unres_queue; c; c=c->next) { | 629 | for (c=mfc_unres_queue; c; c=c->next) { |
630 | if (c->mfc_mcastgrp == skb->nh.iph->daddr && | 630 | if (c->mfc_mcastgrp == skb->nh.iph->daddr && |
631 | c->mfc_origin == skb->nh.iph->saddr) | 631 | c->mfc_origin == skb->nh.iph->saddr) |
632 | break; | 632 | break; |
633 | } | 633 | } |
634 | 634 | ||
635 | if (c == NULL) { | 635 | if (c == NULL) { |
636 | /* | 636 | /* |
637 | * Create a new entry if allowable | 637 | * Create a new entry if allowable |
638 | */ | 638 | */ |
639 | 639 | ||
640 | if (atomic_read(&cache_resolve_queue_len)>=10 || | 640 | if (atomic_read(&cache_resolve_queue_len)>=10 || |
641 | (c=ipmr_cache_alloc_unres())==NULL) { | 641 | (c=ipmr_cache_alloc_unres())==NULL) { |
642 | spin_unlock_bh(&mfc_unres_lock); | 642 | spin_unlock_bh(&mfc_unres_lock); |
643 | 643 | ||
644 | kfree_skb(skb); | 644 | kfree_skb(skb); |
645 | return -ENOBUFS; | 645 | return -ENOBUFS; |
646 | } | 646 | } |
647 | 647 | ||
648 | /* | 648 | /* |
649 | * Fill in the new cache entry | 649 | * Fill in the new cache entry |
650 | */ | 650 | */ |
651 | c->mfc_parent=-1; | 651 | c->mfc_parent=-1; |
652 | c->mfc_origin=skb->nh.iph->saddr; | 652 | c->mfc_origin=skb->nh.iph->saddr; |
653 | c->mfc_mcastgrp=skb->nh.iph->daddr; | 653 | c->mfc_mcastgrp=skb->nh.iph->daddr; |
654 | 654 | ||
655 | /* | 655 | /* |
656 | * Reflect first query at mrouted. | 656 | * Reflect first query at mrouted. |
657 | */ | 657 | */ |
658 | if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { | 658 | if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { |
659 | /* If the report failed throw the cache entry | 659 | /* If the report failed throw the cache entry |
660 | out - Brad Parker | 660 | out - Brad Parker |
661 | */ | 661 | */ |
662 | spin_unlock_bh(&mfc_unres_lock); | 662 | spin_unlock_bh(&mfc_unres_lock); |
663 | 663 | ||
664 | kmem_cache_free(mrt_cachep, c); | 664 | kmem_cache_free(mrt_cachep, c); |
665 | kfree_skb(skb); | 665 | kfree_skb(skb); |
666 | return err; | 666 | return err; |
667 | } | 667 | } |
668 | 668 | ||
669 | atomic_inc(&cache_resolve_queue_len); | 669 | atomic_inc(&cache_resolve_queue_len); |
670 | c->next = mfc_unres_queue; | 670 | c->next = mfc_unres_queue; |
671 | mfc_unres_queue = c; | 671 | mfc_unres_queue = c; |
672 | 672 | ||
673 | mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); | 673 | mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); |
674 | } | 674 | } |
675 | 675 | ||
676 | /* | 676 | /* |
677 | * See if we can append the packet | 677 | * See if we can append the packet |
678 | */ | 678 | */ |
679 | if (c->mfc_un.unres.unresolved.qlen>3) { | 679 | if (c->mfc_un.unres.unresolved.qlen>3) { |
680 | kfree_skb(skb); | 680 | kfree_skb(skb); |
681 | err = -ENOBUFS; | 681 | err = -ENOBUFS; |
682 | } else { | 682 | } else { |
683 | skb_queue_tail(&c->mfc_un.unres.unresolved,skb); | 683 | skb_queue_tail(&c->mfc_un.unres.unresolved,skb); |
684 | err = 0; | 684 | err = 0; |
685 | } | 685 | } |
686 | 686 | ||
687 | spin_unlock_bh(&mfc_unres_lock); | 687 | spin_unlock_bh(&mfc_unres_lock); |
688 | return err; | 688 | return err; |
689 | } | 689 | } |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * MFC cache manipulation by user space mroute daemon | 692 | * MFC cache manipulation by user space mroute daemon |
693 | */ | 693 | */ |
694 | 694 | ||
695 | static int ipmr_mfc_delete(struct mfcctl *mfc) | 695 | static int ipmr_mfc_delete(struct mfcctl *mfc) |
696 | { | 696 | { |
697 | int line; | 697 | int line; |
698 | struct mfc_cache *c, **cp; | 698 | struct mfc_cache *c, **cp; |
699 | 699 | ||
700 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 700 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
701 | 701 | ||
702 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 702 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { |
703 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 703 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
704 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { | 704 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
705 | write_lock_bh(&mrt_lock); | 705 | write_lock_bh(&mrt_lock); |
706 | *cp = c->next; | 706 | *cp = c->next; |
707 | write_unlock_bh(&mrt_lock); | 707 | write_unlock_bh(&mrt_lock); |
708 | 708 | ||
709 | kmem_cache_free(mrt_cachep, c); | 709 | kmem_cache_free(mrt_cachep, c); |
710 | return 0; | 710 | return 0; |
711 | } | 711 | } |
712 | } | 712 | } |
713 | return -ENOENT; | 713 | return -ENOENT; |
714 | } | 714 | } |
715 | 715 | ||
716 | static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | 716 | static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) |
717 | { | 717 | { |
718 | int line; | 718 | int line; |
719 | struct mfc_cache *uc, *c, **cp; | 719 | struct mfc_cache *uc, *c, **cp; |
720 | 720 | ||
721 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 721 | line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
722 | 722 | ||
723 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 723 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { |
724 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 724 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
725 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) | 725 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) |
726 | break; | 726 | break; |
727 | } | 727 | } |
728 | 728 | ||
729 | if (c != NULL) { | 729 | if (c != NULL) { |
730 | write_lock_bh(&mrt_lock); | 730 | write_lock_bh(&mrt_lock); |
731 | c->mfc_parent = mfc->mfcc_parent; | 731 | c->mfc_parent = mfc->mfcc_parent; |
732 | ipmr_update_thresholds(c, mfc->mfcc_ttls); | 732 | ipmr_update_thresholds(c, mfc->mfcc_ttls); |
733 | if (!mrtsock) | 733 | if (!mrtsock) |
734 | c->mfc_flags |= MFC_STATIC; | 734 | c->mfc_flags |= MFC_STATIC; |
735 | write_unlock_bh(&mrt_lock); | 735 | write_unlock_bh(&mrt_lock); |
736 | return 0; | 736 | return 0; |
737 | } | 737 | } |
738 | 738 | ||
739 | if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) | 739 | if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) |
740 | return -EINVAL; | 740 | return -EINVAL; |
741 | 741 | ||
742 | c=ipmr_cache_alloc(); | 742 | c=ipmr_cache_alloc(); |
743 | if (c==NULL) | 743 | if (c==NULL) |
744 | return -ENOMEM; | 744 | return -ENOMEM; |
745 | 745 | ||
746 | c->mfc_origin=mfc->mfcc_origin.s_addr; | 746 | c->mfc_origin=mfc->mfcc_origin.s_addr; |
747 | c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; | 747 | c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; |
748 | c->mfc_parent=mfc->mfcc_parent; | 748 | c->mfc_parent=mfc->mfcc_parent; |
749 | ipmr_update_thresholds(c, mfc->mfcc_ttls); | 749 | ipmr_update_thresholds(c, mfc->mfcc_ttls); |
750 | if (!mrtsock) | 750 | if (!mrtsock) |
751 | c->mfc_flags |= MFC_STATIC; | 751 | c->mfc_flags |= MFC_STATIC; |
752 | 752 | ||
753 | write_lock_bh(&mrt_lock); | 753 | write_lock_bh(&mrt_lock); |
754 | c->next = mfc_cache_array[line]; | 754 | c->next = mfc_cache_array[line]; |
755 | mfc_cache_array[line] = c; | 755 | mfc_cache_array[line] = c; |
756 | write_unlock_bh(&mrt_lock); | 756 | write_unlock_bh(&mrt_lock); |
757 | 757 | ||
758 | /* | 758 | /* |
759 | * Check to see if we resolved a queued list. If so we | 759 | * Check to see if we resolved a queued list. If so we |
760 | * need to send on the frames and tidy up. | 760 | * need to send on the frames and tidy up. |
761 | */ | 761 | */ |
762 | spin_lock_bh(&mfc_unres_lock); | 762 | spin_lock_bh(&mfc_unres_lock); |
763 | for (cp = &mfc_unres_queue; (uc=*cp) != NULL; | 763 | for (cp = &mfc_unres_queue; (uc=*cp) != NULL; |
764 | cp = &uc->next) { | 764 | cp = &uc->next) { |
765 | if (uc->mfc_origin == c->mfc_origin && | 765 | if (uc->mfc_origin == c->mfc_origin && |
766 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { | 766 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { |
767 | *cp = uc->next; | 767 | *cp = uc->next; |
768 | if (atomic_dec_and_test(&cache_resolve_queue_len)) | 768 | if (atomic_dec_and_test(&cache_resolve_queue_len)) |
769 | del_timer(&ipmr_expire_timer); | 769 | del_timer(&ipmr_expire_timer); |
770 | break; | 770 | break; |
771 | } | 771 | } |
772 | } | 772 | } |
773 | spin_unlock_bh(&mfc_unres_lock); | 773 | spin_unlock_bh(&mfc_unres_lock); |
774 | 774 | ||
775 | if (uc) { | 775 | if (uc) { |
776 | ipmr_cache_resolve(uc, c); | 776 | ipmr_cache_resolve(uc, c); |
777 | kmem_cache_free(mrt_cachep, uc); | 777 | kmem_cache_free(mrt_cachep, uc); |
778 | } | 778 | } |
779 | return 0; | 779 | return 0; |
780 | } | 780 | } |
781 | 781 | ||
782 | /* | 782 | /* |
783 | * Close the multicast socket, and clear the vif tables etc | 783 | * Close the multicast socket, and clear the vif tables etc |
784 | */ | 784 | */ |
785 | 785 | ||
786 | static void mroute_clean_tables(struct sock *sk) | 786 | static void mroute_clean_tables(struct sock *sk) |
787 | { | 787 | { |
788 | int i; | 788 | int i; |
789 | 789 | ||
790 | /* | 790 | /* |
791 | * Shut down all active vif entries | 791 | * Shut down all active vif entries |
792 | */ | 792 | */ |
793 | for(i=0; i<maxvif; i++) { | 793 | for(i=0; i<maxvif; i++) { |
794 | if (!(vif_table[i].flags&VIFF_STATIC)) | 794 | if (!(vif_table[i].flags&VIFF_STATIC)) |
795 | vif_delete(i); | 795 | vif_delete(i); |
796 | } | 796 | } |
797 | 797 | ||
798 | /* | 798 | /* |
799 | * Wipe the cache | 799 | * Wipe the cache |
800 | */ | 800 | */ |
801 | for (i=0;i<MFC_LINES;i++) { | 801 | for (i=0;i<MFC_LINES;i++) { |
802 | struct mfc_cache *c, **cp; | 802 | struct mfc_cache *c, **cp; |
803 | 803 | ||
804 | cp = &mfc_cache_array[i]; | 804 | cp = &mfc_cache_array[i]; |
805 | while ((c = *cp) != NULL) { | 805 | while ((c = *cp) != NULL) { |
806 | if (c->mfc_flags&MFC_STATIC) { | 806 | if (c->mfc_flags&MFC_STATIC) { |
807 | cp = &c->next; | 807 | cp = &c->next; |
808 | continue; | 808 | continue; |
809 | } | 809 | } |
810 | write_lock_bh(&mrt_lock); | 810 | write_lock_bh(&mrt_lock); |
811 | *cp = c->next; | 811 | *cp = c->next; |
812 | write_unlock_bh(&mrt_lock); | 812 | write_unlock_bh(&mrt_lock); |
813 | 813 | ||
814 | kmem_cache_free(mrt_cachep, c); | 814 | kmem_cache_free(mrt_cachep, c); |
815 | } | 815 | } |
816 | } | 816 | } |
817 | 817 | ||
818 | if (atomic_read(&cache_resolve_queue_len) != 0) { | 818 | if (atomic_read(&cache_resolve_queue_len) != 0) { |
819 | struct mfc_cache *c; | 819 | struct mfc_cache *c; |
820 | 820 | ||
821 | spin_lock_bh(&mfc_unres_lock); | 821 | spin_lock_bh(&mfc_unres_lock); |
822 | while (mfc_unres_queue != NULL) { | 822 | while (mfc_unres_queue != NULL) { |
823 | c = mfc_unres_queue; | 823 | c = mfc_unres_queue; |
824 | mfc_unres_queue = c->next; | 824 | mfc_unres_queue = c->next; |
825 | spin_unlock_bh(&mfc_unres_lock); | 825 | spin_unlock_bh(&mfc_unres_lock); |
826 | 826 | ||
827 | ipmr_destroy_unres(c); | 827 | ipmr_destroy_unres(c); |
828 | 828 | ||
829 | spin_lock_bh(&mfc_unres_lock); | 829 | spin_lock_bh(&mfc_unres_lock); |
830 | } | 830 | } |
831 | spin_unlock_bh(&mfc_unres_lock); | 831 | spin_unlock_bh(&mfc_unres_lock); |
832 | } | 832 | } |
833 | } | 833 | } |
834 | 834 | ||
835 | static void mrtsock_destruct(struct sock *sk) | 835 | static void mrtsock_destruct(struct sock *sk) |
836 | { | 836 | { |
837 | rtnl_lock(); | 837 | rtnl_lock(); |
838 | if (sk == mroute_socket) { | 838 | if (sk == mroute_socket) { |
839 | ipv4_devconf.mc_forwarding--; | 839 | ipv4_devconf.mc_forwarding--; |
840 | 840 | ||
841 | write_lock_bh(&mrt_lock); | 841 | write_lock_bh(&mrt_lock); |
842 | mroute_socket=NULL; | 842 | mroute_socket=NULL; |
843 | write_unlock_bh(&mrt_lock); | 843 | write_unlock_bh(&mrt_lock); |
844 | 844 | ||
845 | mroute_clean_tables(sk); | 845 | mroute_clean_tables(sk); |
846 | } | 846 | } |
847 | rtnl_unlock(); | 847 | rtnl_unlock(); |
848 | } | 848 | } |
849 | 849 | ||
850 | /* | 850 | /* |
851 | * Socket options and virtual interface manipulation. The whole | 851 | * Socket options and virtual interface manipulation. The whole |
852 | * virtual interface system is a complete heap, but unfortunately | 852 | * virtual interface system is a complete heap, but unfortunately |
853 | * that's how BSD mrouted happens to think. Maybe one day with a proper | 853 | * that's how BSD mrouted happens to think. Maybe one day with a proper |
854 | * MOSPF/PIM router set up we can clean this up. | 854 | * MOSPF/PIM router set up we can clean this up. |
855 | */ | 855 | */ |
856 | 856 | ||
857 | int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) | 857 | int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) |
858 | { | 858 | { |
859 | int ret; | 859 | int ret; |
860 | struct vifctl vif; | 860 | struct vifctl vif; |
861 | struct mfcctl mfc; | 861 | struct mfcctl mfc; |
862 | 862 | ||
863 | if(optname!=MRT_INIT) | 863 | if(optname!=MRT_INIT) |
864 | { | 864 | { |
865 | if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) | 865 | if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) |
866 | return -EACCES; | 866 | return -EACCES; |
867 | } | 867 | } |
868 | 868 | ||
869 | switch(optname) | 869 | switch(optname) |
870 | { | 870 | { |
871 | case MRT_INIT: | 871 | case MRT_INIT: |
872 | if (sk->sk_type != SOCK_RAW || | 872 | if (sk->sk_type != SOCK_RAW || |
873 | inet_sk(sk)->num != IPPROTO_IGMP) | 873 | inet_sk(sk)->num != IPPROTO_IGMP) |
874 | return -EOPNOTSUPP; | 874 | return -EOPNOTSUPP; |
875 | if(optlen!=sizeof(int)) | 875 | if(optlen!=sizeof(int)) |
876 | return -ENOPROTOOPT; | 876 | return -ENOPROTOOPT; |
877 | 877 | ||
878 | rtnl_lock(); | 878 | rtnl_lock(); |
879 | if (mroute_socket) { | 879 | if (mroute_socket) { |
880 | rtnl_unlock(); | 880 | rtnl_unlock(); |
881 | return -EADDRINUSE; | 881 | return -EADDRINUSE; |
882 | } | 882 | } |
883 | 883 | ||
884 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 884 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
885 | if (ret == 0) { | 885 | if (ret == 0) { |
886 | write_lock_bh(&mrt_lock); | 886 | write_lock_bh(&mrt_lock); |
887 | mroute_socket=sk; | 887 | mroute_socket=sk; |
888 | write_unlock_bh(&mrt_lock); | 888 | write_unlock_bh(&mrt_lock); |
889 | 889 | ||
890 | ipv4_devconf.mc_forwarding++; | 890 | ipv4_devconf.mc_forwarding++; |
891 | } | 891 | } |
892 | rtnl_unlock(); | 892 | rtnl_unlock(); |
893 | return ret; | 893 | return ret; |
894 | case MRT_DONE: | 894 | case MRT_DONE: |
895 | if (sk!=mroute_socket) | 895 | if (sk!=mroute_socket) |
896 | return -EACCES; | 896 | return -EACCES; |
897 | return ip_ra_control(sk, 0, NULL); | 897 | return ip_ra_control(sk, 0, NULL); |
898 | case MRT_ADD_VIF: | 898 | case MRT_ADD_VIF: |
899 | case MRT_DEL_VIF: | 899 | case MRT_DEL_VIF: |
900 | if(optlen!=sizeof(vif)) | 900 | if(optlen!=sizeof(vif)) |
901 | return -EINVAL; | 901 | return -EINVAL; |
902 | if (copy_from_user(&vif,optval,sizeof(vif))) | 902 | if (copy_from_user(&vif,optval,sizeof(vif))) |
903 | return -EFAULT; | 903 | return -EFAULT; |
904 | if(vif.vifc_vifi >= MAXVIFS) | 904 | if(vif.vifc_vifi >= MAXVIFS) |
905 | return -ENFILE; | 905 | return -ENFILE; |
906 | rtnl_lock(); | 906 | rtnl_lock(); |
907 | if (optname==MRT_ADD_VIF) { | 907 | if (optname==MRT_ADD_VIF) { |
908 | ret = vif_add(&vif, sk==mroute_socket); | 908 | ret = vif_add(&vif, sk==mroute_socket); |
909 | } else { | 909 | } else { |
910 | ret = vif_delete(vif.vifc_vifi); | 910 | ret = vif_delete(vif.vifc_vifi); |
911 | } | 911 | } |
912 | rtnl_unlock(); | 912 | rtnl_unlock(); |
913 | return ret; | 913 | return ret; |
914 | 914 | ||
915 | /* | 915 | /* |
916 | * Manipulate the forwarding caches. These live | 916 | * Manipulate the forwarding caches. These live |
917 | * in a sort of kernel/user symbiosis. | 917 | * in a sort of kernel/user symbiosis. |
918 | */ | 918 | */ |
919 | case MRT_ADD_MFC: | 919 | case MRT_ADD_MFC: |
920 | case MRT_DEL_MFC: | 920 | case MRT_DEL_MFC: |
921 | if(optlen!=sizeof(mfc)) | 921 | if(optlen!=sizeof(mfc)) |
922 | return -EINVAL; | 922 | return -EINVAL; |
923 | if (copy_from_user(&mfc,optval, sizeof(mfc))) | 923 | if (copy_from_user(&mfc,optval, sizeof(mfc))) |
924 | return -EFAULT; | 924 | return -EFAULT; |
925 | rtnl_lock(); | 925 | rtnl_lock(); |
926 | if (optname==MRT_DEL_MFC) | 926 | if (optname==MRT_DEL_MFC) |
927 | ret = ipmr_mfc_delete(&mfc); | 927 | ret = ipmr_mfc_delete(&mfc); |
928 | else | 928 | else |
929 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); | 929 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); |
930 | rtnl_unlock(); | 930 | rtnl_unlock(); |
931 | return ret; | 931 | return ret; |
932 | /* | 932 | /* |
933 | * Control PIM assert. | 933 | * Control PIM assert. |
934 | */ | 934 | */ |
935 | case MRT_ASSERT: | 935 | case MRT_ASSERT: |
936 | { | 936 | { |
937 | int v; | 937 | int v; |
938 | if(get_user(v,(int __user *)optval)) | 938 | if(get_user(v,(int __user *)optval)) |
939 | return -EFAULT; | 939 | return -EFAULT; |
940 | mroute_do_assert=(v)?1:0; | 940 | mroute_do_assert=(v)?1:0; |
941 | return 0; | 941 | return 0; |
942 | } | 942 | } |
943 | #ifdef CONFIG_IP_PIMSM | 943 | #ifdef CONFIG_IP_PIMSM |
944 | case MRT_PIM: | 944 | case MRT_PIM: |
945 | { | 945 | { |
946 | int v, ret; | 946 | int v, ret; |
947 | if(get_user(v,(int __user *)optval)) | 947 | if(get_user(v,(int __user *)optval)) |
948 | return -EFAULT; | 948 | return -EFAULT; |
949 | v = (v)?1:0; | 949 | v = (v)?1:0; |
950 | rtnl_lock(); | 950 | rtnl_lock(); |
951 | ret = 0; | 951 | ret = 0; |
952 | if (v != mroute_do_pim) { | 952 | if (v != mroute_do_pim) { |
953 | mroute_do_pim = v; | 953 | mroute_do_pim = v; |
954 | mroute_do_assert = v; | 954 | mroute_do_assert = v; |
955 | #ifdef CONFIG_IP_PIMSM_V2 | 955 | #ifdef CONFIG_IP_PIMSM_V2 |
956 | if (mroute_do_pim) | 956 | if (mroute_do_pim) |
957 | ret = inet_add_protocol(&pim_protocol, | 957 | ret = inet_add_protocol(&pim_protocol, |
958 | IPPROTO_PIM); | 958 | IPPROTO_PIM); |
959 | else | 959 | else |
960 | ret = inet_del_protocol(&pim_protocol, | 960 | ret = inet_del_protocol(&pim_protocol, |
961 | IPPROTO_PIM); | 961 | IPPROTO_PIM); |
962 | if (ret < 0) | 962 | if (ret < 0) |
963 | ret = -EAGAIN; | 963 | ret = -EAGAIN; |
964 | #endif | 964 | #endif |
965 | } | 965 | } |
966 | rtnl_unlock(); | 966 | rtnl_unlock(); |
967 | return ret; | 967 | return ret; |
968 | } | 968 | } |
969 | #endif | 969 | #endif |
970 | /* | 970 | /* |
971 | * Spurious command, or MRT_VERSION which you cannot | 971 | * Spurious command, or MRT_VERSION which you cannot |
972 | * set. | 972 | * set. |
973 | */ | 973 | */ |
974 | default: | 974 | default: |
975 | return -ENOPROTOOPT; | 975 | return -ENOPROTOOPT; |
976 | } | 976 | } |
977 | } | 977 | } |
978 | 978 | ||
979 | /* | 979 | /* |
980 | * Getsock opt support for the multicast routing system. | 980 | * Getsock opt support for the multicast routing system. |
981 | */ | 981 | */ |
982 | 982 | ||
983 | int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) | 983 | int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) |
984 | { | 984 | { |
985 | int olr; | 985 | int olr; |
986 | int val; | 986 | int val; |
987 | 987 | ||
988 | if(optname!=MRT_VERSION && | 988 | if(optname!=MRT_VERSION && |
989 | #ifdef CONFIG_IP_PIMSM | 989 | #ifdef CONFIG_IP_PIMSM |
990 | optname!=MRT_PIM && | 990 | optname!=MRT_PIM && |
991 | #endif | 991 | #endif |
992 | optname!=MRT_ASSERT) | 992 | optname!=MRT_ASSERT) |
993 | return -ENOPROTOOPT; | 993 | return -ENOPROTOOPT; |
994 | 994 | ||
995 | if (get_user(olr, optlen)) | 995 | if (get_user(olr, optlen)) |
996 | return -EFAULT; | 996 | return -EFAULT; |
997 | 997 | ||
998 | olr = min_t(unsigned int, olr, sizeof(int)); | 998 | olr = min_t(unsigned int, olr, sizeof(int)); |
999 | if (olr < 0) | 999 | if (olr < 0) |
1000 | return -EINVAL; | 1000 | return -EINVAL; |
1001 | 1001 | ||
1002 | if(put_user(olr,optlen)) | 1002 | if(put_user(olr,optlen)) |
1003 | return -EFAULT; | 1003 | return -EFAULT; |
1004 | if(optname==MRT_VERSION) | 1004 | if(optname==MRT_VERSION) |
1005 | val=0x0305; | 1005 | val=0x0305; |
1006 | #ifdef CONFIG_IP_PIMSM | 1006 | #ifdef CONFIG_IP_PIMSM |
1007 | else if(optname==MRT_PIM) | 1007 | else if(optname==MRT_PIM) |
1008 | val=mroute_do_pim; | 1008 | val=mroute_do_pim; |
1009 | #endif | 1009 | #endif |
1010 | else | 1010 | else |
1011 | val=mroute_do_assert; | 1011 | val=mroute_do_assert; |
1012 | if(copy_to_user(optval,&val,olr)) | 1012 | if(copy_to_user(optval,&val,olr)) |
1013 | return -EFAULT; | 1013 | return -EFAULT; |
1014 | return 0; | 1014 | return 0; |
1015 | } | 1015 | } |
1016 | 1016 | ||
1017 | /* | 1017 | /* |
1018 | * The IP multicast ioctl support routines. | 1018 | * The IP multicast ioctl support routines. |
1019 | */ | 1019 | */ |
1020 | 1020 | ||
1021 | int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | 1021 | int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) |
1022 | { | 1022 | { |
1023 | struct sioc_sg_req sr; | 1023 | struct sioc_sg_req sr; |
1024 | struct sioc_vif_req vr; | 1024 | struct sioc_vif_req vr; |
1025 | struct vif_device *vif; | 1025 | struct vif_device *vif; |
1026 | struct mfc_cache *c; | 1026 | struct mfc_cache *c; |
1027 | 1027 | ||
1028 | switch(cmd) | 1028 | switch(cmd) |
1029 | { | 1029 | { |
1030 | case SIOCGETVIFCNT: | 1030 | case SIOCGETVIFCNT: |
1031 | if (copy_from_user(&vr,arg,sizeof(vr))) | 1031 | if (copy_from_user(&vr,arg,sizeof(vr))) |
1032 | return -EFAULT; | 1032 | return -EFAULT; |
1033 | if(vr.vifi>=maxvif) | 1033 | if(vr.vifi>=maxvif) |
1034 | return -EINVAL; | 1034 | return -EINVAL; |
1035 | read_lock(&mrt_lock); | 1035 | read_lock(&mrt_lock); |
1036 | vif=&vif_table[vr.vifi]; | 1036 | vif=&vif_table[vr.vifi]; |
1037 | if(VIF_EXISTS(vr.vifi)) { | 1037 | if(VIF_EXISTS(vr.vifi)) { |
1038 | vr.icount=vif->pkt_in; | 1038 | vr.icount=vif->pkt_in; |
1039 | vr.ocount=vif->pkt_out; | 1039 | vr.ocount=vif->pkt_out; |
1040 | vr.ibytes=vif->bytes_in; | 1040 | vr.ibytes=vif->bytes_in; |
1041 | vr.obytes=vif->bytes_out; | 1041 | vr.obytes=vif->bytes_out; |
1042 | read_unlock(&mrt_lock); | 1042 | read_unlock(&mrt_lock); |
1043 | 1043 | ||
1044 | if (copy_to_user(arg,&vr,sizeof(vr))) | 1044 | if (copy_to_user(arg,&vr,sizeof(vr))) |
1045 | return -EFAULT; | 1045 | return -EFAULT; |
1046 | return 0; | 1046 | return 0; |
1047 | } | 1047 | } |
1048 | read_unlock(&mrt_lock); | 1048 | read_unlock(&mrt_lock); |
1049 | return -EADDRNOTAVAIL; | 1049 | return -EADDRNOTAVAIL; |
1050 | case SIOCGETSGCNT: | 1050 | case SIOCGETSGCNT: |
1051 | if (copy_from_user(&sr,arg,sizeof(sr))) | 1051 | if (copy_from_user(&sr,arg,sizeof(sr))) |
1052 | return -EFAULT; | 1052 | return -EFAULT; |
1053 | 1053 | ||
1054 | read_lock(&mrt_lock); | 1054 | read_lock(&mrt_lock); |
1055 | c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); | 1055 | c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); |
1056 | if (c) { | 1056 | if (c) { |
1057 | sr.pktcnt = c->mfc_un.res.pkt; | 1057 | sr.pktcnt = c->mfc_un.res.pkt; |
1058 | sr.bytecnt = c->mfc_un.res.bytes; | 1058 | sr.bytecnt = c->mfc_un.res.bytes; |
1059 | sr.wrong_if = c->mfc_un.res.wrong_if; | 1059 | sr.wrong_if = c->mfc_un.res.wrong_if; |
1060 | read_unlock(&mrt_lock); | 1060 | read_unlock(&mrt_lock); |
1061 | 1061 | ||
1062 | if (copy_to_user(arg,&sr,sizeof(sr))) | 1062 | if (copy_to_user(arg,&sr,sizeof(sr))) |
1063 | return -EFAULT; | 1063 | return -EFAULT; |
1064 | return 0; | 1064 | return 0; |
1065 | } | 1065 | } |
1066 | read_unlock(&mrt_lock); | 1066 | read_unlock(&mrt_lock); |
1067 | return -EADDRNOTAVAIL; | 1067 | return -EADDRNOTAVAIL; |
1068 | default: | 1068 | default: |
1069 | return -ENOIOCTLCMD; | 1069 | return -ENOIOCTLCMD; |
1070 | } | 1070 | } |
1071 | } | 1071 | } |
1072 | 1072 | ||
1073 | 1073 | ||
1074 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) | 1074 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) |
1075 | { | 1075 | { |
1076 | struct vif_device *v; | 1076 | struct vif_device *v; |
1077 | int ct; | 1077 | int ct; |
1078 | if (event != NETDEV_UNREGISTER) | 1078 | if (event != NETDEV_UNREGISTER) |
1079 | return NOTIFY_DONE; | 1079 | return NOTIFY_DONE; |
1080 | v=&vif_table[0]; | 1080 | v=&vif_table[0]; |
1081 | for(ct=0;ct<maxvif;ct++,v++) { | 1081 | for(ct=0;ct<maxvif;ct++,v++) { |
1082 | if (v->dev==ptr) | 1082 | if (v->dev==ptr) |
1083 | vif_delete(ct); | 1083 | vif_delete(ct); |
1084 | } | 1084 | } |
1085 | return NOTIFY_DONE; | 1085 | return NOTIFY_DONE; |
1086 | } | 1086 | } |
1087 | 1087 | ||
1088 | 1088 | ||
1089 | static struct notifier_block ip_mr_notifier={ | 1089 | static struct notifier_block ip_mr_notifier={ |
1090 | .notifier_call = ipmr_device_event, | 1090 | .notifier_call = ipmr_device_event, |
1091 | }; | 1091 | }; |
1092 | 1092 | ||
1093 | /* | 1093 | /* |
1094 | * Encapsulate a packet by attaching a valid IPIP header to it. | 1094 | * Encapsulate a packet by attaching a valid IPIP header to it. |
1095 | * This avoids tunnel drivers and other mess and gives us the speed so | 1095 | * This avoids tunnel drivers and other mess and gives us the speed so |
1096 | * important for multicast video. | 1096 | * important for multicast video. |
1097 | */ | 1097 | */ |
1098 | 1098 | ||
1099 | static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) | 1099 | static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) |
1100 | { | 1100 | { |
1101 | struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); | 1101 | struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); |
1102 | 1102 | ||
1103 | iph->version = 4; | 1103 | iph->version = 4; |
1104 | iph->tos = skb->nh.iph->tos; | 1104 | iph->tos = skb->nh.iph->tos; |
1105 | iph->ttl = skb->nh.iph->ttl; | 1105 | iph->ttl = skb->nh.iph->ttl; |
1106 | iph->frag_off = 0; | 1106 | iph->frag_off = 0; |
1107 | iph->daddr = daddr; | 1107 | iph->daddr = daddr; |
1108 | iph->saddr = saddr; | 1108 | iph->saddr = saddr; |
1109 | iph->protocol = IPPROTO_IPIP; | 1109 | iph->protocol = IPPROTO_IPIP; |
1110 | iph->ihl = 5; | 1110 | iph->ihl = 5; |
1111 | iph->tot_len = htons(skb->len); | 1111 | iph->tot_len = htons(skb->len); |
1112 | ip_select_ident(iph, skb->dst, NULL); | 1112 | ip_select_ident(iph, skb->dst, NULL); |
1113 | ip_send_check(iph); | 1113 | ip_send_check(iph); |
1114 | 1114 | ||
1115 | skb->h.ipiph = skb->nh.iph; | 1115 | skb->h.ipiph = skb->nh.iph; |
1116 | skb->nh.iph = iph; | 1116 | skb->nh.iph = iph; |
1117 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1117 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
1118 | nf_reset(skb); | 1118 | nf_reset(skb); |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | static inline int ipmr_forward_finish(struct sk_buff *skb) | 1121 | static inline int ipmr_forward_finish(struct sk_buff *skb) |
1122 | { | 1122 | { |
1123 | struct ip_options * opt = &(IPCB(skb)->opt); | 1123 | struct ip_options * opt = &(IPCB(skb)->opt); |
1124 | 1124 | ||
1125 | IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); | 1125 | IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); |
1126 | 1126 | ||
1127 | if (unlikely(opt->optlen)) | 1127 | if (unlikely(opt->optlen)) |
1128 | ip_forward_options(skb); | 1128 | ip_forward_options(skb); |
1129 | 1129 | ||
1130 | return dst_output(skb); | 1130 | return dst_output(skb); |
1131 | } | 1131 | } |
1132 | 1132 | ||
1133 | /* | 1133 | /* |
1134 | * Processing handlers for ipmr_forward | 1134 | * Processing handlers for ipmr_forward |
1135 | */ | 1135 | */ |
1136 | 1136 | ||
1137 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | 1137 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) |
1138 | { | 1138 | { |
1139 | struct iphdr *iph = skb->nh.iph; | 1139 | struct iphdr *iph = skb->nh.iph; |
1140 | struct vif_device *vif = &vif_table[vifi]; | 1140 | struct vif_device *vif = &vif_table[vifi]; |
1141 | struct net_device *dev; | 1141 | struct net_device *dev; |
1142 | struct rtable *rt; | 1142 | struct rtable *rt; |
1143 | int encap = 0; | 1143 | int encap = 0; |
1144 | 1144 | ||
1145 | if (vif->dev == NULL) | 1145 | if (vif->dev == NULL) |
1146 | goto out_free; | 1146 | goto out_free; |
1147 | 1147 | ||
1148 | #ifdef CONFIG_IP_PIMSM | 1148 | #ifdef CONFIG_IP_PIMSM |
1149 | if (vif->flags & VIFF_REGISTER) { | 1149 | if (vif->flags & VIFF_REGISTER) { |
1150 | vif->pkt_out++; | 1150 | vif->pkt_out++; |
1151 | vif->bytes_out+=skb->len; | 1151 | vif->bytes_out+=skb->len; |
1152 | ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; | 1152 | ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; |
1153 | ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; | 1153 | ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; |
1154 | ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); | 1154 | ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); |
1155 | kfree_skb(skb); | 1155 | kfree_skb(skb); |
1156 | return; | 1156 | return; |
1157 | } | 1157 | } |
1158 | #endif | 1158 | #endif |
1159 | 1159 | ||
1160 | if (vif->flags&VIFF_TUNNEL) { | 1160 | if (vif->flags&VIFF_TUNNEL) { |
1161 | struct flowi fl = { .oif = vif->link, | 1161 | struct flowi fl = { .oif = vif->link, |
1162 | .nl_u = { .ip4_u = | 1162 | .nl_u = { .ip4_u = |
1163 | { .daddr = vif->remote, | 1163 | { .daddr = vif->remote, |
1164 | .saddr = vif->local, | 1164 | .saddr = vif->local, |
1165 | .tos = RT_TOS(iph->tos) } }, | 1165 | .tos = RT_TOS(iph->tos) } }, |
1166 | .proto = IPPROTO_IPIP }; | 1166 | .proto = IPPROTO_IPIP }; |
1167 | if (ip_route_output_key(&rt, &fl)) | 1167 | if (ip_route_output_key(&rt, &fl)) |
1168 | goto out_free; | 1168 | goto out_free; |
1169 | encap = sizeof(struct iphdr); | 1169 | encap = sizeof(struct iphdr); |
1170 | } else { | 1170 | } else { |
1171 | struct flowi fl = { .oif = vif->link, | 1171 | struct flowi fl = { .oif = vif->link, |
1172 | .nl_u = { .ip4_u = | 1172 | .nl_u = { .ip4_u = |
1173 | { .daddr = iph->daddr, | 1173 | { .daddr = iph->daddr, |
1174 | .tos = RT_TOS(iph->tos) } }, | 1174 | .tos = RT_TOS(iph->tos) } }, |
1175 | .proto = IPPROTO_IPIP }; | 1175 | .proto = IPPROTO_IPIP }; |
1176 | if (ip_route_output_key(&rt, &fl)) | 1176 | if (ip_route_output_key(&rt, &fl)) |
1177 | goto out_free; | 1177 | goto out_free; |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | dev = rt->u.dst.dev; | 1180 | dev = rt->u.dst.dev; |
1181 | 1181 | ||
1182 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { | 1182 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { |
1183 | /* Do not fragment multicasts. Alas, IPv4 does not | 1183 | /* Do not fragment multicasts. Alas, IPv4 does not |
1184 | allow to send ICMP, so that packets will disappear | 1184 | allow to send ICMP, so that packets will disappear |
1185 | to blackhole. | 1185 | to blackhole. |
1186 | */ | 1186 | */ |
1187 | 1187 | ||
1188 | IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); | 1188 | IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); |
1189 | ip_rt_put(rt); | 1189 | ip_rt_put(rt); |
1190 | goto out_free; | 1190 | goto out_free; |
1191 | } | 1191 | } |
1192 | 1192 | ||
1193 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; | 1193 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; |
1194 | 1194 | ||
1195 | if (skb_cow(skb, encap)) { | 1195 | if (skb_cow(skb, encap)) { |
1196 | ip_rt_put(rt); | 1196 | ip_rt_put(rt); |
1197 | goto out_free; | 1197 | goto out_free; |
1198 | } | 1198 | } |
1199 | 1199 | ||
1200 | vif->pkt_out++; | 1200 | vif->pkt_out++; |
1201 | vif->bytes_out+=skb->len; | 1201 | vif->bytes_out+=skb->len; |
1202 | 1202 | ||
1203 | dst_release(skb->dst); | 1203 | dst_release(skb->dst); |
1204 | skb->dst = &rt->u.dst; | 1204 | skb->dst = &rt->u.dst; |
1205 | iph = skb->nh.iph; | 1205 | iph = skb->nh.iph; |
1206 | ip_decrease_ttl(iph); | 1206 | ip_decrease_ttl(iph); |
1207 | 1207 | ||
1208 | /* FIXME: forward and output firewalls used to be called here. | 1208 | /* FIXME: forward and output firewalls used to be called here. |
1209 | * What do we do with netfilter? -- RR */ | 1209 | * What do we do with netfilter? -- RR */ |
1210 | if (vif->flags & VIFF_TUNNEL) { | 1210 | if (vif->flags & VIFF_TUNNEL) { |
1211 | ip_encap(skb, vif->local, vif->remote); | 1211 | ip_encap(skb, vif->local, vif->remote); |
1212 | /* FIXME: extra output firewall step used to be here. --RR */ | 1212 | /* FIXME: extra output firewall step used to be here. --RR */ |
1213 | ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; | 1213 | ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; |
1214 | ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; | 1214 | ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; |
1215 | } | 1215 | } |
1216 | 1216 | ||
1217 | IPCB(skb)->flags |= IPSKB_FORWARDED; | 1217 | IPCB(skb)->flags |= IPSKB_FORWARDED; |
1218 | 1218 | ||
1219 | /* | 1219 | /* |
1220 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally | 1220 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally |
1221 | * not only before forwarding, but after forwarding on all output | 1221 | * not only before forwarding, but after forwarding on all output |
1222 | * interfaces. It is clear, if mrouter runs a multicasting | 1222 | * interfaces. It is clear, if mrouter runs a multicasting |
1223 | * program, it should receive packets not depending to what interface | 1223 | * program, it should receive packets not depending to what interface |
1224 | * program is joined. | 1224 | * program is joined. |
1225 | * If we will not make it, the program will have to join on all | 1225 | * If we will not make it, the program will have to join on all |
1226 | * interfaces. On the other hand, multihoming host (or router, but | 1226 | * interfaces. On the other hand, multihoming host (or router, but |
1227 | * not mrouter) cannot join to more than one interface - it will | 1227 | * not mrouter) cannot join to more than one interface - it will |
1228 | * result in receiving multiple packets. | 1228 | * result in receiving multiple packets. |
1229 | */ | 1229 | */ |
1230 | NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, | 1230 | NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, |
1231 | ipmr_forward_finish); | 1231 | ipmr_forward_finish); |
1232 | return; | 1232 | return; |
1233 | 1233 | ||
1234 | out_free: | 1234 | out_free: |
1235 | kfree_skb(skb); | 1235 | kfree_skb(skb); |
1236 | return; | 1236 | return; |
1237 | } | 1237 | } |
1238 | 1238 | ||
1239 | static int ipmr_find_vif(struct net_device *dev) | 1239 | static int ipmr_find_vif(struct net_device *dev) |
1240 | { | 1240 | { |
1241 | int ct; | 1241 | int ct; |
1242 | for (ct=maxvif-1; ct>=0; ct--) { | 1242 | for (ct=maxvif-1; ct>=0; ct--) { |
1243 | if (vif_table[ct].dev == dev) | 1243 | if (vif_table[ct].dev == dev) |
1244 | break; | 1244 | break; |
1245 | } | 1245 | } |
1246 | return ct; | 1246 | return ct; |
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | /* "local" means that we should preserve one skb (for local delivery) */ | 1249 | /* "local" means that we should preserve one skb (for local delivery) */ |
1250 | 1250 | ||
1251 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) | 1251 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) |
1252 | { | 1252 | { |
1253 | int psend = -1; | 1253 | int psend = -1; |
1254 | int vif, ct; | 1254 | int vif, ct; |
1255 | 1255 | ||
1256 | vif = cache->mfc_parent; | 1256 | vif = cache->mfc_parent; |
1257 | cache->mfc_un.res.pkt++; | 1257 | cache->mfc_un.res.pkt++; |
1258 | cache->mfc_un.res.bytes += skb->len; | 1258 | cache->mfc_un.res.bytes += skb->len; |
1259 | 1259 | ||
1260 | /* | 1260 | /* |
1261 | * Wrong interface: drop packet and (maybe) send PIM assert. | 1261 | * Wrong interface: drop packet and (maybe) send PIM assert. |
1262 | */ | 1262 | */ |
1263 | if (vif_table[vif].dev != skb->dev) { | 1263 | if (vif_table[vif].dev != skb->dev) { |
1264 | int true_vifi; | 1264 | int true_vifi; |
1265 | 1265 | ||
1266 | if (((struct rtable*)skb->dst)->fl.iif == 0) { | 1266 | if (((struct rtable*)skb->dst)->fl.iif == 0) { |
1267 | /* It is our own packet, looped back. | 1267 | /* It is our own packet, looped back. |
1268 | Very complicated situation... | 1268 | Very complicated situation... |
1269 | 1269 | ||
1270 | The best workaround until routing daemons will be | 1270 | The best workaround until routing daemons will be |
1271 | fixed is not to redistribute packet, if it was | 1271 | fixed is not to redistribute packet, if it was |
1272 | send through wrong interface. It means, that | 1272 | send through wrong interface. It means, that |
1273 | multicast applications WILL NOT work for | 1273 | multicast applications WILL NOT work for |
1274 | (S,G), which have default multicast route pointing | 1274 | (S,G), which have default multicast route pointing |
1275 | to wrong oif. In any case, it is not a good | 1275 | to wrong oif. In any case, it is not a good |
1276 | idea to use multicasting applications on router. | 1276 | idea to use multicasting applications on router. |
1277 | */ | 1277 | */ |
1278 | goto dont_forward; | 1278 | goto dont_forward; |
1279 | } | 1279 | } |
1280 | 1280 | ||
1281 | cache->mfc_un.res.wrong_if++; | 1281 | cache->mfc_un.res.wrong_if++; |
1282 | true_vifi = ipmr_find_vif(skb->dev); | 1282 | true_vifi = ipmr_find_vif(skb->dev); |
1283 | 1283 | ||
1284 | if (true_vifi >= 0 && mroute_do_assert && | 1284 | if (true_vifi >= 0 && mroute_do_assert && |
1285 | /* pimsm uses asserts, when switching from RPT to SPT, | 1285 | /* pimsm uses asserts, when switching from RPT to SPT, |
1286 | so that we cannot check that packet arrived on an oif. | 1286 | so that we cannot check that packet arrived on an oif. |
1287 | It is bad, but otherwise we would need to move pretty | 1287 | It is bad, but otherwise we would need to move pretty |
1288 | large chunk of pimd to kernel. Ough... --ANK | 1288 | large chunk of pimd to kernel. Ough... --ANK |
1289 | */ | 1289 | */ |
1290 | (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && | 1290 | (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && |
1291 | time_after(jiffies, | 1291 | time_after(jiffies, |
1292 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { | 1292 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { |
1293 | cache->mfc_un.res.last_assert = jiffies; | 1293 | cache->mfc_un.res.last_assert = jiffies; |
1294 | ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); | 1294 | ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); |
1295 | } | 1295 | } |
1296 | goto dont_forward; | 1296 | goto dont_forward; |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | vif_table[vif].pkt_in++; | 1299 | vif_table[vif].pkt_in++; |
1300 | vif_table[vif].bytes_in+=skb->len; | 1300 | vif_table[vif].bytes_in+=skb->len; |
1301 | 1301 | ||
1302 | /* | 1302 | /* |
1303 | * Forward the frame | 1303 | * Forward the frame |
1304 | */ | 1304 | */ |
1305 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { | 1305 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { |
1306 | if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { | 1306 | if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { |
1307 | if (psend != -1) { | 1307 | if (psend != -1) { |
1308 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1308 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1309 | if (skb2) | 1309 | if (skb2) |
1310 | ipmr_queue_xmit(skb2, cache, psend); | 1310 | ipmr_queue_xmit(skb2, cache, psend); |
1311 | } | 1311 | } |
1312 | psend=ct; | 1312 | psend=ct; |
1313 | } | 1313 | } |
1314 | } | 1314 | } |
1315 | if (psend != -1) { | 1315 | if (psend != -1) { |
1316 | if (local) { | 1316 | if (local) { |
1317 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1317 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1318 | if (skb2) | 1318 | if (skb2) |
1319 | ipmr_queue_xmit(skb2, cache, psend); | 1319 | ipmr_queue_xmit(skb2, cache, psend); |
1320 | } else { | 1320 | } else { |
1321 | ipmr_queue_xmit(skb, cache, psend); | 1321 | ipmr_queue_xmit(skb, cache, psend); |
1322 | return 0; | 1322 | return 0; |
1323 | } | 1323 | } |
1324 | } | 1324 | } |
1325 | 1325 | ||
1326 | dont_forward: | 1326 | dont_forward: |
1327 | if (!local) | 1327 | if (!local) |
1328 | kfree_skb(skb); | 1328 | kfree_skb(skb); |
1329 | return 0; | 1329 | return 0; |
1330 | } | 1330 | } |
1331 | 1331 | ||
1332 | 1332 | ||
1333 | /* | 1333 | /* |
1334 | * Multicast packets for forwarding arrive here | 1334 | * Multicast packets for forwarding arrive here |
1335 | */ | 1335 | */ |
1336 | 1336 | ||
1337 | int ip_mr_input(struct sk_buff *skb) | 1337 | int ip_mr_input(struct sk_buff *skb) |
1338 | { | 1338 | { |
1339 | struct mfc_cache *cache; | 1339 | struct mfc_cache *cache; |
1340 | int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; | 1340 | int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; |
1341 | 1341 | ||
1342 | /* Packet is looped back after forward, it should not be | 1342 | /* Packet is looped back after forward, it should not be |
1343 | forwarded second time, but still can be delivered locally. | 1343 | forwarded second time, but still can be delivered locally. |
1344 | */ | 1344 | */ |
1345 | if (IPCB(skb)->flags&IPSKB_FORWARDED) | 1345 | if (IPCB(skb)->flags&IPSKB_FORWARDED) |
1346 | goto dont_forward; | 1346 | goto dont_forward; |
1347 | 1347 | ||
1348 | if (!local) { | 1348 | if (!local) { |
1349 | if (IPCB(skb)->opt.router_alert) { | 1349 | if (IPCB(skb)->opt.router_alert) { |
1350 | if (ip_call_ra_chain(skb)) | 1350 | if (ip_call_ra_chain(skb)) |
1351 | return 0; | 1351 | return 0; |
1352 | } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ | 1352 | } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ |
1353 | /* IGMPv1 (and broken IGMPv2 implementations sort of | 1353 | /* IGMPv1 (and broken IGMPv2 implementations sort of |
1354 | Cisco IOS <= 11.2(8)) do not put router alert | 1354 | Cisco IOS <= 11.2(8)) do not put router alert |
1355 | option to IGMP packets destined to routable | 1355 | option to IGMP packets destined to routable |
1356 | groups. It is very bad, because it means | 1356 | groups. It is very bad, because it means |
1357 | that we can forward NO IGMP messages. | 1357 | that we can forward NO IGMP messages. |
1358 | */ | 1358 | */ |
1359 | read_lock(&mrt_lock); | 1359 | read_lock(&mrt_lock); |
1360 | if (mroute_socket) { | 1360 | if (mroute_socket) { |
1361 | nf_reset(skb); | 1361 | nf_reset(skb); |
1362 | raw_rcv(mroute_socket, skb); | 1362 | raw_rcv(mroute_socket, skb); |
1363 | read_unlock(&mrt_lock); | 1363 | read_unlock(&mrt_lock); |
1364 | return 0; | 1364 | return 0; |
1365 | } | 1365 | } |
1366 | read_unlock(&mrt_lock); | 1366 | read_unlock(&mrt_lock); |
1367 | } | 1367 | } |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | read_lock(&mrt_lock); | 1370 | read_lock(&mrt_lock); |
1371 | cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); | 1371 | cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); |
1372 | 1372 | ||
1373 | /* | 1373 | /* |
1374 | * No usable cache entry | 1374 | * No usable cache entry |
1375 | */ | 1375 | */ |
1376 | if (cache==NULL) { | 1376 | if (cache==NULL) { |
1377 | int vif; | 1377 | int vif; |
1378 | 1378 | ||
1379 | if (local) { | 1379 | if (local) { |
1380 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1380 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1381 | ip_local_deliver(skb); | 1381 | ip_local_deliver(skb); |
1382 | if (skb2 == NULL) { | 1382 | if (skb2 == NULL) { |
1383 | read_unlock(&mrt_lock); | 1383 | read_unlock(&mrt_lock); |
1384 | return -ENOBUFS; | 1384 | return -ENOBUFS; |
1385 | } | 1385 | } |
1386 | skb = skb2; | 1386 | skb = skb2; |
1387 | } | 1387 | } |
1388 | 1388 | ||
1389 | vif = ipmr_find_vif(skb->dev); | 1389 | vif = ipmr_find_vif(skb->dev); |
1390 | if (vif >= 0) { | 1390 | if (vif >= 0) { |
1391 | int err = ipmr_cache_unresolved(vif, skb); | 1391 | int err = ipmr_cache_unresolved(vif, skb); |
1392 | read_unlock(&mrt_lock); | 1392 | read_unlock(&mrt_lock); |
1393 | 1393 | ||
1394 | return err; | 1394 | return err; |
1395 | } | 1395 | } |
1396 | read_unlock(&mrt_lock); | 1396 | read_unlock(&mrt_lock); |
1397 | kfree_skb(skb); | 1397 | kfree_skb(skb); |
1398 | return -ENODEV; | 1398 | return -ENODEV; |
1399 | } | 1399 | } |
1400 | 1400 | ||
1401 | ip_mr_forward(skb, cache, local); | 1401 | ip_mr_forward(skb, cache, local); |
1402 | 1402 | ||
1403 | read_unlock(&mrt_lock); | 1403 | read_unlock(&mrt_lock); |
1404 | 1404 | ||
1405 | if (local) | 1405 | if (local) |
1406 | return ip_local_deliver(skb); | 1406 | return ip_local_deliver(skb); |
1407 | 1407 | ||
1408 | return 0; | 1408 | return 0; |
1409 | 1409 | ||
1410 | dont_forward: | 1410 | dont_forward: |
1411 | if (local) | 1411 | if (local) |
1412 | return ip_local_deliver(skb); | 1412 | return ip_local_deliver(skb); |
1413 | kfree_skb(skb); | 1413 | kfree_skb(skb); |
1414 | return 0; | 1414 | return 0; |
1415 | } | 1415 | } |
1416 | 1416 | ||
1417 | #ifdef CONFIG_IP_PIMSM_V1 | 1417 | #ifdef CONFIG_IP_PIMSM_V1 |
1418 | /* | 1418 | /* |
1419 | * Handle IGMP messages of PIMv1 | 1419 | * Handle IGMP messages of PIMv1 |
1420 | */ | 1420 | */ |
1421 | 1421 | ||
1422 | int pim_rcv_v1(struct sk_buff * skb) | 1422 | int pim_rcv_v1(struct sk_buff * skb) |
1423 | { | 1423 | { |
1424 | struct igmphdr *pim; | 1424 | struct igmphdr *pim; |
1425 | struct iphdr *encap; | 1425 | struct iphdr *encap; |
1426 | struct net_device *reg_dev = NULL; | 1426 | struct net_device *reg_dev = NULL; |
1427 | 1427 | ||
1428 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 1428 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) |
1429 | goto drop; | 1429 | goto drop; |
1430 | 1430 | ||
1431 | pim = (struct igmphdr*)skb->h.raw; | 1431 | pim = (struct igmphdr*)skb->h.raw; |
1432 | 1432 | ||
1433 | if (!mroute_do_pim || | 1433 | if (!mroute_do_pim || |
1434 | skb->len < sizeof(*pim) + sizeof(*encap) || | 1434 | skb->len < sizeof(*pim) + sizeof(*encap) || |
1435 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1435 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
1436 | goto drop; | 1436 | goto drop; |
1437 | 1437 | ||
1438 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); | 1438 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); |
1439 | /* | 1439 | /* |
1440 | Check that: | 1440 | Check that: |
1441 | a. packet is really destinted to a multicast group | 1441 | a. packet is really destinted to a multicast group |
1442 | b. packet is not a NULL-REGISTER | 1442 | b. packet is not a NULL-REGISTER |
1443 | c. packet is not truncated | 1443 | c. packet is not truncated |
1444 | */ | 1444 | */ |
1445 | if (!MULTICAST(encap->daddr) || | 1445 | if (!MULTICAST(encap->daddr) || |
1446 | encap->tot_len == 0 || | 1446 | encap->tot_len == 0 || |
1447 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) | 1447 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) |
1448 | goto drop; | 1448 | goto drop; |
1449 | 1449 | ||
1450 | read_lock(&mrt_lock); | 1450 | read_lock(&mrt_lock); |
1451 | if (reg_vif_num >= 0) | 1451 | if (reg_vif_num >= 0) |
1452 | reg_dev = vif_table[reg_vif_num].dev; | 1452 | reg_dev = vif_table[reg_vif_num].dev; |
1453 | if (reg_dev) | 1453 | if (reg_dev) |
1454 | dev_hold(reg_dev); | 1454 | dev_hold(reg_dev); |
1455 | read_unlock(&mrt_lock); | 1455 | read_unlock(&mrt_lock); |
1456 | 1456 | ||
1457 | if (reg_dev == NULL) | 1457 | if (reg_dev == NULL) |
1458 | goto drop; | 1458 | goto drop; |
1459 | 1459 | ||
1460 | skb->mac.raw = skb->nh.raw; | 1460 | skb->mac.raw = skb->nh.raw; |
1461 | skb_pull(skb, (u8*)encap - skb->data); | 1461 | skb_pull(skb, (u8*)encap - skb->data); |
1462 | skb->nh.iph = (struct iphdr *)skb->data; | 1462 | skb->nh.iph = (struct iphdr *)skb->data; |
1463 | skb->dev = reg_dev; | 1463 | skb->dev = reg_dev; |
1464 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
1465 | skb->protocol = htons(ETH_P_IP); | 1464 | skb->protocol = htons(ETH_P_IP); |
1466 | skb->ip_summed = 0; | 1465 | skb->ip_summed = 0; |
1467 | skb->pkt_type = PACKET_HOST; | 1466 | skb->pkt_type = PACKET_HOST; |
1468 | dst_release(skb->dst); | 1467 | dst_release(skb->dst); |
1469 | skb->dst = NULL; | 1468 | skb->dst = NULL; |
1470 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; | 1469 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; |
1471 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; | 1470 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; |
1472 | nf_reset(skb); | 1471 | nf_reset(skb); |
1473 | netif_rx(skb); | 1472 | netif_rx(skb); |
1474 | dev_put(reg_dev); | 1473 | dev_put(reg_dev); |
1475 | return 0; | 1474 | return 0; |
1476 | drop: | 1475 | drop: |
1477 | kfree_skb(skb); | 1476 | kfree_skb(skb); |
1478 | return 0; | 1477 | return 0; |
1479 | } | 1478 | } |
1480 | #endif | 1479 | #endif |
1481 | 1480 | ||
1482 | #ifdef CONFIG_IP_PIMSM_V2 | 1481 | #ifdef CONFIG_IP_PIMSM_V2 |
1483 | static int pim_rcv(struct sk_buff * skb) | 1482 | static int pim_rcv(struct sk_buff * skb) |
1484 | { | 1483 | { |
1485 | struct pimreghdr *pim; | 1484 | struct pimreghdr *pim; |
1486 | struct iphdr *encap; | 1485 | struct iphdr *encap; |
1487 | struct net_device *reg_dev = NULL; | 1486 | struct net_device *reg_dev = NULL; |
1488 | 1487 | ||
1489 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 1488 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) |
1490 | goto drop; | 1489 | goto drop; |
1491 | 1490 | ||
1492 | pim = (struct pimreghdr*)skb->h.raw; | 1491 | pim = (struct pimreghdr*)skb->h.raw; |
1493 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || | 1492 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || |
1494 | (pim->flags&PIM_NULL_REGISTER) || | 1493 | (pim->flags&PIM_NULL_REGISTER) || |
1495 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && | 1494 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && |
1496 | (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1495 | (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
1497 | goto drop; | 1496 | goto drop; |
1498 | 1497 | ||
1499 | /* check if the inner packet is destined to mcast group */ | 1498 | /* check if the inner packet is destined to mcast group */ |
1500 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); | 1499 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); |
1501 | if (!MULTICAST(encap->daddr) || | 1500 | if (!MULTICAST(encap->daddr) || |
1502 | encap->tot_len == 0 || | 1501 | encap->tot_len == 0 || |
1503 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) | 1502 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) |
1504 | goto drop; | 1503 | goto drop; |
1505 | 1504 | ||
1506 | read_lock(&mrt_lock); | 1505 | read_lock(&mrt_lock); |
1507 | if (reg_vif_num >= 0) | 1506 | if (reg_vif_num >= 0) |
1508 | reg_dev = vif_table[reg_vif_num].dev; | 1507 | reg_dev = vif_table[reg_vif_num].dev; |
1509 | if (reg_dev) | 1508 | if (reg_dev) |
1510 | dev_hold(reg_dev); | 1509 | dev_hold(reg_dev); |
1511 | read_unlock(&mrt_lock); | 1510 | read_unlock(&mrt_lock); |
1512 | 1511 | ||
1513 | if (reg_dev == NULL) | 1512 | if (reg_dev == NULL) |
1514 | goto drop; | 1513 | goto drop; |
1515 | 1514 | ||
1516 | skb->mac.raw = skb->nh.raw; | 1515 | skb->mac.raw = skb->nh.raw; |
1517 | skb_pull(skb, (u8*)encap - skb->data); | 1516 | skb_pull(skb, (u8*)encap - skb->data); |
1518 | skb->nh.iph = (struct iphdr *)skb->data; | 1517 | skb->nh.iph = (struct iphdr *)skb->data; |
1519 | skb->dev = reg_dev; | 1518 | skb->dev = reg_dev; |
1520 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
1521 | skb->protocol = htons(ETH_P_IP); | 1519 | skb->protocol = htons(ETH_P_IP); |
1522 | skb->ip_summed = 0; | 1520 | skb->ip_summed = 0; |
1523 | skb->pkt_type = PACKET_HOST; | 1521 | skb->pkt_type = PACKET_HOST; |
1524 | dst_release(skb->dst); | 1522 | dst_release(skb->dst); |
1525 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; | 1523 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; |
1526 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; | 1524 | ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; |
1527 | skb->dst = NULL; | 1525 | skb->dst = NULL; |
1528 | nf_reset(skb); | 1526 | nf_reset(skb); |
1529 | netif_rx(skb); | 1527 | netif_rx(skb); |
1530 | dev_put(reg_dev); | 1528 | dev_put(reg_dev); |
1531 | return 0; | 1529 | return 0; |
1532 | drop: | 1530 | drop: |
1533 | kfree_skb(skb); | 1531 | kfree_skb(skb); |
1534 | return 0; | 1532 | return 0; |
1535 | } | 1533 | } |
1536 | #endif | 1534 | #endif |
1537 | 1535 | ||
1538 | static int | 1536 | static int |
1539 | ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | 1537 | ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) |
1540 | { | 1538 | { |
1541 | int ct; | 1539 | int ct; |
1542 | struct rtnexthop *nhp; | 1540 | struct rtnexthop *nhp; |
1543 | struct net_device *dev = vif_table[c->mfc_parent].dev; | 1541 | struct net_device *dev = vif_table[c->mfc_parent].dev; |
1544 | u8 *b = skb->tail; | 1542 | u8 *b = skb->tail; |
1545 | struct rtattr *mp_head; | 1543 | struct rtattr *mp_head; |
1546 | 1544 | ||
1547 | if (dev) | 1545 | if (dev) |
1548 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); | 1546 | RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); |
1549 | 1547 | ||
1550 | mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); | 1548 | mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); |
1551 | 1549 | ||
1552 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 1550 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
1553 | if (c->mfc_un.res.ttls[ct] < 255) { | 1551 | if (c->mfc_un.res.ttls[ct] < 255) { |
1554 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 1552 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
1555 | goto rtattr_failure; | 1553 | goto rtattr_failure; |
1556 | nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1554 | nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
1557 | nhp->rtnh_flags = 0; | 1555 | nhp->rtnh_flags = 0; |
1558 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 1556 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
1559 | nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; | 1557 | nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; |
1560 | nhp->rtnh_len = sizeof(*nhp); | 1558 | nhp->rtnh_len = sizeof(*nhp); |
1561 | } | 1559 | } |
1562 | } | 1560 | } |
1563 | mp_head->rta_type = RTA_MULTIPATH; | 1561 | mp_head->rta_type = RTA_MULTIPATH; |
1564 | mp_head->rta_len = skb->tail - (u8*)mp_head; | 1562 | mp_head->rta_len = skb->tail - (u8*)mp_head; |
1565 | rtm->rtm_type = RTN_MULTICAST; | 1563 | rtm->rtm_type = RTN_MULTICAST; |
1566 | return 1; | 1564 | return 1; |
1567 | 1565 | ||
1568 | rtattr_failure: | 1566 | rtattr_failure: |
1569 | skb_trim(skb, b - skb->data); | 1567 | skb_trim(skb, b - skb->data); |
1570 | return -EMSGSIZE; | 1568 | return -EMSGSIZE; |
1571 | } | 1569 | } |
1572 | 1570 | ||
1573 | int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 1571 | int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) |
1574 | { | 1572 | { |
1575 | int err; | 1573 | int err; |
1576 | struct mfc_cache *cache; | 1574 | struct mfc_cache *cache; |
1577 | struct rtable *rt = (struct rtable*)skb->dst; | 1575 | struct rtable *rt = (struct rtable*)skb->dst; |
1578 | 1576 | ||
1579 | read_lock(&mrt_lock); | 1577 | read_lock(&mrt_lock); |
1580 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); | 1578 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); |
1581 | 1579 | ||
1582 | if (cache==NULL) { | 1580 | if (cache==NULL) { |
1583 | struct net_device *dev; | 1581 | struct net_device *dev; |
1584 | int vif; | 1582 | int vif; |
1585 | 1583 | ||
1586 | if (nowait) { | 1584 | if (nowait) { |
1587 | read_unlock(&mrt_lock); | 1585 | read_unlock(&mrt_lock); |
1588 | return -EAGAIN; | 1586 | return -EAGAIN; |
1589 | } | 1587 | } |
1590 | 1588 | ||
1591 | dev = skb->dev; | 1589 | dev = skb->dev; |
1592 | if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { | 1590 | if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { |
1593 | read_unlock(&mrt_lock); | 1591 | read_unlock(&mrt_lock); |
1594 | return -ENODEV; | 1592 | return -ENODEV; |
1595 | } | 1593 | } |
1596 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | 1594 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); |
1597 | skb->nh.iph->ihl = sizeof(struct iphdr)>>2; | 1595 | skb->nh.iph->ihl = sizeof(struct iphdr)>>2; |
1598 | skb->nh.iph->saddr = rt->rt_src; | 1596 | skb->nh.iph->saddr = rt->rt_src; |
1599 | skb->nh.iph->daddr = rt->rt_dst; | 1597 | skb->nh.iph->daddr = rt->rt_dst; |
1600 | skb->nh.iph->version = 0; | 1598 | skb->nh.iph->version = 0; |
1601 | err = ipmr_cache_unresolved(vif, skb); | 1599 | err = ipmr_cache_unresolved(vif, skb); |
1602 | read_unlock(&mrt_lock); | 1600 | read_unlock(&mrt_lock); |
1603 | return err; | 1601 | return err; |
1604 | } | 1602 | } |
1605 | 1603 | ||
1606 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) | 1604 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) |
1607 | cache->mfc_flags |= MFC_NOTIFY; | 1605 | cache->mfc_flags |= MFC_NOTIFY; |
1608 | err = ipmr_fill_mroute(skb, cache, rtm); | 1606 | err = ipmr_fill_mroute(skb, cache, rtm); |
1609 | read_unlock(&mrt_lock); | 1607 | read_unlock(&mrt_lock); |
1610 | return err; | 1608 | return err; |
1611 | } | 1609 | } |
1612 | 1610 | ||
1613 | #ifdef CONFIG_PROC_FS | 1611 | #ifdef CONFIG_PROC_FS |
1614 | /* | 1612 | /* |
1615 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif | 1613 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif |
1616 | */ | 1614 | */ |
1617 | struct ipmr_vif_iter { | 1615 | struct ipmr_vif_iter { |
1618 | int ct; | 1616 | int ct; |
1619 | }; | 1617 | }; |
1620 | 1618 | ||
1621 | static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, | 1619 | static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, |
1622 | loff_t pos) | 1620 | loff_t pos) |
1623 | { | 1621 | { |
1624 | for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { | 1622 | for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { |
1625 | if(!VIF_EXISTS(iter->ct)) | 1623 | if(!VIF_EXISTS(iter->ct)) |
1626 | continue; | 1624 | continue; |
1627 | if (pos-- == 0) | 1625 | if (pos-- == 0) |
1628 | return &vif_table[iter->ct]; | 1626 | return &vif_table[iter->ct]; |
1629 | } | 1627 | } |
1630 | return NULL; | 1628 | return NULL; |
1631 | } | 1629 | } |
1632 | 1630 | ||
1633 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) | 1631 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) |
1634 | { | 1632 | { |
1635 | read_lock(&mrt_lock); | 1633 | read_lock(&mrt_lock); |
1636 | return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) | 1634 | return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) |
1637 | : SEQ_START_TOKEN; | 1635 | : SEQ_START_TOKEN; |
1638 | } | 1636 | } |
1639 | 1637 | ||
1640 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 1638 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1641 | { | 1639 | { |
1642 | struct ipmr_vif_iter *iter = seq->private; | 1640 | struct ipmr_vif_iter *iter = seq->private; |
1643 | 1641 | ||
1644 | ++*pos; | 1642 | ++*pos; |
1645 | if (v == SEQ_START_TOKEN) | 1643 | if (v == SEQ_START_TOKEN) |
1646 | return ipmr_vif_seq_idx(iter, 0); | 1644 | return ipmr_vif_seq_idx(iter, 0); |
1647 | 1645 | ||
1648 | while (++iter->ct < maxvif) { | 1646 | while (++iter->ct < maxvif) { |
1649 | if(!VIF_EXISTS(iter->ct)) | 1647 | if(!VIF_EXISTS(iter->ct)) |
1650 | continue; | 1648 | continue; |
1651 | return &vif_table[iter->ct]; | 1649 | return &vif_table[iter->ct]; |
1652 | } | 1650 | } |
1653 | return NULL; | 1651 | return NULL; |
1654 | } | 1652 | } |
1655 | 1653 | ||
1656 | static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) | 1654 | static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) |
1657 | { | 1655 | { |
1658 | read_unlock(&mrt_lock); | 1656 | read_unlock(&mrt_lock); |
1659 | } | 1657 | } |
1660 | 1658 | ||
1661 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | 1659 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) |
1662 | { | 1660 | { |
1663 | if (v == SEQ_START_TOKEN) { | 1661 | if (v == SEQ_START_TOKEN) { |
1664 | seq_puts(seq, | 1662 | seq_puts(seq, |
1665 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); | 1663 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); |
1666 | } else { | 1664 | } else { |
1667 | const struct vif_device *vif = v; | 1665 | const struct vif_device *vif = v; |
1668 | const char *name = vif->dev ? vif->dev->name : "none"; | 1666 | const char *name = vif->dev ? vif->dev->name : "none"; |
1669 | 1667 | ||
1670 | seq_printf(seq, | 1668 | seq_printf(seq, |
1671 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", | 1669 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", |
1672 | vif - vif_table, | 1670 | vif - vif_table, |
1673 | name, vif->bytes_in, vif->pkt_in, | 1671 | name, vif->bytes_in, vif->pkt_in, |
1674 | vif->bytes_out, vif->pkt_out, | 1672 | vif->bytes_out, vif->pkt_out, |
1675 | vif->flags, vif->local, vif->remote); | 1673 | vif->flags, vif->local, vif->remote); |
1676 | } | 1674 | } |
1677 | return 0; | 1675 | return 0; |
1678 | } | 1676 | } |
1679 | 1677 | ||
1680 | static struct seq_operations ipmr_vif_seq_ops = { | 1678 | static struct seq_operations ipmr_vif_seq_ops = { |
1681 | .start = ipmr_vif_seq_start, | 1679 | .start = ipmr_vif_seq_start, |
1682 | .next = ipmr_vif_seq_next, | 1680 | .next = ipmr_vif_seq_next, |
1683 | .stop = ipmr_vif_seq_stop, | 1681 | .stop = ipmr_vif_seq_stop, |
1684 | .show = ipmr_vif_seq_show, | 1682 | .show = ipmr_vif_seq_show, |
1685 | }; | 1683 | }; |
1686 | 1684 | ||
1687 | static int ipmr_vif_open(struct inode *inode, struct file *file) | 1685 | static int ipmr_vif_open(struct inode *inode, struct file *file) |
1688 | { | 1686 | { |
1689 | struct seq_file *seq; | 1687 | struct seq_file *seq; |
1690 | int rc = -ENOMEM; | 1688 | int rc = -ENOMEM; |
1691 | struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); | 1689 | struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); |
1692 | 1690 | ||
1693 | if (!s) | 1691 | if (!s) |
1694 | goto out; | 1692 | goto out; |
1695 | 1693 | ||
1696 | rc = seq_open(file, &ipmr_vif_seq_ops); | 1694 | rc = seq_open(file, &ipmr_vif_seq_ops); |
1697 | if (rc) | 1695 | if (rc) |
1698 | goto out_kfree; | 1696 | goto out_kfree; |
1699 | 1697 | ||
1700 | s->ct = 0; | 1698 | s->ct = 0; |
1701 | seq = file->private_data; | 1699 | seq = file->private_data; |
1702 | seq->private = s; | 1700 | seq->private = s; |
1703 | out: | 1701 | out: |
1704 | return rc; | 1702 | return rc; |
1705 | out_kfree: | 1703 | out_kfree: |
1706 | kfree(s); | 1704 | kfree(s); |
1707 | goto out; | 1705 | goto out; |
1708 | 1706 | ||
1709 | } | 1707 | } |
1710 | 1708 | ||
1711 | static struct file_operations ipmr_vif_fops = { | 1709 | static struct file_operations ipmr_vif_fops = { |
1712 | .owner = THIS_MODULE, | 1710 | .owner = THIS_MODULE, |
1713 | .open = ipmr_vif_open, | 1711 | .open = ipmr_vif_open, |
1714 | .read = seq_read, | 1712 | .read = seq_read, |
1715 | .llseek = seq_lseek, | 1713 | .llseek = seq_lseek, |
1716 | .release = seq_release_private, | 1714 | .release = seq_release_private, |
1717 | }; | 1715 | }; |
1718 | 1716 | ||
1719 | struct ipmr_mfc_iter { | 1717 | struct ipmr_mfc_iter { |
1720 | struct mfc_cache **cache; | 1718 | struct mfc_cache **cache; |
1721 | int ct; | 1719 | int ct; |
1722 | }; | 1720 | }; |
1723 | 1721 | ||
1724 | 1722 | ||
1725 | static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) | 1723 | static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) |
1726 | { | 1724 | { |
1727 | struct mfc_cache *mfc; | 1725 | struct mfc_cache *mfc; |
1728 | 1726 | ||
1729 | it->cache = mfc_cache_array; | 1727 | it->cache = mfc_cache_array; |
1730 | read_lock(&mrt_lock); | 1728 | read_lock(&mrt_lock); |
1731 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) | 1729 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) |
1732 | for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) | 1730 | for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) |
1733 | if (pos-- == 0) | 1731 | if (pos-- == 0) |
1734 | return mfc; | 1732 | return mfc; |
1735 | read_unlock(&mrt_lock); | 1733 | read_unlock(&mrt_lock); |
1736 | 1734 | ||
1737 | it->cache = &mfc_unres_queue; | 1735 | it->cache = &mfc_unres_queue; |
1738 | spin_lock_bh(&mfc_unres_lock); | 1736 | spin_lock_bh(&mfc_unres_lock); |
1739 | for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) | 1737 | for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) |
1740 | if (pos-- == 0) | 1738 | if (pos-- == 0) |
1741 | return mfc; | 1739 | return mfc; |
1742 | spin_unlock_bh(&mfc_unres_lock); | 1740 | spin_unlock_bh(&mfc_unres_lock); |
1743 | 1741 | ||
1744 | it->cache = NULL; | 1742 | it->cache = NULL; |
1745 | return NULL; | 1743 | return NULL; |
1746 | } | 1744 | } |
1747 | 1745 | ||
1748 | 1746 | ||
1749 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | 1747 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) |
1750 | { | 1748 | { |
1751 | struct ipmr_mfc_iter *it = seq->private; | 1749 | struct ipmr_mfc_iter *it = seq->private; |
1752 | it->cache = NULL; | 1750 | it->cache = NULL; |
1753 | it->ct = 0; | 1751 | it->ct = 0; |
1754 | return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) | 1752 | return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) |
1755 | : SEQ_START_TOKEN; | 1753 | : SEQ_START_TOKEN; |
1756 | } | 1754 | } |
1757 | 1755 | ||
1758 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 1756 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1759 | { | 1757 | { |
1760 | struct mfc_cache *mfc = v; | 1758 | struct mfc_cache *mfc = v; |
1761 | struct ipmr_mfc_iter *it = seq->private; | 1759 | struct ipmr_mfc_iter *it = seq->private; |
1762 | 1760 | ||
1763 | ++*pos; | 1761 | ++*pos; |
1764 | 1762 | ||
1765 | if (v == SEQ_START_TOKEN) | 1763 | if (v == SEQ_START_TOKEN) |
1766 | return ipmr_mfc_seq_idx(seq->private, 0); | 1764 | return ipmr_mfc_seq_idx(seq->private, 0); |
1767 | 1765 | ||
1768 | if (mfc->next) | 1766 | if (mfc->next) |
1769 | return mfc->next; | 1767 | return mfc->next; |
1770 | 1768 | ||
1771 | if (it->cache == &mfc_unres_queue) | 1769 | if (it->cache == &mfc_unres_queue) |
1772 | goto end_of_list; | 1770 | goto end_of_list; |
1773 | 1771 | ||
1774 | BUG_ON(it->cache != mfc_cache_array); | 1772 | BUG_ON(it->cache != mfc_cache_array); |
1775 | 1773 | ||
1776 | while (++it->ct < MFC_LINES) { | 1774 | while (++it->ct < MFC_LINES) { |
1777 | mfc = mfc_cache_array[it->ct]; | 1775 | mfc = mfc_cache_array[it->ct]; |
1778 | if (mfc) | 1776 | if (mfc) |
1779 | return mfc; | 1777 | return mfc; |
1780 | } | 1778 | } |
1781 | 1779 | ||
1782 | /* exhausted cache_array, show unresolved */ | 1780 | /* exhausted cache_array, show unresolved */ |
1783 | read_unlock(&mrt_lock); | 1781 | read_unlock(&mrt_lock); |
1784 | it->cache = &mfc_unres_queue; | 1782 | it->cache = &mfc_unres_queue; |
1785 | it->ct = 0; | 1783 | it->ct = 0; |
1786 | 1784 | ||
1787 | spin_lock_bh(&mfc_unres_lock); | 1785 | spin_lock_bh(&mfc_unres_lock); |
1788 | mfc = mfc_unres_queue; | 1786 | mfc = mfc_unres_queue; |
1789 | if (mfc) | 1787 | if (mfc) |
1790 | return mfc; | 1788 | return mfc; |
1791 | 1789 | ||
1792 | end_of_list: | 1790 | end_of_list: |
1793 | spin_unlock_bh(&mfc_unres_lock); | 1791 | spin_unlock_bh(&mfc_unres_lock); |
1794 | it->cache = NULL; | 1792 | it->cache = NULL; |
1795 | 1793 | ||
1796 | return NULL; | 1794 | return NULL; |
1797 | } | 1795 | } |
1798 | 1796 | ||
1799 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) | 1797 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) |
1800 | { | 1798 | { |
1801 | struct ipmr_mfc_iter *it = seq->private; | 1799 | struct ipmr_mfc_iter *it = seq->private; |
1802 | 1800 | ||
1803 | if (it->cache == &mfc_unres_queue) | 1801 | if (it->cache == &mfc_unres_queue) |
1804 | spin_unlock_bh(&mfc_unres_lock); | 1802 | spin_unlock_bh(&mfc_unres_lock); |
1805 | else if (it->cache == mfc_cache_array) | 1803 | else if (it->cache == mfc_cache_array) |
1806 | read_unlock(&mrt_lock); | 1804 | read_unlock(&mrt_lock); |
1807 | } | 1805 | } |
1808 | 1806 | ||
1809 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | 1807 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) |
1810 | { | 1808 | { |
1811 | int n; | 1809 | int n; |
1812 | 1810 | ||
1813 | if (v == SEQ_START_TOKEN) { | 1811 | if (v == SEQ_START_TOKEN) { |
1814 | seq_puts(seq, | 1812 | seq_puts(seq, |
1815 | "Group Origin Iif Pkts Bytes Wrong Oifs\n"); | 1813 | "Group Origin Iif Pkts Bytes Wrong Oifs\n"); |
1816 | } else { | 1814 | } else { |
1817 | const struct mfc_cache *mfc = v; | 1815 | const struct mfc_cache *mfc = v; |
1818 | const struct ipmr_mfc_iter *it = seq->private; | 1816 | const struct ipmr_mfc_iter *it = seq->private; |
1819 | 1817 | ||
1820 | seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", | 1818 | seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", |
1821 | (unsigned long) mfc->mfc_mcastgrp, | 1819 | (unsigned long) mfc->mfc_mcastgrp, |
1822 | (unsigned long) mfc->mfc_origin, | 1820 | (unsigned long) mfc->mfc_origin, |
1823 | mfc->mfc_parent, | 1821 | mfc->mfc_parent, |
1824 | mfc->mfc_un.res.pkt, | 1822 | mfc->mfc_un.res.pkt, |
1825 | mfc->mfc_un.res.bytes, | 1823 | mfc->mfc_un.res.bytes, |
1826 | mfc->mfc_un.res.wrong_if); | 1824 | mfc->mfc_un.res.wrong_if); |
1827 | 1825 | ||
1828 | if (it->cache != &mfc_unres_queue) { | 1826 | if (it->cache != &mfc_unres_queue) { |
1829 | for(n = mfc->mfc_un.res.minvif; | 1827 | for(n = mfc->mfc_un.res.minvif; |
1830 | n < mfc->mfc_un.res.maxvif; n++ ) { | 1828 | n < mfc->mfc_un.res.maxvif; n++ ) { |
1831 | if(VIF_EXISTS(n) | 1829 | if(VIF_EXISTS(n) |
1832 | && mfc->mfc_un.res.ttls[n] < 255) | 1830 | && mfc->mfc_un.res.ttls[n] < 255) |
1833 | seq_printf(seq, | 1831 | seq_printf(seq, |
1834 | " %2d:%-3d", | 1832 | " %2d:%-3d", |
1835 | n, mfc->mfc_un.res.ttls[n]); | 1833 | n, mfc->mfc_un.res.ttls[n]); |
1836 | } | 1834 | } |
1837 | } | 1835 | } |
1838 | seq_putc(seq, '\n'); | 1836 | seq_putc(seq, '\n'); |
1839 | } | 1837 | } |
1840 | return 0; | 1838 | return 0; |
1841 | } | 1839 | } |
1842 | 1840 | ||
1843 | static struct seq_operations ipmr_mfc_seq_ops = { | 1841 | static struct seq_operations ipmr_mfc_seq_ops = { |
1844 | .start = ipmr_mfc_seq_start, | 1842 | .start = ipmr_mfc_seq_start, |
1845 | .next = ipmr_mfc_seq_next, | 1843 | .next = ipmr_mfc_seq_next, |
1846 | .stop = ipmr_mfc_seq_stop, | 1844 | .stop = ipmr_mfc_seq_stop, |
1847 | .show = ipmr_mfc_seq_show, | 1845 | .show = ipmr_mfc_seq_show, |
1848 | }; | 1846 | }; |
1849 | 1847 | ||
1850 | static int ipmr_mfc_open(struct inode *inode, struct file *file) | 1848 | static int ipmr_mfc_open(struct inode *inode, struct file *file) |
1851 | { | 1849 | { |
1852 | struct seq_file *seq; | 1850 | struct seq_file *seq; |
1853 | int rc = -ENOMEM; | 1851 | int rc = -ENOMEM; |
1854 | struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); | 1852 | struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); |
1855 | 1853 | ||
1856 | if (!s) | 1854 | if (!s) |
1857 | goto out; | 1855 | goto out; |
1858 | 1856 | ||
1859 | rc = seq_open(file, &ipmr_mfc_seq_ops); | 1857 | rc = seq_open(file, &ipmr_mfc_seq_ops); |
1860 | if (rc) | 1858 | if (rc) |
1861 | goto out_kfree; | 1859 | goto out_kfree; |
1862 | 1860 | ||
1863 | seq = file->private_data; | 1861 | seq = file->private_data; |
1864 | seq->private = s; | 1862 | seq->private = s; |
1865 | out: | 1863 | out: |
1866 | return rc; | 1864 | return rc; |
1867 | out_kfree: | 1865 | out_kfree: |
1868 | kfree(s); | 1866 | kfree(s); |
1869 | goto out; | 1867 | goto out; |
1870 | 1868 | ||
1871 | } | 1869 | } |
1872 | 1870 | ||
1873 | static struct file_operations ipmr_mfc_fops = { | 1871 | static struct file_operations ipmr_mfc_fops = { |
1874 | .owner = THIS_MODULE, | 1872 | .owner = THIS_MODULE, |
1875 | .open = ipmr_mfc_open, | 1873 | .open = ipmr_mfc_open, |
1876 | .read = seq_read, | 1874 | .read = seq_read, |
1877 | .llseek = seq_lseek, | 1875 | .llseek = seq_lseek, |
1878 | .release = seq_release_private, | 1876 | .release = seq_release_private, |
1879 | }; | 1877 | }; |
1880 | #endif | 1878 | #endif |
1881 | 1879 | ||
1882 | #ifdef CONFIG_IP_PIMSM_V2 | 1880 | #ifdef CONFIG_IP_PIMSM_V2 |
1883 | static struct net_protocol pim_protocol = { | 1881 | static struct net_protocol pim_protocol = { |
1884 | .handler = pim_rcv, | 1882 | .handler = pim_rcv, |
1885 | }; | 1883 | }; |
1886 | #endif | 1884 | #endif |
1887 | 1885 | ||
1888 | 1886 | ||
1889 | /* | 1887 | /* |
1890 | * Setup for IP multicast routing | 1888 | * Setup for IP multicast routing |
1891 | */ | 1889 | */ |
1892 | 1890 | ||
1893 | void __init ip_mr_init(void) | 1891 | void __init ip_mr_init(void) |
1894 | { | 1892 | { |
1895 | mrt_cachep = kmem_cache_create("ip_mrt_cache", | 1893 | mrt_cachep = kmem_cache_create("ip_mrt_cache", |
1896 | sizeof(struct mfc_cache), | 1894 | sizeof(struct mfc_cache), |
1897 | 0, SLAB_HWCACHE_ALIGN, | 1895 | 0, SLAB_HWCACHE_ALIGN, |
1898 | NULL, NULL); | 1896 | NULL, NULL); |
1899 | if (!mrt_cachep) | 1897 | if (!mrt_cachep) |
1900 | panic("cannot allocate ip_mrt_cache"); | 1898 | panic("cannot allocate ip_mrt_cache"); |
1901 | 1899 | ||
1902 | init_timer(&ipmr_expire_timer); | 1900 | init_timer(&ipmr_expire_timer); |
1903 | ipmr_expire_timer.function=ipmr_expire_process; | 1901 | ipmr_expire_timer.function=ipmr_expire_process; |
1904 | register_netdevice_notifier(&ip_mr_notifier); | 1902 | register_netdevice_notifier(&ip_mr_notifier); |
1905 | #ifdef CONFIG_PROC_FS | 1903 | #ifdef CONFIG_PROC_FS |
1906 | proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); | 1904 | proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); |
1907 | proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); | 1905 | proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); |
1908 | #endif | 1906 | #endif |
1909 | } | 1907 | } |
1910 | 1908 |
net/ipv4/xfrm4_mode_tunnel.c
1 | /* | 1 | /* |
2 | * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. | 2 | * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. |
3 | * | 3 | * |
4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> | 4 | * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/skbuff.h> | 10 | #include <linux/skbuff.h> |
11 | #include <linux/stringify.h> | 11 | #include <linux/stringify.h> |
12 | #include <net/dst.h> | 12 | #include <net/dst.h> |
13 | #include <net/inet_ecn.h> | 13 | #include <net/inet_ecn.h> |
14 | #include <net/ip.h> | 14 | #include <net/ip.h> |
15 | #include <net/xfrm.h> | 15 | #include <net/xfrm.h> |
16 | 16 | ||
17 | static inline void ipip_ecn_decapsulate(struct sk_buff *skb) | 17 | static inline void ipip_ecn_decapsulate(struct sk_buff *skb) |
18 | { | 18 | { |
19 | struct iphdr *outer_iph = skb->nh.iph; | 19 | struct iphdr *outer_iph = skb->nh.iph; |
20 | struct iphdr *inner_iph = skb->h.ipiph; | 20 | struct iphdr *inner_iph = skb->h.ipiph; |
21 | 21 | ||
22 | if (INET_ECN_is_ce(outer_iph->tos)) | 22 | if (INET_ECN_is_ce(outer_iph->tos)) |
23 | IP_ECN_set_ce(inner_iph); | 23 | IP_ECN_set_ce(inner_iph); |
24 | } | 24 | } |
25 | 25 | ||
26 | /* Add encapsulation header. | 26 | /* Add encapsulation header. |
27 | * | 27 | * |
28 | * The top IP header will be constructed per RFC 2401. The following fields | 28 | * The top IP header will be constructed per RFC 2401. The following fields |
29 | * in it shall be filled in by x->type->output: | 29 | * in it shall be filled in by x->type->output: |
30 | * tot_len | 30 | * tot_len |
31 | * check | 31 | * check |
32 | * | 32 | * |
33 | * On exit, skb->h will be set to the start of the payload to be processed | 33 | * On exit, skb->h will be set to the start of the payload to be processed |
34 | * by x->type->output and skb->nh will be set to the top IP header. | 34 | * by x->type->output and skb->nh will be set to the top IP header. |
35 | */ | 35 | */ |
36 | static int xfrm4_tunnel_output(struct sk_buff *skb) | 36 | static int xfrm4_tunnel_output(struct sk_buff *skb) |
37 | { | 37 | { |
38 | struct dst_entry *dst = skb->dst; | 38 | struct dst_entry *dst = skb->dst; |
39 | struct xfrm_state *x = dst->xfrm; | 39 | struct xfrm_state *x = dst->xfrm; |
40 | struct iphdr *iph, *top_iph; | 40 | struct iphdr *iph, *top_iph; |
41 | int flags; | 41 | int flags; |
42 | 42 | ||
43 | iph = skb->nh.iph; | 43 | iph = skb->nh.iph; |
44 | skb->h.ipiph = iph; | 44 | skb->h.ipiph = iph; |
45 | 45 | ||
46 | skb->nh.raw = skb_push(skb, x->props.header_len); | 46 | skb->nh.raw = skb_push(skb, x->props.header_len); |
47 | top_iph = skb->nh.iph; | 47 | top_iph = skb->nh.iph; |
48 | 48 | ||
49 | top_iph->ihl = 5; | 49 | top_iph->ihl = 5; |
50 | top_iph->version = 4; | 50 | top_iph->version = 4; |
51 | 51 | ||
52 | /* DS disclosed */ | 52 | /* DS disclosed */ |
53 | top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); | 53 | top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); |
54 | 54 | ||
55 | flags = x->props.flags; | 55 | flags = x->props.flags; |
56 | if (flags & XFRM_STATE_NOECN) | 56 | if (flags & XFRM_STATE_NOECN) |
57 | IP_ECN_clear(top_iph); | 57 | IP_ECN_clear(top_iph); |
58 | 58 | ||
59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? | 59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? |
60 | 0 : (iph->frag_off & htons(IP_DF)); | 60 | 0 : (iph->frag_off & htons(IP_DF)); |
61 | if (!top_iph->frag_off) | 61 | if (!top_iph->frag_off) |
62 | __ip_select_ident(top_iph, dst->child, 0); | 62 | __ip_select_ident(top_iph, dst->child, 0); |
63 | 63 | ||
64 | top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); | 64 | top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); |
65 | 65 | ||
66 | top_iph->saddr = x->props.saddr.a4; | 66 | top_iph->saddr = x->props.saddr.a4; |
67 | top_iph->daddr = x->id.daddr.a4; | 67 | top_iph->daddr = x->id.daddr.a4; |
68 | top_iph->protocol = IPPROTO_IPIP; | 68 | top_iph->protocol = IPPROTO_IPIP; |
69 | 69 | ||
70 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | 70 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); |
71 | return 0; | 71 | return 0; |
72 | } | 72 | } |
73 | 73 | ||
74 | static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | 74 | static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) |
75 | { | 75 | { |
76 | struct iphdr *iph = skb->nh.iph; | 76 | struct iphdr *iph = skb->nh.iph; |
77 | int err = -EINVAL; | 77 | int err = -EINVAL; |
78 | 78 | ||
79 | if (iph->protocol != IPPROTO_IPIP) | 79 | if (iph->protocol != IPPROTO_IPIP) |
80 | goto out; | 80 | goto out; |
81 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | 81 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) |
82 | goto out; | 82 | goto out; |
83 | 83 | ||
84 | if (skb_cloned(skb) && | 84 | if (skb_cloned(skb) && |
85 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 85 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
86 | goto out; | 86 | goto out; |
87 | 87 | ||
88 | if (x->props.flags & XFRM_STATE_DECAP_DSCP) | 88 | if (x->props.flags & XFRM_STATE_DECAP_DSCP) |
89 | ipv4_copy_dscp(iph, skb->h.ipiph); | 89 | ipv4_copy_dscp(iph, skb->h.ipiph); |
90 | if (!(x->props.flags & XFRM_STATE_NOECN)) | 90 | if (!(x->props.flags & XFRM_STATE_NOECN)) |
91 | ipip_ecn_decapsulate(skb); | 91 | ipip_ecn_decapsulate(skb); |
92 | skb->mac.raw = memmove(skb->data - skb->mac_len, | 92 | skb->mac.raw = memmove(skb->data - skb->mac_len, |
93 | skb->mac.raw, skb->mac_len); | 93 | skb->mac.raw, skb->mac_len); |
94 | skb->nh.raw = skb->data; | 94 | skb->nh.raw = skb->data; |
95 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
96 | err = 0; | 95 | err = 0; |
97 | 96 | ||
98 | out: | 97 | out: |
99 | return err; | 98 | return err; |
100 | } | 99 | } |
101 | 100 | ||
102 | static struct xfrm_mode xfrm4_tunnel_mode = { | 101 | static struct xfrm_mode xfrm4_tunnel_mode = { |
103 | .input = xfrm4_tunnel_input, | 102 | .input = xfrm4_tunnel_input, |
104 | .output = xfrm4_tunnel_output, | 103 | .output = xfrm4_tunnel_output, |
105 | .owner = THIS_MODULE, | 104 | .owner = THIS_MODULE, |
106 | .encap = XFRM_MODE_TUNNEL, | 105 | .encap = XFRM_MODE_TUNNEL, |
107 | }; | 106 | }; |
108 | 107 | ||
109 | static int __init xfrm4_tunnel_init(void) | 108 | static int __init xfrm4_tunnel_init(void) |
110 | { | 109 | { |
111 | return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); | 110 | return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); |
112 | } | 111 | } |
113 | 112 | ||
114 | static void __exit xfrm4_tunnel_exit(void) | 113 | static void __exit xfrm4_tunnel_exit(void) |
115 | { | 114 | { |
116 | int err; | 115 | int err; |
117 | 116 | ||
118 | err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); | 117 | err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); |
119 | BUG_ON(err); | 118 | BUG_ON(err); |
120 | } | 119 | } |
121 | 120 | ||
122 | module_init(xfrm4_tunnel_init); | 121 | module_init(xfrm4_tunnel_init); |
123 | module_exit(xfrm4_tunnel_exit); | 122 | module_exit(xfrm4_tunnel_exit); |
124 | MODULE_LICENSE("GPL"); | 123 | MODULE_LICENSE("GPL"); |
125 | MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); | 124 | MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); |
126 | 125 |
net/ipv6/sit.c
1 | /* | 1 | /* |
2 | * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) | 2 | * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
8 | * | 8 | * |
9 | * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $ | 9 | * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $ |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
13 | * as published by the Free Software Foundation; either version | 13 | * as published by the Free Software Foundation; either version |
14 | * 2 of the License, or (at your option) any later version. | 14 | * 2 of the License, or (at your option) any later version. |
15 | * | 15 | * |
16 | * Changes: | 16 | * Changes: |
17 | * Roger Venning <r.venning@telstra.com>: 6to4 support | 17 | * Roger Venning <r.venning@telstra.com>: 6to4 support |
18 | * Nate Thompson <nate@thebog.net>: 6to4 support | 18 | * Nate Thompson <nate@thebog.net>: 6to4 support |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/capability.h> | 22 | #include <linux/capability.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/socket.h> | 25 | #include <linux/socket.h> |
26 | #include <linux/sockios.h> | 26 | #include <linux/sockios.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/net.h> | 28 | #include <linux/net.h> |
29 | #include <linux/in6.h> | 29 | #include <linux/in6.h> |
30 | #include <linux/netdevice.h> | 30 | #include <linux/netdevice.h> |
31 | #include <linux/if_arp.h> | 31 | #include <linux/if_arp.h> |
32 | #include <linux/icmp.h> | 32 | #include <linux/icmp.h> |
33 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/netfilter_ipv4.h> | 35 | #include <linux/netfilter_ipv4.h> |
36 | #include <linux/if_ether.h> | 36 | #include <linux/if_ether.h> |
37 | 37 | ||
38 | #include <net/sock.h> | 38 | #include <net/sock.h> |
39 | #include <net/snmp.h> | 39 | #include <net/snmp.h> |
40 | 40 | ||
41 | #include <net/ipv6.h> | 41 | #include <net/ipv6.h> |
42 | #include <net/protocol.h> | 42 | #include <net/protocol.h> |
43 | #include <net/transp_v6.h> | 43 | #include <net/transp_v6.h> |
44 | #include <net/ip6_fib.h> | 44 | #include <net/ip6_fib.h> |
45 | #include <net/ip6_route.h> | 45 | #include <net/ip6_route.h> |
46 | #include <net/ndisc.h> | 46 | #include <net/ndisc.h> |
47 | #include <net/addrconf.h> | 47 | #include <net/addrconf.h> |
48 | #include <net/ip.h> | 48 | #include <net/ip.h> |
49 | #include <net/udp.h> | 49 | #include <net/udp.h> |
50 | #include <net/icmp.h> | 50 | #include <net/icmp.h> |
51 | #include <net/ipip.h> | 51 | #include <net/ipip.h> |
52 | #include <net/inet_ecn.h> | 52 | #include <net/inet_ecn.h> |
53 | #include <net/xfrm.h> | 53 | #include <net/xfrm.h> |
54 | #include <net/dsfield.h> | 54 | #include <net/dsfield.h> |
55 | 55 | ||
56 | /* | 56 | /* |
57 | This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c | 57 | This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c |
58 | 58 | ||
59 | For comments look at net/ipv4/ip_gre.c --ANK | 59 | For comments look at net/ipv4/ip_gre.c --ANK |
60 | */ | 60 | */ |
61 | 61 | ||
62 | #define HASH_SIZE 16 | 62 | #define HASH_SIZE 16 |
63 | #define HASH(addr) ((addr^(addr>>4))&0xF) | 63 | #define HASH(addr) ((addr^(addr>>4))&0xF) |
64 | 64 | ||
65 | static int ipip6_fb_tunnel_init(struct net_device *dev); | 65 | static int ipip6_fb_tunnel_init(struct net_device *dev); |
66 | static int ipip6_tunnel_init(struct net_device *dev); | 66 | static int ipip6_tunnel_init(struct net_device *dev); |
67 | static void ipip6_tunnel_setup(struct net_device *dev); | 67 | static void ipip6_tunnel_setup(struct net_device *dev); |
68 | 68 | ||
69 | static struct net_device *ipip6_fb_tunnel_dev; | 69 | static struct net_device *ipip6_fb_tunnel_dev; |
70 | 70 | ||
71 | static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | 71 | static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; |
72 | static struct ip_tunnel *tunnels_r[HASH_SIZE]; | 72 | static struct ip_tunnel *tunnels_r[HASH_SIZE]; |
73 | static struct ip_tunnel *tunnels_l[HASH_SIZE]; | 73 | static struct ip_tunnel *tunnels_l[HASH_SIZE]; |
74 | static struct ip_tunnel *tunnels_wc[1]; | 74 | static struct ip_tunnel *tunnels_wc[1]; |
75 | static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; | 75 | static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; |
76 | 76 | ||
77 | static DEFINE_RWLOCK(ipip6_lock); | 77 | static DEFINE_RWLOCK(ipip6_lock); |
78 | 78 | ||
79 | static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) | 79 | static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) |
80 | { | 80 | { |
81 | unsigned h0 = HASH(remote); | 81 | unsigned h0 = HASH(remote); |
82 | unsigned h1 = HASH(local); | 82 | unsigned h1 = HASH(local); |
83 | struct ip_tunnel *t; | 83 | struct ip_tunnel *t; |
84 | 84 | ||
85 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { | 85 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { |
86 | if (local == t->parms.iph.saddr && | 86 | if (local == t->parms.iph.saddr && |
87 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 87 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
88 | return t; | 88 | return t; |
89 | } | 89 | } |
90 | for (t = tunnels_r[h0]; t; t = t->next) { | 90 | for (t = tunnels_r[h0]; t; t = t->next) { |
91 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 91 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
92 | return t; | 92 | return t; |
93 | } | 93 | } |
94 | for (t = tunnels_l[h1]; t; t = t->next) { | 94 | for (t = tunnels_l[h1]; t; t = t->next) { |
95 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 95 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
96 | return t; | 96 | return t; |
97 | } | 97 | } |
98 | if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) | 98 | if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) |
99 | return t; | 99 | return t; |
100 | return NULL; | 100 | return NULL; |
101 | } | 101 | } |
102 | 102 | ||
103 | static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t) | 103 | static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t) |
104 | { | 104 | { |
105 | u32 remote = t->parms.iph.daddr; | 105 | u32 remote = t->parms.iph.daddr; |
106 | u32 local = t->parms.iph.saddr; | 106 | u32 local = t->parms.iph.saddr; |
107 | unsigned h = 0; | 107 | unsigned h = 0; |
108 | int prio = 0; | 108 | int prio = 0; |
109 | 109 | ||
110 | if (remote) { | 110 | if (remote) { |
111 | prio |= 2; | 111 | prio |= 2; |
112 | h ^= HASH(remote); | 112 | h ^= HASH(remote); |
113 | } | 113 | } |
114 | if (local) { | 114 | if (local) { |
115 | prio |= 1; | 115 | prio |= 1; |
116 | h ^= HASH(local); | 116 | h ^= HASH(local); |
117 | } | 117 | } |
118 | return &tunnels[prio][h]; | 118 | return &tunnels[prio][h]; |
119 | } | 119 | } |
120 | 120 | ||
121 | static void ipip6_tunnel_unlink(struct ip_tunnel *t) | 121 | static void ipip6_tunnel_unlink(struct ip_tunnel *t) |
122 | { | 122 | { |
123 | struct ip_tunnel **tp; | 123 | struct ip_tunnel **tp; |
124 | 124 | ||
125 | for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) { | 125 | for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) { |
126 | if (t == *tp) { | 126 | if (t == *tp) { |
127 | write_lock_bh(&ipip6_lock); | 127 | write_lock_bh(&ipip6_lock); |
128 | *tp = t->next; | 128 | *tp = t->next; |
129 | write_unlock_bh(&ipip6_lock); | 129 | write_unlock_bh(&ipip6_lock); |
130 | break; | 130 | break; |
131 | } | 131 | } |
132 | } | 132 | } |
133 | } | 133 | } |
134 | 134 | ||
135 | static void ipip6_tunnel_link(struct ip_tunnel *t) | 135 | static void ipip6_tunnel_link(struct ip_tunnel *t) |
136 | { | 136 | { |
137 | struct ip_tunnel **tp = ipip6_bucket(t); | 137 | struct ip_tunnel **tp = ipip6_bucket(t); |
138 | 138 | ||
139 | t->next = *tp; | 139 | t->next = *tp; |
140 | write_lock_bh(&ipip6_lock); | 140 | write_lock_bh(&ipip6_lock); |
141 | *tp = t; | 141 | *tp = t; |
142 | write_unlock_bh(&ipip6_lock); | 142 | write_unlock_bh(&ipip6_lock); |
143 | } | 143 | } |
144 | 144 | ||
145 | static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) | 145 | static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) |
146 | { | 146 | { |
147 | u32 remote = parms->iph.daddr; | 147 | u32 remote = parms->iph.daddr; |
148 | u32 local = parms->iph.saddr; | 148 | u32 local = parms->iph.saddr; |
149 | struct ip_tunnel *t, **tp, *nt; | 149 | struct ip_tunnel *t, **tp, *nt; |
150 | struct net_device *dev; | 150 | struct net_device *dev; |
151 | unsigned h = 0; | 151 | unsigned h = 0; |
152 | int prio = 0; | 152 | int prio = 0; |
153 | char name[IFNAMSIZ]; | 153 | char name[IFNAMSIZ]; |
154 | 154 | ||
155 | if (remote) { | 155 | if (remote) { |
156 | prio |= 2; | 156 | prio |= 2; |
157 | h ^= HASH(remote); | 157 | h ^= HASH(remote); |
158 | } | 158 | } |
159 | if (local) { | 159 | if (local) { |
160 | prio |= 1; | 160 | prio |= 1; |
161 | h ^= HASH(local); | 161 | h ^= HASH(local); |
162 | } | 162 | } |
163 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { | 163 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { |
164 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | 164 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) |
165 | return t; | 165 | return t; |
166 | } | 166 | } |
167 | if (!create) | 167 | if (!create) |
168 | goto failed; | 168 | goto failed; |
169 | 169 | ||
170 | if (parms->name[0]) | 170 | if (parms->name[0]) |
171 | strlcpy(name, parms->name, IFNAMSIZ); | 171 | strlcpy(name, parms->name, IFNAMSIZ); |
172 | else { | 172 | else { |
173 | int i; | 173 | int i; |
174 | for (i=1; i<100; i++) { | 174 | for (i=1; i<100; i++) { |
175 | sprintf(name, "sit%d", i); | 175 | sprintf(name, "sit%d", i); |
176 | if (__dev_get_by_name(name) == NULL) | 176 | if (__dev_get_by_name(name) == NULL) |
177 | break; | 177 | break; |
178 | } | 178 | } |
179 | if (i==100) | 179 | if (i==100) |
180 | goto failed; | 180 | goto failed; |
181 | } | 181 | } |
182 | 182 | ||
183 | dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); | 183 | dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); |
184 | if (dev == NULL) | 184 | if (dev == NULL) |
185 | return NULL; | 185 | return NULL; |
186 | 186 | ||
187 | nt = netdev_priv(dev); | 187 | nt = netdev_priv(dev); |
188 | dev->init = ipip6_tunnel_init; | 188 | dev->init = ipip6_tunnel_init; |
189 | nt->parms = *parms; | 189 | nt->parms = *parms; |
190 | 190 | ||
191 | if (register_netdevice(dev) < 0) { | 191 | if (register_netdevice(dev) < 0) { |
192 | free_netdev(dev); | 192 | free_netdev(dev); |
193 | goto failed; | 193 | goto failed; |
194 | } | 194 | } |
195 | 195 | ||
196 | dev_hold(dev); | 196 | dev_hold(dev); |
197 | 197 | ||
198 | ipip6_tunnel_link(nt); | 198 | ipip6_tunnel_link(nt); |
199 | return nt; | 199 | return nt; |
200 | 200 | ||
201 | failed: | 201 | failed: |
202 | return NULL; | 202 | return NULL; |
203 | } | 203 | } |
204 | 204 | ||
205 | static void ipip6_tunnel_uninit(struct net_device *dev) | 205 | static void ipip6_tunnel_uninit(struct net_device *dev) |
206 | { | 206 | { |
207 | if (dev == ipip6_fb_tunnel_dev) { | 207 | if (dev == ipip6_fb_tunnel_dev) { |
208 | write_lock_bh(&ipip6_lock); | 208 | write_lock_bh(&ipip6_lock); |
209 | tunnels_wc[0] = NULL; | 209 | tunnels_wc[0] = NULL; |
210 | write_unlock_bh(&ipip6_lock); | 210 | write_unlock_bh(&ipip6_lock); |
211 | dev_put(dev); | 211 | dev_put(dev); |
212 | } else { | 212 | } else { |
213 | ipip6_tunnel_unlink(netdev_priv(dev)); | 213 | ipip6_tunnel_unlink(netdev_priv(dev)); |
214 | dev_put(dev); | 214 | dev_put(dev); |
215 | } | 215 | } |
216 | } | 216 | } |
217 | 217 | ||
218 | 218 | ||
219 | static void ipip6_err(struct sk_buff *skb, u32 info) | 219 | static void ipip6_err(struct sk_buff *skb, u32 info) |
220 | { | 220 | { |
221 | #ifndef I_WISH_WORLD_WERE_PERFECT | 221 | #ifndef I_WISH_WORLD_WERE_PERFECT |
222 | 222 | ||
223 | /* It is not :-( All the routers (except for Linux) return only | 223 | /* It is not :-( All the routers (except for Linux) return only |
224 | 8 bytes of packet payload. It means, that precise relaying of | 224 | 8 bytes of packet payload. It means, that precise relaying of |
225 | ICMP in the real Internet is absolutely infeasible. | 225 | ICMP in the real Internet is absolutely infeasible. |
226 | */ | 226 | */ |
227 | struct iphdr *iph = (struct iphdr*)skb->data; | 227 | struct iphdr *iph = (struct iphdr*)skb->data; |
228 | int type = skb->h.icmph->type; | 228 | int type = skb->h.icmph->type; |
229 | int code = skb->h.icmph->code; | 229 | int code = skb->h.icmph->code; |
230 | struct ip_tunnel *t; | 230 | struct ip_tunnel *t; |
231 | 231 | ||
232 | switch (type) { | 232 | switch (type) { |
233 | default: | 233 | default: |
234 | case ICMP_PARAMETERPROB: | 234 | case ICMP_PARAMETERPROB: |
235 | return; | 235 | return; |
236 | 236 | ||
237 | case ICMP_DEST_UNREACH: | 237 | case ICMP_DEST_UNREACH: |
238 | switch (code) { | 238 | switch (code) { |
239 | case ICMP_SR_FAILED: | 239 | case ICMP_SR_FAILED: |
240 | case ICMP_PORT_UNREACH: | 240 | case ICMP_PORT_UNREACH: |
241 | /* Impossible event. */ | 241 | /* Impossible event. */ |
242 | return; | 242 | return; |
243 | case ICMP_FRAG_NEEDED: | 243 | case ICMP_FRAG_NEEDED: |
244 | /* Soft state for pmtu is maintained by IP core. */ | 244 | /* Soft state for pmtu is maintained by IP core. */ |
245 | return; | 245 | return; |
246 | default: | 246 | default: |
247 | /* All others are translated to HOST_UNREACH. | 247 | /* All others are translated to HOST_UNREACH. |
248 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 248 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
249 | I believe they are just ether pollution. --ANK | 249 | I believe they are just ether pollution. --ANK |
250 | */ | 250 | */ |
251 | break; | 251 | break; |
252 | } | 252 | } |
253 | break; | 253 | break; |
254 | case ICMP_TIME_EXCEEDED: | 254 | case ICMP_TIME_EXCEEDED: |
255 | if (code != ICMP_EXC_TTL) | 255 | if (code != ICMP_EXC_TTL) |
256 | return; | 256 | return; |
257 | break; | 257 | break; |
258 | } | 258 | } |
259 | 259 | ||
260 | read_lock(&ipip6_lock); | 260 | read_lock(&ipip6_lock); |
261 | t = ipip6_tunnel_lookup(iph->daddr, iph->saddr); | 261 | t = ipip6_tunnel_lookup(iph->daddr, iph->saddr); |
262 | if (t == NULL || t->parms.iph.daddr == 0) | 262 | if (t == NULL || t->parms.iph.daddr == 0) |
263 | goto out; | 263 | goto out; |
264 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 264 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
265 | goto out; | 265 | goto out; |
266 | 266 | ||
267 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 267 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) |
268 | t->err_count++; | 268 | t->err_count++; |
269 | else | 269 | else |
270 | t->err_count = 1; | 270 | t->err_count = 1; |
271 | t->err_time = jiffies; | 271 | t->err_time = jiffies; |
272 | out: | 272 | out: |
273 | read_unlock(&ipip6_lock); | 273 | read_unlock(&ipip6_lock); |
274 | return; | 274 | return; |
275 | #else | 275 | #else |
276 | struct iphdr *iph = (struct iphdr*)dp; | 276 | struct iphdr *iph = (struct iphdr*)dp; |
277 | int hlen = iph->ihl<<2; | 277 | int hlen = iph->ihl<<2; |
278 | struct ipv6hdr *iph6; | 278 | struct ipv6hdr *iph6; |
279 | int type = skb->h.icmph->type; | 279 | int type = skb->h.icmph->type; |
280 | int code = skb->h.icmph->code; | 280 | int code = skb->h.icmph->code; |
281 | int rel_type = 0; | 281 | int rel_type = 0; |
282 | int rel_code = 0; | 282 | int rel_code = 0; |
283 | int rel_info = 0; | 283 | int rel_info = 0; |
284 | struct sk_buff *skb2; | 284 | struct sk_buff *skb2; |
285 | struct rt6_info *rt6i; | 285 | struct rt6_info *rt6i; |
286 | 286 | ||
287 | if (len < hlen + sizeof(struct ipv6hdr)) | 287 | if (len < hlen + sizeof(struct ipv6hdr)) |
288 | return; | 288 | return; |
289 | iph6 = (struct ipv6hdr*)(dp + hlen); | 289 | iph6 = (struct ipv6hdr*)(dp + hlen); |
290 | 290 | ||
291 | switch (type) { | 291 | switch (type) { |
292 | default: | 292 | default: |
293 | return; | 293 | return; |
294 | case ICMP_PARAMETERPROB: | 294 | case ICMP_PARAMETERPROB: |
295 | if (skb->h.icmph->un.gateway < hlen) | 295 | if (skb->h.icmph->un.gateway < hlen) |
296 | return; | 296 | return; |
297 | 297 | ||
298 | /* So... This guy found something strange INSIDE encapsulated | 298 | /* So... This guy found something strange INSIDE encapsulated |
299 | packet. Well, he is fool, but what can we do ? | 299 | packet. Well, he is fool, but what can we do ? |
300 | */ | 300 | */ |
301 | rel_type = ICMPV6_PARAMPROB; | 301 | rel_type = ICMPV6_PARAMPROB; |
302 | rel_info = skb->h.icmph->un.gateway - hlen; | 302 | rel_info = skb->h.icmph->un.gateway - hlen; |
303 | break; | 303 | break; |
304 | 304 | ||
305 | case ICMP_DEST_UNREACH: | 305 | case ICMP_DEST_UNREACH: |
306 | switch (code) { | 306 | switch (code) { |
307 | case ICMP_SR_FAILED: | 307 | case ICMP_SR_FAILED: |
308 | case ICMP_PORT_UNREACH: | 308 | case ICMP_PORT_UNREACH: |
309 | /* Impossible event. */ | 309 | /* Impossible event. */ |
310 | return; | 310 | return; |
311 | case ICMP_FRAG_NEEDED: | 311 | case ICMP_FRAG_NEEDED: |
312 | /* Too complicated case ... */ | 312 | /* Too complicated case ... */ |
313 | return; | 313 | return; |
314 | default: | 314 | default: |
315 | /* All others are translated to HOST_UNREACH. | 315 | /* All others are translated to HOST_UNREACH. |
316 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 316 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
317 | I believe, it is just ether pollution. --ANK | 317 | I believe, it is just ether pollution. --ANK |
318 | */ | 318 | */ |
319 | rel_type = ICMPV6_DEST_UNREACH; | 319 | rel_type = ICMPV6_DEST_UNREACH; |
320 | rel_code = ICMPV6_ADDR_UNREACH; | 320 | rel_code = ICMPV6_ADDR_UNREACH; |
321 | break; | 321 | break; |
322 | } | 322 | } |
323 | break; | 323 | break; |
324 | case ICMP_TIME_EXCEEDED: | 324 | case ICMP_TIME_EXCEEDED: |
325 | if (code != ICMP_EXC_TTL) | 325 | if (code != ICMP_EXC_TTL) |
326 | return; | 326 | return; |
327 | rel_type = ICMPV6_TIME_EXCEED; | 327 | rel_type = ICMPV6_TIME_EXCEED; |
328 | rel_code = ICMPV6_EXC_HOPLIMIT; | 328 | rel_code = ICMPV6_EXC_HOPLIMIT; |
329 | break; | 329 | break; |
330 | } | 330 | } |
331 | 331 | ||
332 | /* Prepare fake skb to feed it to icmpv6_send */ | 332 | /* Prepare fake skb to feed it to icmpv6_send */ |
333 | skb2 = skb_clone(skb, GFP_ATOMIC); | 333 | skb2 = skb_clone(skb, GFP_ATOMIC); |
334 | if (skb2 == NULL) | 334 | if (skb2 == NULL) |
335 | return; | 335 | return; |
336 | dst_release(skb2->dst); | 336 | dst_release(skb2->dst); |
337 | skb2->dst = NULL; | 337 | skb2->dst = NULL; |
338 | skb_pull(skb2, skb->data - (u8*)iph6); | 338 | skb_pull(skb2, skb->data - (u8*)iph6); |
339 | skb2->nh.raw = skb2->data; | 339 | skb2->nh.raw = skb2->data; |
340 | 340 | ||
341 | /* Try to guess incoming interface */ | 341 | /* Try to guess incoming interface */ |
342 | rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); | 342 | rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); |
343 | if (rt6i && rt6i->rt6i_dev) { | 343 | if (rt6i && rt6i->rt6i_dev) { |
344 | skb2->dev = rt6i->rt6i_dev; | 344 | skb2->dev = rt6i->rt6i_dev; |
345 | 345 | ||
346 | rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); | 346 | rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); |
347 | 347 | ||
348 | if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { | 348 | if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { |
349 | struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); | 349 | struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); |
350 | if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { | 350 | if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { |
351 | rel_type = ICMPV6_DEST_UNREACH; | 351 | rel_type = ICMPV6_DEST_UNREACH; |
352 | rel_code = ICMPV6_ADDR_UNREACH; | 352 | rel_code = ICMPV6_ADDR_UNREACH; |
353 | } | 353 | } |
354 | icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); | 354 | icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); |
355 | } | 355 | } |
356 | } | 356 | } |
357 | kfree_skb(skb2); | 357 | kfree_skb(skb2); |
358 | return; | 358 | return; |
359 | #endif | 359 | #endif |
360 | } | 360 | } |
361 | 361 | ||
362 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 362 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
363 | { | 363 | { |
364 | if (INET_ECN_is_ce(iph->tos)) | 364 | if (INET_ECN_is_ce(iph->tos)) |
365 | IP6_ECN_set_ce(skb->nh.ipv6h); | 365 | IP6_ECN_set_ce(skb->nh.ipv6h); |
366 | } | 366 | } |
367 | 367 | ||
368 | static int ipip6_rcv(struct sk_buff *skb) | 368 | static int ipip6_rcv(struct sk_buff *skb) |
369 | { | 369 | { |
370 | struct iphdr *iph; | 370 | struct iphdr *iph; |
371 | struct ip_tunnel *tunnel; | 371 | struct ip_tunnel *tunnel; |
372 | 372 | ||
373 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) | 373 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) |
374 | goto out; | 374 | goto out; |
375 | 375 | ||
376 | iph = skb->nh.iph; | 376 | iph = skb->nh.iph; |
377 | 377 | ||
378 | read_lock(&ipip6_lock); | 378 | read_lock(&ipip6_lock); |
379 | if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { | 379 | if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { |
380 | secpath_reset(skb); | 380 | secpath_reset(skb); |
381 | skb->mac.raw = skb->nh.raw; | 381 | skb->mac.raw = skb->nh.raw; |
382 | skb->nh.raw = skb->data; | 382 | skb->nh.raw = skb->data; |
383 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | ||
384 | IPCB(skb)->flags = 0; | 383 | IPCB(skb)->flags = 0; |
385 | skb->protocol = htons(ETH_P_IPV6); | 384 | skb->protocol = htons(ETH_P_IPV6); |
386 | skb->pkt_type = PACKET_HOST; | 385 | skb->pkt_type = PACKET_HOST; |
387 | tunnel->stat.rx_packets++; | 386 | tunnel->stat.rx_packets++; |
388 | tunnel->stat.rx_bytes += skb->len; | 387 | tunnel->stat.rx_bytes += skb->len; |
389 | skb->dev = tunnel->dev; | 388 | skb->dev = tunnel->dev; |
390 | dst_release(skb->dst); | 389 | dst_release(skb->dst); |
391 | skb->dst = NULL; | 390 | skb->dst = NULL; |
392 | nf_reset(skb); | 391 | nf_reset(skb); |
393 | ipip6_ecn_decapsulate(iph, skb); | 392 | ipip6_ecn_decapsulate(iph, skb); |
394 | netif_rx(skb); | 393 | netif_rx(skb); |
395 | read_unlock(&ipip6_lock); | 394 | read_unlock(&ipip6_lock); |
396 | return 0; | 395 | return 0; |
397 | } | 396 | } |
398 | 397 | ||
399 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 398 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
400 | kfree_skb(skb); | 399 | kfree_skb(skb); |
401 | read_unlock(&ipip6_lock); | 400 | read_unlock(&ipip6_lock); |
402 | out: | 401 | out: |
403 | return 0; | 402 | return 0; |
404 | } | 403 | } |
405 | 404 | ||
406 | /* Returns the embedded IPv4 address if the IPv6 address | 405 | /* Returns the embedded IPv4 address if the IPv6 address |
407 | comes from 6to4 (RFC 3056) addr space */ | 406 | comes from 6to4 (RFC 3056) addr space */ |
408 | 407 | ||
409 | static inline u32 try_6to4(struct in6_addr *v6dst) | 408 | static inline u32 try_6to4(struct in6_addr *v6dst) |
410 | { | 409 | { |
411 | u32 dst = 0; | 410 | u32 dst = 0; |
412 | 411 | ||
413 | if (v6dst->s6_addr16[0] == htons(0x2002)) { | 412 | if (v6dst->s6_addr16[0] == htons(0x2002)) { |
414 | /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ | 413 | /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ |
415 | memcpy(&dst, &v6dst->s6_addr16[1], 4); | 414 | memcpy(&dst, &v6dst->s6_addr16[1], 4); |
416 | } | 415 | } |
417 | return dst; | 416 | return dst; |
418 | } | 417 | } |
419 | 418 | ||
420 | /* | 419 | /* |
421 | * This function assumes it is being called from dev_queue_xmit() | 420 | * This function assumes it is being called from dev_queue_xmit() |
422 | * and that skb is filled properly by that function. | 421 | * and that skb is filled properly by that function. |
423 | */ | 422 | */ |
424 | 423 | ||
425 | static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 424 | static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
426 | { | 425 | { |
427 | struct ip_tunnel *tunnel = netdev_priv(dev); | 426 | struct ip_tunnel *tunnel = netdev_priv(dev); |
428 | struct net_device_stats *stats = &tunnel->stat; | 427 | struct net_device_stats *stats = &tunnel->stat; |
429 | struct iphdr *tiph = &tunnel->parms.iph; | 428 | struct iphdr *tiph = &tunnel->parms.iph; |
430 | struct ipv6hdr *iph6 = skb->nh.ipv6h; | 429 | struct ipv6hdr *iph6 = skb->nh.ipv6h; |
431 | u8 tos = tunnel->parms.iph.tos; | 430 | u8 tos = tunnel->parms.iph.tos; |
432 | struct rtable *rt; /* Route to the other host */ | 431 | struct rtable *rt; /* Route to the other host */ |
433 | struct net_device *tdev; /* Device to other host */ | 432 | struct net_device *tdev; /* Device to other host */ |
434 | struct iphdr *iph; /* Our new IP header */ | 433 | struct iphdr *iph; /* Our new IP header */ |
435 | int max_headroom; /* The extra header space needed */ | 434 | int max_headroom; /* The extra header space needed */ |
436 | u32 dst = tiph->daddr; | 435 | u32 dst = tiph->daddr; |
437 | int mtu; | 436 | int mtu; |
438 | struct in6_addr *addr6; | 437 | struct in6_addr *addr6; |
439 | int addr_type; | 438 | int addr_type; |
440 | 439 | ||
441 | if (tunnel->recursion++) { | 440 | if (tunnel->recursion++) { |
442 | tunnel->stat.collisions++; | 441 | tunnel->stat.collisions++; |
443 | goto tx_error; | 442 | goto tx_error; |
444 | } | 443 | } |
445 | 444 | ||
446 | if (skb->protocol != htons(ETH_P_IPV6)) | 445 | if (skb->protocol != htons(ETH_P_IPV6)) |
447 | goto tx_error; | 446 | goto tx_error; |
448 | 447 | ||
449 | if (!dst) | 448 | if (!dst) |
450 | dst = try_6to4(&iph6->daddr); | 449 | dst = try_6to4(&iph6->daddr); |
451 | 450 | ||
452 | if (!dst) { | 451 | if (!dst) { |
453 | struct neighbour *neigh = NULL; | 452 | struct neighbour *neigh = NULL; |
454 | 453 | ||
455 | if (skb->dst) | 454 | if (skb->dst) |
456 | neigh = skb->dst->neighbour; | 455 | neigh = skb->dst->neighbour; |
457 | 456 | ||
458 | if (neigh == NULL) { | 457 | if (neigh == NULL) { |
459 | if (net_ratelimit()) | 458 | if (net_ratelimit()) |
460 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); | 459 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); |
461 | goto tx_error; | 460 | goto tx_error; |
462 | } | 461 | } |
463 | 462 | ||
464 | addr6 = (struct in6_addr*)&neigh->primary_key; | 463 | addr6 = (struct in6_addr*)&neigh->primary_key; |
465 | addr_type = ipv6_addr_type(addr6); | 464 | addr_type = ipv6_addr_type(addr6); |
466 | 465 | ||
467 | if (addr_type == IPV6_ADDR_ANY) { | 466 | if (addr_type == IPV6_ADDR_ANY) { |
468 | addr6 = &skb->nh.ipv6h->daddr; | 467 | addr6 = &skb->nh.ipv6h->daddr; |
469 | addr_type = ipv6_addr_type(addr6); | 468 | addr_type = ipv6_addr_type(addr6); |
470 | } | 469 | } |
471 | 470 | ||
472 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | 471 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) |
473 | goto tx_error_icmp; | 472 | goto tx_error_icmp; |
474 | 473 | ||
475 | dst = addr6->s6_addr32[3]; | 474 | dst = addr6->s6_addr32[3]; |
476 | } | 475 | } |
477 | 476 | ||
478 | { | 477 | { |
479 | struct flowi fl = { .nl_u = { .ip4_u = | 478 | struct flowi fl = { .nl_u = { .ip4_u = |
480 | { .daddr = dst, | 479 | { .daddr = dst, |
481 | .saddr = tiph->saddr, | 480 | .saddr = tiph->saddr, |
482 | .tos = RT_TOS(tos) } }, | 481 | .tos = RT_TOS(tos) } }, |
483 | .oif = tunnel->parms.link, | 482 | .oif = tunnel->parms.link, |
484 | .proto = IPPROTO_IPV6 }; | 483 | .proto = IPPROTO_IPV6 }; |
485 | if (ip_route_output_key(&rt, &fl)) { | 484 | if (ip_route_output_key(&rt, &fl)) { |
486 | tunnel->stat.tx_carrier_errors++; | 485 | tunnel->stat.tx_carrier_errors++; |
487 | goto tx_error_icmp; | 486 | goto tx_error_icmp; |
488 | } | 487 | } |
489 | } | 488 | } |
490 | if (rt->rt_type != RTN_UNICAST) { | 489 | if (rt->rt_type != RTN_UNICAST) { |
491 | ip_rt_put(rt); | 490 | ip_rt_put(rt); |
492 | tunnel->stat.tx_carrier_errors++; | 491 | tunnel->stat.tx_carrier_errors++; |
493 | goto tx_error_icmp; | 492 | goto tx_error_icmp; |
494 | } | 493 | } |
495 | tdev = rt->u.dst.dev; | 494 | tdev = rt->u.dst.dev; |
496 | 495 | ||
497 | if (tdev == dev) { | 496 | if (tdev == dev) { |
498 | ip_rt_put(rt); | 497 | ip_rt_put(rt); |
499 | tunnel->stat.collisions++; | 498 | tunnel->stat.collisions++; |
500 | goto tx_error; | 499 | goto tx_error; |
501 | } | 500 | } |
502 | 501 | ||
503 | if (tiph->frag_off) | 502 | if (tiph->frag_off) |
504 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 503 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); |
505 | else | 504 | else |
506 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; | 505 | mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; |
507 | 506 | ||
508 | if (mtu < 68) { | 507 | if (mtu < 68) { |
509 | tunnel->stat.collisions++; | 508 | tunnel->stat.collisions++; |
510 | ip_rt_put(rt); | 509 | ip_rt_put(rt); |
511 | goto tx_error; | 510 | goto tx_error; |
512 | } | 511 | } |
513 | if (mtu < IPV6_MIN_MTU) | 512 | if (mtu < IPV6_MIN_MTU) |
514 | mtu = IPV6_MIN_MTU; | 513 | mtu = IPV6_MIN_MTU; |
515 | if (tunnel->parms.iph.daddr && skb->dst) | 514 | if (tunnel->parms.iph.daddr && skb->dst) |
516 | skb->dst->ops->update_pmtu(skb->dst, mtu); | 515 | skb->dst->ops->update_pmtu(skb->dst, mtu); |
517 | 516 | ||
518 | if (skb->len > mtu) { | 517 | if (skb->len > mtu) { |
519 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | 518 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); |
520 | ip_rt_put(rt); | 519 | ip_rt_put(rt); |
521 | goto tx_error; | 520 | goto tx_error; |
522 | } | 521 | } |
523 | 522 | ||
524 | if (tunnel->err_count > 0) { | 523 | if (tunnel->err_count > 0) { |
525 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 524 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { |
526 | tunnel->err_count--; | 525 | tunnel->err_count--; |
527 | dst_link_failure(skb); | 526 | dst_link_failure(skb); |
528 | } else | 527 | } else |
529 | tunnel->err_count = 0; | 528 | tunnel->err_count = 0; |
530 | } | 529 | } |
531 | 530 | ||
532 | /* | 531 | /* |
533 | * Okay, now see if we can stuff it in the buffer as-is. | 532 | * Okay, now see if we can stuff it in the buffer as-is. |
534 | */ | 533 | */ |
535 | max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); | 534 | max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); |
536 | 535 | ||
537 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { | 536 | if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { |
538 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 537 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
539 | if (!new_skb) { | 538 | if (!new_skb) { |
540 | ip_rt_put(rt); | 539 | ip_rt_put(rt); |
541 | stats->tx_dropped++; | 540 | stats->tx_dropped++; |
542 | dev_kfree_skb(skb); | 541 | dev_kfree_skb(skb); |
543 | tunnel->recursion--; | 542 | tunnel->recursion--; |
544 | return 0; | 543 | return 0; |
545 | } | 544 | } |
546 | if (skb->sk) | 545 | if (skb->sk) |
547 | skb_set_owner_w(new_skb, skb->sk); | 546 | skb_set_owner_w(new_skb, skb->sk); |
548 | dev_kfree_skb(skb); | 547 | dev_kfree_skb(skb); |
549 | skb = new_skb; | 548 | skb = new_skb; |
550 | iph6 = skb->nh.ipv6h; | 549 | iph6 = skb->nh.ipv6h; |
551 | } | 550 | } |
552 | 551 | ||
553 | skb->h.raw = skb->nh.raw; | 552 | skb->h.raw = skb->nh.raw; |
554 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | 553 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); |
555 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 554 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
556 | IPCB(skb)->flags = 0; | 555 | IPCB(skb)->flags = 0; |
557 | dst_release(skb->dst); | 556 | dst_release(skb->dst); |
558 | skb->dst = &rt->u.dst; | 557 | skb->dst = &rt->u.dst; |
559 | 558 | ||
560 | /* | 559 | /* |
561 | * Push down and install the IPIP header. | 560 | * Push down and install the IPIP header. |
562 | */ | 561 | */ |
563 | 562 | ||
564 | iph = skb->nh.iph; | 563 | iph = skb->nh.iph; |
565 | iph->version = 4; | 564 | iph->version = 4; |
566 | iph->ihl = sizeof(struct iphdr)>>2; | 565 | iph->ihl = sizeof(struct iphdr)>>2; |
567 | if (mtu > IPV6_MIN_MTU) | 566 | if (mtu > IPV6_MIN_MTU) |
568 | iph->frag_off = htons(IP_DF); | 567 | iph->frag_off = htons(IP_DF); |
569 | else | 568 | else |
570 | iph->frag_off = 0; | 569 | iph->frag_off = 0; |
571 | 570 | ||
572 | iph->protocol = IPPROTO_IPV6; | 571 | iph->protocol = IPPROTO_IPV6; |
573 | iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); | 572 | iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); |
574 | iph->daddr = rt->rt_dst; | 573 | iph->daddr = rt->rt_dst; |
575 | iph->saddr = rt->rt_src; | 574 | iph->saddr = rt->rt_src; |
576 | 575 | ||
577 | if ((iph->ttl = tiph->ttl) == 0) | 576 | if ((iph->ttl = tiph->ttl) == 0) |
578 | iph->ttl = iph6->hop_limit; | 577 | iph->ttl = iph6->hop_limit; |
579 | 578 | ||
580 | nf_reset(skb); | 579 | nf_reset(skb); |
581 | 580 | ||
582 | IPTUNNEL_XMIT(); | 581 | IPTUNNEL_XMIT(); |
583 | tunnel->recursion--; | 582 | tunnel->recursion--; |
584 | return 0; | 583 | return 0; |
585 | 584 | ||
586 | tx_error_icmp: | 585 | tx_error_icmp: |
587 | dst_link_failure(skb); | 586 | dst_link_failure(skb); |
588 | tx_error: | 587 | tx_error: |
589 | stats->tx_errors++; | 588 | stats->tx_errors++; |
590 | dev_kfree_skb(skb); | 589 | dev_kfree_skb(skb); |
591 | tunnel->recursion--; | 590 | tunnel->recursion--; |
592 | return 0; | 591 | return 0; |
593 | } | 592 | } |
594 | 593 | ||
595 | static int | 594 | static int |
596 | ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 595 | ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
597 | { | 596 | { |
598 | int err = 0; | 597 | int err = 0; |
599 | struct ip_tunnel_parm p; | 598 | struct ip_tunnel_parm p; |
600 | struct ip_tunnel *t; | 599 | struct ip_tunnel *t; |
601 | 600 | ||
602 | switch (cmd) { | 601 | switch (cmd) { |
603 | case SIOCGETTUNNEL: | 602 | case SIOCGETTUNNEL: |
604 | t = NULL; | 603 | t = NULL; |
605 | if (dev == ipip6_fb_tunnel_dev) { | 604 | if (dev == ipip6_fb_tunnel_dev) { |
606 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 605 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
607 | err = -EFAULT; | 606 | err = -EFAULT; |
608 | break; | 607 | break; |
609 | } | 608 | } |
610 | t = ipip6_tunnel_locate(&p, 0); | 609 | t = ipip6_tunnel_locate(&p, 0); |
611 | } | 610 | } |
612 | if (t == NULL) | 611 | if (t == NULL) |
613 | t = netdev_priv(dev); | 612 | t = netdev_priv(dev); |
614 | memcpy(&p, &t->parms, sizeof(p)); | 613 | memcpy(&p, &t->parms, sizeof(p)); |
615 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 614 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
616 | err = -EFAULT; | 615 | err = -EFAULT; |
617 | break; | 616 | break; |
618 | 617 | ||
619 | case SIOCADDTUNNEL: | 618 | case SIOCADDTUNNEL: |
620 | case SIOCCHGTUNNEL: | 619 | case SIOCCHGTUNNEL: |
621 | err = -EPERM; | 620 | err = -EPERM; |
622 | if (!capable(CAP_NET_ADMIN)) | 621 | if (!capable(CAP_NET_ADMIN)) |
623 | goto done; | 622 | goto done; |
624 | 623 | ||
625 | err = -EFAULT; | 624 | err = -EFAULT; |
626 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 625 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
627 | goto done; | 626 | goto done; |
628 | 627 | ||
629 | err = -EINVAL; | 628 | err = -EINVAL; |
630 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || | 629 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || |
631 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | 630 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
632 | goto done; | 631 | goto done; |
633 | if (p.iph.ttl) | 632 | if (p.iph.ttl) |
634 | p.iph.frag_off |= htons(IP_DF); | 633 | p.iph.frag_off |= htons(IP_DF); |
635 | 634 | ||
636 | t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL); | 635 | t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL); |
637 | 636 | ||
638 | if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 637 | if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
639 | if (t != NULL) { | 638 | if (t != NULL) { |
640 | if (t->dev != dev) { | 639 | if (t->dev != dev) { |
641 | err = -EEXIST; | 640 | err = -EEXIST; |
642 | break; | 641 | break; |
643 | } | 642 | } |
644 | } else { | 643 | } else { |
645 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | 644 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || |
646 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | 645 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { |
647 | err = -EINVAL; | 646 | err = -EINVAL; |
648 | break; | 647 | break; |
649 | } | 648 | } |
650 | t = netdev_priv(dev); | 649 | t = netdev_priv(dev); |
651 | ipip6_tunnel_unlink(t); | 650 | ipip6_tunnel_unlink(t); |
652 | t->parms.iph.saddr = p.iph.saddr; | 651 | t->parms.iph.saddr = p.iph.saddr; |
653 | t->parms.iph.daddr = p.iph.daddr; | 652 | t->parms.iph.daddr = p.iph.daddr; |
654 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 653 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
655 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 654 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
656 | ipip6_tunnel_link(t); | 655 | ipip6_tunnel_link(t); |
657 | netdev_state_change(dev); | 656 | netdev_state_change(dev); |
658 | } | 657 | } |
659 | } | 658 | } |
660 | 659 | ||
661 | if (t) { | 660 | if (t) { |
662 | err = 0; | 661 | err = 0; |
663 | if (cmd == SIOCCHGTUNNEL) { | 662 | if (cmd == SIOCCHGTUNNEL) { |
664 | t->parms.iph.ttl = p.iph.ttl; | 663 | t->parms.iph.ttl = p.iph.ttl; |
665 | t->parms.iph.tos = p.iph.tos; | 664 | t->parms.iph.tos = p.iph.tos; |
666 | } | 665 | } |
667 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 666 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
668 | err = -EFAULT; | 667 | err = -EFAULT; |
669 | } else | 668 | } else |
670 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 669 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
671 | break; | 670 | break; |
672 | 671 | ||
673 | case SIOCDELTUNNEL: | 672 | case SIOCDELTUNNEL: |
674 | err = -EPERM; | 673 | err = -EPERM; |
675 | if (!capable(CAP_NET_ADMIN)) | 674 | if (!capable(CAP_NET_ADMIN)) |
676 | goto done; | 675 | goto done; |
677 | 676 | ||
678 | if (dev == ipip6_fb_tunnel_dev) { | 677 | if (dev == ipip6_fb_tunnel_dev) { |
679 | err = -EFAULT; | 678 | err = -EFAULT; |
680 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 679 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
681 | goto done; | 680 | goto done; |
682 | err = -ENOENT; | 681 | err = -ENOENT; |
683 | if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) | 682 | if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) |
684 | goto done; | 683 | goto done; |
685 | err = -EPERM; | 684 | err = -EPERM; |
686 | if (t == netdev_priv(ipip6_fb_tunnel_dev)) | 685 | if (t == netdev_priv(ipip6_fb_tunnel_dev)) |
687 | goto done; | 686 | goto done; |
688 | dev = t->dev; | 687 | dev = t->dev; |
689 | } | 688 | } |
690 | err = unregister_netdevice(dev); | 689 | err = unregister_netdevice(dev); |
691 | break; | 690 | break; |
692 | 691 | ||
693 | default: | 692 | default: |
694 | err = -EINVAL; | 693 | err = -EINVAL; |
695 | } | 694 | } |
696 | 695 | ||
697 | done: | 696 | done: |
698 | return err; | 697 | return err; |
699 | } | 698 | } |
700 | 699 | ||
701 | static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) | 700 | static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) |
702 | { | 701 | { |
703 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); | 702 | return &(((struct ip_tunnel*)netdev_priv(dev))->stat); |
704 | } | 703 | } |
705 | 704 | ||
706 | static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 705 | static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
707 | { | 706 | { |
708 | if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 707 | if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) |
709 | return -EINVAL; | 708 | return -EINVAL; |
710 | dev->mtu = new_mtu; | 709 | dev->mtu = new_mtu; |
711 | return 0; | 710 | return 0; |
712 | } | 711 | } |
713 | 712 | ||
714 | static void ipip6_tunnel_setup(struct net_device *dev) | 713 | static void ipip6_tunnel_setup(struct net_device *dev) |
715 | { | 714 | { |
716 | SET_MODULE_OWNER(dev); | 715 | SET_MODULE_OWNER(dev); |
717 | dev->uninit = ipip6_tunnel_uninit; | 716 | dev->uninit = ipip6_tunnel_uninit; |
718 | dev->destructor = free_netdev; | 717 | dev->destructor = free_netdev; |
719 | dev->hard_start_xmit = ipip6_tunnel_xmit; | 718 | dev->hard_start_xmit = ipip6_tunnel_xmit; |
720 | dev->get_stats = ipip6_tunnel_get_stats; | 719 | dev->get_stats = ipip6_tunnel_get_stats; |
721 | dev->do_ioctl = ipip6_tunnel_ioctl; | 720 | dev->do_ioctl = ipip6_tunnel_ioctl; |
722 | dev->change_mtu = ipip6_tunnel_change_mtu; | 721 | dev->change_mtu = ipip6_tunnel_change_mtu; |
723 | 722 | ||
724 | dev->type = ARPHRD_SIT; | 723 | dev->type = ARPHRD_SIT; |
725 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | 724 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); |
726 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | 725 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); |
727 | dev->flags = IFF_NOARP; | 726 | dev->flags = IFF_NOARP; |
728 | dev->iflink = 0; | 727 | dev->iflink = 0; |
729 | dev->addr_len = 4; | 728 | dev->addr_len = 4; |
730 | } | 729 | } |
731 | 730 | ||
732 | static int ipip6_tunnel_init(struct net_device *dev) | 731 | static int ipip6_tunnel_init(struct net_device *dev) |
733 | { | 732 | { |
734 | struct net_device *tdev = NULL; | 733 | struct net_device *tdev = NULL; |
735 | struct ip_tunnel *tunnel; | 734 | struct ip_tunnel *tunnel; |
736 | struct iphdr *iph; | 735 | struct iphdr *iph; |
737 | 736 | ||
738 | tunnel = netdev_priv(dev); | 737 | tunnel = netdev_priv(dev); |
739 | iph = &tunnel->parms.iph; | 738 | iph = &tunnel->parms.iph; |
740 | 739 | ||
741 | tunnel->dev = dev; | 740 | tunnel->dev = dev; |
742 | strcpy(tunnel->parms.name, dev->name); | 741 | strcpy(tunnel->parms.name, dev->name); |
743 | 742 | ||
744 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 743 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
745 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 744 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
746 | 745 | ||
747 | if (iph->daddr) { | 746 | if (iph->daddr) { |
748 | struct flowi fl = { .nl_u = { .ip4_u = | 747 | struct flowi fl = { .nl_u = { .ip4_u = |
749 | { .daddr = iph->daddr, | 748 | { .daddr = iph->daddr, |
750 | .saddr = iph->saddr, | 749 | .saddr = iph->saddr, |
751 | .tos = RT_TOS(iph->tos) } }, | 750 | .tos = RT_TOS(iph->tos) } }, |
752 | .oif = tunnel->parms.link, | 751 | .oif = tunnel->parms.link, |
753 | .proto = IPPROTO_IPV6 }; | 752 | .proto = IPPROTO_IPV6 }; |
754 | struct rtable *rt; | 753 | struct rtable *rt; |
755 | if (!ip_route_output_key(&rt, &fl)) { | 754 | if (!ip_route_output_key(&rt, &fl)) { |
756 | tdev = rt->u.dst.dev; | 755 | tdev = rt->u.dst.dev; |
757 | ip_rt_put(rt); | 756 | ip_rt_put(rt); |
758 | } | 757 | } |
759 | dev->flags |= IFF_POINTOPOINT; | 758 | dev->flags |= IFF_POINTOPOINT; |
760 | } | 759 | } |
761 | 760 | ||
762 | if (!tdev && tunnel->parms.link) | 761 | if (!tdev && tunnel->parms.link) |
763 | tdev = __dev_get_by_index(tunnel->parms.link); | 762 | tdev = __dev_get_by_index(tunnel->parms.link); |
764 | 763 | ||
765 | if (tdev) { | 764 | if (tdev) { |
766 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | 765 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); |
767 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | 766 | dev->mtu = tdev->mtu - sizeof(struct iphdr); |
768 | if (dev->mtu < IPV6_MIN_MTU) | 767 | if (dev->mtu < IPV6_MIN_MTU) |
769 | dev->mtu = IPV6_MIN_MTU; | 768 | dev->mtu = IPV6_MIN_MTU; |
770 | } | 769 | } |
771 | dev->iflink = tunnel->parms.link; | 770 | dev->iflink = tunnel->parms.link; |
772 | 771 | ||
773 | return 0; | 772 | return 0; |
774 | } | 773 | } |
775 | 774 | ||
776 | static int __init ipip6_fb_tunnel_init(struct net_device *dev) | 775 | static int __init ipip6_fb_tunnel_init(struct net_device *dev) |
777 | { | 776 | { |
778 | struct ip_tunnel *tunnel = netdev_priv(dev); | 777 | struct ip_tunnel *tunnel = netdev_priv(dev); |
779 | struct iphdr *iph = &tunnel->parms.iph; | 778 | struct iphdr *iph = &tunnel->parms.iph; |
780 | 779 | ||
781 | tunnel->dev = dev; | 780 | tunnel->dev = dev; |
782 | strcpy(tunnel->parms.name, dev->name); | 781 | strcpy(tunnel->parms.name, dev->name); |
783 | 782 | ||
784 | iph->version = 4; | 783 | iph->version = 4; |
785 | iph->protocol = IPPROTO_IPV6; | 784 | iph->protocol = IPPROTO_IPV6; |
786 | iph->ihl = 5; | 785 | iph->ihl = 5; |
787 | iph->ttl = 64; | 786 | iph->ttl = 64; |
788 | 787 | ||
789 | dev_hold(dev); | 788 | dev_hold(dev); |
790 | tunnels_wc[0] = tunnel; | 789 | tunnels_wc[0] = tunnel; |
791 | return 0; | 790 | return 0; |
792 | } | 791 | } |
793 | 792 | ||
794 | static struct net_protocol sit_protocol = { | 793 | static struct net_protocol sit_protocol = { |
795 | .handler = ipip6_rcv, | 794 | .handler = ipip6_rcv, |
796 | .err_handler = ipip6_err, | 795 | .err_handler = ipip6_err, |
797 | }; | 796 | }; |
798 | 797 | ||
799 | static void __exit sit_destroy_tunnels(void) | 798 | static void __exit sit_destroy_tunnels(void) |
800 | { | 799 | { |
801 | int prio; | 800 | int prio; |
802 | 801 | ||
803 | for (prio = 1; prio < 4; prio++) { | 802 | for (prio = 1; prio < 4; prio++) { |
804 | int h; | 803 | int h; |
805 | for (h = 0; h < HASH_SIZE; h++) { | 804 | for (h = 0; h < HASH_SIZE; h++) { |
806 | struct ip_tunnel *t; | 805 | struct ip_tunnel *t; |
807 | while ((t = tunnels[prio][h]) != NULL) | 806 | while ((t = tunnels[prio][h]) != NULL) |
808 | unregister_netdevice(t->dev); | 807 | unregister_netdevice(t->dev); |
809 | } | 808 | } |
810 | } | 809 | } |
811 | } | 810 | } |
812 | 811 | ||
813 | void __exit sit_cleanup(void) | 812 | void __exit sit_cleanup(void) |
814 | { | 813 | { |
815 | inet_del_protocol(&sit_protocol, IPPROTO_IPV6); | 814 | inet_del_protocol(&sit_protocol, IPPROTO_IPV6); |
816 | 815 | ||
817 | rtnl_lock(); | 816 | rtnl_lock(); |
818 | sit_destroy_tunnels(); | 817 | sit_destroy_tunnels(); |
819 | unregister_netdevice(ipip6_fb_tunnel_dev); | 818 | unregister_netdevice(ipip6_fb_tunnel_dev); |
820 | rtnl_unlock(); | 819 | rtnl_unlock(); |
821 | } | 820 | } |
822 | 821 | ||
823 | int __init sit_init(void) | 822 | int __init sit_init(void) |
824 | { | 823 | { |
825 | int err; | 824 | int err; |
826 | 825 | ||
827 | printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); | 826 | printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); |
828 | 827 | ||
829 | if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) { | 828 | if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) { |
830 | printk(KERN_INFO "sit init: Can't add protocol\n"); | 829 | printk(KERN_INFO "sit init: Can't add protocol\n"); |
831 | return -EAGAIN; | 830 | return -EAGAIN; |
832 | } | 831 | } |
833 | 832 | ||
834 | ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", | 833 | ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", |
835 | ipip6_tunnel_setup); | 834 | ipip6_tunnel_setup); |
836 | if (!ipip6_fb_tunnel_dev) { | 835 | if (!ipip6_fb_tunnel_dev) { |
837 | err = -ENOMEM; | 836 | err = -ENOMEM; |
838 | goto err1; | 837 | goto err1; |
839 | } | 838 | } |
840 | 839 | ||
841 | ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init; | 840 | ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init; |
842 | 841 | ||
843 | if ((err = register_netdev(ipip6_fb_tunnel_dev))) | 842 | if ((err = register_netdev(ipip6_fb_tunnel_dev))) |
844 | goto err2; | 843 | goto err2; |
845 | 844 | ||
846 | out: | 845 | out: |
847 | return err; | 846 | return err; |
848 | err2: | 847 | err2: |
849 | free_netdev(ipip6_fb_tunnel_dev); | 848 | free_netdev(ipip6_fb_tunnel_dev); |
850 | err1: | 849 | err1: |
851 | inet_del_protocol(&sit_protocol, IPPROTO_IPV6); | 850 | inet_del_protocol(&sit_protocol, IPPROTO_IPV6); |
852 | goto out; | 851 | goto out; |
853 | } | 852 | } |
854 | 853 |