Commit d19d56ddc88e7895429ef118db9c83c7bbe3ce6a

Authored by Eric Dumazet
Committed by David S. Miller
1 parent de213e5eed

net: Introduce skb_tunnel_rx() helper

skb rxhash should be cleared when a skb is handled by a tunnel before
being delivered again, so that correct packet steering can take place.

There are other cleanups and accounting that we can factorize in a new
helper, skb_tunnel_rx()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 7 changed files with 34 additions and 34 deletions Inline Diff

1 /* 1 /*
2 * net/dst.h Protocol independent destination cache definitions. 2 * net/dst.h Protocol independent destination cache definitions.
3 * 3 *
4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5 * 5 *
6 */ 6 */
7 7
8 #ifndef _NET_DST_H 8 #ifndef _NET_DST_H
9 #define _NET_DST_H 9 #define _NET_DST_H
10 10
11 #include <net/dst_ops.h> 11 #include <net/dst_ops.h>
12 #include <linux/netdevice.h> 12 #include <linux/netdevice.h>
13 #include <linux/rtnetlink.h> 13 #include <linux/rtnetlink.h>
14 #include <linux/rcupdate.h> 14 #include <linux/rcupdate.h>
15 #include <linux/jiffies.h> 15 #include <linux/jiffies.h>
16 #include <net/neighbour.h> 16 #include <net/neighbour.h>
17 #include <asm/processor.h> 17 #include <asm/processor.h>
18 18
19 /* 19 /*
20 * 0 - no debugging messages 20 * 0 - no debugging messages
21 * 1 - rare events and bugs (default) 21 * 1 - rare events and bugs (default)
22 * 2 - trace mode. 22 * 2 - trace mode.
23 */ 23 */
24 #define RT_CACHE_DEBUG 0 24 #define RT_CACHE_DEBUG 0
25 25
26 #define DST_GC_MIN (HZ/10) 26 #define DST_GC_MIN (HZ/10)
27 #define DST_GC_INC (HZ/2) 27 #define DST_GC_INC (HZ/2)
28 #define DST_GC_MAX (120*HZ) 28 #define DST_GC_MAX (120*HZ)
29 29
30 /* Each dst_entry has reference count and sits in some parent list(s). 30 /* Each dst_entry has reference count and sits in some parent list(s).
31 * When it is removed from parent list, it is "freed" (dst_free). 31 * When it is removed from parent list, it is "freed" (dst_free).
32 * After this it enters dead state (dst->obsolete > 0) and if its refcnt 32 * After this it enters dead state (dst->obsolete > 0) and if its refcnt
33 * is zero, it can be destroyed immediately, otherwise it is added 33 * is zero, it can be destroyed immediately, otherwise it is added
34 * to gc list and garbage collector periodically checks the refcnt. 34 * to gc list and garbage collector periodically checks the refcnt.
35 */ 35 */
36 36
37 struct sk_buff; 37 struct sk_buff;
38 38
39 struct dst_entry { 39 struct dst_entry {
40 struct rcu_head rcu_head; 40 struct rcu_head rcu_head;
41 struct dst_entry *child; 41 struct dst_entry *child;
42 struct net_device *dev; 42 struct net_device *dev;
43 short error; 43 short error;
44 short obsolete; 44 short obsolete;
45 int flags; 45 int flags;
46 #define DST_HOST 1 46 #define DST_HOST 1
47 #define DST_NOXFRM 2 47 #define DST_NOXFRM 2
48 #define DST_NOPOLICY 4 48 #define DST_NOPOLICY 4
49 #define DST_NOHASH 8 49 #define DST_NOHASH 8
50 unsigned long expires; 50 unsigned long expires;
51 51
52 unsigned short header_len; /* more space at head required */ 52 unsigned short header_len; /* more space at head required */
53 unsigned short trailer_len; /* space to reserve at tail */ 53 unsigned short trailer_len; /* space to reserve at tail */
54 54
55 unsigned int rate_tokens; 55 unsigned int rate_tokens;
56 unsigned long rate_last; /* rate limiting for ICMP */ 56 unsigned long rate_last; /* rate limiting for ICMP */
57 57
58 struct dst_entry *path; 58 struct dst_entry *path;
59 59
60 struct neighbour *neighbour; 60 struct neighbour *neighbour;
61 struct hh_cache *hh; 61 struct hh_cache *hh;
62 #ifdef CONFIG_XFRM 62 #ifdef CONFIG_XFRM
63 struct xfrm_state *xfrm; 63 struct xfrm_state *xfrm;
64 #else 64 #else
65 void *__pad1; 65 void *__pad1;
66 #endif 66 #endif
67 int (*input)(struct sk_buff*); 67 int (*input)(struct sk_buff*);
68 int (*output)(struct sk_buff*); 68 int (*output)(struct sk_buff*);
69 69
70 struct dst_ops *ops; 70 struct dst_ops *ops;
71 71
72 u32 metrics[RTAX_MAX]; 72 u32 metrics[RTAX_MAX];
73 73
74 #ifdef CONFIG_NET_CLS_ROUTE 74 #ifdef CONFIG_NET_CLS_ROUTE
75 __u32 tclassid; 75 __u32 tclassid;
76 #else 76 #else
77 __u32 __pad2; 77 __u32 __pad2;
78 #endif 78 #endif
79 79
80 80
81 /* 81 /*
82 * Align __refcnt to a 64 bytes alignment 82 * Align __refcnt to a 64 bytes alignment
83 * (L1_CACHE_SIZE would be too much) 83 * (L1_CACHE_SIZE would be too much)
84 */ 84 */
85 #ifdef CONFIG_64BIT 85 #ifdef CONFIG_64BIT
86 long __pad_to_align_refcnt[1]; 86 long __pad_to_align_refcnt[1];
87 #endif 87 #endif
88 /* 88 /*
89 * __refcnt wants to be on a different cache line from 89 * __refcnt wants to be on a different cache line from
90 * input/output/ops or performance tanks badly 90 * input/output/ops or performance tanks badly
91 */ 91 */
92 atomic_t __refcnt; /* client references */ 92 atomic_t __refcnt; /* client references */
93 int __use; 93 int __use;
94 unsigned long lastuse; 94 unsigned long lastuse;
95 union { 95 union {
96 struct dst_entry *next; 96 struct dst_entry *next;
97 struct rtable *rt_next; 97 struct rtable *rt_next;
98 struct rt6_info *rt6_next; 98 struct rt6_info *rt6_next;
99 struct dn_route *dn_next; 99 struct dn_route *dn_next;
100 }; 100 };
101 }; 101 };
102 102
103 #ifdef __KERNEL__ 103 #ifdef __KERNEL__
104 104
105 static inline u32 105 static inline u32
106 dst_metric(const struct dst_entry *dst, int metric) 106 dst_metric(const struct dst_entry *dst, int metric)
107 { 107 {
108 return dst->metrics[metric-1]; 108 return dst->metrics[metric-1];
109 } 109 }
110 110
111 static inline u32 111 static inline u32
112 dst_feature(const struct dst_entry *dst, u32 feature) 112 dst_feature(const struct dst_entry *dst, u32 feature)
113 { 113 {
114 return dst_metric(dst, RTAX_FEATURES) & feature; 114 return dst_metric(dst, RTAX_FEATURES) & feature;
115 } 115 }
116 116
117 static inline u32 dst_mtu(const struct dst_entry *dst) 117 static inline u32 dst_mtu(const struct dst_entry *dst)
118 { 118 {
119 u32 mtu = dst_metric(dst, RTAX_MTU); 119 u32 mtu = dst_metric(dst, RTAX_MTU);
120 /* 120 /*
121 * Alexey put it here, so ask him about it :) 121 * Alexey put it here, so ask him about it :)
122 */ 122 */
123 barrier(); 123 barrier();
124 return mtu; 124 return mtu;
125 } 125 }
126 126
127 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ 127 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
128 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) 128 static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
129 { 129 {
130 return msecs_to_jiffies(dst_metric(dst, metric)); 130 return msecs_to_jiffies(dst_metric(dst, metric));
131 } 131 }
132 132
133 static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, 133 static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric,
134 unsigned long rtt) 134 unsigned long rtt)
135 { 135 {
136 dst->metrics[metric-1] = jiffies_to_msecs(rtt); 136 dst->metrics[metric-1] = jiffies_to_msecs(rtt);
137 } 137 }
138 138
139 static inline u32 139 static inline u32
140 dst_allfrag(const struct dst_entry *dst) 140 dst_allfrag(const struct dst_entry *dst)
141 { 141 {
142 int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); 142 int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG);
143 /* Yes, _exactly_. This is paranoia. */ 143 /* Yes, _exactly_. This is paranoia. */
144 barrier(); 144 barrier();
145 return ret; 145 return ret;
146 } 146 }
147 147
148 static inline int 148 static inline int
149 dst_metric_locked(struct dst_entry *dst, int metric) 149 dst_metric_locked(struct dst_entry *dst, int metric)
150 { 150 {
151 return dst_metric(dst, RTAX_LOCK) & (1<<metric); 151 return dst_metric(dst, RTAX_LOCK) & (1<<metric);
152 } 152 }
153 153
154 static inline void dst_hold(struct dst_entry * dst) 154 static inline void dst_hold(struct dst_entry * dst)
155 { 155 {
156 /* 156 /*
157 * If your kernel compilation stops here, please check 157 * If your kernel compilation stops here, please check
158 * __pad_to_align_refcnt declaration in struct dst_entry 158 * __pad_to_align_refcnt declaration in struct dst_entry
159 */ 159 */
160 BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); 160 BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
161 atomic_inc(&dst->__refcnt); 161 atomic_inc(&dst->__refcnt);
162 } 162 }
163 163
164 static inline void dst_use(struct dst_entry *dst, unsigned long time) 164 static inline void dst_use(struct dst_entry *dst, unsigned long time)
165 { 165 {
166 dst_hold(dst); 166 dst_hold(dst);
167 dst->__use++; 167 dst->__use++;
168 dst->lastuse = time; 168 dst->lastuse = time;
169 } 169 }
170 170
171 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) 171 static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
172 { 172 {
173 dst->__use++; 173 dst->__use++;
174 dst->lastuse = time; 174 dst->lastuse = time;
175 } 175 }
176 176
177 static inline 177 static inline
178 struct dst_entry * dst_clone(struct dst_entry * dst) 178 struct dst_entry * dst_clone(struct dst_entry * dst)
179 { 179 {
180 if (dst) 180 if (dst)
181 atomic_inc(&dst->__refcnt); 181 atomic_inc(&dst->__refcnt);
182 return dst; 182 return dst;
183 } 183 }
184 184
185 extern void dst_release(struct dst_entry *dst); 185 extern void dst_release(struct dst_entry *dst);
186 186
187 static inline void refdst_drop(unsigned long refdst) 187 static inline void refdst_drop(unsigned long refdst)
188 { 188 {
189 if (!(refdst & SKB_DST_NOREF)) 189 if (!(refdst & SKB_DST_NOREF))
190 dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); 190 dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
191 } 191 }
192 192
193 /** 193 /**
194 * skb_dst_drop - drops skb dst 194 * skb_dst_drop - drops skb dst
195 * @skb: buffer 195 * @skb: buffer
196 * 196 *
197 * Drops dst reference count if a reference was taken. 197 * Drops dst reference count if a reference was taken.
198 */ 198 */
199 static inline void skb_dst_drop(struct sk_buff *skb) 199 static inline void skb_dst_drop(struct sk_buff *skb)
200 { 200 {
201 if (skb->_skb_refdst) { 201 if (skb->_skb_refdst) {
202 refdst_drop(skb->_skb_refdst); 202 refdst_drop(skb->_skb_refdst);
203 skb->_skb_refdst = 0UL; 203 skb->_skb_refdst = 0UL;
204 } 204 }
205 } 205 }
206 206
207 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) 207 static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
208 { 208 {
209 nskb->_skb_refdst = oskb->_skb_refdst; 209 nskb->_skb_refdst = oskb->_skb_refdst;
210 if (!(nskb->_skb_refdst & SKB_DST_NOREF)) 210 if (!(nskb->_skb_refdst & SKB_DST_NOREF))
211 dst_clone(skb_dst(nskb)); 211 dst_clone(skb_dst(nskb));
212 } 212 }
213 213
214 /** 214 /**
215 * skb_dst_force - makes sure skb dst is refcounted 215 * skb_dst_force - makes sure skb dst is refcounted
216 * @skb: buffer 216 * @skb: buffer
217 * 217 *
218 * If dst is not yet refcounted, let's do it 218 * If dst is not yet refcounted, let's do it
219 */ 219 */
220 static inline void skb_dst_force(struct sk_buff *skb) 220 static inline void skb_dst_force(struct sk_buff *skb)
221 { 221 {
222 if (skb_dst_is_noref(skb)) { 222 if (skb_dst_is_noref(skb)) {
223 WARN_ON(!rcu_read_lock_held()); 223 WARN_ON(!rcu_read_lock_held());
224 skb->_skb_refdst &= ~SKB_DST_NOREF; 224 skb->_skb_refdst &= ~SKB_DST_NOREF;
225 dst_clone(skb_dst(skb)); 225 dst_clone(skb_dst(skb));
226 } 226 }
227 } 227 }
228 228
229
230 /**
231 * skb_tunnel_rx - prepare skb for rx reinsert
232 * @skb: buffer
233 * @dev: tunnel device
234 *
235 * After decapsulation, packet is going to re-enter (netif_rx()) our stack,
236 * so make some cleanups, and perform accounting.
237 */
238 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
239 {
240 skb->dev = dev;
241 /* TODO : stats should be SMP safe */
242 dev->stats.rx_packets++;
243 dev->stats.rx_bytes += skb->len;
244 skb->rxhash = 0;
245 skb_dst_drop(skb);
246 nf_reset(skb);
247 }
248
229 /* Children define the path of the packet through the 249 /* Children define the path of the packet through the
230 * Linux networking. Thus, destinations are stackable. 250 * Linux networking. Thus, destinations are stackable.
231 */ 251 */
232 252
233 static inline struct dst_entry *dst_pop(struct dst_entry *dst) 253 static inline struct dst_entry *dst_pop(struct dst_entry *dst)
234 { 254 {
235 struct dst_entry *child = dst_clone(dst->child); 255 struct dst_entry *child = dst_clone(dst->child);
236 256
237 dst_release(dst); 257 dst_release(dst);
238 return child; 258 return child;
239 } 259 }
240 260
241 extern int dst_discard(struct sk_buff *skb); 261 extern int dst_discard(struct sk_buff *skb);
242 extern void * dst_alloc(struct dst_ops * ops); 262 extern void * dst_alloc(struct dst_ops * ops);
243 extern void __dst_free(struct dst_entry * dst); 263 extern void __dst_free(struct dst_entry * dst);
244 extern struct dst_entry *dst_destroy(struct dst_entry * dst); 264 extern struct dst_entry *dst_destroy(struct dst_entry * dst);
245 265
246 static inline void dst_free(struct dst_entry * dst) 266 static inline void dst_free(struct dst_entry * dst)
247 { 267 {
248 if (dst->obsolete > 1) 268 if (dst->obsolete > 1)
249 return; 269 return;
250 if (!atomic_read(&dst->__refcnt)) { 270 if (!atomic_read(&dst->__refcnt)) {
251 dst = dst_destroy(dst); 271 dst = dst_destroy(dst);
252 if (!dst) 272 if (!dst)
253 return; 273 return;
254 } 274 }
255 __dst_free(dst); 275 __dst_free(dst);
256 } 276 }
257 277
258 static inline void dst_rcu_free(struct rcu_head *head) 278 static inline void dst_rcu_free(struct rcu_head *head)
259 { 279 {
260 struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); 280 struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
261 dst_free(dst); 281 dst_free(dst);
262 } 282 }
263 283
264 static inline void dst_confirm(struct dst_entry *dst) 284 static inline void dst_confirm(struct dst_entry *dst)
265 { 285 {
266 if (dst) 286 if (dst)
267 neigh_confirm(dst->neighbour); 287 neigh_confirm(dst->neighbour);
268 } 288 }
269 289
270 static inline void dst_link_failure(struct sk_buff *skb) 290 static inline void dst_link_failure(struct sk_buff *skb)
271 { 291 {
272 struct dst_entry *dst = skb_dst(skb); 292 struct dst_entry *dst = skb_dst(skb);
273 if (dst && dst->ops && dst->ops->link_failure) 293 if (dst && dst->ops && dst->ops->link_failure)
274 dst->ops->link_failure(skb); 294 dst->ops->link_failure(skb);
275 } 295 }
276 296
277 static inline void dst_set_expires(struct dst_entry *dst, int timeout) 297 static inline void dst_set_expires(struct dst_entry *dst, int timeout)
278 { 298 {
279 unsigned long expires = jiffies + timeout; 299 unsigned long expires = jiffies + timeout;
280 300
281 if (expires == 0) 301 if (expires == 0)
282 expires = 1; 302 expires = 1;
283 303
284 if (dst->expires == 0 || time_before(expires, dst->expires)) 304 if (dst->expires == 0 || time_before(expires, dst->expires))
285 dst->expires = expires; 305 dst->expires = expires;
286 } 306 }
287 307
288 /* Output packet to network from transport. */ 308 /* Output packet to network from transport. */
289 static inline int dst_output(struct sk_buff *skb) 309 static inline int dst_output(struct sk_buff *skb)
290 { 310 {
291 return skb_dst(skb)->output(skb); 311 return skb_dst(skb)->output(skb);
292 } 312 }
293 313
294 /* Input packet from network to transport. */ 314 /* Input packet from network to transport. */
295 static inline int dst_input(struct sk_buff *skb) 315 static inline int dst_input(struct sk_buff *skb)
296 { 316 {
297 return skb_dst(skb)->input(skb); 317 return skb_dst(skb)->input(skb);
298 } 318 }
299 319
300 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) 320 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
301 { 321 {
302 if (dst->obsolete) 322 if (dst->obsolete)
303 dst = dst->ops->check(dst, cookie); 323 dst = dst->ops->check(dst, cookie);
304 return dst; 324 return dst;
305 } 325 }
306 326
307 extern void dst_init(void); 327 extern void dst_init(void);
308 328
309 /* Flags for xfrm_lookup flags argument. */ 329 /* Flags for xfrm_lookup flags argument. */
310 enum { 330 enum {
311 XFRM_LOOKUP_WAIT = 1 << 0, 331 XFRM_LOOKUP_WAIT = 1 << 0,
312 XFRM_LOOKUP_ICMP = 1 << 1, 332 XFRM_LOOKUP_ICMP = 1 << 1,
313 }; 333 };
314 334
315 struct flowi; 335 struct flowi;
316 #ifndef CONFIG_XFRM 336 #ifndef CONFIG_XFRM
317 static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, 337 static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
318 struct flowi *fl, struct sock *sk, int flags) 338 struct flowi *fl, struct sock *sk, int flags)
319 { 339 {
320 return 0; 340 return 0;
321 } 341 }
322 static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, 342 static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
323 struct flowi *fl, struct sock *sk, int flags) 343 struct flowi *fl, struct sock *sk, int flags)
324 { 344 {
325 return 0; 345 return 0;
326 } 346 }
327 #else 347 #else
328 extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, 348 extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
329 struct flowi *fl, struct sock *sk, int flags); 349 struct flowi *fl, struct sock *sk, int flags);
330 extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, 350 extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
331 struct flowi *fl, struct sock *sk, int flags); 351 struct flowi *fl, struct sock *sk, int flags);
332 #endif 352 #endif
333 #endif 353 #endif
334 354
335 #endif /* _NET_DST_H */ 355 #endif /* _NET_DST_H */
336 356
1 /* 1 /*
2 * Linux NET3: GRE over IP protocol decoder. 2 * Linux NET3: GRE over IP protocol decoder.
3 * 3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 */ 11 */
12 12
13 #include <linux/capability.h> 13 #include <linux/capability.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/types.h> 15 #include <linux/types.h>
16 #include <linux/kernel.h> 16 #include <linux/kernel.h>
17 #include <linux/slab.h> 17 #include <linux/slab.h>
18 #include <asm/uaccess.h> 18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h> 19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h> 20 #include <linux/netdevice.h>
21 #include <linux/in.h> 21 #include <linux/in.h>
22 #include <linux/tcp.h> 22 #include <linux/tcp.h>
23 #include <linux/udp.h> 23 #include <linux/udp.h>
24 #include <linux/if_arp.h> 24 #include <linux/if_arp.h>
25 #include <linux/mroute.h> 25 #include <linux/mroute.h>
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/in6.h> 27 #include <linux/in6.h>
28 #include <linux/inetdevice.h> 28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h> 29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h> 30 #include <linux/netfilter_ipv4.h>
31 #include <linux/etherdevice.h> 31 #include <linux/etherdevice.h>
32 #include <linux/if_ether.h> 32 #include <linux/if_ether.h>
33 33
34 #include <net/sock.h> 34 #include <net/sock.h>
35 #include <net/ip.h> 35 #include <net/ip.h>
36 #include <net/icmp.h> 36 #include <net/icmp.h>
37 #include <net/protocol.h> 37 #include <net/protocol.h>
38 #include <net/ipip.h> 38 #include <net/ipip.h>
39 #include <net/arp.h> 39 #include <net/arp.h>
40 #include <net/checksum.h> 40 #include <net/checksum.h>
41 #include <net/dsfield.h> 41 #include <net/dsfield.h>
42 #include <net/inet_ecn.h> 42 #include <net/inet_ecn.h>
43 #include <net/xfrm.h> 43 #include <net/xfrm.h>
44 #include <net/net_namespace.h> 44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h> 45 #include <net/netns/generic.h>
46 #include <net/rtnetlink.h> 46 #include <net/rtnetlink.h>
47 47
48 #ifdef CONFIG_IPV6 48 #ifdef CONFIG_IPV6
49 #include <net/ipv6.h> 49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h> 50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h> 51 #include <net/ip6_route.h>
52 #endif 52 #endif
53 53
54 /* 54 /*
55 Problems & solutions 55 Problems & solutions
56 -------------------- 56 --------------------
57 57
58 1. The most important issue is detecting local dead loops. 58 1. The most important issue is detecting local dead loops.
59 They would cause complete host lockup in transmit, which 59 They would cause complete host lockup in transmit, which
60 would be "resolved" by stack overflow or, if queueing is enabled, 60 would be "resolved" by stack overflow or, if queueing is enabled,
61 with infinite looping in net_bh. 61 with infinite looping in net_bh.
62 62
63 We cannot track such dead loops during route installation, 63 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be 64 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl), 65 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best 66 and silently drop packet when it expires. It is the best
67 solution, but it supposes maintaing new variable in ALL 67 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used. 68 skb, even if no tunneling is used.
69 69
70 Current solution: HARD_TX_LOCK lock breaks dead loops. 70 Current solution: HARD_TX_LOCK lock breaks dead loops.
71 71
72 72
73 73
74 2. Networking dead loops would not kill routers, but would really 74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case, 75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header. 76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems: 77 It is very good solution, but it introduces two problems:
78 78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels. 80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel, 81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output 82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong: 83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine) 85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end. 86 return only 8 bytes of payload. It is the end.
87 87
88 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution. 89 we should search for another solution.
90 90
91 One of them is to parse packet trying to detect inner encapsulation 91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially, 92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all. 93 taking into account fragmentation. TO be short, tt is not solution at all.
94 94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit, 96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely, 97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear 98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu 99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops. 100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop, 101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set. 102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made 103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected 104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured 105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-) 106 fatal static route: you are innocent. :-)
107 107
108 108
109 109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them 111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular. 112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular. 113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc) 114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c). 115 to a separate module (ip_tunnel.c).
116 116
117 Alexey Kuznetsov. 117 Alexey Kuznetsov.
118 */ 118 */
119 119
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev); 121 static int ipgre_tunnel_init(struct net_device *dev);
122 static void ipgre_tunnel_setup(struct net_device *dev); 122 static void ipgre_tunnel_setup(struct net_device *dev);
123 static int ipgre_tunnel_bind_dev(struct net_device *dev); 123 static int ipgre_tunnel_bind_dev(struct net_device *dev);
124 124
125 /* Fallback tunnel: no source, no destination, no key, no options */ 125 /* Fallback tunnel: no source, no destination, no key, no options */
126 126
127 #define HASH_SIZE 16 127 #define HASH_SIZE 16
128 128
129 static int ipgre_net_id __read_mostly; 129 static int ipgre_net_id __read_mostly;
130 struct ipgre_net { 130 struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132 132
133 struct net_device *fb_tunnel_dev; 133 struct net_device *fb_tunnel_dev;
134 }; 134 };
135 135
136 /* Tunnel hash table */ 136 /* Tunnel hash table */
137 137
138 /* 138 /*
139 4 hash tables: 139 4 hash tables:
140 140
141 3: (remote,local) 141 3: (remote,local)
142 2: (remote,*) 142 2: (remote,*)
143 1: (*,local) 143 1: (*,local)
144 0: (*,*) 144 0: (*,*)
145 145
146 We require exact key match i.e. if a key is present in packet 146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present, 147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel. 148 it will match only keyless tunnel.
149 149
150 All keysless packets, if not matched configured keyless tunnels 150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel. 151 will match fallback tunnel.
152 */ 152 */
153 153
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155 155
156 #define tunnels_r_l tunnels[3] 156 #define tunnels_r_l tunnels[3]
157 #define tunnels_r tunnels[2] 157 #define tunnels_r tunnels[2]
158 #define tunnels_l tunnels[1] 158 #define tunnels_l tunnels[1]
159 #define tunnels_wc tunnels[0] 159 #define tunnels_wc tunnels[0]
160 /* 160 /*
161 * Locking : hash tables are protected by RCU and a spinlock 161 * Locking : hash tables are protected by RCU and a spinlock
162 */ 162 */
163 static DEFINE_SPINLOCK(ipgre_lock); 163 static DEFINE_SPINLOCK(ipgre_lock);
164 164
165 #define for_each_ip_tunnel_rcu(start) \ 165 #define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 167
168 /* Given src, dst and key, find appropriate for input tunnel. */ 168 /* Given src, dst and key, find appropriate for input tunnel. */
169 169
170 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 170 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
171 __be32 remote, __be32 local, 171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto) 172 __be32 key, __be16 gre_proto)
173 { 173 {
174 struct net *net = dev_net(dev); 174 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 175 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 176 unsigned h0 = HASH(remote);
177 unsigned h1 = HASH(key); 177 unsigned h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 178 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE; 181 ARPHRD_ETHER : ARPHRD_IPGRE;
182 int score, cand_score = 4; 182 int score, cand_score = 4;
183 183
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { 184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (local != t->parms.iph.saddr || 185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr || 186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key || 187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP)) 188 !(t->dev->flags & IFF_UP))
189 continue; 189 continue;
190 190
191 if (t->dev->type != ARPHRD_IPGRE && 191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type) 192 t->dev->type != dev_type)
193 continue; 193 continue;
194 194
195 score = 0; 195 score = 0;
196 if (t->parms.link != link) 196 if (t->parms.link != link)
197 score |= 1; 197 score |= 1;
198 if (t->dev->type != dev_type) 198 if (t->dev->type != dev_type)
199 score |= 2; 199 score |= 2;
200 if (score == 0) 200 if (score == 0)
201 return t; 201 return t;
202 202
203 if (score < cand_score) { 203 if (score < cand_score) {
204 cand = t; 204 cand = t;
205 cand_score = score; 205 cand_score = score;
206 } 206 }
207 } 207 }
208 208
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { 209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (remote != t->parms.iph.daddr || 210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key || 211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP)) 212 !(t->dev->flags & IFF_UP))
213 continue; 213 continue;
214 214
215 if (t->dev->type != ARPHRD_IPGRE && 215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type) 216 t->dev->type != dev_type)
217 continue; 217 continue;
218 218
219 score = 0; 219 score = 0;
220 if (t->parms.link != link) 220 if (t->parms.link != link)
221 score |= 1; 221 score |= 1;
222 if (t->dev->type != dev_type) 222 if (t->dev->type != dev_type)
223 score |= 2; 223 score |= 2;
224 if (score == 0) 224 if (score == 0)
225 return t; 225 return t;
226 226
227 if (score < cand_score) { 227 if (score < cand_score) {
228 cand = t; 228 cand = t;
229 cand_score = score; 229 cand_score = score;
230 } 230 }
231 } 231 }
232 232
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { 233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((local != t->parms.iph.saddr && 234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr || 235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) || 236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key || 237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP)) 238 !(t->dev->flags & IFF_UP))
239 continue; 239 continue;
240 240
241 if (t->dev->type != ARPHRD_IPGRE && 241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type) 242 t->dev->type != dev_type)
243 continue; 243 continue;
244 244
245 score = 0; 245 score = 0;
246 if (t->parms.link != link) 246 if (t->parms.link != link)
247 score |= 1; 247 score |= 1;
248 if (t->dev->type != dev_type) 248 if (t->dev->type != dev_type)
249 score |= 2; 249 score |= 2;
250 if (score == 0) 250 if (score == 0)
251 return t; 251 return t;
252 252
253 if (score < cand_score) { 253 if (score < cand_score) {
254 cand = t; 254 cand = t;
255 cand_score = score; 255 cand_score = score;
256 } 256 }
257 } 257 }
258 258
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { 259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key || 260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP)) 261 !(t->dev->flags & IFF_UP))
262 continue; 262 continue;
263 263
264 if (t->dev->type != ARPHRD_IPGRE && 264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type) 265 t->dev->type != dev_type)
266 continue; 266 continue;
267 267
268 score = 0; 268 score = 0;
269 if (t->parms.link != link) 269 if (t->parms.link != link)
270 score |= 1; 270 score |= 1;
271 if (t->dev->type != dev_type) 271 if (t->dev->type != dev_type)
272 score |= 2; 272 score |= 2;
273 if (score == 0) 273 if (score == 0)
274 return t; 274 return t;
275 275
276 if (score < cand_score) { 276 if (score < cand_score) {
277 cand = t; 277 cand = t;
278 cand_score = score; 278 cand_score = score;
279 } 279 }
280 } 280 }
281 281
282 if (cand != NULL) 282 if (cand != NULL)
283 return cand; 283 return cand;
284 284
285 dev = ign->fb_tunnel_dev; 285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP) 286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev); 287 return netdev_priv(dev);
288 288
289 return NULL; 289 return NULL;
290 } 290 }
291 291
292 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 292 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 293 struct ip_tunnel_parm *parms)
294 { 294 {
295 __be32 remote = parms->iph.daddr; 295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 297 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 298 unsigned h = HASH(key);
299 int prio = 0; 299 int prio = 0;
300 300
301 if (local) 301 if (local)
302 prio |= 1; 302 prio |= 1;
303 if (remote && !ipv4_is_multicast(remote)) { 303 if (remote && !ipv4_is_multicast(remote)) {
304 prio |= 2; 304 prio |= 2;
305 h ^= HASH(remote); 305 h ^= HASH(remote);
306 } 306 }
307 307
308 return &ign->tunnels[prio][h]; 308 return &ign->tunnels[prio][h];
309 } 309 }
310 310
311 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 311 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 312 struct ip_tunnel *t)
313 { 313 {
314 return __ipgre_bucket(ign, &t->parms); 314 return __ipgre_bucket(ign, &t->parms);
315 } 315 }
316 316
317 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 317 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318 { 318 {
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 319 struct ip_tunnel **tp = ipgre_bucket(ign, t);
320 320
321 spin_lock_bh(&ipgre_lock); 321 spin_lock_bh(&ipgre_lock);
322 t->next = *tp; 322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 323 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock); 324 spin_unlock_bh(&ipgre_lock);
325 } 325 }
326 326
327 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 327 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328 { 328 {
329 struct ip_tunnel **tp; 329 struct ip_tunnel **tp;
330 330
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
332 if (t == *tp) { 332 if (t == *tp) {
333 spin_lock_bh(&ipgre_lock); 333 spin_lock_bh(&ipgre_lock);
334 *tp = t->next; 334 *tp = t->next;
335 spin_unlock_bh(&ipgre_lock); 335 spin_unlock_bh(&ipgre_lock);
336 break; 336 break;
337 } 337 }
338 } 338 }
339 } 339 }
340 340
341 static struct ip_tunnel *ipgre_tunnel_find(struct net *net, 341 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342 struct ip_tunnel_parm *parms, 342 struct ip_tunnel_parm *parms,
343 int type) 343 int type)
344 { 344 {
345 __be32 remote = parms->iph.daddr; 345 __be32 remote = parms->iph.daddr;
346 __be32 local = parms->iph.saddr; 346 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 347 __be32 key = parms->i_key;
348 int link = parms->link; 348 int link = parms->link;
349 struct ip_tunnel *t, **tp; 349 struct ip_tunnel *t, **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 351
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353 if (local == t->parms.iph.saddr && 353 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 354 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 355 key == t->parms.i_key &&
356 link == t->parms.link && 356 link == t->parms.link &&
357 type == t->dev->type) 357 type == t->dev->type)
358 break; 358 break;
359 359
360 return t; 360 return t;
361 } 361 }
362 362
363 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 363 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 364 struct ip_tunnel_parm *parms, int create)
365 { 365 {
366 struct ip_tunnel *t, *nt; 366 struct ip_tunnel *t, *nt;
367 struct net_device *dev; 367 struct net_device *dev;
368 char name[IFNAMSIZ]; 368 char name[IFNAMSIZ];
369 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 369 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
370 370
371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); 371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372 if (t || !create) 372 if (t || !create)
373 return t; 373 return t;
374 374
375 if (parms->name[0]) 375 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ); 376 strlcpy(name, parms->name, IFNAMSIZ);
377 else 377 else
378 sprintf(name, "gre%%d"); 378 sprintf(name, "gre%%d");
379 379
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev) 381 if (!dev)
382 return NULL; 382 return NULL;
383 383
384 dev_net_set(dev, net); 384 dev_net_set(dev, net);
385 385
386 if (strchr(name, '%')) { 386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0) 387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free; 388 goto failed_free;
389 } 389 }
390 390
391 nt = netdev_priv(dev); 391 nt = netdev_priv(dev);
392 nt->parms = *parms; 392 nt->parms = *parms;
393 dev->rtnl_link_ops = &ipgre_link_ops; 393 dev->rtnl_link_ops = &ipgre_link_ops;
394 394
395 dev->mtu = ipgre_tunnel_bind_dev(dev); 395 dev->mtu = ipgre_tunnel_bind_dev(dev);
396 396
397 if (register_netdevice(dev) < 0) 397 if (register_netdevice(dev) < 0)
398 goto failed_free; 398 goto failed_free;
399 399
400 dev_hold(dev); 400 dev_hold(dev);
401 ipgre_tunnel_link(ign, nt); 401 ipgre_tunnel_link(ign, nt);
402 return nt; 402 return nt;
403 403
404 failed_free: 404 failed_free:
405 free_netdev(dev); 405 free_netdev(dev);
406 return NULL; 406 return NULL;
407 } 407 }
408 408
409 static void ipgre_tunnel_uninit(struct net_device *dev) 409 static void ipgre_tunnel_uninit(struct net_device *dev)
410 { 410 {
411 struct net *net = dev_net(dev); 411 struct net *net = dev_net(dev);
412 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 412 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413 413
414 ipgre_tunnel_unlink(ign, netdev_priv(dev)); 414 ipgre_tunnel_unlink(ign, netdev_priv(dev));
415 dev_put(dev); 415 dev_put(dev);
416 } 416 }
417 417
418 418
419 static void ipgre_err(struct sk_buff *skb, u32 info) 419 static void ipgre_err(struct sk_buff *skb, u32 info)
420 { 420 {
421 421
422 /* All the routers (except for Linux) return only 422 /* All the routers (except for Linux) return only
423 8 bytes of packet payload. It means, that precise relaying of 423 8 bytes of packet payload. It means, that precise relaying of
424 ICMP in the real Internet is absolutely infeasible. 424 ICMP in the real Internet is absolutely infeasible.
425 425
426 Moreover, Cisco "wise men" put GRE key to the third word 426 Moreover, Cisco "wise men" put GRE key to the third word
427 in GRE header. It makes impossible maintaining even soft state for keyed 427 in GRE header. It makes impossible maintaining even soft state for keyed
428 GRE tunnels with enabled checksum. Tell them "thank you". 428 GRE tunnels with enabled checksum. Tell them "thank you".
429 429
430 Well, I wonder, rfc1812 was written by Cisco employee, 430 Well, I wonder, rfc1812 was written by Cisco employee,
431 what the hell these idiots break standrads established 431 what the hell these idiots break standrads established
432 by themself??? 432 by themself???
433 */ 433 */
434 434
435 struct iphdr *iph = (struct iphdr *)skb->data; 435 struct iphdr *iph = (struct iphdr *)skb->data;
436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
437 int grehlen = (iph->ihl<<2) + 4; 437 int grehlen = (iph->ihl<<2) + 4;
438 const int type = icmp_hdr(skb)->type; 438 const int type = icmp_hdr(skb)->type;
439 const int code = icmp_hdr(skb)->code; 439 const int code = icmp_hdr(skb)->code;
440 struct ip_tunnel *t; 440 struct ip_tunnel *t;
441 __be16 flags; 441 __be16 flags;
442 442
443 flags = p[0]; 443 flags = p[0];
444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445 if (flags&(GRE_VERSION|GRE_ROUTING)) 445 if (flags&(GRE_VERSION|GRE_ROUTING))
446 return; 446 return;
447 if (flags&GRE_KEY) { 447 if (flags&GRE_KEY) {
448 grehlen += 4; 448 grehlen += 4;
449 if (flags&GRE_CSUM) 449 if (flags&GRE_CSUM)
450 grehlen += 4; 450 grehlen += 4;
451 } 451 }
452 } 452 }
453 453
454 /* If only 8 bytes returned, keyed message will be dropped here */ 454 /* If only 8 bytes returned, keyed message will be dropped here */
455 if (skb_headlen(skb) < grehlen) 455 if (skb_headlen(skb) < grehlen)
456 return; 456 return;
457 457
458 switch (type) { 458 switch (type) {
459 default: 459 default:
460 case ICMP_PARAMETERPROB: 460 case ICMP_PARAMETERPROB:
461 return; 461 return;
462 462
463 case ICMP_DEST_UNREACH: 463 case ICMP_DEST_UNREACH:
464 switch (code) { 464 switch (code) {
465 case ICMP_SR_FAILED: 465 case ICMP_SR_FAILED:
466 case ICMP_PORT_UNREACH: 466 case ICMP_PORT_UNREACH:
467 /* Impossible event. */ 467 /* Impossible event. */
468 return; 468 return;
469 case ICMP_FRAG_NEEDED: 469 case ICMP_FRAG_NEEDED:
470 /* Soft state for pmtu is maintained by IP core. */ 470 /* Soft state for pmtu is maintained by IP core. */
471 return; 471 return;
472 default: 472 default:
473 /* All others are translated to HOST_UNREACH. 473 /* All others are translated to HOST_UNREACH.
474 rfc2003 contains "deep thoughts" about NET_UNREACH, 474 rfc2003 contains "deep thoughts" about NET_UNREACH,
475 I believe they are just ether pollution. --ANK 475 I believe they are just ether pollution. --ANK
476 */ 476 */
477 break; 477 break;
478 } 478 }
479 break; 479 break;
480 case ICMP_TIME_EXCEEDED: 480 case ICMP_TIME_EXCEEDED:
481 if (code != ICMP_EXC_TTL) 481 if (code != ICMP_EXC_TTL)
482 return; 482 return;
483 break; 483 break;
484 } 484 }
485 485
486 rcu_read_lock(); 486 rcu_read_lock();
487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
488 flags & GRE_KEY ? 488 flags & GRE_KEY ?
489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490 p[1]); 490 p[1]);
491 if (t == NULL || t->parms.iph.daddr == 0 || 491 if (t == NULL || t->parms.iph.daddr == 0 ||
492 ipv4_is_multicast(t->parms.iph.daddr)) 492 ipv4_is_multicast(t->parms.iph.daddr))
493 goto out; 493 goto out;
494 494
495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496 goto out; 496 goto out;
497 497
498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
499 t->err_count++; 499 t->err_count++;
500 else 500 else
501 t->err_count = 1; 501 t->err_count = 1;
502 t->err_time = jiffies; 502 t->err_time = jiffies;
503 out: 503 out:
504 rcu_read_unlock(); 504 rcu_read_unlock();
505 return; 505 return;
506 } 506 }
507 507
508 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 508 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
509 { 509 {
510 if (INET_ECN_is_ce(iph->tos)) { 510 if (INET_ECN_is_ce(iph->tos)) {
511 if (skb->protocol == htons(ETH_P_IP)) { 511 if (skb->protocol == htons(ETH_P_IP)) {
512 IP_ECN_set_ce(ip_hdr(skb)); 512 IP_ECN_set_ce(ip_hdr(skb));
513 } else if (skb->protocol == htons(ETH_P_IPV6)) { 513 } else if (skb->protocol == htons(ETH_P_IPV6)) {
514 IP6_ECN_set_ce(ipv6_hdr(skb)); 514 IP6_ECN_set_ce(ipv6_hdr(skb));
515 } 515 }
516 } 516 }
517 } 517 }
518 518
519 static inline u8 519 static inline u8
520 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 520 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
521 { 521 {
522 u8 inner = 0; 522 u8 inner = 0;
523 if (skb->protocol == htons(ETH_P_IP)) 523 if (skb->protocol == htons(ETH_P_IP))
524 inner = old_iph->tos; 524 inner = old_iph->tos;
525 else if (skb->protocol == htons(ETH_P_IPV6)) 525 else if (skb->protocol == htons(ETH_P_IPV6))
526 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 526 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
527 return INET_ECN_encapsulate(tos, inner); 527 return INET_ECN_encapsulate(tos, inner);
528 } 528 }
529 529
530 static int ipgre_rcv(struct sk_buff *skb) 530 static int ipgre_rcv(struct sk_buff *skb)
531 { 531 {
532 struct iphdr *iph; 532 struct iphdr *iph;
533 u8 *h; 533 u8 *h;
534 __be16 flags; 534 __be16 flags;
535 __sum16 csum = 0; 535 __sum16 csum = 0;
536 __be32 key = 0; 536 __be32 key = 0;
537 u32 seqno = 0; 537 u32 seqno = 0;
538 struct ip_tunnel *tunnel; 538 struct ip_tunnel *tunnel;
539 int offset = 4; 539 int offset = 4;
540 __be16 gre_proto; 540 __be16 gre_proto;
541 unsigned int len;
542 541
543 if (!pskb_may_pull(skb, 16)) 542 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock; 543 goto drop_nolock;
545 544
546 iph = ip_hdr(skb); 545 iph = ip_hdr(skb);
547 h = skb->data; 546 h = skb->data;
548 flags = *(__be16*)h; 547 flags = *(__be16*)h;
549 548
550 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
551 /* - Version must be 0. 550 /* - Version must be 0.
552 - We do not support routing headers. 551 - We do not support routing headers.
553 */ 552 */
554 if (flags&(GRE_VERSION|GRE_ROUTING)) 553 if (flags&(GRE_VERSION|GRE_ROUTING))
555 goto drop_nolock; 554 goto drop_nolock;
556 555
557 if (flags&GRE_CSUM) { 556 if (flags&GRE_CSUM) {
558 switch (skb->ip_summed) { 557 switch (skb->ip_summed) {
559 case CHECKSUM_COMPLETE: 558 case CHECKSUM_COMPLETE:
560 csum = csum_fold(skb->csum); 559 csum = csum_fold(skb->csum);
561 if (!csum) 560 if (!csum)
562 break; 561 break;
563 /* fall through */ 562 /* fall through */
564 case CHECKSUM_NONE: 563 case CHECKSUM_NONE:
565 skb->csum = 0; 564 skb->csum = 0;
566 csum = __skb_checksum_complete(skb); 565 csum = __skb_checksum_complete(skb);
567 skb->ip_summed = CHECKSUM_COMPLETE; 566 skb->ip_summed = CHECKSUM_COMPLETE;
568 } 567 }
569 offset += 4; 568 offset += 4;
570 } 569 }
571 if (flags&GRE_KEY) { 570 if (flags&GRE_KEY) {
572 key = *(__be32*)(h + offset); 571 key = *(__be32*)(h + offset);
573 offset += 4; 572 offset += 4;
574 } 573 }
575 if (flags&GRE_SEQ) { 574 if (flags&GRE_SEQ) {
576 seqno = ntohl(*(__be32*)(h + offset)); 575 seqno = ntohl(*(__be32*)(h + offset));
577 offset += 4; 576 offset += 4;
578 } 577 }
579 } 578 }
580 579
581 gre_proto = *(__be16 *)(h + 2); 580 gre_proto = *(__be16 *)(h + 2);
582 581
583 rcu_read_lock(); 582 rcu_read_lock();
584 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
585 iph->saddr, iph->daddr, key, 584 iph->saddr, iph->daddr, key,
586 gre_proto))) { 585 gre_proto))) {
587 struct net_device_stats *stats = &tunnel->dev->stats; 586 struct net_device_stats *stats = &tunnel->dev->stats;
588 587
589 secpath_reset(skb); 588 secpath_reset(skb);
590 589
591 skb->protocol = gre_proto; 590 skb->protocol = gre_proto;
592 /* WCCP version 1 and 2 protocol decoding. 591 /* WCCP version 1 and 2 protocol decoding.
593 * - Change protocol to IP 592 * - Change protocol to IP
594 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
595 */ 594 */
596 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { 595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
597 skb->protocol = htons(ETH_P_IP); 596 skb->protocol = htons(ETH_P_IP);
598 if ((*(h + offset) & 0xF0) != 0x40) 597 if ((*(h + offset) & 0xF0) != 0x40)
599 offset += 4; 598 offset += 4;
600 } 599 }
601 600
602 skb->mac_header = skb->network_header; 601 skb->mac_header = skb->network_header;
603 __pskb_pull(skb, offset); 602 __pskb_pull(skb, offset);
604 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
605 skb->pkt_type = PACKET_HOST; 604 skb->pkt_type = PACKET_HOST;
606 #ifdef CONFIG_NET_IPGRE_BROADCAST 605 #ifdef CONFIG_NET_IPGRE_BROADCAST
607 if (ipv4_is_multicast(iph->daddr)) { 606 if (ipv4_is_multicast(iph->daddr)) {
608 /* Looped back packet, drop it! */ 607 /* Looped back packet, drop it! */
609 if (skb_rtable(skb)->fl.iif == 0) 608 if (skb_rtable(skb)->fl.iif == 0)
610 goto drop; 609 goto drop;
611 stats->multicast++; 610 stats->multicast++;
612 skb->pkt_type = PACKET_BROADCAST; 611 skb->pkt_type = PACKET_BROADCAST;
613 } 612 }
614 #endif 613 #endif
615 614
616 if (((flags&GRE_CSUM) && csum) || 615 if (((flags&GRE_CSUM) && csum) ||
617 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
618 stats->rx_crc_errors++; 617 stats->rx_crc_errors++;
619 stats->rx_errors++; 618 stats->rx_errors++;
620 goto drop; 619 goto drop;
621 } 620 }
622 if (tunnel->parms.i_flags&GRE_SEQ) { 621 if (tunnel->parms.i_flags&GRE_SEQ) {
623 if (!(flags&GRE_SEQ) || 622 if (!(flags&GRE_SEQ) ||
624 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
625 stats->rx_fifo_errors++; 624 stats->rx_fifo_errors++;
626 stats->rx_errors++; 625 stats->rx_errors++;
627 goto drop; 626 goto drop;
628 } 627 }
629 tunnel->i_seqno = seqno + 1; 628 tunnel->i_seqno = seqno + 1;
630 } 629 }
631 630
632 len = skb->len;
633
634 /* Warning: All skb pointers will be invalidated! */ 631 /* Warning: All skb pointers will be invalidated! */
635 if (tunnel->dev->type == ARPHRD_ETHER) { 632 if (tunnel->dev->type == ARPHRD_ETHER) {
636 if (!pskb_may_pull(skb, ETH_HLEN)) { 633 if (!pskb_may_pull(skb, ETH_HLEN)) {
637 stats->rx_length_errors++; 634 stats->rx_length_errors++;
638 stats->rx_errors++; 635 stats->rx_errors++;
639 goto drop; 636 goto drop;
640 } 637 }
641 638
642 iph = ip_hdr(skb); 639 iph = ip_hdr(skb);
643 skb->protocol = eth_type_trans(skb, tunnel->dev); 640 skb->protocol = eth_type_trans(skb, tunnel->dev);
644 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 641 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
645 } 642 }
646 643
647 stats->rx_packets++; 644 skb_tunnel_rx(skb, tunnel->dev);
648 stats->rx_bytes += len;
649 skb->dev = tunnel->dev;
650 skb_dst_drop(skb);
651 nf_reset(skb);
652 645
653 skb_reset_network_header(skb); 646 skb_reset_network_header(skb);
654 ipgre_ecn_decapsulate(iph, skb); 647 ipgre_ecn_decapsulate(iph, skb);
655 648
656 netif_rx(skb); 649 netif_rx(skb);
657 rcu_read_unlock(); 650 rcu_read_unlock();
658 return(0); 651 return(0);
659 } 652 }
660 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
661 654
662 drop: 655 drop:
663 rcu_read_unlock(); 656 rcu_read_unlock();
664 drop_nolock: 657 drop_nolock:
665 kfree_skb(skb); 658 kfree_skb(skb);
666 return(0); 659 return(0);
667 } 660 }
668 661
669 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 662 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
670 { 663 {
671 struct ip_tunnel *tunnel = netdev_priv(dev); 664 struct ip_tunnel *tunnel = netdev_priv(dev);
672 struct net_device_stats *stats = &dev->stats; 665 struct net_device_stats *stats = &dev->stats;
673 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); 666 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
674 struct iphdr *old_iph = ip_hdr(skb); 667 struct iphdr *old_iph = ip_hdr(skb);
675 struct iphdr *tiph; 668 struct iphdr *tiph;
676 u8 tos; 669 u8 tos;
677 __be16 df; 670 __be16 df;
678 struct rtable *rt; /* Route to the other host */ 671 struct rtable *rt; /* Route to the other host */
679 struct net_device *tdev; /* Device to other host */ 672 struct net_device *tdev; /* Device to other host */
680 struct iphdr *iph; /* Our new IP header */ 673 struct iphdr *iph; /* Our new IP header */
681 unsigned int max_headroom; /* The extra header space needed */ 674 unsigned int max_headroom; /* The extra header space needed */
682 int gre_hlen; 675 int gre_hlen;
683 __be32 dst; 676 __be32 dst;
684 int mtu; 677 int mtu;
685 678
686 if (dev->type == ARPHRD_ETHER) 679 if (dev->type == ARPHRD_ETHER)
687 IPCB(skb)->flags = 0; 680 IPCB(skb)->flags = 0;
688 681
689 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 682 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
690 gre_hlen = 0; 683 gre_hlen = 0;
691 tiph = (struct iphdr *)skb->data; 684 tiph = (struct iphdr *)skb->data;
692 } else { 685 } else {
693 gre_hlen = tunnel->hlen; 686 gre_hlen = tunnel->hlen;
694 tiph = &tunnel->parms.iph; 687 tiph = &tunnel->parms.iph;
695 } 688 }
696 689
697 if ((dst = tiph->daddr) == 0) { 690 if ((dst = tiph->daddr) == 0) {
698 /* NBMA tunnel */ 691 /* NBMA tunnel */
699 692
700 if (skb_dst(skb) == NULL) { 693 if (skb_dst(skb) == NULL) {
701 stats->tx_fifo_errors++; 694 stats->tx_fifo_errors++;
702 goto tx_error; 695 goto tx_error;
703 } 696 }
704 697
705 if (skb->protocol == htons(ETH_P_IP)) { 698 if (skb->protocol == htons(ETH_P_IP)) {
706 rt = skb_rtable(skb); 699 rt = skb_rtable(skb);
707 if ((dst = rt->rt_gateway) == 0) 700 if ((dst = rt->rt_gateway) == 0)
708 goto tx_error_icmp; 701 goto tx_error_icmp;
709 } 702 }
710 #ifdef CONFIG_IPV6 703 #ifdef CONFIG_IPV6
711 else if (skb->protocol == htons(ETH_P_IPV6)) { 704 else if (skb->protocol == htons(ETH_P_IPV6)) {
712 struct in6_addr *addr6; 705 struct in6_addr *addr6;
713 int addr_type; 706 int addr_type;
714 struct neighbour *neigh = skb_dst(skb)->neighbour; 707 struct neighbour *neigh = skb_dst(skb)->neighbour;
715 708
716 if (neigh == NULL) 709 if (neigh == NULL)
717 goto tx_error; 710 goto tx_error;
718 711
719 addr6 = (struct in6_addr *)&neigh->primary_key; 712 addr6 = (struct in6_addr *)&neigh->primary_key;
720 addr_type = ipv6_addr_type(addr6); 713 addr_type = ipv6_addr_type(addr6);
721 714
722 if (addr_type == IPV6_ADDR_ANY) { 715 if (addr_type == IPV6_ADDR_ANY) {
723 addr6 = &ipv6_hdr(skb)->daddr; 716 addr6 = &ipv6_hdr(skb)->daddr;
724 addr_type = ipv6_addr_type(addr6); 717 addr_type = ipv6_addr_type(addr6);
725 } 718 }
726 719
727 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 720 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
728 goto tx_error_icmp; 721 goto tx_error_icmp;
729 722
730 dst = addr6->s6_addr32[3]; 723 dst = addr6->s6_addr32[3];
731 } 724 }
732 #endif 725 #endif
733 else 726 else
734 goto tx_error; 727 goto tx_error;
735 } 728 }
736 729
737 tos = tiph->tos; 730 tos = tiph->tos;
738 if (tos == 1) { 731 if (tos == 1) {
739 tos = 0; 732 tos = 0;
740 if (skb->protocol == htons(ETH_P_IP)) 733 if (skb->protocol == htons(ETH_P_IP))
741 tos = old_iph->tos; 734 tos = old_iph->tos;
742 } 735 }
743 736
744 { 737 {
745 struct flowi fl = { .oif = tunnel->parms.link, 738 struct flowi fl = { .oif = tunnel->parms.link,
746 .nl_u = { .ip4_u = 739 .nl_u = { .ip4_u =
747 { .daddr = dst, 740 { .daddr = dst,
748 .saddr = tiph->saddr, 741 .saddr = tiph->saddr,
749 .tos = RT_TOS(tos) } }, 742 .tos = RT_TOS(tos) } },
750 .proto = IPPROTO_GRE }; 743 .proto = IPPROTO_GRE };
751 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 744 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
752 stats->tx_carrier_errors++; 745 stats->tx_carrier_errors++;
753 goto tx_error; 746 goto tx_error;
754 } 747 }
755 } 748 }
756 tdev = rt->u.dst.dev; 749 tdev = rt->u.dst.dev;
757 750
758 if (tdev == dev) { 751 if (tdev == dev) {
759 ip_rt_put(rt); 752 ip_rt_put(rt);
760 stats->collisions++; 753 stats->collisions++;
761 goto tx_error; 754 goto tx_error;
762 } 755 }
763 756
764 df = tiph->frag_off; 757 df = tiph->frag_off;
765 if (df) 758 if (df)
766 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 759 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
767 else 760 else
768 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 761 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
769 762
770 if (skb_dst(skb)) 763 if (skb_dst(skb))
771 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 764 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
772 765
773 if (skb->protocol == htons(ETH_P_IP)) { 766 if (skb->protocol == htons(ETH_P_IP)) {
774 df |= (old_iph->frag_off&htons(IP_DF)); 767 df |= (old_iph->frag_off&htons(IP_DF));
775 768
776 if ((old_iph->frag_off&htons(IP_DF)) && 769 if ((old_iph->frag_off&htons(IP_DF)) &&
777 mtu < ntohs(old_iph->tot_len)) { 770 mtu < ntohs(old_iph->tot_len)) {
778 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 771 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
779 ip_rt_put(rt); 772 ip_rt_put(rt);
780 goto tx_error; 773 goto tx_error;
781 } 774 }
782 } 775 }
783 #ifdef CONFIG_IPV6 776 #ifdef CONFIG_IPV6
784 else if (skb->protocol == htons(ETH_P_IPV6)) { 777 else if (skb->protocol == htons(ETH_P_IPV6)) {
785 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 778 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
786 779
787 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { 780 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
788 if ((tunnel->parms.iph.daddr && 781 if ((tunnel->parms.iph.daddr &&
789 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 782 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
790 rt6->rt6i_dst.plen == 128) { 783 rt6->rt6i_dst.plen == 128) {
791 rt6->rt6i_flags |= RTF_MODIFIED; 784 rt6->rt6i_flags |= RTF_MODIFIED;
792 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; 785 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
793 } 786 }
794 } 787 }
795 788
796 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 789 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
797 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 790 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
798 ip_rt_put(rt); 791 ip_rt_put(rt);
799 goto tx_error; 792 goto tx_error;
800 } 793 }
801 } 794 }
802 #endif 795 #endif
803 796
804 if (tunnel->err_count > 0) { 797 if (tunnel->err_count > 0) {
805 if (time_before(jiffies, 798 if (time_before(jiffies,
806 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 799 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
807 tunnel->err_count--; 800 tunnel->err_count--;
808 801
809 dst_link_failure(skb); 802 dst_link_failure(skb);
810 } else 803 } else
811 tunnel->err_count = 0; 804 tunnel->err_count = 0;
812 } 805 }
813 806
814 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; 807 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
815 808
816 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 809 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
817 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 810 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
818 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 811 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
819 if (max_headroom > dev->needed_headroom) 812 if (max_headroom > dev->needed_headroom)
820 dev->needed_headroom = max_headroom; 813 dev->needed_headroom = max_headroom;
821 if (!new_skb) { 814 if (!new_skb) {
822 ip_rt_put(rt); 815 ip_rt_put(rt);
823 txq->tx_dropped++; 816 txq->tx_dropped++;
824 dev_kfree_skb(skb); 817 dev_kfree_skb(skb);
825 return NETDEV_TX_OK; 818 return NETDEV_TX_OK;
826 } 819 }
827 if (skb->sk) 820 if (skb->sk)
828 skb_set_owner_w(new_skb, skb->sk); 821 skb_set_owner_w(new_skb, skb->sk);
829 dev_kfree_skb(skb); 822 dev_kfree_skb(skb);
830 skb = new_skb; 823 skb = new_skb;
831 old_iph = ip_hdr(skb); 824 old_iph = ip_hdr(skb);
832 } 825 }
833 826
834 skb_reset_transport_header(skb); 827 skb_reset_transport_header(skb);
835 skb_push(skb, gre_hlen); 828 skb_push(skb, gre_hlen);
836 skb_reset_network_header(skb); 829 skb_reset_network_header(skb);
837 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 830 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 831 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED); 832 IPSKB_REROUTED);
840 skb_dst_drop(skb); 833 skb_dst_drop(skb);
841 skb_dst_set(skb, &rt->u.dst); 834 skb_dst_set(skb, &rt->u.dst);
842 835
843 /* 836 /*
844 * Push down and install the IPIP header. 837 * Push down and install the IPIP header.
845 */ 838 */
846 839
847 iph = ip_hdr(skb); 840 iph = ip_hdr(skb);
848 iph->version = 4; 841 iph->version = 4;
849 iph->ihl = sizeof(struct iphdr) >> 2; 842 iph->ihl = sizeof(struct iphdr) >> 2;
850 iph->frag_off = df; 843 iph->frag_off = df;
851 iph->protocol = IPPROTO_GRE; 844 iph->protocol = IPPROTO_GRE;
852 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 845 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
853 iph->daddr = rt->rt_dst; 846 iph->daddr = rt->rt_dst;
854 iph->saddr = rt->rt_src; 847 iph->saddr = rt->rt_src;
855 848
856 if ((iph->ttl = tiph->ttl) == 0) { 849 if ((iph->ttl = tiph->ttl) == 0) {
857 if (skb->protocol == htons(ETH_P_IP)) 850 if (skb->protocol == htons(ETH_P_IP))
858 iph->ttl = old_iph->ttl; 851 iph->ttl = old_iph->ttl;
859 #ifdef CONFIG_IPV6 852 #ifdef CONFIG_IPV6
860 else if (skb->protocol == htons(ETH_P_IPV6)) 853 else if (skb->protocol == htons(ETH_P_IPV6))
861 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 854 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
862 #endif 855 #endif
863 else 856 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 857 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
865 } 858 }
866 859
867 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 860 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
868 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? 861 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
869 htons(ETH_P_TEB) : skb->protocol; 862 htons(ETH_P_TEB) : skb->protocol;
870 863
871 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 864 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
872 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 865 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
873 866
874 if (tunnel->parms.o_flags&GRE_SEQ) { 867 if (tunnel->parms.o_flags&GRE_SEQ) {
875 ++tunnel->o_seqno; 868 ++tunnel->o_seqno;
876 *ptr = htonl(tunnel->o_seqno); 869 *ptr = htonl(tunnel->o_seqno);
877 ptr--; 870 ptr--;
878 } 871 }
879 if (tunnel->parms.o_flags&GRE_KEY) { 872 if (tunnel->parms.o_flags&GRE_KEY) {
880 *ptr = tunnel->parms.o_key; 873 *ptr = tunnel->parms.o_key;
881 ptr--; 874 ptr--;
882 } 875 }
883 if (tunnel->parms.o_flags&GRE_CSUM) { 876 if (tunnel->parms.o_flags&GRE_CSUM) {
884 *ptr = 0; 877 *ptr = 0;
885 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 878 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
886 } 879 }
887 } 880 }
888 881
889 nf_reset(skb); 882 nf_reset(skb);
890 883
891 IPTUNNEL_XMIT(); 884 IPTUNNEL_XMIT();
892 return NETDEV_TX_OK; 885 return NETDEV_TX_OK;
893 886
894 tx_error_icmp: 887 tx_error_icmp:
895 dst_link_failure(skb); 888 dst_link_failure(skb);
896 889
897 tx_error: 890 tx_error:
898 stats->tx_errors++; 891 stats->tx_errors++;
899 dev_kfree_skb(skb); 892 dev_kfree_skb(skb);
900 return NETDEV_TX_OK; 893 return NETDEV_TX_OK;
901 } 894 }
902 895
903 static int ipgre_tunnel_bind_dev(struct net_device *dev) 896 static int ipgre_tunnel_bind_dev(struct net_device *dev)
904 { 897 {
905 struct net_device *tdev = NULL; 898 struct net_device *tdev = NULL;
906 struct ip_tunnel *tunnel; 899 struct ip_tunnel *tunnel;
907 struct iphdr *iph; 900 struct iphdr *iph;
908 int hlen = LL_MAX_HEADER; 901 int hlen = LL_MAX_HEADER;
909 int mtu = ETH_DATA_LEN; 902 int mtu = ETH_DATA_LEN;
910 int addend = sizeof(struct iphdr) + 4; 903 int addend = sizeof(struct iphdr) + 4;
911 904
912 tunnel = netdev_priv(dev); 905 tunnel = netdev_priv(dev);
913 iph = &tunnel->parms.iph; 906 iph = &tunnel->parms.iph;
914 907
915 /* Guess output device to choose reasonable mtu and needed_headroom */ 908 /* Guess output device to choose reasonable mtu and needed_headroom */
916 909
917 if (iph->daddr) { 910 if (iph->daddr) {
918 struct flowi fl = { .oif = tunnel->parms.link, 911 struct flowi fl = { .oif = tunnel->parms.link,
919 .nl_u = { .ip4_u = 912 .nl_u = { .ip4_u =
920 { .daddr = iph->daddr, 913 { .daddr = iph->daddr,
921 .saddr = iph->saddr, 914 .saddr = iph->saddr,
922 .tos = RT_TOS(iph->tos) } }, 915 .tos = RT_TOS(iph->tos) } },
923 .proto = IPPROTO_GRE }; 916 .proto = IPPROTO_GRE };
924 struct rtable *rt; 917 struct rtable *rt;
925 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 918 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
926 tdev = rt->u.dst.dev; 919 tdev = rt->u.dst.dev;
927 ip_rt_put(rt); 920 ip_rt_put(rt);
928 } 921 }
929 922
930 if (dev->type != ARPHRD_ETHER) 923 if (dev->type != ARPHRD_ETHER)
931 dev->flags |= IFF_POINTOPOINT; 924 dev->flags |= IFF_POINTOPOINT;
932 } 925 }
933 926
934 if (!tdev && tunnel->parms.link) 927 if (!tdev && tunnel->parms.link)
935 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 928 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
936 929
937 if (tdev) { 930 if (tdev) {
938 hlen = tdev->hard_header_len + tdev->needed_headroom; 931 hlen = tdev->hard_header_len + tdev->needed_headroom;
939 mtu = tdev->mtu; 932 mtu = tdev->mtu;
940 } 933 }
941 dev->iflink = tunnel->parms.link; 934 dev->iflink = tunnel->parms.link;
942 935
943 /* Precalculate GRE options length */ 936 /* Precalculate GRE options length */
944 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 937 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
945 if (tunnel->parms.o_flags&GRE_CSUM) 938 if (tunnel->parms.o_flags&GRE_CSUM)
946 addend += 4; 939 addend += 4;
947 if (tunnel->parms.o_flags&GRE_KEY) 940 if (tunnel->parms.o_flags&GRE_KEY)
948 addend += 4; 941 addend += 4;
949 if (tunnel->parms.o_flags&GRE_SEQ) 942 if (tunnel->parms.o_flags&GRE_SEQ)
950 addend += 4; 943 addend += 4;
951 } 944 }
952 dev->needed_headroom = addend + hlen; 945 dev->needed_headroom = addend + hlen;
953 mtu -= dev->hard_header_len + addend; 946 mtu -= dev->hard_header_len + addend;
954 947
955 if (mtu < 68) 948 if (mtu < 68)
956 mtu = 68; 949 mtu = 68;
957 950
958 tunnel->hlen = addend; 951 tunnel->hlen = addend;
959 952
960 return mtu; 953 return mtu;
961 } 954 }
962 955
963 static int 956 static int
964 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 957 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
965 { 958 {
966 int err = 0; 959 int err = 0;
967 struct ip_tunnel_parm p; 960 struct ip_tunnel_parm p;
968 struct ip_tunnel *t; 961 struct ip_tunnel *t;
969 struct net *net = dev_net(dev); 962 struct net *net = dev_net(dev);
970 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 963 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
971 964
972 switch (cmd) { 965 switch (cmd) {
973 case SIOCGETTUNNEL: 966 case SIOCGETTUNNEL:
974 t = NULL; 967 t = NULL;
975 if (dev == ign->fb_tunnel_dev) { 968 if (dev == ign->fb_tunnel_dev) {
976 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 969 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
977 err = -EFAULT; 970 err = -EFAULT;
978 break; 971 break;
979 } 972 }
980 t = ipgre_tunnel_locate(net, &p, 0); 973 t = ipgre_tunnel_locate(net, &p, 0);
981 } 974 }
982 if (t == NULL) 975 if (t == NULL)
983 t = netdev_priv(dev); 976 t = netdev_priv(dev);
984 memcpy(&p, &t->parms, sizeof(p)); 977 memcpy(&p, &t->parms, sizeof(p));
985 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 978 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
986 err = -EFAULT; 979 err = -EFAULT;
987 break; 980 break;
988 981
989 case SIOCADDTUNNEL: 982 case SIOCADDTUNNEL:
990 case SIOCCHGTUNNEL: 983 case SIOCCHGTUNNEL:
991 err = -EPERM; 984 err = -EPERM;
992 if (!capable(CAP_NET_ADMIN)) 985 if (!capable(CAP_NET_ADMIN))
993 goto done; 986 goto done;
994 987
995 err = -EFAULT; 988 err = -EFAULT;
996 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 989 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
997 goto done; 990 goto done;
998 991
999 err = -EINVAL; 992 err = -EINVAL;
1000 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 993 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1001 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 994 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1002 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 995 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1003 goto done; 996 goto done;
1004 if (p.iph.ttl) 997 if (p.iph.ttl)
1005 p.iph.frag_off |= htons(IP_DF); 998 p.iph.frag_off |= htons(IP_DF);
1006 999
1007 if (!(p.i_flags&GRE_KEY)) 1000 if (!(p.i_flags&GRE_KEY))
1008 p.i_key = 0; 1001 p.i_key = 0;
1009 if (!(p.o_flags&GRE_KEY)) 1002 if (!(p.o_flags&GRE_KEY))
1010 p.o_key = 0; 1003 p.o_key = 0;
1011 1004
1012 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 1005 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1013 1006
1014 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 1007 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1015 if (t != NULL) { 1008 if (t != NULL) {
1016 if (t->dev != dev) { 1009 if (t->dev != dev) {
1017 err = -EEXIST; 1010 err = -EEXIST;
1018 break; 1011 break;
1019 } 1012 }
1020 } else { 1013 } else {
1021 unsigned nflags = 0; 1014 unsigned nflags = 0;
1022 1015
1023 t = netdev_priv(dev); 1016 t = netdev_priv(dev);
1024 1017
1025 if (ipv4_is_multicast(p.iph.daddr)) 1018 if (ipv4_is_multicast(p.iph.daddr))
1026 nflags = IFF_BROADCAST; 1019 nflags = IFF_BROADCAST;
1027 else if (p.iph.daddr) 1020 else if (p.iph.daddr)
1028 nflags = IFF_POINTOPOINT; 1021 nflags = IFF_POINTOPOINT;
1029 1022
1030 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 1023 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1031 err = -EINVAL; 1024 err = -EINVAL;
1032 break; 1025 break;
1033 } 1026 }
1034 ipgre_tunnel_unlink(ign, t); 1027 ipgre_tunnel_unlink(ign, t);
1035 t->parms.iph.saddr = p.iph.saddr; 1028 t->parms.iph.saddr = p.iph.saddr;
1036 t->parms.iph.daddr = p.iph.daddr; 1029 t->parms.iph.daddr = p.iph.daddr;
1037 t->parms.i_key = p.i_key; 1030 t->parms.i_key = p.i_key;
1038 t->parms.o_key = p.o_key; 1031 t->parms.o_key = p.o_key;
1039 memcpy(dev->dev_addr, &p.iph.saddr, 4); 1032 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1040 memcpy(dev->broadcast, &p.iph.daddr, 4); 1033 memcpy(dev->broadcast, &p.iph.daddr, 4);
1041 ipgre_tunnel_link(ign, t); 1034 ipgre_tunnel_link(ign, t);
1042 netdev_state_change(dev); 1035 netdev_state_change(dev);
1043 } 1036 }
1044 } 1037 }
1045 1038
1046 if (t) { 1039 if (t) {
1047 err = 0; 1040 err = 0;
1048 if (cmd == SIOCCHGTUNNEL) { 1041 if (cmd == SIOCCHGTUNNEL) {
1049 t->parms.iph.ttl = p.iph.ttl; 1042 t->parms.iph.ttl = p.iph.ttl;
1050 t->parms.iph.tos = p.iph.tos; 1043 t->parms.iph.tos = p.iph.tos;
1051 t->parms.iph.frag_off = p.iph.frag_off; 1044 t->parms.iph.frag_off = p.iph.frag_off;
1052 if (t->parms.link != p.link) { 1045 if (t->parms.link != p.link) {
1053 t->parms.link = p.link; 1046 t->parms.link = p.link;
1054 dev->mtu = ipgre_tunnel_bind_dev(dev); 1047 dev->mtu = ipgre_tunnel_bind_dev(dev);
1055 netdev_state_change(dev); 1048 netdev_state_change(dev);
1056 } 1049 }
1057 } 1050 }
1058 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 1051 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1059 err = -EFAULT; 1052 err = -EFAULT;
1060 } else 1053 } else
1061 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1054 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1062 break; 1055 break;
1063 1056
1064 case SIOCDELTUNNEL: 1057 case SIOCDELTUNNEL:
1065 err = -EPERM; 1058 err = -EPERM;
1066 if (!capable(CAP_NET_ADMIN)) 1059 if (!capable(CAP_NET_ADMIN))
1067 goto done; 1060 goto done;
1068 1061
1069 if (dev == ign->fb_tunnel_dev) { 1062 if (dev == ign->fb_tunnel_dev) {
1070 err = -EFAULT; 1063 err = -EFAULT;
1071 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1064 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1072 goto done; 1065 goto done;
1073 err = -ENOENT; 1066 err = -ENOENT;
1074 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) 1067 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1075 goto done; 1068 goto done;
1076 err = -EPERM; 1069 err = -EPERM;
1077 if (t == netdev_priv(ign->fb_tunnel_dev)) 1070 if (t == netdev_priv(ign->fb_tunnel_dev))
1078 goto done; 1071 goto done;
1079 dev = t->dev; 1072 dev = t->dev;
1080 } 1073 }
1081 unregister_netdevice(dev); 1074 unregister_netdevice(dev);
1082 err = 0; 1075 err = 0;
1083 break; 1076 break;
1084 1077
1085 default: 1078 default:
1086 err = -EINVAL; 1079 err = -EINVAL;
1087 } 1080 }
1088 1081
1089 done: 1082 done:
1090 return err; 1083 return err;
1091 } 1084 }
1092 1085
1093 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1086 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1094 { 1087 {
1095 struct ip_tunnel *tunnel = netdev_priv(dev); 1088 struct ip_tunnel *tunnel = netdev_priv(dev);
1096 if (new_mtu < 68 || 1089 if (new_mtu < 68 ||
1097 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 1090 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1098 return -EINVAL; 1091 return -EINVAL;
1099 dev->mtu = new_mtu; 1092 dev->mtu = new_mtu;
1100 return 0; 1093 return 0;
1101 } 1094 }
1102 1095
1103 /* Nice toy. Unfortunately, useless in real life :-) 1096 /* Nice toy. Unfortunately, useless in real life :-)
1104 It allows to construct virtual multiprotocol broadcast "LAN" 1097 It allows to construct virtual multiprotocol broadcast "LAN"
1105 over the Internet, provided multicast routing is tuned. 1098 over the Internet, provided multicast routing is tuned.
1106 1099
1107 1100
1108 I have no idea was this bicycle invented before me, 1101 I have no idea was this bicycle invented before me,
1109 so that I had to set ARPHRD_IPGRE to a random value. 1102 so that I had to set ARPHRD_IPGRE to a random value.
1110 I have an impression, that Cisco could make something similar, 1103 I have an impression, that Cisco could make something similar,
1111 but this feature is apparently missing in IOS<=11.2(8). 1104 but this feature is apparently missing in IOS<=11.2(8).
1112 1105
1113 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 1106 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1114 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 1107 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1115 1108
1116 ping -t 255 224.66.66.66 1109 ping -t 255 224.66.66.66
1117 1110
1118 If nobody answers, mbone does not work. 1111 If nobody answers, mbone does not work.
1119 1112
1120 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 1113 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1121 ip addr add 10.66.66.<somewhat>/24 dev Universe 1114 ip addr add 10.66.66.<somewhat>/24 dev Universe
1122 ifconfig Universe up 1115 ifconfig Universe up
1123 ifconfig Universe add fe80::<Your_real_addr>/10 1116 ifconfig Universe add fe80::<Your_real_addr>/10
1124 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 1117 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1125 ftp 10.66.66.66 1118 ftp 10.66.66.66
1126 ... 1119 ...
1127 ftp fec0:6666:6666::193.233.7.65 1120 ftp fec0:6666:6666::193.233.7.65
1128 ... 1121 ...
1129 1122
1130 */ 1123 */
1131 1124
1132 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1125 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1133 unsigned short type, 1126 unsigned short type,
1134 const void *daddr, const void *saddr, unsigned len) 1127 const void *daddr, const void *saddr, unsigned len)
1135 { 1128 {
1136 struct ip_tunnel *t = netdev_priv(dev); 1129 struct ip_tunnel *t = netdev_priv(dev);
1137 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1130 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1138 __be16 *p = (__be16*)(iph+1); 1131 __be16 *p = (__be16*)(iph+1);
1139 1132
1140 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1133 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1141 p[0] = t->parms.o_flags; 1134 p[0] = t->parms.o_flags;
1142 p[1] = htons(type); 1135 p[1] = htons(type);
1143 1136
1144 /* 1137 /*
1145 * Set the source hardware address. 1138 * Set the source hardware address.
1146 */ 1139 */
1147 1140
1148 if (saddr) 1141 if (saddr)
1149 memcpy(&iph->saddr, saddr, 4); 1142 memcpy(&iph->saddr, saddr, 4);
1150 if (daddr) 1143 if (daddr)
1151 memcpy(&iph->daddr, daddr, 4); 1144 memcpy(&iph->daddr, daddr, 4);
1152 if (iph->daddr) 1145 if (iph->daddr)
1153 return t->hlen; 1146 return t->hlen;
1154 1147
1155 return -t->hlen; 1148 return -t->hlen;
1156 } 1149 }
1157 1150
1158 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1151 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1159 { 1152 {
1160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1161 memcpy(haddr, &iph->saddr, 4); 1154 memcpy(haddr, &iph->saddr, 4);
1162 return 4; 1155 return 4;
1163 } 1156 }
1164 1157
1165 static const struct header_ops ipgre_header_ops = { 1158 static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header, 1159 .create = ipgre_header,
1167 .parse = ipgre_header_parse, 1160 .parse = ipgre_header_parse,
1168 }; 1161 };
1169 1162
1170 #ifdef CONFIG_NET_IPGRE_BROADCAST 1163 #ifdef CONFIG_NET_IPGRE_BROADCAST
1171 static int ipgre_open(struct net_device *dev) 1164 static int ipgre_open(struct net_device *dev)
1172 { 1165 {
1173 struct ip_tunnel *t = netdev_priv(dev); 1166 struct ip_tunnel *t = netdev_priv(dev);
1174 1167
1175 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1176 struct flowi fl = { .oif = t->parms.link, 1169 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u = 1170 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr, 1171 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr, 1172 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } }, 1173 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE }; 1174 .proto = IPPROTO_GRE };
1182 struct rtable *rt; 1175 struct rtable *rt;
1183 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1184 return -EADDRNOTAVAIL; 1177 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev; 1178 dev = rt->u.dst.dev;
1186 ip_rt_put(rt); 1179 ip_rt_put(rt);
1187 if (__in_dev_get_rtnl(dev) == NULL) 1180 if (__in_dev_get_rtnl(dev) == NULL)
1188 return -EADDRNOTAVAIL; 1181 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex; 1182 t->mlink = dev->ifindex;
1190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 1183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1191 } 1184 }
1192 return 0; 1185 return 0;
1193 } 1186 }
1194 1187
1195 static int ipgre_close(struct net_device *dev) 1188 static int ipgre_close(struct net_device *dev)
1196 { 1189 {
1197 struct ip_tunnel *t = netdev_priv(dev); 1190 struct ip_tunnel *t = netdev_priv(dev);
1198 1191
1199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1200 struct in_device *in_dev; 1193 struct in_device *in_dev;
1201 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1202 if (in_dev) { 1195 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev); 1197 in_dev_put(in_dev);
1205 } 1198 }
1206 } 1199 }
1207 return 0; 1200 return 0;
1208 } 1201 }
1209 1202
1210 #endif 1203 #endif
1211 1204
1212 static const struct net_device_ops ipgre_netdev_ops = { 1205 static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init, 1206 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit, 1207 .ndo_uninit = ipgre_tunnel_uninit,
1215 #ifdef CONFIG_NET_IPGRE_BROADCAST 1208 #ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open, 1209 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close, 1210 .ndo_stop = ipgre_close,
1218 #endif 1211 #endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit, 1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1222 }; 1215 };
1223 1216
1224 static void ipgre_tunnel_setup(struct net_device *dev) 1217 static void ipgre_tunnel_setup(struct net_device *dev)
1225 { 1218 {
1226 dev->netdev_ops = &ipgre_netdev_ops; 1219 dev->netdev_ops = &ipgre_netdev_ops;
1227 dev->destructor = free_netdev; 1220 dev->destructor = free_netdev;
1228 1221
1229 dev->type = ARPHRD_IPGRE; 1222 dev->type = ARPHRD_IPGRE;
1230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 1224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1232 dev->flags = IFF_NOARP; 1225 dev->flags = IFF_NOARP;
1233 dev->iflink = 0; 1226 dev->iflink = 0;
1234 dev->addr_len = 4; 1227 dev->addr_len = 4;
1235 dev->features |= NETIF_F_NETNS_LOCAL; 1228 dev->features |= NETIF_F_NETNS_LOCAL;
1236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1229 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1237 } 1230 }
1238 1231
1239 static int ipgre_tunnel_init(struct net_device *dev) 1232 static int ipgre_tunnel_init(struct net_device *dev)
1240 { 1233 {
1241 struct ip_tunnel *tunnel; 1234 struct ip_tunnel *tunnel;
1242 struct iphdr *iph; 1235 struct iphdr *iph;
1243 1236
1244 tunnel = netdev_priv(dev); 1237 tunnel = netdev_priv(dev);
1245 iph = &tunnel->parms.iph; 1238 iph = &tunnel->parms.iph;
1246 1239
1247 tunnel->dev = dev; 1240 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name); 1241 strcpy(tunnel->parms.name, dev->name);
1249 1242
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1243 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1244 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1252 1245
1253 if (iph->daddr) { 1246 if (iph->daddr) {
1254 #ifdef CONFIG_NET_IPGRE_BROADCAST 1247 #ifdef CONFIG_NET_IPGRE_BROADCAST
1255 if (ipv4_is_multicast(iph->daddr)) { 1248 if (ipv4_is_multicast(iph->daddr)) {
1256 if (!iph->saddr) 1249 if (!iph->saddr)
1257 return -EINVAL; 1250 return -EINVAL;
1258 dev->flags = IFF_BROADCAST; 1251 dev->flags = IFF_BROADCAST;
1259 dev->header_ops = &ipgre_header_ops; 1252 dev->header_ops = &ipgre_header_ops;
1260 } 1253 }
1261 #endif 1254 #endif
1262 } else 1255 } else
1263 dev->header_ops = &ipgre_header_ops; 1256 dev->header_ops = &ipgre_header_ops;
1264 1257
1265 return 0; 1258 return 0;
1266 } 1259 }
1267 1260
1268 static void ipgre_fb_tunnel_init(struct net_device *dev) 1261 static void ipgre_fb_tunnel_init(struct net_device *dev)
1269 { 1262 {
1270 struct ip_tunnel *tunnel = netdev_priv(dev); 1263 struct ip_tunnel *tunnel = netdev_priv(dev);
1271 struct iphdr *iph = &tunnel->parms.iph; 1264 struct iphdr *iph = &tunnel->parms.iph;
1272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); 1265 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1273 1266
1274 tunnel->dev = dev; 1267 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name); 1268 strcpy(tunnel->parms.name, dev->name);
1276 1269
1277 iph->version = 4; 1270 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE; 1271 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5; 1272 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4; 1273 tunnel->hlen = sizeof(struct iphdr) + 4;
1281 1274
1282 dev_hold(dev); 1275 dev_hold(dev);
1283 ign->tunnels_wc[0] = tunnel; 1276 ign->tunnels_wc[0] = tunnel;
1284 } 1277 }
1285 1278
1286 1279
1287 static const struct net_protocol ipgre_protocol = { 1280 static const struct net_protocol ipgre_protocol = {
1288 .handler = ipgre_rcv, 1281 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err, 1282 .err_handler = ipgre_err,
1290 .netns_ok = 1, 1283 .netns_ok = 1,
1291 }; 1284 };
1292 1285
1293 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1286 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1294 { 1287 {
1295 int prio; 1288 int prio;
1296 1289
1297 for (prio = 0; prio < 4; prio++) { 1290 for (prio = 0; prio < 4; prio++) {
1298 int h; 1291 int h;
1299 for (h = 0; h < HASH_SIZE; h++) { 1292 for (h = 0; h < HASH_SIZE; h++) {
1300 struct ip_tunnel *t = ign->tunnels[prio][h]; 1293 struct ip_tunnel *t = ign->tunnels[prio][h];
1301 1294
1302 while (t != NULL) { 1295 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head); 1296 unregister_netdevice_queue(t->dev, head);
1304 t = t->next; 1297 t = t->next;
1305 } 1298 }
1306 } 1299 }
1307 } 1300 }
1308 } 1301 }
1309 1302
1310 static int __net_init ipgre_init_net(struct net *net) 1303 static int __net_init ipgre_init_net(struct net *net)
1311 { 1304 {
1312 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1305 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1313 int err; 1306 int err;
1314 1307
1315 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1308 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1316 ipgre_tunnel_setup); 1309 ipgre_tunnel_setup);
1317 if (!ign->fb_tunnel_dev) { 1310 if (!ign->fb_tunnel_dev) {
1318 err = -ENOMEM; 1311 err = -ENOMEM;
1319 goto err_alloc_dev; 1312 goto err_alloc_dev;
1320 } 1313 }
1321 dev_net_set(ign->fb_tunnel_dev, net); 1314 dev_net_set(ign->fb_tunnel_dev, net);
1322 1315
1323 ipgre_fb_tunnel_init(ign->fb_tunnel_dev); 1316 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1324 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; 1317 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1325 1318
1326 if ((err = register_netdev(ign->fb_tunnel_dev))) 1319 if ((err = register_netdev(ign->fb_tunnel_dev)))
1327 goto err_reg_dev; 1320 goto err_reg_dev;
1328 1321
1329 return 0; 1322 return 0;
1330 1323
1331 err_reg_dev: 1324 err_reg_dev:
1332 free_netdev(ign->fb_tunnel_dev); 1325 free_netdev(ign->fb_tunnel_dev);
1333 err_alloc_dev: 1326 err_alloc_dev:
1334 return err; 1327 return err;
1335 } 1328 }
1336 1329
1337 static void __net_exit ipgre_exit_net(struct net *net) 1330 static void __net_exit ipgre_exit_net(struct net *net)
1338 { 1331 {
1339 struct ipgre_net *ign; 1332 struct ipgre_net *ign;
1340 LIST_HEAD(list); 1333 LIST_HEAD(list);
1341 1334
1342 ign = net_generic(net, ipgre_net_id); 1335 ign = net_generic(net, ipgre_net_id);
1343 rtnl_lock(); 1336 rtnl_lock();
1344 ipgre_destroy_tunnels(ign, &list); 1337 ipgre_destroy_tunnels(ign, &list);
1345 unregister_netdevice_many(&list); 1338 unregister_netdevice_many(&list);
1346 rtnl_unlock(); 1339 rtnl_unlock();
1347 } 1340 }
1348 1341
1349 static struct pernet_operations ipgre_net_ops = { 1342 static struct pernet_operations ipgre_net_ops = {
1350 .init = ipgre_init_net, 1343 .init = ipgre_init_net,
1351 .exit = ipgre_exit_net, 1344 .exit = ipgre_exit_net,
1352 .id = &ipgre_net_id, 1345 .id = &ipgre_net_id,
1353 .size = sizeof(struct ipgre_net), 1346 .size = sizeof(struct ipgre_net),
1354 }; 1347 };
1355 1348
1356 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 1349 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357 { 1350 {
1358 __be16 flags; 1351 __be16 flags;
1359 1352
1360 if (!data) 1353 if (!data)
1361 return 0; 1354 return 0;
1362 1355
1363 flags = 0; 1356 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS]) 1357 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1358 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS]) 1359 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1360 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING)) 1361 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL; 1362 return -EINVAL;
1370 1363
1371 return 0; 1364 return 0;
1372 } 1365 }
1373 1366
1374 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) 1367 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375 { 1368 {
1376 __be32 daddr; 1369 __be32 daddr;
1377 1370
1378 if (tb[IFLA_ADDRESS]) { 1371 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1372 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL; 1373 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1374 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL; 1375 return -EADDRNOTAVAIL;
1383 } 1376 }
1384 1377
1385 if (!data) 1378 if (!data)
1386 goto out; 1379 goto out;
1387 1380
1388 if (data[IFLA_GRE_REMOTE]) { 1381 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1382 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr) 1383 if (!daddr)
1391 return -EINVAL; 1384 return -EINVAL;
1392 } 1385 }
1393 1386
1394 out: 1387 out:
1395 return ipgre_tunnel_validate(tb, data); 1388 return ipgre_tunnel_validate(tb, data);
1396 } 1389 }
1397 1390
1398 static void ipgre_netlink_parms(struct nlattr *data[], 1391 static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms) 1392 struct ip_tunnel_parm *parms)
1400 { 1393 {
1401 memset(parms, 0, sizeof(*parms)); 1394 memset(parms, 0, sizeof(*parms));
1402 1395
1403 parms->iph.protocol = IPPROTO_GRE; 1396 parms->iph.protocol = IPPROTO_GRE;
1404 1397
1405 if (!data) 1398 if (!data)
1406 return; 1399 return;
1407 1400
1408 if (data[IFLA_GRE_LINK]) 1401 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1402 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410 1403
1411 if (data[IFLA_GRE_IFLAGS]) 1404 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 1405 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413 1406
1414 if (data[IFLA_GRE_OFLAGS]) 1407 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 1408 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416 1409
1417 if (data[IFLA_GRE_IKEY]) 1410 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1411 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419 1412
1420 if (data[IFLA_GRE_OKEY]) 1413 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1414 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422 1415
1423 if (data[IFLA_GRE_LOCAL]) 1416 if (data[IFLA_GRE_LOCAL])
1424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); 1417 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1425 1418
1426 if (data[IFLA_GRE_REMOTE]) 1419 if (data[IFLA_GRE_REMOTE])
1427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); 1420 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1428 1421
1429 if (data[IFLA_GRE_TTL]) 1422 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1423 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431 1424
1432 if (data[IFLA_GRE_TOS]) 1425 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1426 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434 1427
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) 1428 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF); 1429 parms->iph.frag_off = htons(IP_DF);
1437 } 1430 }
1438 1431
1439 static int ipgre_tap_init(struct net_device *dev) 1432 static int ipgre_tap_init(struct net_device *dev)
1440 { 1433 {
1441 struct ip_tunnel *tunnel; 1434 struct ip_tunnel *tunnel;
1442 1435
1443 tunnel = netdev_priv(dev); 1436 tunnel = netdev_priv(dev);
1444 1437
1445 tunnel->dev = dev; 1438 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name); 1439 strcpy(tunnel->parms.name, dev->name);
1447 1440
1448 ipgre_tunnel_bind_dev(dev); 1441 ipgre_tunnel_bind_dev(dev);
1449 1442
1450 return 0; 1443 return 0;
1451 } 1444 }
1452 1445
1453 static const struct net_device_ops ipgre_tap_netdev_ops = { 1446 static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init, 1447 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit, 1448 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit, 1449 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr, 1450 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr, 1451 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1452 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1460 }; 1453 };
1461 1454
1462 static void ipgre_tap_setup(struct net_device *dev) 1455 static void ipgre_tap_setup(struct net_device *dev)
1463 { 1456 {
1464 1457
1465 ether_setup(dev); 1458 ether_setup(dev);
1466 1459
1467 dev->netdev_ops = &ipgre_tap_netdev_ops; 1460 dev->netdev_ops = &ipgre_tap_netdev_ops;
1468 dev->destructor = free_netdev; 1461 dev->destructor = free_netdev;
1469 1462
1470 dev->iflink = 0; 1463 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL; 1464 dev->features |= NETIF_F_NETNS_LOCAL;
1472 } 1465 }
1473 1466
1474 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], 1467 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[]) 1468 struct nlattr *data[])
1476 { 1469 {
1477 struct ip_tunnel *nt; 1470 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev); 1471 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1472 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu; 1473 int mtu;
1481 int err; 1474 int err;
1482 1475
1483 nt = netdev_priv(dev); 1476 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms); 1477 ipgre_netlink_parms(data, &nt->parms);
1485 1478
1486 if (ipgre_tunnel_find(net, &nt->parms, dev->type)) 1479 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1487 return -EEXIST; 1480 return -EEXIST;
1488 1481
1489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1482 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr); 1483 random_ether_addr(dev->dev_addr);
1491 1484
1492 mtu = ipgre_tunnel_bind_dev(dev); 1485 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU]) 1486 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu; 1487 dev->mtu = mtu;
1495 1488
1496 err = register_netdevice(dev); 1489 err = register_netdevice(dev);
1497 if (err) 1490 if (err)
1498 goto out; 1491 goto out;
1499 1492
1500 dev_hold(dev); 1493 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt); 1494 ipgre_tunnel_link(ign, nt);
1502 1495
1503 out: 1496 out:
1504 return err; 1497 return err;
1505 } 1498 }
1506 1499
1507 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1500 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[]) 1501 struct nlattr *data[])
1509 { 1502 {
1510 struct ip_tunnel *t, *nt; 1503 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev); 1504 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1505 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p; 1506 struct ip_tunnel_parm p;
1514 int mtu; 1507 int mtu;
1515 1508
1516 if (dev == ign->fb_tunnel_dev) 1509 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL; 1510 return -EINVAL;
1518 1511
1519 nt = netdev_priv(dev); 1512 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p); 1513 ipgre_netlink_parms(data, &p);
1521 1514
1522 t = ipgre_tunnel_locate(net, &p, 0); 1515 t = ipgre_tunnel_locate(net, &p, 0);
1523 1516
1524 if (t) { 1517 if (t) {
1525 if (t->dev != dev) 1518 if (t->dev != dev)
1526 return -EEXIST; 1519 return -EEXIST;
1527 } else { 1520 } else {
1528 t = nt; 1521 t = nt;
1529 1522
1530 if (dev->type != ARPHRD_ETHER) { 1523 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0; 1524 unsigned nflags = 0;
1532 1525
1533 if (ipv4_is_multicast(p.iph.daddr)) 1526 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST; 1527 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr) 1528 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT; 1529 nflags = IFF_POINTOPOINT;
1537 1530
1538 if ((dev->flags ^ nflags) & 1531 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST)) 1532 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL; 1533 return -EINVAL;
1541 } 1534 }
1542 1535
1543 ipgre_tunnel_unlink(ign, t); 1536 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr; 1537 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr; 1538 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key; 1539 t->parms.i_key = p.i_key;
1547 if (dev->type != ARPHRD_ETHER) { 1540 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4); 1541 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4); 1542 memcpy(dev->broadcast, &p.iph.daddr, 4);
1550 } 1543 }
1551 ipgre_tunnel_link(ign, t); 1544 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev); 1545 netdev_state_change(dev);
1553 } 1546 }
1554 1547
1555 t->parms.o_key = p.o_key; 1548 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl; 1549 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos; 1550 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off; 1551 t->parms.iph.frag_off = p.iph.frag_off;
1559 1552
1560 if (t->parms.link != p.link) { 1553 if (t->parms.link != p.link) {
1561 t->parms.link = p.link; 1554 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev); 1555 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU]) 1556 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu; 1557 dev->mtu = mtu;
1565 netdev_state_change(dev); 1558 netdev_state_change(dev);
1566 } 1559 }
1567 1560
1568 return 0; 1561 return 0;
1569 } 1562 }
1570 1563
1571 static size_t ipgre_get_size(const struct net_device *dev) 1564 static size_t ipgre_get_size(const struct net_device *dev)
1572 { 1565 {
1573 return 1566 return
1574 /* IFLA_GRE_LINK */ 1567 /* IFLA_GRE_LINK */
1575 nla_total_size(4) + 1568 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */ 1569 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) + 1570 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */ 1571 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) + 1572 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */ 1573 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) + 1574 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */ 1575 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) + 1576 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */ 1577 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) + 1578 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */ 1579 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) + 1580 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */ 1581 /* IFLA_GRE_TTL */
1589 nla_total_size(1) + 1582 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */ 1583 /* IFLA_GRE_TOS */
1591 nla_total_size(1) + 1584 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */ 1585 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) + 1586 nla_total_size(1) +
1594 0; 1587 0;
1595 } 1588 }
1596 1589
1597 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1590 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598 { 1591 {
1599 struct ip_tunnel *t = netdev_priv(dev); 1592 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms; 1593 struct ip_tunnel_parm *p = &t->parms;
1601 1594
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); 1595 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); 1596 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); 1597 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); 1598 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); 1599 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); 1600 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); 1601 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); 1602 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); 1603 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); 1604 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612 1605
1613 return 0; 1606 return 0;
1614 1607
1615 nla_put_failure: 1608 nla_put_failure:
1616 return -EMSGSIZE; 1609 return -EMSGSIZE;
1617 } 1610 }
1618 1611
1619 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1612 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1613 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1614 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1615 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1616 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1617 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1618 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1619 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1627 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1620 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1621 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1622 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1630 }; 1623 };
1631 1624
1632 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1625 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre", 1626 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX, 1627 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy, 1628 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel), 1629 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup, 1630 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate, 1631 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink, 1632 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink, 1633 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size, 1634 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info, 1635 .fill_info = ipgre_fill_info,
1643 }; 1636 };
1644 1637
1645 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1638 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap", 1639 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX, 1640 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy, 1641 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel), 1642 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup, 1643 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate, 1644 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink, 1645 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink, 1646 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size, 1647 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info, 1648 .fill_info = ipgre_fill_info,
1656 }; 1649 };
1657 1650
1658 /* 1651 /*
1659 * And now the modules code and kernel interface. 1652 * And now the modules code and kernel interface.
1660 */ 1653 */
1661 1654
1662 static int __init ipgre_init(void) 1655 static int __init ipgre_init(void)
1663 { 1656 {
1664 int err; 1657 int err;
1665 1658
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1659 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667 1660
1668 err = register_pernet_device(&ipgre_net_ops); 1661 err = register_pernet_device(&ipgre_net_ops);
1669 if (err < 0) 1662 if (err < 0)
1670 return err; 1663 return err;
1671 1664
1672 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1665 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673 if (err < 0) { 1666 if (err < 0) {
1674 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1667 printk(KERN_INFO "ipgre init: can't add protocol\n");
1675 goto add_proto_failed; 1668 goto add_proto_failed;
1676 } 1669 }
1677 1670
1678 err = rtnl_link_register(&ipgre_link_ops); 1671 err = rtnl_link_register(&ipgre_link_ops);
1679 if (err < 0) 1672 if (err < 0)
1680 goto rtnl_link_failed; 1673 goto rtnl_link_failed;
1681 1674
1682 err = rtnl_link_register(&ipgre_tap_ops); 1675 err = rtnl_link_register(&ipgre_tap_ops);
1683 if (err < 0) 1676 if (err < 0)
1684 goto tap_ops_failed; 1677 goto tap_ops_failed;
1685 1678
1686 out: 1679 out:
1687 return err; 1680 return err;
1688 1681
1689 tap_ops_failed: 1682 tap_ops_failed:
1690 rtnl_link_unregister(&ipgre_link_ops); 1683 rtnl_link_unregister(&ipgre_link_ops);
1691 rtnl_link_failed: 1684 rtnl_link_failed:
1692 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1685 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1693 add_proto_failed: 1686 add_proto_failed:
1694 unregister_pernet_device(&ipgre_net_ops); 1687 unregister_pernet_device(&ipgre_net_ops);
1695 goto out; 1688 goto out;
1696 } 1689 }
1697 1690
1698 static void __exit ipgre_fini(void) 1691 static void __exit ipgre_fini(void)
1699 { 1692 {
1700 rtnl_link_unregister(&ipgre_tap_ops); 1693 rtnl_link_unregister(&ipgre_tap_ops);
1701 rtnl_link_unregister(&ipgre_link_ops); 1694 rtnl_link_unregister(&ipgre_link_ops);
1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1695 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1696 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1704 unregister_pernet_device(&ipgre_net_ops); 1697 unregister_pernet_device(&ipgre_net_ops);
1705 } 1698 }
1706 1699
1707 module_init(ipgre_init); 1700 module_init(ipgre_init);
1708 module_exit(ipgre_fini); 1701 module_exit(ipgre_fini);
1709 MODULE_LICENSE("GPL"); 1702 MODULE_LICENSE("GPL");
1710 MODULE_ALIAS_RTNL_LINK("gre"); 1703 MODULE_ALIAS_RTNL_LINK("gre");
1711 MODULE_ALIAS_RTNL_LINK("gretap"); 1704 MODULE_ALIAS_RTNL_LINK("gretap");
1712 1705
1 /* 1 /*
2 * Linux NET3: IP/IP protocol decoder. 2 * Linux NET3: IP/IP protocol decoder.
3 * 3 *
4 * Authors: 4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 * 6 *
7 * Fixes: 7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as 8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages). 9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) 10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy. 11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). 12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL 13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling. 14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation. 15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support 16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. 17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together. 18 * I do not want to merge them together.
19 * 19 *
20 * This program is free software; you can redistribute it and/or 20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License 21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version 22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version. 23 * 2 of the License, or (at your option) any later version.
24 * 24 *
25 */ 25 */
26 26
27 /* tunnel.c: an IP tunnel driver 27 /* tunnel.c: an IP tunnel driver
28 28
29 The purpose of this driver is to provide an IP tunnel through 29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets. 30 which you can tunnel network traffic transparently across subnets.
31 31
32 This was written by looking at Nick Holloway's dummy driver 32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code! 33 Thanks for the great code!
34 34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
36 36
37 Minor tweaks: 37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks. 38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers. 39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked. 40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting. 41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat 42 Added tx_dropped stat
43 43
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
45 45
46 Reworked: 46 Reworked:
47 Changed to tunnel to destination gateway in addition to the 47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address 48 tunnel's pointopoint address
49 Almost completely rewritten 49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done. 50 Note: There is currently no firewall or ICMP handling done.
51 51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
53 53
54 */ 54 */
55 55
56 /* Things I wish I had known when writing the tunnel driver: 56 /* Things I wish I had known when writing the tunnel driver:
57 57
58 When the tunnel_xmit() function is called, the skb contains the 58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev 59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are. 60 contains the tunnel device that _we_ are.
61 61
62 When we are passed a packet, we are expected to fill in the 62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address. 63 source address with our source IP address.
64 64
65 What is the proper way to allocate, copy and free a buffer? 65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory 66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer 67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with 68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call 69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in 70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of 71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have 72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len 73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put() 74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can 75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling 76 find out how much more space you can allocate by calling
77 "skb_tailroom(skb)". 77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)". 78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns 79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a 80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)". 81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top 82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom 83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called. 84 space must be reserved before the skb_put() function is called.
85 */ 85 */
86 86
87 /* 87 /*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c 88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 89
90 For comments look at net/ipv4/ip_gre.c --ANK 90 For comments look at net/ipv4/ip_gre.c --ANK
91 */ 91 */
92 92
93 93
94 #include <linux/capability.h> 94 #include <linux/capability.h>
95 #include <linux/module.h> 95 #include <linux/module.h>
96 #include <linux/types.h> 96 #include <linux/types.h>
97 #include <linux/kernel.h> 97 #include <linux/kernel.h>
98 #include <linux/slab.h> 98 #include <linux/slab.h>
99 #include <asm/uaccess.h> 99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h> 100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h> 101 #include <linux/netdevice.h>
102 #include <linux/in.h> 102 #include <linux/in.h>
103 #include <linux/tcp.h> 103 #include <linux/tcp.h>
104 #include <linux/udp.h> 104 #include <linux/udp.h>
105 #include <linux/if_arp.h> 105 #include <linux/if_arp.h>
106 #include <linux/mroute.h> 106 #include <linux/mroute.h>
107 #include <linux/init.h> 107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h> 108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h> 109 #include <linux/if_ether.h>
110 110
111 #include <net/sock.h> 111 #include <net/sock.h>
112 #include <net/ip.h> 112 #include <net/ip.h>
113 #include <net/icmp.h> 113 #include <net/icmp.h>
114 #include <net/ipip.h> 114 #include <net/ipip.h>
115 #include <net/inet_ecn.h> 115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h> 116 #include <net/xfrm.h>
117 #include <net/net_namespace.h> 117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h> 118 #include <net/netns/generic.h>
119 119
120 #define HASH_SIZE 16 120 #define HASH_SIZE 16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 122
123 static int ipip_net_id __read_mostly; 123 static int ipip_net_id __read_mostly;
124 struct ipip_net { 124 struct ipip_net {
125 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 125 struct ip_tunnel *tunnels_r_l[HASH_SIZE];
126 struct ip_tunnel *tunnels_r[HASH_SIZE]; 126 struct ip_tunnel *tunnels_r[HASH_SIZE];
127 struct ip_tunnel *tunnels_l[HASH_SIZE]; 127 struct ip_tunnel *tunnels_l[HASH_SIZE];
128 struct ip_tunnel *tunnels_wc[1]; 128 struct ip_tunnel *tunnels_wc[1];
129 struct ip_tunnel **tunnels[4]; 129 struct ip_tunnel **tunnels[4];
130 130
131 struct net_device *fb_tunnel_dev; 131 struct net_device *fb_tunnel_dev;
132 }; 132 };
133 133
134 static void ipip_tunnel_init(struct net_device *dev); 134 static void ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev); 135 static void ipip_tunnel_setup(struct net_device *dev);
136 136
137 /* 137 /*
138 * Locking : hash tables are protected by RCU and a spinlock 138 * Locking : hash tables are protected by RCU and a spinlock
139 */ 139 */
140 static DEFINE_SPINLOCK(ipip_lock); 140 static DEFINE_SPINLOCK(ipip_lock);
141 141
142 #define for_each_ip_tunnel_rcu(start) \ 142 #define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 144
145 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 145 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 __be32 remote, __be32 local) 146 __be32 remote, __be32 local)
147 { 147 {
148 unsigned h0 = HASH(remote); 148 unsigned h0 = HASH(remote);
149 unsigned h1 = HASH(local); 149 unsigned h1 = HASH(local);
150 struct ip_tunnel *t; 150 struct ip_tunnel *t;
151 struct ipip_net *ipn = net_generic(net, ipip_net_id); 151 struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 152
153 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 153 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
154 if (local == t->parms.iph.saddr && 154 if (local == t->parms.iph.saddr &&
155 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 155 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156 return t; 156 return t;
157 157
158 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 158 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
159 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 159 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
160 return t; 160 return t;
161 161
162 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 162 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
163 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 163 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
164 return t; 164 return t;
165 165
166 t = rcu_dereference(ipn->tunnels_wc[0]); 166 t = rcu_dereference(ipn->tunnels_wc[0]);
167 if (t && (t->dev->flags&IFF_UP)) 167 if (t && (t->dev->flags&IFF_UP))
168 return t; 168 return t;
169 return NULL; 169 return NULL;
170 } 170 }
171 171
172 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, 172 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
173 struct ip_tunnel_parm *parms) 173 struct ip_tunnel_parm *parms)
174 { 174 {
175 __be32 remote = parms->iph.daddr; 175 __be32 remote = parms->iph.daddr;
176 __be32 local = parms->iph.saddr; 176 __be32 local = parms->iph.saddr;
177 unsigned h = 0; 177 unsigned h = 0;
178 int prio = 0; 178 int prio = 0;
179 179
180 if (remote) { 180 if (remote) {
181 prio |= 2; 181 prio |= 2;
182 h ^= HASH(remote); 182 h ^= HASH(remote);
183 } 183 }
184 if (local) { 184 if (local) {
185 prio |= 1; 185 prio |= 1;
186 h ^= HASH(local); 186 h ^= HASH(local);
187 } 187 }
188 return &ipn->tunnels[prio][h]; 188 return &ipn->tunnels[prio][h];
189 } 189 }
190 190
191 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, 191 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
192 struct ip_tunnel *t) 192 struct ip_tunnel *t)
193 { 193 {
194 return __ipip_bucket(ipn, &t->parms); 194 return __ipip_bucket(ipn, &t->parms);
195 } 195 }
196 196
197 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) 197 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198 { 198 {
199 struct ip_tunnel **tp; 199 struct ip_tunnel **tp;
200 200
201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
202 if (t == *tp) { 202 if (t == *tp) {
203 spin_lock_bh(&ipip_lock); 203 spin_lock_bh(&ipip_lock);
204 *tp = t->next; 204 *tp = t->next;
205 spin_unlock_bh(&ipip_lock); 205 spin_unlock_bh(&ipip_lock);
206 break; 206 break;
207 } 207 }
208 } 208 }
209 } 209 }
210 210
211 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) 211 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212 { 212 {
213 struct ip_tunnel **tp = ipip_bucket(ipn, t); 213 struct ip_tunnel **tp = ipip_bucket(ipn, t);
214 214
215 spin_lock_bh(&ipip_lock); 215 spin_lock_bh(&ipip_lock);
216 t->next = *tp; 216 t->next = *tp;
217 rcu_assign_pointer(*tp, t); 217 rcu_assign_pointer(*tp, t);
218 spin_unlock_bh(&ipip_lock); 218 spin_unlock_bh(&ipip_lock);
219 } 219 }
220 220
221 static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 221 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
222 struct ip_tunnel_parm *parms, int create) 222 struct ip_tunnel_parm *parms, int create)
223 { 223 {
224 __be32 remote = parms->iph.daddr; 224 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr; 225 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, **tp, *nt; 226 struct ip_tunnel *t, **tp, *nt;
227 struct net_device *dev; 227 struct net_device *dev;
228 char name[IFNAMSIZ]; 228 char name[IFNAMSIZ];
229 struct ipip_net *ipn = net_generic(net, ipip_net_id); 229 struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 230
231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { 231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 return t; 233 return t;
234 } 234 }
235 if (!create) 235 if (!create)
236 return NULL; 236 return NULL;
237 237
238 if (parms->name[0]) 238 if (parms->name[0])
239 strlcpy(name, parms->name, IFNAMSIZ); 239 strlcpy(name, parms->name, IFNAMSIZ);
240 else 240 else
241 sprintf(name, "tunl%%d"); 241 sprintf(name, "tunl%%d");
242 242
243 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); 243 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 if (dev == NULL) 244 if (dev == NULL)
245 return NULL; 245 return NULL;
246 246
247 dev_net_set(dev, net); 247 dev_net_set(dev, net);
248 248
249 if (strchr(name, '%')) { 249 if (strchr(name, '%')) {
250 if (dev_alloc_name(dev, name) < 0) 250 if (dev_alloc_name(dev, name) < 0)
251 goto failed_free; 251 goto failed_free;
252 } 252 }
253 253
254 nt = netdev_priv(dev); 254 nt = netdev_priv(dev);
255 nt->parms = *parms; 255 nt->parms = *parms;
256 256
257 ipip_tunnel_init(dev); 257 ipip_tunnel_init(dev);
258 258
259 if (register_netdevice(dev) < 0) 259 if (register_netdevice(dev) < 0)
260 goto failed_free; 260 goto failed_free;
261 261
262 dev_hold(dev); 262 dev_hold(dev);
263 ipip_tunnel_link(ipn, nt); 263 ipip_tunnel_link(ipn, nt);
264 return nt; 264 return nt;
265 265
266 failed_free: 266 failed_free:
267 free_netdev(dev); 267 free_netdev(dev);
268 return NULL; 268 return NULL;
269 } 269 }
270 270
271 static void ipip_tunnel_uninit(struct net_device *dev) 271 static void ipip_tunnel_uninit(struct net_device *dev)
272 { 272 {
273 struct net *net = dev_net(dev); 273 struct net *net = dev_net(dev);
274 struct ipip_net *ipn = net_generic(net, ipip_net_id); 274 struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 275
276 if (dev == ipn->fb_tunnel_dev) { 276 if (dev == ipn->fb_tunnel_dev) {
277 spin_lock_bh(&ipip_lock); 277 spin_lock_bh(&ipip_lock);
278 ipn->tunnels_wc[0] = NULL; 278 ipn->tunnels_wc[0] = NULL;
279 spin_unlock_bh(&ipip_lock); 279 spin_unlock_bh(&ipip_lock);
280 } else 280 } else
281 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 281 ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 dev_put(dev); 282 dev_put(dev);
283 } 283 }
284 284
285 static int ipip_err(struct sk_buff *skb, u32 info) 285 static int ipip_err(struct sk_buff *skb, u32 info)
286 { 286 {
287 287
288 /* All the routers (except for Linux) return only 288 /* All the routers (except for Linux) return only
289 8 bytes of packet payload. It means, that precise relaying of 289 8 bytes of packet payload. It means, that precise relaying of
290 ICMP in the real Internet is absolutely infeasible. 290 ICMP in the real Internet is absolutely infeasible.
291 */ 291 */
292 struct iphdr *iph = (struct iphdr *)skb->data; 292 struct iphdr *iph = (struct iphdr *)skb->data;
293 const int type = icmp_hdr(skb)->type; 293 const int type = icmp_hdr(skb)->type;
294 const int code = icmp_hdr(skb)->code; 294 const int code = icmp_hdr(skb)->code;
295 struct ip_tunnel *t; 295 struct ip_tunnel *t;
296 int err; 296 int err;
297 297
298 switch (type) { 298 switch (type) {
299 default: 299 default:
300 case ICMP_PARAMETERPROB: 300 case ICMP_PARAMETERPROB:
301 return 0; 301 return 0;
302 302
303 case ICMP_DEST_UNREACH: 303 case ICMP_DEST_UNREACH:
304 switch (code) { 304 switch (code) {
305 case ICMP_SR_FAILED: 305 case ICMP_SR_FAILED:
306 case ICMP_PORT_UNREACH: 306 case ICMP_PORT_UNREACH:
307 /* Impossible event. */ 307 /* Impossible event. */
308 return 0; 308 return 0;
309 case ICMP_FRAG_NEEDED: 309 case ICMP_FRAG_NEEDED:
310 /* Soft state for pmtu is maintained by IP core. */ 310 /* Soft state for pmtu is maintained by IP core. */
311 return 0; 311 return 0;
312 default: 312 default:
313 /* All others are translated to HOST_UNREACH. 313 /* All others are translated to HOST_UNREACH.
314 rfc2003 contains "deep thoughts" about NET_UNREACH, 314 rfc2003 contains "deep thoughts" about NET_UNREACH,
315 I believe they are just ether pollution. --ANK 315 I believe they are just ether pollution. --ANK
316 */ 316 */
317 break; 317 break;
318 } 318 }
319 break; 319 break;
320 case ICMP_TIME_EXCEEDED: 320 case ICMP_TIME_EXCEEDED:
321 if (code != ICMP_EXC_TTL) 321 if (code != ICMP_EXC_TTL)
322 return 0; 322 return 0;
323 break; 323 break;
324 } 324 }
325 325
326 err = -ENOENT; 326 err = -ENOENT;
327 327
328 rcu_read_lock(); 328 rcu_read_lock();
329 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 329 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
330 if (t == NULL || t->parms.iph.daddr == 0) 330 if (t == NULL || t->parms.iph.daddr == 0)
331 goto out; 331 goto out;
332 332
333 err = 0; 333 err = 0;
334 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 334 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
335 goto out; 335 goto out;
336 336
337 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 337 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
338 t->err_count++; 338 t->err_count++;
339 else 339 else
340 t->err_count = 1; 340 t->err_count = 1;
341 t->err_time = jiffies; 341 t->err_time = jiffies;
342 out: 342 out:
343 rcu_read_unlock(); 343 rcu_read_unlock();
344 return err; 344 return err;
345 } 345 }
346 346
347 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, 347 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
348 struct sk_buff *skb) 348 struct sk_buff *skb)
349 { 349 {
350 struct iphdr *inner_iph = ip_hdr(skb); 350 struct iphdr *inner_iph = ip_hdr(skb);
351 351
352 if (INET_ECN_is_ce(outer_iph->tos)) 352 if (INET_ECN_is_ce(outer_iph->tos))
353 IP_ECN_set_ce(inner_iph); 353 IP_ECN_set_ce(inner_iph);
354 } 354 }
355 355
356 static int ipip_rcv(struct sk_buff *skb) 356 static int ipip_rcv(struct sk_buff *skb)
357 { 357 {
358 struct ip_tunnel *tunnel; 358 struct ip_tunnel *tunnel;
359 const struct iphdr *iph = ip_hdr(skb); 359 const struct iphdr *iph = ip_hdr(skb);
360 360
361 rcu_read_lock(); 361 rcu_read_lock();
362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
363 iph->saddr, iph->daddr)) != NULL) { 363 iph->saddr, iph->daddr)) != NULL) {
364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 rcu_read_unlock(); 365 rcu_read_unlock();
366 kfree_skb(skb); 366 kfree_skb(skb);
367 return 0; 367 return 0;
368 } 368 }
369 369
370 secpath_reset(skb); 370 secpath_reset(skb);
371 371
372 skb->mac_header = skb->network_header; 372 skb->mac_header = skb->network_header;
373 skb_reset_network_header(skb); 373 skb_reset_network_header(skb);
374 skb->protocol = htons(ETH_P_IP); 374 skb->protocol = htons(ETH_P_IP);
375 skb->pkt_type = PACKET_HOST; 375 skb->pkt_type = PACKET_HOST;
376 376
377 tunnel->dev->stats.rx_packets++; 377 skb_tunnel_rx(skb, tunnel->dev);
378 tunnel->dev->stats.rx_bytes += skb->len; 378
379 skb->dev = tunnel->dev;
380 skb_dst_drop(skb);
381 nf_reset(skb);
382 ipip_ecn_decapsulate(iph, skb); 379 ipip_ecn_decapsulate(iph, skb);
383 netif_rx(skb); 380 netif_rx(skb);
384 rcu_read_unlock(); 381 rcu_read_unlock();
385 return 0; 382 return 0;
386 } 383 }
387 rcu_read_unlock(); 384 rcu_read_unlock();
388 385
389 return -1; 386 return -1;
390 } 387 }
391 388
392 /* 389 /*
393 * This function assumes it is being called from dev_queue_xmit() 390 * This function assumes it is being called from dev_queue_xmit()
394 * and that skb is filled properly by that function. 391 * and that skb is filled properly by that function.
395 */ 392 */
396 393
397 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 394 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
398 { 395 {
399 struct ip_tunnel *tunnel = netdev_priv(dev); 396 struct ip_tunnel *tunnel = netdev_priv(dev);
400 struct net_device_stats *stats = &dev->stats; 397 struct net_device_stats *stats = &dev->stats;
401 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); 398 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
402 struct iphdr *tiph = &tunnel->parms.iph; 399 struct iphdr *tiph = &tunnel->parms.iph;
403 u8 tos = tunnel->parms.iph.tos; 400 u8 tos = tunnel->parms.iph.tos;
404 __be16 df = tiph->frag_off; 401 __be16 df = tiph->frag_off;
405 struct rtable *rt; /* Route to the other host */ 402 struct rtable *rt; /* Route to the other host */
406 struct net_device *tdev; /* Device to other host */ 403 struct net_device *tdev; /* Device to other host */
407 struct iphdr *old_iph = ip_hdr(skb); 404 struct iphdr *old_iph = ip_hdr(skb);
408 struct iphdr *iph; /* Our new IP header */ 405 struct iphdr *iph; /* Our new IP header */
409 unsigned int max_headroom; /* The extra header space needed */ 406 unsigned int max_headroom; /* The extra header space needed */
410 __be32 dst = tiph->daddr; 407 __be32 dst = tiph->daddr;
411 int mtu; 408 int mtu;
412 409
413 if (skb->protocol != htons(ETH_P_IP)) 410 if (skb->protocol != htons(ETH_P_IP))
414 goto tx_error; 411 goto tx_error;
415 412
416 if (tos&1) 413 if (tos&1)
417 tos = old_iph->tos; 414 tos = old_iph->tos;
418 415
419 if (!dst) { 416 if (!dst) {
420 /* NBMA tunnel */ 417 /* NBMA tunnel */
421 if ((rt = skb_rtable(skb)) == NULL) { 418 if ((rt = skb_rtable(skb)) == NULL) {
422 stats->tx_fifo_errors++; 419 stats->tx_fifo_errors++;
423 goto tx_error; 420 goto tx_error;
424 } 421 }
425 if ((dst = rt->rt_gateway) == 0) 422 if ((dst = rt->rt_gateway) == 0)
426 goto tx_error_icmp; 423 goto tx_error_icmp;
427 } 424 }
428 425
429 { 426 {
430 struct flowi fl = { .oif = tunnel->parms.link, 427 struct flowi fl = { .oif = tunnel->parms.link,
431 .nl_u = { .ip4_u = 428 .nl_u = { .ip4_u =
432 { .daddr = dst, 429 { .daddr = dst,
433 .saddr = tiph->saddr, 430 .saddr = tiph->saddr,
434 .tos = RT_TOS(tos) } }, 431 .tos = RT_TOS(tos) } },
435 .proto = IPPROTO_IPIP }; 432 .proto = IPPROTO_IPIP };
436 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 433 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
437 stats->tx_carrier_errors++; 434 stats->tx_carrier_errors++;
438 goto tx_error_icmp; 435 goto tx_error_icmp;
439 } 436 }
440 } 437 }
441 tdev = rt->u.dst.dev; 438 tdev = rt->u.dst.dev;
442 439
443 if (tdev == dev) { 440 if (tdev == dev) {
444 ip_rt_put(rt); 441 ip_rt_put(rt);
445 stats->collisions++; 442 stats->collisions++;
446 goto tx_error; 443 goto tx_error;
447 } 444 }
448 445
449 df |= old_iph->frag_off & htons(IP_DF); 446 df |= old_iph->frag_off & htons(IP_DF);
450 447
451 if (df) { 448 if (df) {
452 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 449 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
453 450
454 if (mtu < 68) { 451 if (mtu < 68) {
455 stats->collisions++; 452 stats->collisions++;
456 ip_rt_put(rt); 453 ip_rt_put(rt);
457 goto tx_error; 454 goto tx_error;
458 } 455 }
459 456
460 if (skb_dst(skb)) 457 if (skb_dst(skb))
461 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 458 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
462 459
463 if ((old_iph->frag_off & htons(IP_DF)) && 460 if ((old_iph->frag_off & htons(IP_DF)) &&
464 mtu < ntohs(old_iph->tot_len)) { 461 mtu < ntohs(old_iph->tot_len)) {
465 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 462 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466 htonl(mtu)); 463 htonl(mtu));
467 ip_rt_put(rt); 464 ip_rt_put(rt);
468 goto tx_error; 465 goto tx_error;
469 } 466 }
470 } 467 }
471 468
472 if (tunnel->err_count > 0) { 469 if (tunnel->err_count > 0) {
473 if (time_before(jiffies, 470 if (time_before(jiffies,
474 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 471 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
475 tunnel->err_count--; 472 tunnel->err_count--;
476 dst_link_failure(skb); 473 dst_link_failure(skb);
477 } else 474 } else
478 tunnel->err_count = 0; 475 tunnel->err_count = 0;
479 } 476 }
480 477
481 /* 478 /*
482 * Okay, now see if we can stuff it in the buffer as-is. 479 * Okay, now see if we can stuff it in the buffer as-is.
483 */ 480 */
484 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); 481 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
485 482
486 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 483 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
487 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 484 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
488 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 485 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
489 if (!new_skb) { 486 if (!new_skb) {
490 ip_rt_put(rt); 487 ip_rt_put(rt);
491 txq->tx_dropped++; 488 txq->tx_dropped++;
492 dev_kfree_skb(skb); 489 dev_kfree_skb(skb);
493 return NETDEV_TX_OK; 490 return NETDEV_TX_OK;
494 } 491 }
495 if (skb->sk) 492 if (skb->sk)
496 skb_set_owner_w(new_skb, skb->sk); 493 skb_set_owner_w(new_skb, skb->sk);
497 dev_kfree_skb(skb); 494 dev_kfree_skb(skb);
498 skb = new_skb; 495 skb = new_skb;
499 old_iph = ip_hdr(skb); 496 old_iph = ip_hdr(skb);
500 } 497 }
501 498
502 skb->transport_header = skb->network_header; 499 skb->transport_header = skb->network_header;
503 skb_push(skb, sizeof(struct iphdr)); 500 skb_push(skb, sizeof(struct iphdr));
504 skb_reset_network_header(skb); 501 skb_reset_network_header(skb);
505 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 502 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
506 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
507 IPSKB_REROUTED); 504 IPSKB_REROUTED);
508 skb_dst_drop(skb); 505 skb_dst_drop(skb);
509 skb_dst_set(skb, &rt->u.dst); 506 skb_dst_set(skb, &rt->u.dst);
510 507
511 /* 508 /*
512 * Push down and install the IPIP header. 509 * Push down and install the IPIP header.
513 */ 510 */
514 511
515 iph = ip_hdr(skb); 512 iph = ip_hdr(skb);
516 iph->version = 4; 513 iph->version = 4;
517 iph->ihl = sizeof(struct iphdr)>>2; 514 iph->ihl = sizeof(struct iphdr)>>2;
518 iph->frag_off = df; 515 iph->frag_off = df;
519 iph->protocol = IPPROTO_IPIP; 516 iph->protocol = IPPROTO_IPIP;
520 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); 517 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
521 iph->daddr = rt->rt_dst; 518 iph->daddr = rt->rt_dst;
522 iph->saddr = rt->rt_src; 519 iph->saddr = rt->rt_src;
523 520
524 if ((iph->ttl = tiph->ttl) == 0) 521 if ((iph->ttl = tiph->ttl) == 0)
525 iph->ttl = old_iph->ttl; 522 iph->ttl = old_iph->ttl;
526 523
527 nf_reset(skb); 524 nf_reset(skb);
528 525
529 IPTUNNEL_XMIT(); 526 IPTUNNEL_XMIT();
530 return NETDEV_TX_OK; 527 return NETDEV_TX_OK;
531 528
532 tx_error_icmp: 529 tx_error_icmp:
533 dst_link_failure(skb); 530 dst_link_failure(skb);
534 tx_error: 531 tx_error:
535 stats->tx_errors++; 532 stats->tx_errors++;
536 dev_kfree_skb(skb); 533 dev_kfree_skb(skb);
537 return NETDEV_TX_OK; 534 return NETDEV_TX_OK;
538 } 535 }
539 536
540 static void ipip_tunnel_bind_dev(struct net_device *dev) 537 static void ipip_tunnel_bind_dev(struct net_device *dev)
541 { 538 {
542 struct net_device *tdev = NULL; 539 struct net_device *tdev = NULL;
543 struct ip_tunnel *tunnel; 540 struct ip_tunnel *tunnel;
544 struct iphdr *iph; 541 struct iphdr *iph;
545 542
546 tunnel = netdev_priv(dev); 543 tunnel = netdev_priv(dev);
547 iph = &tunnel->parms.iph; 544 iph = &tunnel->parms.iph;
548 545
549 if (iph->daddr) { 546 if (iph->daddr) {
550 struct flowi fl = { .oif = tunnel->parms.link, 547 struct flowi fl = { .oif = tunnel->parms.link,
551 .nl_u = { .ip4_u = 548 .nl_u = { .ip4_u =
552 { .daddr = iph->daddr, 549 { .daddr = iph->daddr,
553 .saddr = iph->saddr, 550 .saddr = iph->saddr,
554 .tos = RT_TOS(iph->tos) } }, 551 .tos = RT_TOS(iph->tos) } },
555 .proto = IPPROTO_IPIP }; 552 .proto = IPPROTO_IPIP };
556 struct rtable *rt; 553 struct rtable *rt;
557 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
558 tdev = rt->u.dst.dev; 555 tdev = rt->u.dst.dev;
559 ip_rt_put(rt); 556 ip_rt_put(rt);
560 } 557 }
561 dev->flags |= IFF_POINTOPOINT; 558 dev->flags |= IFF_POINTOPOINT;
562 } 559 }
563 560
564 if (!tdev && tunnel->parms.link) 561 if (!tdev && tunnel->parms.link)
565 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 562 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
566 563
567 if (tdev) { 564 if (tdev) {
568 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 565 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
569 dev->mtu = tdev->mtu - sizeof(struct iphdr); 566 dev->mtu = tdev->mtu - sizeof(struct iphdr);
570 } 567 }
571 dev->iflink = tunnel->parms.link; 568 dev->iflink = tunnel->parms.link;
572 } 569 }
573 570
574 static int 571 static int
575 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 572 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
576 { 573 {
577 int err = 0; 574 int err = 0;
578 struct ip_tunnel_parm p; 575 struct ip_tunnel_parm p;
579 struct ip_tunnel *t; 576 struct ip_tunnel *t;
580 struct net *net = dev_net(dev); 577 struct net *net = dev_net(dev);
581 struct ipip_net *ipn = net_generic(net, ipip_net_id); 578 struct ipip_net *ipn = net_generic(net, ipip_net_id);
582 579
583 switch (cmd) { 580 switch (cmd) {
584 case SIOCGETTUNNEL: 581 case SIOCGETTUNNEL:
585 t = NULL; 582 t = NULL;
586 if (dev == ipn->fb_tunnel_dev) { 583 if (dev == ipn->fb_tunnel_dev) {
587 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 584 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
588 err = -EFAULT; 585 err = -EFAULT;
589 break; 586 break;
590 } 587 }
591 t = ipip_tunnel_locate(net, &p, 0); 588 t = ipip_tunnel_locate(net, &p, 0);
592 } 589 }
593 if (t == NULL) 590 if (t == NULL)
594 t = netdev_priv(dev); 591 t = netdev_priv(dev);
595 memcpy(&p, &t->parms, sizeof(p)); 592 memcpy(&p, &t->parms, sizeof(p));
596 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 593 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
597 err = -EFAULT; 594 err = -EFAULT;
598 break; 595 break;
599 596
600 case SIOCADDTUNNEL: 597 case SIOCADDTUNNEL:
601 case SIOCCHGTUNNEL: 598 case SIOCCHGTUNNEL:
602 err = -EPERM; 599 err = -EPERM;
603 if (!capable(CAP_NET_ADMIN)) 600 if (!capable(CAP_NET_ADMIN))
604 goto done; 601 goto done;
605 602
606 err = -EFAULT; 603 err = -EFAULT;
607 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 604 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
608 goto done; 605 goto done;
609 606
610 err = -EINVAL; 607 err = -EINVAL;
611 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 608 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
612 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 609 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
613 goto done; 610 goto done;
614 if (p.iph.ttl) 611 if (p.iph.ttl)
615 p.iph.frag_off |= htons(IP_DF); 612 p.iph.frag_off |= htons(IP_DF);
616 613
617 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 614 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
618 615
619 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 616 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
620 if (t != NULL) { 617 if (t != NULL) {
621 if (t->dev != dev) { 618 if (t->dev != dev) {
622 err = -EEXIST; 619 err = -EEXIST;
623 break; 620 break;
624 } 621 }
625 } else { 622 } else {
626 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || 623 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
627 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { 624 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
628 err = -EINVAL; 625 err = -EINVAL;
629 break; 626 break;
630 } 627 }
631 t = netdev_priv(dev); 628 t = netdev_priv(dev);
632 ipip_tunnel_unlink(ipn, t); 629 ipip_tunnel_unlink(ipn, t);
633 t->parms.iph.saddr = p.iph.saddr; 630 t->parms.iph.saddr = p.iph.saddr;
634 t->parms.iph.daddr = p.iph.daddr; 631 t->parms.iph.daddr = p.iph.daddr;
635 memcpy(dev->dev_addr, &p.iph.saddr, 4); 632 memcpy(dev->dev_addr, &p.iph.saddr, 4);
636 memcpy(dev->broadcast, &p.iph.daddr, 4); 633 memcpy(dev->broadcast, &p.iph.daddr, 4);
637 ipip_tunnel_link(ipn, t); 634 ipip_tunnel_link(ipn, t);
638 netdev_state_change(dev); 635 netdev_state_change(dev);
639 } 636 }
640 } 637 }
641 638
642 if (t) { 639 if (t) {
643 err = 0; 640 err = 0;
644 if (cmd == SIOCCHGTUNNEL) { 641 if (cmd == SIOCCHGTUNNEL) {
645 t->parms.iph.ttl = p.iph.ttl; 642 t->parms.iph.ttl = p.iph.ttl;
646 t->parms.iph.tos = p.iph.tos; 643 t->parms.iph.tos = p.iph.tos;
647 t->parms.iph.frag_off = p.iph.frag_off; 644 t->parms.iph.frag_off = p.iph.frag_off;
648 if (t->parms.link != p.link) { 645 if (t->parms.link != p.link) {
649 t->parms.link = p.link; 646 t->parms.link = p.link;
650 ipip_tunnel_bind_dev(dev); 647 ipip_tunnel_bind_dev(dev);
651 netdev_state_change(dev); 648 netdev_state_change(dev);
652 } 649 }
653 } 650 }
654 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 651 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
655 err = -EFAULT; 652 err = -EFAULT;
656 } else 653 } else
657 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 654 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
658 break; 655 break;
659 656
660 case SIOCDELTUNNEL: 657 case SIOCDELTUNNEL:
661 err = -EPERM; 658 err = -EPERM;
662 if (!capable(CAP_NET_ADMIN)) 659 if (!capable(CAP_NET_ADMIN))
663 goto done; 660 goto done;
664 661
665 if (dev == ipn->fb_tunnel_dev) { 662 if (dev == ipn->fb_tunnel_dev) {
666 err = -EFAULT; 663 err = -EFAULT;
667 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 664 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
668 goto done; 665 goto done;
669 err = -ENOENT; 666 err = -ENOENT;
670 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) 667 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
671 goto done; 668 goto done;
672 err = -EPERM; 669 err = -EPERM;
673 if (t->dev == ipn->fb_tunnel_dev) 670 if (t->dev == ipn->fb_tunnel_dev)
674 goto done; 671 goto done;
675 dev = t->dev; 672 dev = t->dev;
676 } 673 }
677 unregister_netdevice(dev); 674 unregister_netdevice(dev);
678 err = 0; 675 err = 0;
679 break; 676 break;
680 677
681 default: 678 default:
682 err = -EINVAL; 679 err = -EINVAL;
683 } 680 }
684 681
685 done: 682 done:
686 return err; 683 return err;
687 } 684 }
688 685
689 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 686 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
690 { 687 {
691 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 688 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
692 return -EINVAL; 689 return -EINVAL;
693 dev->mtu = new_mtu; 690 dev->mtu = new_mtu;
694 return 0; 691 return 0;
695 } 692 }
696 693
697 static const struct net_device_ops ipip_netdev_ops = { 694 static const struct net_device_ops ipip_netdev_ops = {
698 .ndo_uninit = ipip_tunnel_uninit, 695 .ndo_uninit = ipip_tunnel_uninit,
699 .ndo_start_xmit = ipip_tunnel_xmit, 696 .ndo_start_xmit = ipip_tunnel_xmit,
700 .ndo_do_ioctl = ipip_tunnel_ioctl, 697 .ndo_do_ioctl = ipip_tunnel_ioctl,
701 .ndo_change_mtu = ipip_tunnel_change_mtu, 698 .ndo_change_mtu = ipip_tunnel_change_mtu,
702 699
703 }; 700 };
704 701
705 static void ipip_tunnel_setup(struct net_device *dev) 702 static void ipip_tunnel_setup(struct net_device *dev)
706 { 703 {
707 dev->netdev_ops = &ipip_netdev_ops; 704 dev->netdev_ops = &ipip_netdev_ops;
708 dev->destructor = free_netdev; 705 dev->destructor = free_netdev;
709 706
710 dev->type = ARPHRD_TUNNEL; 707 dev->type = ARPHRD_TUNNEL;
711 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 708 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
712 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); 709 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
713 dev->flags = IFF_NOARP; 710 dev->flags = IFF_NOARP;
714 dev->iflink = 0; 711 dev->iflink = 0;
715 dev->addr_len = 4; 712 dev->addr_len = 4;
716 dev->features |= NETIF_F_NETNS_LOCAL; 713 dev->features |= NETIF_F_NETNS_LOCAL;
717 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 714 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
718 } 715 }
719 716
720 static void ipip_tunnel_init(struct net_device *dev) 717 static void ipip_tunnel_init(struct net_device *dev)
721 { 718 {
722 struct ip_tunnel *tunnel = netdev_priv(dev); 719 struct ip_tunnel *tunnel = netdev_priv(dev);
723 720
724 tunnel->dev = dev; 721 tunnel->dev = dev;
725 strcpy(tunnel->parms.name, dev->name); 722 strcpy(tunnel->parms.name, dev->name);
726 723
727 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 724 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
728 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 725 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
729 726
730 ipip_tunnel_bind_dev(dev); 727 ipip_tunnel_bind_dev(dev);
731 } 728 }
732 729
733 static void __net_init ipip_fb_tunnel_init(struct net_device *dev) 730 static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
734 { 731 {
735 struct ip_tunnel *tunnel = netdev_priv(dev); 732 struct ip_tunnel *tunnel = netdev_priv(dev);
736 struct iphdr *iph = &tunnel->parms.iph; 733 struct iphdr *iph = &tunnel->parms.iph;
737 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); 734 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
738 735
739 tunnel->dev = dev; 736 tunnel->dev = dev;
740 strcpy(tunnel->parms.name, dev->name); 737 strcpy(tunnel->parms.name, dev->name);
741 738
742 iph->version = 4; 739 iph->version = 4;
743 iph->protocol = IPPROTO_IPIP; 740 iph->protocol = IPPROTO_IPIP;
744 iph->ihl = 5; 741 iph->ihl = 5;
745 742
746 dev_hold(dev); 743 dev_hold(dev);
747 ipn->tunnels_wc[0] = tunnel; 744 ipn->tunnels_wc[0] = tunnel;
748 } 745 }
749 746
750 static struct xfrm_tunnel ipip_handler = { 747 static struct xfrm_tunnel ipip_handler = {
751 .handler = ipip_rcv, 748 .handler = ipip_rcv,
752 .err_handler = ipip_err, 749 .err_handler = ipip_err,
753 .priority = 1, 750 .priority = 1,
754 }; 751 };
755 752
756 static const char banner[] __initconst = 753 static const char banner[] __initconst =
757 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 754 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
758 755
759 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) 756 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 { 757 {
761 int prio; 758 int prio;
762 759
763 for (prio = 1; prio < 4; prio++) { 760 for (prio = 1; prio < 4; prio++) {
764 int h; 761 int h;
765 for (h = 0; h < HASH_SIZE; h++) { 762 for (h = 0; h < HASH_SIZE; h++) {
766 struct ip_tunnel *t = ipn->tunnels[prio][h]; 763 struct ip_tunnel *t = ipn->tunnels[prio][h];
767 764
768 while (t != NULL) { 765 while (t != NULL) {
769 unregister_netdevice_queue(t->dev, head); 766 unregister_netdevice_queue(t->dev, head);
770 t = t->next; 767 t = t->next;
771 } 768 }
772 } 769 }
773 } 770 }
774 } 771 }
775 772
776 static int __net_init ipip_init_net(struct net *net) 773 static int __net_init ipip_init_net(struct net *net)
777 { 774 {
778 struct ipip_net *ipn = net_generic(net, ipip_net_id); 775 struct ipip_net *ipn = net_generic(net, ipip_net_id);
779 int err; 776 int err;
780 777
781 ipn->tunnels[0] = ipn->tunnels_wc; 778 ipn->tunnels[0] = ipn->tunnels_wc;
782 ipn->tunnels[1] = ipn->tunnels_l; 779 ipn->tunnels[1] = ipn->tunnels_l;
783 ipn->tunnels[2] = ipn->tunnels_r; 780 ipn->tunnels[2] = ipn->tunnels_r;
784 ipn->tunnels[3] = ipn->tunnels_r_l; 781 ipn->tunnels[3] = ipn->tunnels_r_l;
785 782
786 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), 783 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
787 "tunl0", 784 "tunl0",
788 ipip_tunnel_setup); 785 ipip_tunnel_setup);
789 if (!ipn->fb_tunnel_dev) { 786 if (!ipn->fb_tunnel_dev) {
790 err = -ENOMEM; 787 err = -ENOMEM;
791 goto err_alloc_dev; 788 goto err_alloc_dev;
792 } 789 }
793 dev_net_set(ipn->fb_tunnel_dev, net); 790 dev_net_set(ipn->fb_tunnel_dev, net);
794 791
795 ipip_fb_tunnel_init(ipn->fb_tunnel_dev); 792 ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
796 793
797 if ((err = register_netdev(ipn->fb_tunnel_dev))) 794 if ((err = register_netdev(ipn->fb_tunnel_dev)))
798 goto err_reg_dev; 795 goto err_reg_dev;
799 796
800 return 0; 797 return 0;
801 798
802 err_reg_dev: 799 err_reg_dev:
803 free_netdev(ipn->fb_tunnel_dev); 800 free_netdev(ipn->fb_tunnel_dev);
804 err_alloc_dev: 801 err_alloc_dev:
805 /* nothing */ 802 /* nothing */
806 return err; 803 return err;
807 } 804 }
808 805
809 static void __net_exit ipip_exit_net(struct net *net) 806 static void __net_exit ipip_exit_net(struct net *net)
810 { 807 {
811 struct ipip_net *ipn = net_generic(net, ipip_net_id); 808 struct ipip_net *ipn = net_generic(net, ipip_net_id);
812 LIST_HEAD(list); 809 LIST_HEAD(list);
813 810
814 rtnl_lock(); 811 rtnl_lock();
815 ipip_destroy_tunnels(ipn, &list); 812 ipip_destroy_tunnels(ipn, &list);
816 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); 813 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
817 unregister_netdevice_many(&list); 814 unregister_netdevice_many(&list);
818 rtnl_unlock(); 815 rtnl_unlock();
819 } 816 }
820 817
821 static struct pernet_operations ipip_net_ops = { 818 static struct pernet_operations ipip_net_ops = {
822 .init = ipip_init_net, 819 .init = ipip_init_net,
823 .exit = ipip_exit_net, 820 .exit = ipip_exit_net,
824 .id = &ipip_net_id, 821 .id = &ipip_net_id,
825 .size = sizeof(struct ipip_net), 822 .size = sizeof(struct ipip_net),
826 }; 823 };
827 824
828 static int __init ipip_init(void) 825 static int __init ipip_init(void)
829 { 826 {
830 int err; 827 int err;
831 828
832 printk(banner); 829 printk(banner);
833 830
834 err = register_pernet_device(&ipip_net_ops); 831 err = register_pernet_device(&ipip_net_ops);
835 if (err < 0) 832 if (err < 0)
836 return err; 833 return err;
837 err = xfrm4_tunnel_register(&ipip_handler, AF_INET); 834 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
838 if (err < 0) { 835 if (err < 0) {
839 unregister_pernet_device(&ipip_net_ops); 836 unregister_pernet_device(&ipip_net_ops);
840 printk(KERN_INFO "ipip init: can't register tunnel\n"); 837 printk(KERN_INFO "ipip init: can't register tunnel\n");
841 } 838 }
842 return err; 839 return err;
843 } 840 }
844 841
845 static void __exit ipip_fini(void) 842 static void __exit ipip_fini(void)
846 { 843 {
847 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) 844 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
848 printk(KERN_INFO "ipip close: can't deregister tunnel\n"); 845 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
849 846
850 unregister_pernet_device(&ipip_net_ops); 847 unregister_pernet_device(&ipip_net_ops);
851 } 848 }
852 849
853 module_init(ipip_init); 850 module_init(ipip_init);
854 module_exit(ipip_fini); 851 module_exit(ipip_fini);
855 MODULE_LICENSE("GPL"); 852 MODULE_LICENSE("GPL");
856 853
1 /* 1 /*
2 * IP multicast routing support for mrouted 3.6/3.8 2 * IP multicast routing support for mrouted 3.6/3.8
3 * 3 *
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development 5 * Linux Consultancy and Custom Driver Development
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Fixes: 12 * Fixes:
13 * Michael Chastain : Incorrect size of copying. 13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code 14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race. 15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source 16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes. 17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems. 19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more. 20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall 21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow. 22 * overflow.
23 * Carlos Picoto : PIMv1 Support 23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers. 25 * Relax this requrement to work with older peers.
26 * 26 *
27 */ 27 */
28 28
29 #include <asm/system.h> 29 #include <asm/system.h>
30 #include <asm/uaccess.h> 30 #include <asm/uaccess.h>
31 #include <linux/types.h> 31 #include <linux/types.h>
32 #include <linux/capability.h> 32 #include <linux/capability.h>
33 #include <linux/errno.h> 33 #include <linux/errno.h>
34 #include <linux/timer.h> 34 #include <linux/timer.h>
35 #include <linux/mm.h> 35 #include <linux/mm.h>
36 #include <linux/kernel.h> 36 #include <linux/kernel.h>
37 #include <linux/fcntl.h> 37 #include <linux/fcntl.h>
38 #include <linux/stat.h> 38 #include <linux/stat.h>
39 #include <linux/socket.h> 39 #include <linux/socket.h>
40 #include <linux/in.h> 40 #include <linux/in.h>
41 #include <linux/inet.h> 41 #include <linux/inet.h>
42 #include <linux/netdevice.h> 42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h> 43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h> 44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h> 45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h> 46 #include <linux/seq_file.h>
47 #include <linux/mroute.h> 47 #include <linux/mroute.h>
48 #include <linux/init.h> 48 #include <linux/init.h>
49 #include <linux/if_ether.h> 49 #include <linux/if_ether.h>
50 #include <linux/slab.h> 50 #include <linux/slab.h>
51 #include <net/net_namespace.h> 51 #include <net/net_namespace.h>
52 #include <net/ip.h> 52 #include <net/ip.h>
53 #include <net/protocol.h> 53 #include <net/protocol.h>
54 #include <linux/skbuff.h> 54 #include <linux/skbuff.h>
55 #include <net/route.h> 55 #include <net/route.h>
56 #include <net/sock.h> 56 #include <net/sock.h>
57 #include <net/icmp.h> 57 #include <net/icmp.h>
58 #include <net/udp.h> 58 #include <net/udp.h>
59 #include <net/raw.h> 59 #include <net/raw.h>
60 #include <linux/notifier.h> 60 #include <linux/notifier.h>
61 #include <linux/if_arp.h> 61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h> 62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h> 63 #include <net/ipip.h>
64 #include <net/checksum.h> 64 #include <net/checksum.h>
65 #include <net/netlink.h> 65 #include <net/netlink.h>
66 #include <net/fib_rules.h> 66 #include <net/fib_rules.h>
67 67
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1 69 #define CONFIG_IP_PIMSM 1
70 #endif 70 #endif
71 71
72 struct mr_table { 72 struct mr_table {
73 struct list_head list; 73 struct list_head list;
74 #ifdef CONFIG_NET_NS 74 #ifdef CONFIG_NET_NS
75 struct net *net; 75 struct net *net;
76 #endif 76 #endif
77 u32 id; 77 u32 id;
78 struct sock *mroute_sk; 78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer; 79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue; 80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES]; 81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS]; 82 struct vif_device vif_table[MAXVIFS];
83 int maxvif; 83 int maxvif;
84 atomic_t cache_resolve_queue_len; 84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert; 85 int mroute_do_assert;
86 int mroute_do_pim; 86 int mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num; 88 int mroute_reg_vif_num;
89 #endif 89 #endif
90 }; 90 };
91 91
92 struct ipmr_rule { 92 struct ipmr_rule {
93 struct fib_rule common; 93 struct fib_rule common;
94 }; 94 };
95 95
96 struct ipmr_result { 96 struct ipmr_result {
97 struct mr_table *mrt; 97 struct mr_table *mrt;
98 }; 98 };
99 99
100 /* Big lock, protecting vif table, mrt cache and mroute socket state. 100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock. 101 Note that the changes are semaphored via rtnl_lock.
102 */ 102 */
103 103
104 static DEFINE_RWLOCK(mrt_lock); 104 static DEFINE_RWLOCK(mrt_lock);
105 105
106 /* 106 /*
107 * Multicast router control variables 107 * Multicast router control variables
108 */ 108 */
109 109
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) 110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111 111
112 /* Special spinlock for queue of unresolved entries */ 112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock); 113 static DEFINE_SPINLOCK(mfc_unres_lock);
114 114
115 /* We return to original Alan's scheme. Hash table of resolved 115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected 116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected 117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock. 118 with strong spinlock mfc_unres_lock.
119 119
120 In this case data path is free of exclusive locks at all. 120 In this case data path is free of exclusive locks at all.
121 */ 121 */
122 122
123 static struct kmem_cache *mrt_cachep __read_mostly; 123 static struct kmem_cache *mrt_cachep __read_mostly;
124 124
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache, 127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local); 128 int local);
129 static int ipmr_cache_report(struct mr_table *mrt, 129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert); 130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm); 132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg); 133 static void ipmr_expire_process(unsigned long arg);
134 134
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \ 136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138 138
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140 { 140 {
141 struct mr_table *mrt; 141 struct mr_table *mrt;
142 142
143 ipmr_for_each_table(mrt, net) { 143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id) 144 if (mrt->id == id)
145 return mrt; 145 return mrt;
146 } 146 }
147 return NULL; 147 return NULL;
148 } 148 }
149 149
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt) 151 struct mr_table **mrt)
152 { 152 {
153 struct ipmr_result res; 153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, }; 154 struct fib_lookup_arg arg = { .result = &res, };
155 int err; 155 int err;
156 156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); 157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0) 158 if (err < 0)
159 return err; 159 return err;
160 *mrt = res.mrt; 160 *mrt = res.mrt;
161 return 0; 161 return 0;
162 } 162 }
163 163
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg) 165 int flags, struct fib_lookup_arg *arg)
166 { 166 {
167 struct ipmr_result *res = arg->result; 167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt; 168 struct mr_table *mrt;
169 169
170 switch (rule->action) { 170 switch (rule->action) {
171 case FR_ACT_TO_TBL: 171 case FR_ACT_TO_TBL:
172 break; 172 break;
173 case FR_ACT_UNREACHABLE: 173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH; 174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT: 175 case FR_ACT_PROHIBIT:
176 return -EACCES; 176 return -EACCES;
177 case FR_ACT_BLACKHOLE: 177 case FR_ACT_BLACKHOLE:
178 default: 178 default:
179 return -EINVAL; 179 return -EINVAL;
180 } 180 }
181 181
182 mrt = ipmr_get_table(rule->fr_net, rule->table); 182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL) 183 if (mrt == NULL)
184 return -EAGAIN; 184 return -EAGAIN;
185 res->mrt = mrt; 185 res->mrt = mrt;
186 return 0; 186 return 0;
187 } 187 }
188 188
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190 { 190 {
191 return 1; 191 return 1;
192 } 192 }
193 193
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY, 195 FRA_GENERIC_POLICY,
196 }; 196 };
197 197
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb) 199 struct fib_rule_hdr *frh, struct nlattr **tb)
200 { 200 {
201 return 0; 201 return 0;
202 } 202 }
203 203
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb) 205 struct nlattr **tb)
206 { 206 {
207 return 1; 207 return 1;
208 } 208 }
209 209
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh) 211 struct fib_rule_hdr *frh)
212 { 212 {
213 frh->dst_len = 0; 213 frh->dst_len = 0;
214 frh->src_len = 0; 214 frh->src_len = 0;
215 frh->tos = 0; 215 frh->tos = 0;
216 return 0; 216 return 0;
217 } 217 }
218 218
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { 219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR, 220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule), 221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32), 222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action, 223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match, 224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure, 225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare, 226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref, 227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill, 228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE, 229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy, 230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE, 231 .owner = THIS_MODULE,
232 }; 232 };
233 233
234 static int __net_init ipmr_rules_init(struct net *net) 234 static int __net_init ipmr_rules_init(struct net *net)
235 { 235 {
236 struct fib_rules_ops *ops; 236 struct fib_rules_ops *ops;
237 struct mr_table *mrt; 237 struct mr_table *mrt;
238 int err; 238 int err;
239 239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net); 240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops)) 241 if (IS_ERR(ops))
242 return PTR_ERR(ops); 242 return PTR_ERR(ops);
243 243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables); 244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245 245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) { 247 if (mrt == NULL) {
248 err = -ENOMEM; 248 err = -ENOMEM;
249 goto err1; 249 goto err1;
250 } 250 }
251 251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0) 253 if (err < 0)
254 goto err2; 254 goto err2;
255 255
256 net->ipv4.mr_rules_ops = ops; 256 net->ipv4.mr_rules_ops = ops;
257 return 0; 257 return 0;
258 258
259 err2: 259 err2:
260 kfree(mrt); 260 kfree(mrt);
261 err1: 261 err1:
262 fib_rules_unregister(ops); 262 fib_rules_unregister(ops);
263 return err; 263 return err;
264 } 264 }
265 265
266 static void __net_exit ipmr_rules_exit(struct net *net) 266 static void __net_exit ipmr_rules_exit(struct net *net)
267 { 267 {
268 struct mr_table *mrt, *next; 268 struct mr_table *mrt, *next;
269 269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) 270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt); 271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops); 272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273 } 273 }
274 #else 274 #else
275 #define ipmr_for_each_table(mrt, net) \ 275 #define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277 277
278 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 278 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279 { 279 {
280 return net->ipv4.mrt; 280 return net->ipv4.mrt;
281 } 281 }
282 282
283 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 283 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt) 284 struct mr_table **mrt)
285 { 285 {
286 *mrt = net->ipv4.mrt; 286 *mrt = net->ipv4.mrt;
287 return 0; 287 return 0;
288 } 288 }
289 289
290 static int __net_init ipmr_rules_init(struct net *net) 290 static int __net_init ipmr_rules_init(struct net *net)
291 { 291 {
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM; 293 return net->ipv4.mrt ? 0 : -ENOMEM;
294 } 294 }
295 295
296 static void __net_exit ipmr_rules_exit(struct net *net) 296 static void __net_exit ipmr_rules_exit(struct net *net)
297 { 297 {
298 kfree(net->ipv4.mrt); 298 kfree(net->ipv4.mrt);
299 } 299 }
300 #endif 300 #endif
301 301
302 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 302 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303 { 303 {
304 struct mr_table *mrt; 304 struct mr_table *mrt;
305 unsigned int i; 305 unsigned int i;
306 306
307 mrt = ipmr_get_table(net, id); 307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL) 308 if (mrt != NULL)
309 return mrt; 309 return mrt;
310 310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL) 312 if (mrt == NULL)
313 return NULL; 313 return NULL;
314 write_pnet(&mrt->net, net); 314 write_pnet(&mrt->net, net);
315 mrt->id = id; 315 mrt->id = id;
316 316
317 /* Forwarding cache */ 317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++) 318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); 319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320 320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322 322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt); 324 (unsigned long)mrt);
325 325
326 #ifdef CONFIG_IP_PIMSM 326 #ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1; 327 mrt->mroute_reg_vif_num = -1;
328 #endif 328 #endif
329 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 329 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331 #endif 331 #endif
332 return mrt; 332 return mrt;
333 } 333 }
334 334
335 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 335 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336 336
337 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 337 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338 { 338 {
339 struct net *net = dev_net(dev); 339 struct net *net = dev_net(dev);
340 340
341 dev_close(dev); 341 dev_close(dev);
342 342
343 dev = __dev_get_by_name(net, "tunl0"); 343 dev = __dev_get_by_name(net, "tunl0");
344 if (dev) { 344 if (dev) {
345 const struct net_device_ops *ops = dev->netdev_ops; 345 const struct net_device_ops *ops = dev->netdev_ops;
346 struct ifreq ifr; 346 struct ifreq ifr;
347 struct ip_tunnel_parm p; 347 struct ip_tunnel_parm p;
348 348
349 memset(&p, 0, sizeof(p)); 349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr; 350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr; 351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4; 352 p.iph.version = 4;
353 p.iph.ihl = 5; 353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP; 354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357 357
358 if (ops->ndo_do_ioctl) { 358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs(); 359 mm_segment_t oldfs = get_fs();
360 360
361 set_fs(KERNEL_DS); 361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs); 363 set_fs(oldfs);
364 } 364 }
365 } 365 }
366 } 366 }
367 367
368 static 368 static
369 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 369 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
370 { 370 {
371 struct net_device *dev; 371 struct net_device *dev;
372 372
373 dev = __dev_get_by_name(net, "tunl0"); 373 dev = __dev_get_by_name(net, "tunl0");
374 374
375 if (dev) { 375 if (dev) {
376 const struct net_device_ops *ops = dev->netdev_ops; 376 const struct net_device_ops *ops = dev->netdev_ops;
377 int err; 377 int err;
378 struct ifreq ifr; 378 struct ifreq ifr;
379 struct ip_tunnel_parm p; 379 struct ip_tunnel_parm p;
380 struct in_device *in_dev; 380 struct in_device *in_dev;
381 381
382 memset(&p, 0, sizeof(p)); 382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr; 383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr; 384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4; 385 p.iph.version = 4;
386 p.iph.ihl = 5; 386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP; 387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
390 390
391 if (ops->ndo_do_ioctl) { 391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs(); 392 mm_segment_t oldfs = get_fs();
393 393
394 set_fs(KERNEL_DS); 394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs); 396 set_fs(oldfs);
397 } else 397 } else
398 err = -EOPNOTSUPP; 398 err = -EOPNOTSUPP;
399 399
400 dev = NULL; 400 dev = NULL;
401 401
402 if (err == 0 && 402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) { 403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
404 dev->flags |= IFF_MULTICAST; 404 dev->flags |= IFF_MULTICAST;
405 405
406 in_dev = __in_dev_get_rtnl(dev); 406 in_dev = __in_dev_get_rtnl(dev);
407 if (in_dev == NULL) 407 if (in_dev == NULL)
408 goto failure; 408 goto failure;
409 409
410 ipv4_devconf_setall(in_dev); 410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
412 412
413 if (dev_open(dev)) 413 if (dev_open(dev))
414 goto failure; 414 goto failure;
415 dev_hold(dev); 415 dev_hold(dev);
416 } 416 }
417 } 417 }
418 return dev; 418 return dev;
419 419
420 failure: 420 failure:
421 /* allow the register to be completed before unregistering. */ 421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock(); 422 rtnl_unlock();
423 rtnl_lock(); 423 rtnl_lock();
424 424
425 unregister_netdevice(dev); 425 unregister_netdevice(dev);
426 return NULL; 426 return NULL;
427 } 427 }
428 428
429 #ifdef CONFIG_IP_PIMSM 429 #ifdef CONFIG_IP_PIMSM
430 430
431 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 431 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
432 { 432 {
433 struct net *net = dev_net(dev); 433 struct net *net = dev_net(dev);
434 struct mr_table *mrt; 434 struct mr_table *mrt;
435 struct flowi fl = { 435 struct flowi fl = {
436 .oif = dev->ifindex, 436 .oif = dev->ifindex,
437 .iif = skb->skb_iif, 437 .iif = skb->skb_iif,
438 .mark = skb->mark, 438 .mark = skb->mark,
439 }; 439 };
440 int err; 440 int err;
441 441
442 err = ipmr_fib_lookup(net, &fl, &mrt); 442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0) 443 if (err < 0)
444 return err; 444 return err;
445 445
446 read_lock(&mrt_lock); 446 read_lock(&mrt_lock);
447 dev->stats.tx_bytes += skb->len; 447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++; 448 dev->stats.tx_packets++;
449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
450 read_unlock(&mrt_lock); 450 read_unlock(&mrt_lock);
451 kfree_skb(skb); 451 kfree_skb(skb);
452 return NETDEV_TX_OK; 452 return NETDEV_TX_OK;
453 } 453 }
454 454
455 static const struct net_device_ops reg_vif_netdev_ops = { 455 static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit, 456 .ndo_start_xmit = reg_vif_xmit,
457 }; 457 };
458 458
459 static void reg_vif_setup(struct net_device *dev) 459 static void reg_vif_setup(struct net_device *dev)
460 { 460 {
461 dev->type = ARPHRD_PIMREG; 461 dev->type = ARPHRD_PIMREG;
462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
463 dev->flags = IFF_NOARP; 463 dev->flags = IFF_NOARP;
464 dev->netdev_ops = &reg_vif_netdev_ops, 464 dev->netdev_ops = &reg_vif_netdev_ops,
465 dev->destructor = free_netdev; 465 dev->destructor = free_netdev;
466 dev->features |= NETIF_F_NETNS_LOCAL; 466 dev->features |= NETIF_F_NETNS_LOCAL;
467 } 467 }
468 468
469 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 469 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
470 { 470 {
471 struct net_device *dev; 471 struct net_device *dev;
472 struct in_device *in_dev; 472 struct in_device *in_dev;
473 char name[IFNAMSIZ]; 473 char name[IFNAMSIZ];
474 474
475 if (mrt->id == RT_TABLE_DEFAULT) 475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg"); 476 sprintf(name, "pimreg");
477 else 477 else
478 sprintf(name, "pimreg%u", mrt->id); 478 sprintf(name, "pimreg%u", mrt->id);
479 479
480 dev = alloc_netdev(0, name, reg_vif_setup); 480 dev = alloc_netdev(0, name, reg_vif_setup);
481 481
482 if (dev == NULL) 482 if (dev == NULL)
483 return NULL; 483 return NULL;
484 484
485 dev_net_set(dev, net); 485 dev_net_set(dev, net);
486 486
487 if (register_netdevice(dev)) { 487 if (register_netdevice(dev)) {
488 free_netdev(dev); 488 free_netdev(dev);
489 return NULL; 489 return NULL;
490 } 490 }
491 dev->iflink = 0; 491 dev->iflink = 0;
492 492
493 rcu_read_lock(); 493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock(); 495 rcu_read_unlock();
496 goto failure; 496 goto failure;
497 } 497 }
498 498
499 ipv4_devconf_setall(in_dev); 499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock(); 501 rcu_read_unlock();
502 502
503 if (dev_open(dev)) 503 if (dev_open(dev))
504 goto failure; 504 goto failure;
505 505
506 dev_hold(dev); 506 dev_hold(dev);
507 507
508 return dev; 508 return dev;
509 509
510 failure: 510 failure:
511 /* allow the register to be completed before unregistering. */ 511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock(); 512 rtnl_unlock();
513 rtnl_lock(); 513 rtnl_lock();
514 514
515 unregister_netdevice(dev); 515 unregister_netdevice(dev);
516 return NULL; 516 return NULL;
517 } 517 }
518 #endif 518 #endif
519 519
520 /* 520 /*
521 * Delete a VIF entry 521 * Delete a VIF entry
522 * @notify: Set to 1, if the caller is a notifier_call 522 * @notify: Set to 1, if the caller is a notifier_call
523 */ 523 */
524 524
525 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 525 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
526 struct list_head *head) 526 struct list_head *head)
527 { 527 {
528 struct vif_device *v; 528 struct vif_device *v;
529 struct net_device *dev; 529 struct net_device *dev;
530 struct in_device *in_dev; 530 struct in_device *in_dev;
531 531
532 if (vifi < 0 || vifi >= mrt->maxvif) 532 if (vifi < 0 || vifi >= mrt->maxvif)
533 return -EADDRNOTAVAIL; 533 return -EADDRNOTAVAIL;
534 534
535 v = &mrt->vif_table[vifi]; 535 v = &mrt->vif_table[vifi];
536 536
537 write_lock_bh(&mrt_lock); 537 write_lock_bh(&mrt_lock);
538 dev = v->dev; 538 dev = v->dev;
539 v->dev = NULL; 539 v->dev = NULL;
540 540
541 if (!dev) { 541 if (!dev) {
542 write_unlock_bh(&mrt_lock); 542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL; 543 return -EADDRNOTAVAIL;
544 } 544 }
545 545
546 #ifdef CONFIG_IP_PIMSM 546 #ifdef CONFIG_IP_PIMSM
547 if (vifi == mrt->mroute_reg_vif_num) 547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1; 548 mrt->mroute_reg_vif_num = -1;
549 #endif 549 #endif
550 550
551 if (vifi+1 == mrt->maxvif) { 551 if (vifi+1 == mrt->maxvif) {
552 int tmp; 552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) { 553 for (tmp=vifi-1; tmp>=0; tmp--) {
554 if (VIF_EXISTS(mrt, tmp)) 554 if (VIF_EXISTS(mrt, tmp))
555 break; 555 break;
556 } 556 }
557 mrt->maxvif = tmp+1; 557 mrt->maxvif = tmp+1;
558 } 558 }
559 559
560 write_unlock_bh(&mrt_lock); 560 write_unlock_bh(&mrt_lock);
561 561
562 dev_set_allmulti(dev, -1); 562 dev_set_allmulti(dev, -1);
563 563
564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
566 ip_rt_multicast_event(in_dev); 566 ip_rt_multicast_event(in_dev);
567 } 567 }
568 568
569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
570 unregister_netdevice_queue(dev, head); 570 unregister_netdevice_queue(dev, head);
571 571
572 dev_put(dev); 572 dev_put(dev);
573 return 0; 573 return 0;
574 } 574 }
575 575
576 static inline void ipmr_cache_free(struct mfc_cache *c) 576 static inline void ipmr_cache_free(struct mfc_cache *c)
577 { 577 {
578 kmem_cache_free(mrt_cachep, c); 578 kmem_cache_free(mrt_cachep, c);
579 } 579 }
580 580
581 /* Destroy an unresolved cache entry, killing queued skbs 581 /* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers. 582 and reporting error to netlink readers.
583 */ 583 */
584 584
585 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 585 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
586 { 586 {
587 struct net *net = read_pnet(&mrt->net); 587 struct net *net = read_pnet(&mrt->net);
588 struct sk_buff *skb; 588 struct sk_buff *skb;
589 struct nlmsgerr *e; 589 struct nlmsgerr *e;
590 590
591 atomic_dec(&mrt->cache_resolve_queue_len); 591 atomic_dec(&mrt->cache_resolve_queue_len);
592 592
593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
594 if (ip_hdr(skb)->version == 0) { 594 if (ip_hdr(skb)->version == 0) {
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR; 596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len); 598 skb_trim(skb, nlh->nlmsg_len);
599 e = NLMSG_DATA(nlh); 599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT; 600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg)); 601 memset(&e->msg, 0, sizeof(e->msg));
602 602
603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
604 } else 604 } else
605 kfree_skb(skb); 605 kfree_skb(skb);
606 } 606 }
607 607
608 ipmr_cache_free(c); 608 ipmr_cache_free(c);
609 } 609 }
610 610
611 611
612 /* Timer process for the unresolved queue. */ 612 /* Timer process for the unresolved queue. */
613 613
614 static void ipmr_expire_process(unsigned long arg) 614 static void ipmr_expire_process(unsigned long arg)
615 { 615 {
616 struct mr_table *mrt = (struct mr_table *)arg; 616 struct mr_table *mrt = (struct mr_table *)arg;
617 unsigned long now; 617 unsigned long now;
618 unsigned long expires; 618 unsigned long expires;
619 struct mfc_cache *c, *next; 619 struct mfc_cache *c, *next;
620 620
621 if (!spin_trylock(&mfc_unres_lock)) { 621 if (!spin_trylock(&mfc_unres_lock)) {
622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
623 return; 623 return;
624 } 624 }
625 625
626 if (list_empty(&mrt->mfc_unres_queue)) 626 if (list_empty(&mrt->mfc_unres_queue))
627 goto out; 627 goto out;
628 628
629 now = jiffies; 629 now = jiffies;
630 expires = 10*HZ; 630 expires = 10*HZ;
631 631
632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
633 if (time_after(c->mfc_un.unres.expires, now)) { 633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now; 634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires) 635 if (interval < expires)
636 expires = interval; 636 expires = interval;
637 continue; 637 continue;
638 } 638 }
639 639
640 list_del(&c->list); 640 list_del(&c->list);
641 ipmr_destroy_unres(mrt, c); 641 ipmr_destroy_unres(mrt, c);
642 } 642 }
643 643
644 if (!list_empty(&mrt->mfc_unres_queue)) 644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
646 646
647 out: 647 out:
648 spin_unlock(&mfc_unres_lock); 648 spin_unlock(&mfc_unres_lock);
649 } 649 }
650 650
651 /* Fill oifs list. It is called under write locked mrt_lock. */ 651 /* Fill oifs list. It is called under write locked mrt_lock. */
652 652
653 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 653 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 unsigned char *ttls) 654 unsigned char *ttls)
655 { 655 {
656 int vifi; 656 int vifi;
657 657
658 cache->mfc_un.res.minvif = MAXVIFS; 658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0; 659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661 661
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) && 663 if (VIF_EXISTS(mrt, vifi) &&
664 ttls[vifi] && ttls[vifi] < 255) { 664 ttls[vifi] && ttls[vifi] < 255) {
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi) 666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi; 667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi) 668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1; 669 cache->mfc_un.res.maxvif = vifi + 1;
670 } 670 }
671 } 671 }
672 } 672 }
673 673
674 static int vif_add(struct net *net, struct mr_table *mrt, 674 static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock) 675 struct vifctl *vifc, int mrtsock)
676 { 676 {
677 int vifi = vifc->vifc_vifi; 677 int vifi = vifc->vifc_vifi;
678 struct vif_device *v = &mrt->vif_table[vifi]; 678 struct vif_device *v = &mrt->vif_table[vifi];
679 struct net_device *dev; 679 struct net_device *dev;
680 struct in_device *in_dev; 680 struct in_device *in_dev;
681 int err; 681 int err;
682 682
683 /* Is vif busy ? */ 683 /* Is vif busy ? */
684 if (VIF_EXISTS(mrt, vifi)) 684 if (VIF_EXISTS(mrt, vifi))
685 return -EADDRINUSE; 685 return -EADDRINUSE;
686 686
687 switch (vifc->vifc_flags) { 687 switch (vifc->vifc_flags) {
688 #ifdef CONFIG_IP_PIMSM 688 #ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER: 689 case VIFF_REGISTER:
690 /* 690 /*
691 * Special Purpose VIF in PIM 691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon 692 * All the packets will be sent to the daemon
693 */ 693 */
694 if (mrt->mroute_reg_vif_num >= 0) 694 if (mrt->mroute_reg_vif_num >= 0)
695 return -EADDRINUSE; 695 return -EADDRINUSE;
696 dev = ipmr_reg_vif(net, mrt); 696 dev = ipmr_reg_vif(net, mrt);
697 if (!dev) 697 if (!dev)
698 return -ENOBUFS; 698 return -ENOBUFS;
699 err = dev_set_allmulti(dev, 1); 699 err = dev_set_allmulti(dev, 1);
700 if (err) { 700 if (err) {
701 unregister_netdevice(dev); 701 unregister_netdevice(dev);
702 dev_put(dev); 702 dev_put(dev);
703 return err; 703 return err;
704 } 704 }
705 break; 705 break;
706 #endif 706 #endif
707 case VIFF_TUNNEL: 707 case VIFF_TUNNEL:
708 dev = ipmr_new_tunnel(net, vifc); 708 dev = ipmr_new_tunnel(net, vifc);
709 if (!dev) 709 if (!dev)
710 return -ENOBUFS; 710 return -ENOBUFS;
711 err = dev_set_allmulti(dev, 1); 711 err = dev_set_allmulti(dev, 1);
712 if (err) { 712 if (err) {
713 ipmr_del_tunnel(dev, vifc); 713 ipmr_del_tunnel(dev, vifc);
714 dev_put(dev); 714 dev_put(dev);
715 return err; 715 return err;
716 } 716 }
717 break; 717 break;
718 718
719 case VIFF_USE_IFINDEX: 719 case VIFF_USE_IFINDEX:
720 case 0: 720 case 0:
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) { 723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev); 724 dev_put(dev);
725 return -EADDRNOTAVAIL; 725 return -EADDRNOTAVAIL;
726 } 726 }
727 } else 727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729 729
730 if (!dev) 730 if (!dev)
731 return -EADDRNOTAVAIL; 731 return -EADDRNOTAVAIL;
732 err = dev_set_allmulti(dev, 1); 732 err = dev_set_allmulti(dev, 1);
733 if (err) { 733 if (err) {
734 dev_put(dev); 734 dev_put(dev);
735 return err; 735 return err;
736 } 736 }
737 break; 737 break;
738 default: 738 default:
739 return -EINVAL; 739 return -EINVAL;
740 } 740 }
741 741
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev); 743 dev_put(dev);
744 return -EADDRNOTAVAIL; 744 return -EADDRNOTAVAIL;
745 } 745 }
746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
747 ip_rt_multicast_event(in_dev); 747 ip_rt_multicast_event(in_dev);
748 748
749 /* 749 /*
750 * Fill in the VIF structures 750 * Fill in the VIF structures
751 */ 751 */
752 v->rate_limit = vifc->vifc_rate_limit; 752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr; 753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr; 754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags; 755 v->flags = vifc->vifc_flags;
756 if (!mrtsock) 756 if (!mrtsock)
757 v->flags |= VIFF_STATIC; 757 v->flags |= VIFF_STATIC;
758 v->threshold = vifc->vifc_threshold; 758 v->threshold = vifc->vifc_threshold;
759 v->bytes_in = 0; 759 v->bytes_in = 0;
760 v->bytes_out = 0; 760 v->bytes_out = 0;
761 v->pkt_in = 0; 761 v->pkt_in = 0;
762 v->pkt_out = 0; 762 v->pkt_out = 0;
763 v->link = dev->ifindex; 763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink; 765 v->link = dev->iflink;
766 766
767 /* And finish update writing critical data */ 767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock); 768 write_lock_bh(&mrt_lock);
769 v->dev = dev; 769 v->dev = dev;
770 #ifdef CONFIG_IP_PIMSM 770 #ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER) 771 if (v->flags&VIFF_REGISTER)
772 mrt->mroute_reg_vif_num = vifi; 772 mrt->mroute_reg_vif_num = vifi;
773 #endif 773 #endif
774 if (vifi+1 > mrt->maxvif) 774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1; 775 mrt->maxvif = vifi+1;
776 write_unlock_bh(&mrt_lock); 776 write_unlock_bh(&mrt_lock);
777 return 0; 777 return 0;
778 } 778 }
779 779
780 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 780 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
781 __be32 origin, 781 __be32 origin,
782 __be32 mcastgrp) 782 __be32 mcastgrp)
783 { 783 {
784 int line = MFC_HASH(mcastgrp, origin); 784 int line = MFC_HASH(mcastgrp, origin);
785 struct mfc_cache *c; 785 struct mfc_cache *c;
786 786
787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c; 789 return c;
790 } 790 }
791 return NULL; 791 return NULL;
792 } 792 }
793 793
794 /* 794 /*
795 * Allocate a multicast cache entry 795 * Allocate a multicast cache entry
796 */ 796 */
797 static struct mfc_cache *ipmr_cache_alloc(void) 797 static struct mfc_cache *ipmr_cache_alloc(void)
798 { 798 {
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL) 800 if (c == NULL)
801 return NULL; 801 return NULL;
802 c->mfc_un.res.minvif = MAXVIFS; 802 c->mfc_un.res.minvif = MAXVIFS;
803 return c; 803 return c;
804 } 804 }
805 805
806 static struct mfc_cache *ipmr_cache_alloc_unres(void) 806 static struct mfc_cache *ipmr_cache_alloc_unres(void)
807 { 807 {
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL) 809 if (c == NULL)
810 return NULL; 810 return NULL;
811 skb_queue_head_init(&c->mfc_un.unres.unresolved); 811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ; 812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c; 813 return c;
814 } 814 }
815 815
816 /* 816 /*
817 * A cache entry has gone into a resolved state from queued 817 * A cache entry has gone into a resolved state from queued
818 */ 818 */
819 819
820 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 820 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c) 821 struct mfc_cache *uc, struct mfc_cache *c)
822 { 822 {
823 struct sk_buff *skb; 823 struct sk_buff *skb;
824 struct nlmsgerr *e; 824 struct nlmsgerr *e;
825 825
826 /* 826 /*
827 * Play the pending entries through our router 827 * Play the pending entries through our router
828 */ 828 */
829 829
830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
831 if (ip_hdr(skb)->version == 0) { 831 if (ip_hdr(skb)->version == 0) {
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833 833
834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
835 nlh->nlmsg_len = (skb_tail_pointer(skb) - 835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh); 836 (u8 *)nlh);
837 } else { 837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR; 838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len); 840 skb_trim(skb, nlh->nlmsg_len);
841 e = NLMSG_DATA(nlh); 841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE; 842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg)); 843 memset(&e->msg, 0, sizeof(e->msg));
844 } 844 }
845 845
846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
847 } else 847 } else
848 ip_mr_forward(net, mrt, skb, c, 0); 848 ip_mr_forward(net, mrt, skb, c, 0);
849 } 849 }
850 } 850 }
851 851
852 /* 852 /*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme. 854 * expects the following bizarre scheme.
855 * 855 *
856 * Called under mrt_lock. 856 * Called under mrt_lock.
857 */ 857 */
858 858
859 static int ipmr_cache_report(struct mr_table *mrt, 859 static int ipmr_cache_report(struct mr_table *mrt,
860 struct sk_buff *pkt, vifi_t vifi, int assert) 860 struct sk_buff *pkt, vifi_t vifi, int assert)
861 { 861 {
862 struct sk_buff *skb; 862 struct sk_buff *skb;
863 const int ihl = ip_hdrlen(pkt); 863 const int ihl = ip_hdrlen(pkt);
864 struct igmphdr *igmp; 864 struct igmphdr *igmp;
865 struct igmpmsg *msg; 865 struct igmpmsg *msg;
866 int ret; 866 int ret;
867 867
868 #ifdef CONFIG_IP_PIMSM 868 #ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT) 869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else 871 else
872 #endif 872 #endif
873 skb = alloc_skb(128, GFP_ATOMIC); 873 skb = alloc_skb(128, GFP_ATOMIC);
874 874
875 if (!skb) 875 if (!skb)
876 return -ENOBUFS; 876 return -ENOBUFS;
877 877
878 #ifdef CONFIG_IP_PIMSM 878 #ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) { 879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface. 880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc. 881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and 882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-) 883 to set msg->im_mbz to "mbz" :-)
884 */ 884 */
885 skb_push(skb, sizeof(struct iphdr)); 885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb); 886 skb_reset_network_header(skb);
887 skb_reset_transport_header(skb); 887 skb_reset_transport_header(skb);
888 msg = (struct igmpmsg *)skb_network_header(skb); 888 msg = (struct igmpmsg *)skb_network_header(skb);
889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
890 msg->im_msgtype = IGMPMSG_WHOLEPKT; 890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0; 891 msg->im_mbz = 0;
892 msg->im_vif = mrt->mroute_reg_vif_num; 892 msg->im_vif = mrt->mroute_reg_vif_num;
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr)); 895 sizeof(struct iphdr));
896 } else 896 } else
897 #endif 897 #endif
898 { 898 {
899 899
900 /* 900 /*
901 * Copy the IP header 901 * Copy the IP header
902 */ 902 */
903 903
904 skb->network_header = skb->tail; 904 skb->network_header = skb->tail;
905 skb_put(skb, ihl); 905 skb_put(skb, ihl);
906 skb_copy_to_linear_data(skb, pkt->data, ihl); 906 skb_copy_to_linear_data(skb, pkt->data, ihl);
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb); 908 msg = (struct igmpmsg *)skb_network_header(skb);
909 msg->im_vif = vifi; 909 msg->im_vif = vifi;
910 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
911 911
912 /* 912 /*
913 * Add our header 913 * Add our header
914 */ 914 */
915 915
916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
917 igmp->type = 917 igmp->type =
918 msg->im_msgtype = assert; 918 msg->im_msgtype = assert;
919 igmp->code = 0; 919 igmp->code = 0;
920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
921 skb->transport_header = skb->network_header; 921 skb->transport_header = skb->network_header;
922 } 922 }
923 923
924 if (mrt->mroute_sk == NULL) { 924 if (mrt->mroute_sk == NULL) {
925 kfree_skb(skb); 925 kfree_skb(skb);
926 return -EINVAL; 926 return -EINVAL;
927 } 927 }
928 928
929 /* 929 /*
930 * Deliver to mrouted 930 * Deliver to mrouted
931 */ 931 */
932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
933 if (ret < 0) { 933 if (ret < 0) {
934 if (net_ratelimit()) 934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb); 936 kfree_skb(skb);
937 } 937 }
938 938
939 return ret; 939 return ret;
940 } 940 }
941 941
942 /* 942 /*
943 * Queue a packet for resolution. It gets locked cache entry! 943 * Queue a packet for resolution. It gets locked cache entry!
944 */ 944 */
945 945
946 static int 946 static int
947 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) 947 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
948 { 948 {
949 bool found = false; 949 bool found = false;
950 int err; 950 int err;
951 struct mfc_cache *c; 951 struct mfc_cache *c;
952 const struct iphdr *iph = ip_hdr(skb); 952 const struct iphdr *iph = ip_hdr(skb);
953 953
954 spin_lock_bh(&mfc_unres_lock); 954 spin_lock_bh(&mfc_unres_lock);
955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
956 if (c->mfc_mcastgrp == iph->daddr && 956 if (c->mfc_mcastgrp == iph->daddr &&
957 c->mfc_origin == iph->saddr) { 957 c->mfc_origin == iph->saddr) {
958 found = true; 958 found = true;
959 break; 959 break;
960 } 960 }
961 } 961 }
962 962
963 if (!found) { 963 if (!found) {
964 /* 964 /*
965 * Create a new entry if allowable 965 * Create a new entry if allowable
966 */ 966 */
967 967
968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
969 (c = ipmr_cache_alloc_unres()) == NULL) { 969 (c = ipmr_cache_alloc_unres()) == NULL) {
970 spin_unlock_bh(&mfc_unres_lock); 970 spin_unlock_bh(&mfc_unres_lock);
971 971
972 kfree_skb(skb); 972 kfree_skb(skb);
973 return -ENOBUFS; 973 return -ENOBUFS;
974 } 974 }
975 975
976 /* 976 /*
977 * Fill in the new cache entry 977 * Fill in the new cache entry
978 */ 978 */
979 c->mfc_parent = -1; 979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr; 980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr; 981 c->mfc_mcastgrp = iph->daddr;
982 982
983 /* 983 /*
984 * Reflect first query at mrouted. 984 * Reflect first query at mrouted.
985 */ 985 */
986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
987 if (err < 0) { 987 if (err < 0) {
988 /* If the report failed throw the cache entry 988 /* If the report failed throw the cache entry
989 out - Brad Parker 989 out - Brad Parker
990 */ 990 */
991 spin_unlock_bh(&mfc_unres_lock); 991 spin_unlock_bh(&mfc_unres_lock);
992 992
993 ipmr_cache_free(c); 993 ipmr_cache_free(c);
994 kfree_skb(skb); 994 kfree_skb(skb);
995 return err; 995 return err;
996 } 996 }
997 997
998 atomic_inc(&mrt->cache_resolve_queue_len); 998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue); 999 list_add(&c->list, &mrt->mfc_unres_queue);
1000 1000
1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1003 } 1003 }
1004 1004
1005 /* 1005 /*
1006 * See if we can append the packet 1006 * See if we can append the packet
1007 */ 1007 */
1008 if (c->mfc_un.unres.unresolved.qlen>3) { 1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1009 kfree_skb(skb); 1009 kfree_skb(skb);
1010 err = -ENOBUFS; 1010 err = -ENOBUFS;
1011 } else { 1011 } else {
1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1013 err = 0; 1013 err = 0;
1014 } 1014 }
1015 1015
1016 spin_unlock_bh(&mfc_unres_lock); 1016 spin_unlock_bh(&mfc_unres_lock);
1017 return err; 1017 return err;
1018 } 1018 }
1019 1019
1020 /* 1020 /*
1021 * MFC cache manipulation by user space mroute daemon 1021 * MFC cache manipulation by user space mroute daemon
1022 */ 1022 */
1023 1023
1024 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) 1024 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1025 { 1025 {
1026 int line; 1026 int line;
1027 struct mfc_cache *c, *next; 1027 struct mfc_cache *c, *next;
1028 1028
1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1030 1030
1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock); 1034 write_lock_bh(&mrt_lock);
1035 list_del(&c->list); 1035 list_del(&c->list);
1036 write_unlock_bh(&mrt_lock); 1036 write_unlock_bh(&mrt_lock);
1037 1037
1038 ipmr_cache_free(c); 1038 ipmr_cache_free(c);
1039 return 0; 1039 return 0;
1040 } 1040 }
1041 } 1041 }
1042 return -ENOENT; 1042 return -ENOENT;
1043 } 1043 }
1044 1044
1045 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1045 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock) 1046 struct mfcctl *mfc, int mrtsock)
1047 { 1047 {
1048 bool found = false; 1048 bool found = false;
1049 int line; 1049 int line;
1050 struct mfc_cache *uc, *c; 1050 struct mfc_cache *uc, *c;
1051 1051
1052 if (mfc->mfcc_parent >= MAXVIFS) 1052 if (mfc->mfcc_parent >= MAXVIFS)
1053 return -ENFILE; 1053 return -ENFILE;
1054 1054
1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1056 1056
1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060 found = true; 1060 found = true;
1061 break; 1061 break;
1062 } 1062 }
1063 } 1063 }
1064 1064
1065 if (found) { 1065 if (found) {
1066 write_lock_bh(&mrt_lock); 1066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent; 1067 c->mfc_parent = mfc->mfcc_parent;
1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1069 if (!mrtsock) 1069 if (!mrtsock)
1070 c->mfc_flags |= MFC_STATIC; 1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock); 1071 write_unlock_bh(&mrt_lock);
1072 return 0; 1072 return 0;
1073 } 1073 }
1074 1074
1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1076 return -EINVAL; 1076 return -EINVAL;
1077 1077
1078 c = ipmr_cache_alloc(); 1078 c = ipmr_cache_alloc();
1079 if (c == NULL) 1079 if (c == NULL)
1080 return -ENOMEM; 1080 return -ENOMEM;
1081 1081
1082 c->mfc_origin = mfc->mfcc_origin.s_addr; 1082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent; 1084 c->mfc_parent = mfc->mfcc_parent;
1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1086 if (!mrtsock) 1086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC; 1087 c->mfc_flags |= MFC_STATIC;
1088 1088
1089 write_lock_bh(&mrt_lock); 1089 write_lock_bh(&mrt_lock);
1090 list_add(&c->list, &mrt->mfc_cache_array[line]); 1090 list_add(&c->list, &mrt->mfc_cache_array[line]);
1091 write_unlock_bh(&mrt_lock); 1091 write_unlock_bh(&mrt_lock);
1092 1092
1093 /* 1093 /*
1094 * Check to see if we resolved a queued list. If so we 1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up. 1095 * need to send on the frames and tidy up.
1096 */ 1096 */
1097 found = false; 1097 found = false;
1098 spin_lock_bh(&mfc_unres_lock); 1098 spin_lock_bh(&mfc_unres_lock);
1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1100 if (uc->mfc_origin == c->mfc_origin && 1100 if (uc->mfc_origin == c->mfc_origin &&
1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1102 list_del(&uc->list); 1102 list_del(&uc->list);
1103 atomic_dec(&mrt->cache_resolve_queue_len); 1103 atomic_dec(&mrt->cache_resolve_queue_len);
1104 found = true; 1104 found = true;
1105 break; 1105 break;
1106 } 1106 }
1107 } 1107 }
1108 if (list_empty(&mrt->mfc_unres_queue)) 1108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer); 1109 del_timer(&mrt->ipmr_expire_timer);
1110 spin_unlock_bh(&mfc_unres_lock); 1110 spin_unlock_bh(&mfc_unres_lock);
1111 1111
1112 if (found) { 1112 if (found) {
1113 ipmr_cache_resolve(net, mrt, uc, c); 1113 ipmr_cache_resolve(net, mrt, uc, c);
1114 ipmr_cache_free(uc); 1114 ipmr_cache_free(uc);
1115 } 1115 }
1116 return 0; 1116 return 0;
1117 } 1117 }
1118 1118
1119 /* 1119 /*
1120 * Close the multicast socket, and clear the vif tables etc 1120 * Close the multicast socket, and clear the vif tables etc
1121 */ 1121 */
1122 1122
1123 static void mroute_clean_tables(struct mr_table *mrt) 1123 static void mroute_clean_tables(struct mr_table *mrt)
1124 { 1124 {
1125 int i; 1125 int i;
1126 LIST_HEAD(list); 1126 LIST_HEAD(list);
1127 struct mfc_cache *c, *next; 1127 struct mfc_cache *c, *next;
1128 1128
1129 /* 1129 /*
1130 * Shut down all active vif entries 1130 * Shut down all active vif entries
1131 */ 1131 */
1132 for (i = 0; i < mrt->maxvif; i++) { 1132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list); 1134 vif_delete(mrt, i, 0, &list);
1135 } 1135 }
1136 unregister_netdevice_many(&list); 1136 unregister_netdevice_many(&list);
1137 1137
1138 /* 1138 /*
1139 * Wipe the cache 1139 * Wipe the cache
1140 */ 1140 */
1141 for (i = 0; i < MFC_LINES; i++) { 1141 for (i = 0; i < MFC_LINES; i++) {
1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1143 if (c->mfc_flags&MFC_STATIC) 1143 if (c->mfc_flags&MFC_STATIC)
1144 continue; 1144 continue;
1145 write_lock_bh(&mrt_lock); 1145 write_lock_bh(&mrt_lock);
1146 list_del(&c->list); 1146 list_del(&c->list);
1147 write_unlock_bh(&mrt_lock); 1147 write_unlock_bh(&mrt_lock);
1148 1148
1149 ipmr_cache_free(c); 1149 ipmr_cache_free(c);
1150 } 1150 }
1151 } 1151 }
1152 1152
1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1154 spin_lock_bh(&mfc_unres_lock); 1154 spin_lock_bh(&mfc_unres_lock);
1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1156 list_del(&c->list); 1156 list_del(&c->list);
1157 ipmr_destroy_unres(mrt, c); 1157 ipmr_destroy_unres(mrt, c);
1158 } 1158 }
1159 spin_unlock_bh(&mfc_unres_lock); 1159 spin_unlock_bh(&mfc_unres_lock);
1160 } 1160 }
1161 } 1161 }
1162 1162
1163 static void mrtsock_destruct(struct sock *sk) 1163 static void mrtsock_destruct(struct sock *sk)
1164 { 1164 {
1165 struct net *net = sock_net(sk); 1165 struct net *net = sock_net(sk);
1166 struct mr_table *mrt; 1166 struct mr_table *mrt;
1167 1167
1168 rtnl_lock(); 1168 rtnl_lock();
1169 ipmr_for_each_table(mrt, net) { 1169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) { 1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1172 1172
1173 write_lock_bh(&mrt_lock); 1173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL; 1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock); 1175 write_unlock_bh(&mrt_lock);
1176 1176
1177 mroute_clean_tables(mrt); 1177 mroute_clean_tables(mrt);
1178 } 1178 }
1179 } 1179 }
1180 rtnl_unlock(); 1180 rtnl_unlock();
1181 } 1181 }
1182 1182
1183 /* 1183 /*
1184 * Socket options and virtual interface manipulation. The whole 1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately 1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper 1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up. 1187 * MOSPF/PIM router set up we can clean this up.
1188 */ 1188 */
1189 1189
1190 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1190 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1191 { 1191 {
1192 int ret; 1192 int ret;
1193 struct vifctl vif; 1193 struct vifctl vif;
1194 struct mfcctl mfc; 1194 struct mfcctl mfc;
1195 struct net *net = sock_net(sk); 1195 struct net *net = sock_net(sk);
1196 struct mr_table *mrt; 1196 struct mr_table *mrt;
1197 1197
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199 if (mrt == NULL) 1199 if (mrt == NULL)
1200 return -ENOENT; 1200 return -ENOENT;
1201 1201
1202 if (optname != MRT_INIT) { 1202 if (optname != MRT_INIT) {
1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1204 return -EACCES; 1204 return -EACCES;
1205 } 1205 }
1206 1206
1207 switch (optname) { 1207 switch (optname) {
1208 case MRT_INIT: 1208 case MRT_INIT:
1209 if (sk->sk_type != SOCK_RAW || 1209 if (sk->sk_type != SOCK_RAW ||
1210 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1211 return -EOPNOTSUPP; 1211 return -EOPNOTSUPP;
1212 if (optlen != sizeof(int)) 1212 if (optlen != sizeof(int))
1213 return -ENOPROTOOPT; 1213 return -ENOPROTOOPT;
1214 1214
1215 rtnl_lock(); 1215 rtnl_lock();
1216 if (mrt->mroute_sk) { 1216 if (mrt->mroute_sk) {
1217 rtnl_unlock(); 1217 rtnl_unlock();
1218 return -EADDRINUSE; 1218 return -EADDRINUSE;
1219 } 1219 }
1220 1220
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222 if (ret == 0) { 1222 if (ret == 0) {
1223 write_lock_bh(&mrt_lock); 1223 write_lock_bh(&mrt_lock);
1224 mrt->mroute_sk = sk; 1224 mrt->mroute_sk = sk;
1225 write_unlock_bh(&mrt_lock); 1225 write_unlock_bh(&mrt_lock);
1226 1226
1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1228 } 1228 }
1229 rtnl_unlock(); 1229 rtnl_unlock();
1230 return ret; 1230 return ret;
1231 case MRT_DONE: 1231 case MRT_DONE:
1232 if (sk != mrt->mroute_sk) 1232 if (sk != mrt->mroute_sk)
1233 return -EACCES; 1233 return -EACCES;
1234 return ip_ra_control(sk, 0, NULL); 1234 return ip_ra_control(sk, 0, NULL);
1235 case MRT_ADD_VIF: 1235 case MRT_ADD_VIF:
1236 case MRT_DEL_VIF: 1236 case MRT_DEL_VIF:
1237 if (optlen != sizeof(vif)) 1237 if (optlen != sizeof(vif))
1238 return -EINVAL; 1238 return -EINVAL;
1239 if (copy_from_user(&vif, optval, sizeof(vif))) 1239 if (copy_from_user(&vif, optval, sizeof(vif)))
1240 return -EFAULT; 1240 return -EFAULT;
1241 if (vif.vifc_vifi >= MAXVIFS) 1241 if (vif.vifc_vifi >= MAXVIFS)
1242 return -ENFILE; 1242 return -ENFILE;
1243 rtnl_lock(); 1243 rtnl_lock();
1244 if (optname == MRT_ADD_VIF) { 1244 if (optname == MRT_ADD_VIF) {
1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1246 } else { 1246 } else {
1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1248 } 1248 }
1249 rtnl_unlock(); 1249 rtnl_unlock();
1250 return ret; 1250 return ret;
1251 1251
1252 /* 1252 /*
1253 * Manipulate the forwarding caches. These live 1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis. 1254 * in a sort of kernel/user symbiosis.
1255 */ 1255 */
1256 case MRT_ADD_MFC: 1256 case MRT_ADD_MFC:
1257 case MRT_DEL_MFC: 1257 case MRT_DEL_MFC:
1258 if (optlen != sizeof(mfc)) 1258 if (optlen != sizeof(mfc))
1259 return -EINVAL; 1259 return -EINVAL;
1260 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1261 return -EFAULT; 1261 return -EFAULT;
1262 rtnl_lock(); 1262 rtnl_lock();
1263 if (optname == MRT_DEL_MFC) 1263 if (optname == MRT_DEL_MFC)
1264 ret = ipmr_mfc_delete(mrt, &mfc); 1264 ret = ipmr_mfc_delete(mrt, &mfc);
1265 else 1265 else
1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1267 rtnl_unlock(); 1267 rtnl_unlock();
1268 return ret; 1268 return ret;
1269 /* 1269 /*
1270 * Control PIM assert. 1270 * Control PIM assert.
1271 */ 1271 */
1272 case MRT_ASSERT: 1272 case MRT_ASSERT:
1273 { 1273 {
1274 int v; 1274 int v;
1275 if (get_user(v,(int __user *)optval)) 1275 if (get_user(v,(int __user *)optval))
1276 return -EFAULT; 1276 return -EFAULT;
1277 mrt->mroute_do_assert = (v) ? 1 : 0; 1277 mrt->mroute_do_assert = (v) ? 1 : 0;
1278 return 0; 1278 return 0;
1279 } 1279 }
1280 #ifdef CONFIG_IP_PIMSM 1280 #ifdef CONFIG_IP_PIMSM
1281 case MRT_PIM: 1281 case MRT_PIM:
1282 { 1282 {
1283 int v; 1283 int v;
1284 1284
1285 if (get_user(v,(int __user *)optval)) 1285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT; 1286 return -EFAULT;
1287 v = (v) ? 1 : 0; 1287 v = (v) ? 1 : 0;
1288 1288
1289 rtnl_lock(); 1289 rtnl_lock();
1290 ret = 0; 1290 ret = 0;
1291 if (v != mrt->mroute_do_pim) { 1291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v; 1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v; 1293 mrt->mroute_do_assert = v;
1294 } 1294 }
1295 rtnl_unlock(); 1295 rtnl_unlock();
1296 return ret; 1296 return ret;
1297 } 1297 }
1298 #endif 1298 #endif
1299 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 1299 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300 case MRT_TABLE: 1300 case MRT_TABLE:
1301 { 1301 {
1302 u32 v; 1302 u32 v;
1303 1303
1304 if (optlen != sizeof(u32)) 1304 if (optlen != sizeof(u32))
1305 return -EINVAL; 1305 return -EINVAL;
1306 if (get_user(v, (u32 __user *)optval)) 1306 if (get_user(v, (u32 __user *)optval))
1307 return -EFAULT; 1307 return -EFAULT;
1308 if (sk == mrt->mroute_sk) 1308 if (sk == mrt->mroute_sk)
1309 return -EBUSY; 1309 return -EBUSY;
1310 1310
1311 rtnl_lock(); 1311 rtnl_lock();
1312 ret = 0; 1312 ret = 0;
1313 if (!ipmr_new_table(net, v)) 1313 if (!ipmr_new_table(net, v))
1314 ret = -ENOMEM; 1314 ret = -ENOMEM;
1315 raw_sk(sk)->ipmr_table = v; 1315 raw_sk(sk)->ipmr_table = v;
1316 rtnl_unlock(); 1316 rtnl_unlock();
1317 return ret; 1317 return ret;
1318 } 1318 }
1319 #endif 1319 #endif
1320 /* 1320 /*
1321 * Spurious command, or MRT_VERSION which you cannot 1321 * Spurious command, or MRT_VERSION which you cannot
1322 * set. 1322 * set.
1323 */ 1323 */
1324 default: 1324 default:
1325 return -ENOPROTOOPT; 1325 return -ENOPROTOOPT;
1326 } 1326 }
1327 } 1327 }
1328 1328
1329 /* 1329 /*
1330 * Getsock opt support for the multicast routing system. 1330 * Getsock opt support for the multicast routing system.
1331 */ 1331 */
1332 1332
1333 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1333 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1334 { 1334 {
1335 int olr; 1335 int olr;
1336 int val; 1336 int val;
1337 struct net *net = sock_net(sk); 1337 struct net *net = sock_net(sk);
1338 struct mr_table *mrt; 1338 struct mr_table *mrt;
1339 1339
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341 if (mrt == NULL) 1341 if (mrt == NULL)
1342 return -ENOENT; 1342 return -ENOENT;
1343 1343
1344 if (optname != MRT_VERSION && 1344 if (optname != MRT_VERSION &&
1345 #ifdef CONFIG_IP_PIMSM 1345 #ifdef CONFIG_IP_PIMSM
1346 optname!=MRT_PIM && 1346 optname!=MRT_PIM &&
1347 #endif 1347 #endif
1348 optname!=MRT_ASSERT) 1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT; 1349 return -ENOPROTOOPT;
1350 1350
1351 if (get_user(olr, optlen)) 1351 if (get_user(olr, optlen))
1352 return -EFAULT; 1352 return -EFAULT;
1353 1353
1354 olr = min_t(unsigned int, olr, sizeof(int)); 1354 olr = min_t(unsigned int, olr, sizeof(int));
1355 if (olr < 0) 1355 if (olr < 0)
1356 return -EINVAL; 1356 return -EINVAL;
1357 1357
1358 if (put_user(olr, optlen)) 1358 if (put_user(olr, optlen))
1359 return -EFAULT; 1359 return -EFAULT;
1360 if (optname == MRT_VERSION) 1360 if (optname == MRT_VERSION)
1361 val = 0x0305; 1361 val = 0x0305;
1362 #ifdef CONFIG_IP_PIMSM 1362 #ifdef CONFIG_IP_PIMSM
1363 else if (optname == MRT_PIM) 1363 else if (optname == MRT_PIM)
1364 val = mrt->mroute_do_pim; 1364 val = mrt->mroute_do_pim;
1365 #endif 1365 #endif
1366 else 1366 else
1367 val = mrt->mroute_do_assert; 1367 val = mrt->mroute_do_assert;
1368 if (copy_to_user(optval, &val, olr)) 1368 if (copy_to_user(optval, &val, olr))
1369 return -EFAULT; 1369 return -EFAULT;
1370 return 0; 1370 return 0;
1371 } 1371 }
1372 1372
1373 /* 1373 /*
1374 * The IP multicast ioctl support routines. 1374 * The IP multicast ioctl support routines.
1375 */ 1375 */
1376 1376
1377 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1377 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378 { 1378 {
1379 struct sioc_sg_req sr; 1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr; 1380 struct sioc_vif_req vr;
1381 struct vif_device *vif; 1381 struct vif_device *vif;
1382 struct mfc_cache *c; 1382 struct mfc_cache *c;
1383 struct net *net = sock_net(sk); 1383 struct net *net = sock_net(sk);
1384 struct mr_table *mrt; 1384 struct mr_table *mrt;
1385 1385
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (mrt == NULL) 1387 if (mrt == NULL)
1388 return -ENOENT; 1388 return -ENOENT;
1389 1389
1390 switch (cmd) { 1390 switch (cmd) {
1391 case SIOCGETVIFCNT: 1391 case SIOCGETVIFCNT:
1392 if (copy_from_user(&vr, arg, sizeof(vr))) 1392 if (copy_from_user(&vr, arg, sizeof(vr)))
1393 return -EFAULT; 1393 return -EFAULT;
1394 if (vr.vifi >= mrt->maxvif) 1394 if (vr.vifi >= mrt->maxvif)
1395 return -EINVAL; 1395 return -EINVAL;
1396 read_lock(&mrt_lock); 1396 read_lock(&mrt_lock);
1397 vif = &mrt->vif_table[vr.vifi]; 1397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) { 1398 if (VIF_EXISTS(mrt, vr.vifi)) {
1399 vr.icount = vif->pkt_in; 1399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out; 1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in; 1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out; 1402 vr.obytes = vif->bytes_out;
1403 read_unlock(&mrt_lock); 1403 read_unlock(&mrt_lock);
1404 1404
1405 if (copy_to_user(arg, &vr, sizeof(vr))) 1405 if (copy_to_user(arg, &vr, sizeof(vr)))
1406 return -EFAULT; 1406 return -EFAULT;
1407 return 0; 1407 return 0;
1408 } 1408 }
1409 read_unlock(&mrt_lock); 1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL; 1410 return -EADDRNOTAVAIL;
1411 case SIOCGETSGCNT: 1411 case SIOCGETSGCNT:
1412 if (copy_from_user(&sr, arg, sizeof(sr))) 1412 if (copy_from_user(&sr, arg, sizeof(sr)))
1413 return -EFAULT; 1413 return -EFAULT;
1414 1414
1415 read_lock(&mrt_lock); 1415 read_lock(&mrt_lock);
1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1417 if (c) { 1417 if (c) {
1418 sr.pktcnt = c->mfc_un.res.pkt; 1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes; 1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if; 1420 sr.wrong_if = c->mfc_un.res.wrong_if;
1421 read_unlock(&mrt_lock); 1421 read_unlock(&mrt_lock);
1422 1422
1423 if (copy_to_user(arg, &sr, sizeof(sr))) 1423 if (copy_to_user(arg, &sr, sizeof(sr)))
1424 return -EFAULT; 1424 return -EFAULT;
1425 return 0; 1425 return 0;
1426 } 1426 }
1427 read_unlock(&mrt_lock); 1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL; 1428 return -EADDRNOTAVAIL;
1429 default: 1429 default:
1430 return -ENOIOCTLCMD; 1430 return -ENOIOCTLCMD;
1431 } 1431 }
1432 } 1432 }
1433 1433
1434 1434
1435 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1435 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436 { 1436 {
1437 struct net_device *dev = ptr; 1437 struct net_device *dev = ptr;
1438 struct net *net = dev_net(dev); 1438 struct net *net = dev_net(dev);
1439 struct mr_table *mrt; 1439 struct mr_table *mrt;
1440 struct vif_device *v; 1440 struct vif_device *v;
1441 int ct; 1441 int ct;
1442 LIST_HEAD(list); 1442 LIST_HEAD(list);
1443 1443
1444 if (event != NETDEV_UNREGISTER) 1444 if (event != NETDEV_UNREGISTER)
1445 return NOTIFY_DONE; 1445 return NOTIFY_DONE;
1446 1446
1447 ipmr_for_each_table(mrt, net) { 1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0]; 1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450 if (v->dev == dev) 1450 if (v->dev == dev)
1451 vif_delete(mrt, ct, 1, &list); 1451 vif_delete(mrt, ct, 1, &list);
1452 } 1452 }
1453 } 1453 }
1454 unregister_netdevice_many(&list); 1454 unregister_netdevice_many(&list);
1455 return NOTIFY_DONE; 1455 return NOTIFY_DONE;
1456 } 1456 }
1457 1457
1458 1458
1459 static struct notifier_block ip_mr_notifier = { 1459 static struct notifier_block ip_mr_notifier = {
1460 .notifier_call = ipmr_device_event, 1460 .notifier_call = ipmr_device_event,
1461 }; 1461 };
1462 1462
1463 /* 1463 /*
1464 * Encapsulate a packet by attaching a valid IPIP header to it. 1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so 1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video. 1466 * important for multicast video.
1467 */ 1467 */
1468 1468
1469 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1469 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1470 { 1470 {
1471 struct iphdr *iph; 1471 struct iphdr *iph;
1472 struct iphdr *old_iph = ip_hdr(skb); 1472 struct iphdr *old_iph = ip_hdr(skb);
1473 1473
1474 skb_push(skb, sizeof(struct iphdr)); 1474 skb_push(skb, sizeof(struct iphdr));
1475 skb->transport_header = skb->network_header; 1475 skb->transport_header = skb->network_header;
1476 skb_reset_network_header(skb); 1476 skb_reset_network_header(skb);
1477 iph = ip_hdr(skb); 1477 iph = ip_hdr(skb);
1478 1478
1479 iph->version = 4; 1479 iph->version = 4;
1480 iph->tos = old_iph->tos; 1480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl; 1481 iph->ttl = old_iph->ttl;
1482 iph->frag_off = 0; 1482 iph->frag_off = 0;
1483 iph->daddr = daddr; 1483 iph->daddr = daddr;
1484 iph->saddr = saddr; 1484 iph->saddr = saddr;
1485 iph->protocol = IPPROTO_IPIP; 1485 iph->protocol = IPPROTO_IPIP;
1486 iph->ihl = 5; 1486 iph->ihl = 5;
1487 iph->tot_len = htons(skb->len); 1487 iph->tot_len = htons(skb->len);
1488 ip_select_ident(iph, skb_dst(skb), NULL); 1488 ip_select_ident(iph, skb_dst(skb), NULL);
1489 ip_send_check(iph); 1489 ip_send_check(iph);
1490 1490
1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492 nf_reset(skb); 1492 nf_reset(skb);
1493 } 1493 }
1494 1494
1495 static inline int ipmr_forward_finish(struct sk_buff *skb) 1495 static inline int ipmr_forward_finish(struct sk_buff *skb)
1496 { 1496 {
1497 struct ip_options * opt = &(IPCB(skb)->opt); 1497 struct ip_options * opt = &(IPCB(skb)->opt);
1498 1498
1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1500 1500
1501 if (unlikely(opt->optlen)) 1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb); 1502 ip_forward_options(skb);
1503 1503
1504 return dst_output(skb); 1504 return dst_output(skb);
1505 } 1505 }
1506 1506
1507 /* 1507 /*
1508 * Processing handlers for ipmr_forward 1508 * Processing handlers for ipmr_forward
1509 */ 1509 */
1510 1510
1511 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1511 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1513 { 1513 {
1514 const struct iphdr *iph = ip_hdr(skb); 1514 const struct iphdr *iph = ip_hdr(skb);
1515 struct vif_device *vif = &mrt->vif_table[vifi]; 1515 struct vif_device *vif = &mrt->vif_table[vifi];
1516 struct net_device *dev; 1516 struct net_device *dev;
1517 struct rtable *rt; 1517 struct rtable *rt;
1518 int encap = 0; 1518 int encap = 0;
1519 1519
1520 if (vif->dev == NULL) 1520 if (vif->dev == NULL)
1521 goto out_free; 1521 goto out_free;
1522 1522
1523 #ifdef CONFIG_IP_PIMSM 1523 #ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) { 1524 if (vif->flags & VIFF_REGISTER) {
1525 vif->pkt_out++; 1525 vif->pkt_out++;
1526 vif->bytes_out += skb->len; 1526 vif->bytes_out += skb->len;
1527 vif->dev->stats.tx_bytes += skb->len; 1527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++; 1528 vif->dev->stats.tx_packets++;
1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1530 goto out_free; 1530 goto out_free;
1531 } 1531 }
1532 #endif 1532 #endif
1533 1533
1534 if (vif->flags&VIFF_TUNNEL) { 1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link, 1535 struct flowi fl = { .oif = vif->link,
1536 .nl_u = { .ip4_u = 1536 .nl_u = { .ip4_u =
1537 { .daddr = vif->remote, 1537 { .daddr = vif->remote,
1538 .saddr = vif->local, 1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } }, 1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP }; 1540 .proto = IPPROTO_IPIP };
1541 if (ip_route_output_key(net, &rt, &fl)) 1541 if (ip_route_output_key(net, &rt, &fl))
1542 goto out_free; 1542 goto out_free;
1543 encap = sizeof(struct iphdr); 1543 encap = sizeof(struct iphdr);
1544 } else { 1544 } else {
1545 struct flowi fl = { .oif = vif->link, 1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u = 1546 .nl_u = { .ip4_u =
1547 { .daddr = iph->daddr, 1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } }, 1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP }; 1549 .proto = IPPROTO_IPIP };
1550 if (ip_route_output_key(net, &rt, &fl)) 1550 if (ip_route_output_key(net, &rt, &fl))
1551 goto out_free; 1551 goto out_free;
1552 } 1552 }
1553 1553
1554 dev = rt->u.dst.dev; 1554 dev = rt->u.dst.dev;
1555 1555
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not 1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear 1558 allow to send ICMP, so that packets will disappear
1559 to blackhole. 1559 to blackhole.
1560 */ 1560 */
1561 1561
1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1563 ip_rt_put(rt); 1563 ip_rt_put(rt);
1564 goto out_free; 1564 goto out_free;
1565 } 1565 }
1566 1566
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568 1568
1569 if (skb_cow(skb, encap)) { 1569 if (skb_cow(skb, encap)) {
1570 ip_rt_put(rt); 1570 ip_rt_put(rt);
1571 goto out_free; 1571 goto out_free;
1572 } 1572 }
1573 1573
1574 vif->pkt_out++; 1574 vif->pkt_out++;
1575 vif->bytes_out += skb->len; 1575 vif->bytes_out += skb->len;
1576 1576
1577 skb_dst_drop(skb); 1577 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst); 1578 skb_dst_set(skb, &rt->u.dst);
1579 ip_decrease_ttl(ip_hdr(skb)); 1579 ip_decrease_ttl(ip_hdr(skb));
1580 1580
1581 /* FIXME: forward and output firewalls used to be called here. 1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */ 1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) { 1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote); 1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */ 1585 /* FIXME: extra output firewall step used to be here. --RR */
1586 vif->dev->stats.tx_packets++; 1586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len; 1587 vif->dev->stats.tx_bytes += skb->len;
1588 } 1588 }
1589 1589
1590 IPCB(skb)->flags |= IPSKB_FORWARDED; 1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1591 1591
1592 /* 1592 /*
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output 1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting 1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface 1596 * program, it should receive packets not depending to what interface
1597 * program is joined. 1597 * program is joined.
1598 * If we will not make it, the program will have to join on all 1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but 1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will 1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets. 1601 * result in receiving multiple packets.
1602 */ 1602 */
1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, 1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1604 ipmr_forward_finish); 1604 ipmr_forward_finish);
1605 return; 1605 return;
1606 1606
1607 out_free: 1607 out_free:
1608 kfree_skb(skb); 1608 kfree_skb(skb);
1609 return; 1609 return;
1610 } 1610 }
1611 1611
1612 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1612 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1613 { 1613 {
1614 int ct; 1614 int ct;
1615 1615
1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617 if (mrt->vif_table[ct].dev == dev) 1617 if (mrt->vif_table[ct].dev == dev)
1618 break; 1618 break;
1619 } 1619 }
1620 return ct; 1620 return ct;
1621 } 1621 }
1622 1622
1623 /* "local" means that we should preserve one skb (for local delivery) */ 1623 /* "local" means that we should preserve one skb (for local delivery) */
1624 1624
1625 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 1625 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626 struct sk_buff *skb, struct mfc_cache *cache, 1626 struct sk_buff *skb, struct mfc_cache *cache,
1627 int local) 1627 int local)
1628 { 1628 {
1629 int psend = -1; 1629 int psend = -1;
1630 int vif, ct; 1630 int vif, ct;
1631 1631
1632 vif = cache->mfc_parent; 1632 vif = cache->mfc_parent;
1633 cache->mfc_un.res.pkt++; 1633 cache->mfc_un.res.pkt++;
1634 cache->mfc_un.res.bytes += skb->len; 1634 cache->mfc_un.res.bytes += skb->len;
1635 1635
1636 /* 1636 /*
1637 * Wrong interface: drop packet and (maybe) send PIM assert. 1637 * Wrong interface: drop packet and (maybe) send PIM assert.
1638 */ 1638 */
1639 if (mrt->vif_table[vif].dev != skb->dev) { 1639 if (mrt->vif_table[vif].dev != skb->dev) {
1640 int true_vifi; 1640 int true_vifi;
1641 1641
1642 if (skb_rtable(skb)->fl.iif == 0) { 1642 if (skb_rtable(skb)->fl.iif == 0) {
1643 /* It is our own packet, looped back. 1643 /* It is our own packet, looped back.
1644 Very complicated situation... 1644 Very complicated situation...
1645 1645
1646 The best workaround until routing daemons will be 1646 The best workaround until routing daemons will be
1647 fixed is not to redistribute packet, if it was 1647 fixed is not to redistribute packet, if it was
1648 send through wrong interface. It means, that 1648 send through wrong interface. It means, that
1649 multicast applications WILL NOT work for 1649 multicast applications WILL NOT work for
1650 (S,G), which have default multicast route pointing 1650 (S,G), which have default multicast route pointing
1651 to wrong oif. In any case, it is not a good 1651 to wrong oif. In any case, it is not a good
1652 idea to use multicasting applications on router. 1652 idea to use multicasting applications on router.
1653 */ 1653 */
1654 goto dont_forward; 1654 goto dont_forward;
1655 } 1655 }
1656 1656
1657 cache->mfc_un.res.wrong_if++; 1657 cache->mfc_un.res.wrong_if++;
1658 true_vifi = ipmr_find_vif(mrt, skb->dev); 1658 true_vifi = ipmr_find_vif(mrt, skb->dev);
1659 1659
1660 if (true_vifi >= 0 && mrt->mroute_do_assert && 1660 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1661 /* pimsm uses asserts, when switching from RPT to SPT, 1661 /* pimsm uses asserts, when switching from RPT to SPT,
1662 so that we cannot check that packet arrived on an oif. 1662 so that we cannot check that packet arrived on an oif.
1663 It is bad, but otherwise we would need to move pretty 1663 It is bad, but otherwise we would need to move pretty
1664 large chunk of pimd to kernel. Ough... --ANK 1664 large chunk of pimd to kernel. Ough... --ANK
1665 */ 1665 */
1666 (mrt->mroute_do_pim || 1666 (mrt->mroute_do_pim ||
1667 cache->mfc_un.res.ttls[true_vifi] < 255) && 1667 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1668 time_after(jiffies, 1668 time_after(jiffies,
1669 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1669 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1670 cache->mfc_un.res.last_assert = jiffies; 1670 cache->mfc_un.res.last_assert = jiffies;
1671 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1671 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1672 } 1672 }
1673 goto dont_forward; 1673 goto dont_forward;
1674 } 1674 }
1675 1675
1676 mrt->vif_table[vif].pkt_in++; 1676 mrt->vif_table[vif].pkt_in++;
1677 mrt->vif_table[vif].bytes_in += skb->len; 1677 mrt->vif_table[vif].bytes_in += skb->len;
1678 1678
1679 /* 1679 /*
1680 * Forward the frame 1680 * Forward the frame
1681 */ 1681 */
1682 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1682 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1683 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1683 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1684 if (psend != -1) { 1684 if (psend != -1) {
1685 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1685 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1686 if (skb2) 1686 if (skb2)
1687 ipmr_queue_xmit(net, mrt, skb2, cache, 1687 ipmr_queue_xmit(net, mrt, skb2, cache,
1688 psend); 1688 psend);
1689 } 1689 }
1690 psend = ct; 1690 psend = ct;
1691 } 1691 }
1692 } 1692 }
1693 if (psend != -1) { 1693 if (psend != -1) {
1694 if (local) { 1694 if (local) {
1695 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1695 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1696 if (skb2) 1696 if (skb2)
1697 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1697 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1698 } else { 1698 } else {
1699 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1699 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1700 return 0; 1700 return 0;
1701 } 1701 }
1702 } 1702 }
1703 1703
1704 dont_forward: 1704 dont_forward:
1705 if (!local) 1705 if (!local)
1706 kfree_skb(skb); 1706 kfree_skb(skb);
1707 return 0; 1707 return 0;
1708 } 1708 }
1709 1709
1710 1710
1711 /* 1711 /*
1712 * Multicast packets for forwarding arrive here 1712 * Multicast packets for forwarding arrive here
1713 */ 1713 */
1714 1714
1715 int ip_mr_input(struct sk_buff *skb) 1715 int ip_mr_input(struct sk_buff *skb)
1716 { 1716 {
1717 struct mfc_cache *cache; 1717 struct mfc_cache *cache;
1718 struct net *net = dev_net(skb->dev); 1718 struct net *net = dev_net(skb->dev);
1719 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1719 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1720 struct mr_table *mrt; 1720 struct mr_table *mrt;
1721 int err; 1721 int err;
1722 1722
1723 /* Packet is looped back after forward, it should not be 1723 /* Packet is looped back after forward, it should not be
1724 forwarded second time, but still can be delivered locally. 1724 forwarded second time, but still can be delivered locally.
1725 */ 1725 */
1726 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1726 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1727 goto dont_forward; 1727 goto dont_forward;
1728 1728
1729 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1729 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1730 if (err < 0) 1730 if (err < 0)
1731 return err; 1731 return err;
1732 1732
1733 if (!local) { 1733 if (!local) {
1734 if (IPCB(skb)->opt.router_alert) { 1734 if (IPCB(skb)->opt.router_alert) {
1735 if (ip_call_ra_chain(skb)) 1735 if (ip_call_ra_chain(skb))
1736 return 0; 1736 return 0;
1737 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1737 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1738 /* IGMPv1 (and broken IGMPv2 implementations sort of 1738 /* IGMPv1 (and broken IGMPv2 implementations sort of
1739 Cisco IOS <= 11.2(8)) do not put router alert 1739 Cisco IOS <= 11.2(8)) do not put router alert
1740 option to IGMP packets destined to routable 1740 option to IGMP packets destined to routable
1741 groups. It is very bad, because it means 1741 groups. It is very bad, because it means
1742 that we can forward NO IGMP messages. 1742 that we can forward NO IGMP messages.
1743 */ 1743 */
1744 read_lock(&mrt_lock); 1744 read_lock(&mrt_lock);
1745 if (mrt->mroute_sk) { 1745 if (mrt->mroute_sk) {
1746 nf_reset(skb); 1746 nf_reset(skb);
1747 raw_rcv(mrt->mroute_sk, skb); 1747 raw_rcv(mrt->mroute_sk, skb);
1748 read_unlock(&mrt_lock); 1748 read_unlock(&mrt_lock);
1749 return 0; 1749 return 0;
1750 } 1750 }
1751 read_unlock(&mrt_lock); 1751 read_unlock(&mrt_lock);
1752 } 1752 }
1753 } 1753 }
1754 1754
1755 read_lock(&mrt_lock); 1755 read_lock(&mrt_lock);
1756 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1756 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1757 1757
1758 /* 1758 /*
1759 * No usable cache entry 1759 * No usable cache entry
1760 */ 1760 */
1761 if (cache == NULL) { 1761 if (cache == NULL) {
1762 int vif; 1762 int vif;
1763 1763
1764 if (local) { 1764 if (local) {
1765 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1765 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1766 ip_local_deliver(skb); 1766 ip_local_deliver(skb);
1767 if (skb2 == NULL) { 1767 if (skb2 == NULL) {
1768 read_unlock(&mrt_lock); 1768 read_unlock(&mrt_lock);
1769 return -ENOBUFS; 1769 return -ENOBUFS;
1770 } 1770 }
1771 skb = skb2; 1771 skb = skb2;
1772 } 1772 }
1773 1773
1774 vif = ipmr_find_vif(mrt, skb->dev); 1774 vif = ipmr_find_vif(mrt, skb->dev);
1775 if (vif >= 0) { 1775 if (vif >= 0) {
1776 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1776 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1777 read_unlock(&mrt_lock); 1777 read_unlock(&mrt_lock);
1778 1778
1779 return err2; 1779 return err2;
1780 } 1780 }
1781 read_unlock(&mrt_lock); 1781 read_unlock(&mrt_lock);
1782 kfree_skb(skb); 1782 kfree_skb(skb);
1783 return -ENODEV; 1783 return -ENODEV;
1784 } 1784 }
1785 1785
1786 ip_mr_forward(net, mrt, skb, cache, local); 1786 ip_mr_forward(net, mrt, skb, cache, local);
1787 1787
1788 read_unlock(&mrt_lock); 1788 read_unlock(&mrt_lock);
1789 1789
1790 if (local) 1790 if (local)
1791 return ip_local_deliver(skb); 1791 return ip_local_deliver(skb);
1792 1792
1793 return 0; 1793 return 0;
1794 1794
1795 dont_forward: 1795 dont_forward:
1796 if (local) 1796 if (local)
1797 return ip_local_deliver(skb); 1797 return ip_local_deliver(skb);
1798 kfree_skb(skb); 1798 kfree_skb(skb);
1799 return 0; 1799 return 0;
1800 } 1800 }
1801 1801
1802 #ifdef CONFIG_IP_PIMSM 1802 #ifdef CONFIG_IP_PIMSM
1803 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1803 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1804 unsigned int pimlen) 1804 unsigned int pimlen)
1805 { 1805 {
1806 struct net_device *reg_dev = NULL; 1806 struct net_device *reg_dev = NULL;
1807 struct iphdr *encap; 1807 struct iphdr *encap;
1808 1808
1809 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1809 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1810 /* 1810 /*
1811 Check that: 1811 Check that:
1812 a. packet is really destinted to a multicast group 1812 a. packet is really destinted to a multicast group
1813 b. packet is not a NULL-REGISTER 1813 b. packet is not a NULL-REGISTER
1814 c. packet is not truncated 1814 c. packet is not truncated
1815 */ 1815 */
1816 if (!ipv4_is_multicast(encap->daddr) || 1816 if (!ipv4_is_multicast(encap->daddr) ||
1817 encap->tot_len == 0 || 1817 encap->tot_len == 0 ||
1818 ntohs(encap->tot_len) + pimlen > skb->len) 1818 ntohs(encap->tot_len) + pimlen > skb->len)
1819 return 1; 1819 return 1;
1820 1820
1821 read_lock(&mrt_lock); 1821 read_lock(&mrt_lock);
1822 if (mrt->mroute_reg_vif_num >= 0) 1822 if (mrt->mroute_reg_vif_num >= 0)
1823 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1823 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1824 if (reg_dev) 1824 if (reg_dev)
1825 dev_hold(reg_dev); 1825 dev_hold(reg_dev);
1826 read_unlock(&mrt_lock); 1826 read_unlock(&mrt_lock);
1827 1827
1828 if (reg_dev == NULL) 1828 if (reg_dev == NULL)
1829 return 1; 1829 return 1;
1830 1830
1831 skb->mac_header = skb->network_header; 1831 skb->mac_header = skb->network_header;
1832 skb_pull(skb, (u8*)encap - skb->data); 1832 skb_pull(skb, (u8*)encap - skb->data);
1833 skb_reset_network_header(skb); 1833 skb_reset_network_header(skb);
1834 skb->dev = reg_dev;
1835 skb->protocol = htons(ETH_P_IP); 1834 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0; 1835 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST; 1836 skb->pkt_type = PACKET_HOST;
1838 skb_dst_drop(skb); 1837
1839 reg_dev->stats.rx_bytes += skb->len; 1838 skb_tunnel_rx(skb, reg_dev);
1840 reg_dev->stats.rx_packets++; 1839
1841 nf_reset(skb);
1842 netif_rx(skb); 1840 netif_rx(skb);
1843 dev_put(reg_dev); 1841 dev_put(reg_dev);
1844 1842
1845 return 0; 1843 return 0;
1846 } 1844 }
1847 #endif 1845 #endif
1848 1846
1849 #ifdef CONFIG_IP_PIMSM_V1 1847 #ifdef CONFIG_IP_PIMSM_V1
1850 /* 1848 /*
1851 * Handle IGMP messages of PIMv1 1849 * Handle IGMP messages of PIMv1
1852 */ 1850 */
1853 1851
1854 int pim_rcv_v1(struct sk_buff * skb) 1852 int pim_rcv_v1(struct sk_buff * skb)
1855 { 1853 {
1856 struct igmphdr *pim; 1854 struct igmphdr *pim;
1857 struct net *net = dev_net(skb->dev); 1855 struct net *net = dev_net(skb->dev);
1858 struct mr_table *mrt; 1856 struct mr_table *mrt;
1859 1857
1860 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1858 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1861 goto drop; 1859 goto drop;
1862 1860
1863 pim = igmp_hdr(skb); 1861 pim = igmp_hdr(skb);
1864 1862
1865 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1863 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1866 goto drop; 1864 goto drop;
1867 1865
1868 if (!mrt->mroute_do_pim || 1866 if (!mrt->mroute_do_pim ||
1869 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1867 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1870 goto drop; 1868 goto drop;
1871 1869
1872 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1870 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1873 drop: 1871 drop:
1874 kfree_skb(skb); 1872 kfree_skb(skb);
1875 } 1873 }
1876 return 0; 1874 return 0;
1877 } 1875 }
1878 #endif 1876 #endif
1879 1877
1880 #ifdef CONFIG_IP_PIMSM_V2 1878 #ifdef CONFIG_IP_PIMSM_V2
1881 static int pim_rcv(struct sk_buff * skb) 1879 static int pim_rcv(struct sk_buff * skb)
1882 { 1880 {
1883 struct pimreghdr *pim; 1881 struct pimreghdr *pim;
1884 struct net *net = dev_net(skb->dev); 1882 struct net *net = dev_net(skb->dev);
1885 struct mr_table *mrt; 1883 struct mr_table *mrt;
1886 1884
1887 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1885 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1888 goto drop; 1886 goto drop;
1889 1887
1890 pim = (struct pimreghdr *)skb_transport_header(skb); 1888 pim = (struct pimreghdr *)skb_transport_header(skb);
1891 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1889 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1892 (pim->flags&PIM_NULL_REGISTER) || 1890 (pim->flags&PIM_NULL_REGISTER) ||
1893 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1891 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1894 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1892 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1895 goto drop; 1893 goto drop;
1896 1894
1897 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1895 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1898 goto drop; 1896 goto drop;
1899 1897
1900 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1898 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1901 drop: 1899 drop:
1902 kfree_skb(skb); 1900 kfree_skb(skb);
1903 } 1901 }
1904 return 0; 1902 return 0;
1905 } 1903 }
1906 #endif 1904 #endif
1907 1905
1908 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 1906 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1909 struct mfc_cache *c, struct rtmsg *rtm) 1907 struct mfc_cache *c, struct rtmsg *rtm)
1910 { 1908 {
1911 int ct; 1909 int ct;
1912 struct rtnexthop *nhp; 1910 struct rtnexthop *nhp;
1913 u8 *b = skb_tail_pointer(skb); 1911 u8 *b = skb_tail_pointer(skb);
1914 struct rtattr *mp_head; 1912 struct rtattr *mp_head;
1915 1913
1916 /* If cache is unresolved, don't try to parse IIF and OIF */ 1914 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS) 1915 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT; 1916 return -ENOENT;
1919 1917
1920 if (VIF_EXISTS(mrt, c->mfc_parent)) 1918 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); 1919 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1922 1920
1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1921 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1924 1922
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1923 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 1924 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1925 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure; 1926 goto rtattr_failure;
1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1927 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1930 nhp->rtnh_flags = 0; 1928 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1929 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 1930 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1933 nhp->rtnh_len = sizeof(*nhp); 1931 nhp->rtnh_len = sizeof(*nhp);
1934 } 1932 }
1935 } 1933 }
1936 mp_head->rta_type = RTA_MULTIPATH; 1934 mp_head->rta_type = RTA_MULTIPATH;
1937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1935 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1938 rtm->rtm_type = RTN_MULTICAST; 1936 rtm->rtm_type = RTN_MULTICAST;
1939 return 1; 1937 return 1;
1940 1938
1941 rtattr_failure: 1939 rtattr_failure:
1942 nlmsg_trim(skb, b); 1940 nlmsg_trim(skb, b);
1943 return -EMSGSIZE; 1941 return -EMSGSIZE;
1944 } 1942 }
1945 1943
1946 int ipmr_get_route(struct net *net, 1944 int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1945 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1948 { 1946 {
1949 int err; 1947 int err;
1950 struct mr_table *mrt; 1948 struct mr_table *mrt;
1951 struct mfc_cache *cache; 1949 struct mfc_cache *cache;
1952 struct rtable *rt = skb_rtable(skb); 1950 struct rtable *rt = skb_rtable(skb);
1953 1951
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 1952 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL) 1953 if (mrt == NULL)
1956 return -ENOENT; 1954 return -ENOENT;
1957 1955
1958 read_lock(&mrt_lock); 1956 read_lock(&mrt_lock);
1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1957 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1960 1958
1961 if (cache == NULL) { 1959 if (cache == NULL) {
1962 struct sk_buff *skb2; 1960 struct sk_buff *skb2;
1963 struct iphdr *iph; 1961 struct iphdr *iph;
1964 struct net_device *dev; 1962 struct net_device *dev;
1965 int vif; 1963 int vif;
1966 1964
1967 if (nowait) { 1965 if (nowait) {
1968 read_unlock(&mrt_lock); 1966 read_unlock(&mrt_lock);
1969 return -EAGAIN; 1967 return -EAGAIN;
1970 } 1968 }
1971 1969
1972 dev = skb->dev; 1970 dev = skb->dev;
1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1971 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1974 read_unlock(&mrt_lock); 1972 read_unlock(&mrt_lock);
1975 return -ENODEV; 1973 return -ENODEV;
1976 } 1974 }
1977 skb2 = skb_clone(skb, GFP_ATOMIC); 1975 skb2 = skb_clone(skb, GFP_ATOMIC);
1978 if (!skb2) { 1976 if (!skb2) {
1979 read_unlock(&mrt_lock); 1977 read_unlock(&mrt_lock);
1980 return -ENOMEM; 1978 return -ENOMEM;
1981 } 1979 }
1982 1980
1983 skb_push(skb2, sizeof(struct iphdr)); 1981 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2); 1982 skb_reset_network_header(skb2);
1985 iph = ip_hdr(skb2); 1983 iph = ip_hdr(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2; 1984 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src; 1985 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst; 1986 iph->daddr = rt->rt_dst;
1989 iph->version = 0; 1987 iph->version = 0;
1990 err = ipmr_cache_unresolved(mrt, vif, skb2); 1988 err = ipmr_cache_unresolved(mrt, vif, skb2);
1991 read_unlock(&mrt_lock); 1989 read_unlock(&mrt_lock);
1992 return err; 1990 return err;
1993 } 1991 }
1994 1992
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1993 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY; 1994 cache->mfc_flags |= MFC_NOTIFY;
1997 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 1995 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1998 read_unlock(&mrt_lock); 1996 read_unlock(&mrt_lock);
1999 return err; 1997 return err;
2000 } 1998 }
2001 1999
2002 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2000 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2003 u32 pid, u32 seq, struct mfc_cache *c) 2001 u32 pid, u32 seq, struct mfc_cache *c)
2004 { 2002 {
2005 struct nlmsghdr *nlh; 2003 struct nlmsghdr *nlh;
2006 struct rtmsg *rtm; 2004 struct rtmsg *rtm;
2007 2005
2008 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2006 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2009 if (nlh == NULL) 2007 if (nlh == NULL)
2010 return -EMSGSIZE; 2008 return -EMSGSIZE;
2011 2009
2012 rtm = nlmsg_data(nlh); 2010 rtm = nlmsg_data(nlh);
2013 rtm->rtm_family = RTNL_FAMILY_IPMR; 2011 rtm->rtm_family = RTNL_FAMILY_IPMR;
2014 rtm->rtm_dst_len = 32; 2012 rtm->rtm_dst_len = 32;
2015 rtm->rtm_src_len = 32; 2013 rtm->rtm_src_len = 32;
2016 rtm->rtm_tos = 0; 2014 rtm->rtm_tos = 0;
2017 rtm->rtm_table = mrt->id; 2015 rtm->rtm_table = mrt->id;
2018 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2016 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2019 rtm->rtm_type = RTN_MULTICAST; 2017 rtm->rtm_type = RTN_MULTICAST;
2020 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2018 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2021 rtm->rtm_protocol = RTPROT_UNSPEC; 2019 rtm->rtm_protocol = RTPROT_UNSPEC;
2022 rtm->rtm_flags = 0; 2020 rtm->rtm_flags = 0;
2023 2021
2024 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); 2022 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2025 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); 2023 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2026 2024
2027 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2025 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2028 goto nla_put_failure; 2026 goto nla_put_failure;
2029 2027
2030 return nlmsg_end(skb, nlh); 2028 return nlmsg_end(skb, nlh);
2031 2029
2032 nla_put_failure: 2030 nla_put_failure:
2033 nlmsg_cancel(skb, nlh); 2031 nlmsg_cancel(skb, nlh);
2034 return -EMSGSIZE; 2032 return -EMSGSIZE;
2035 } 2033 }
2036 2034
2037 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2035 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2038 { 2036 {
2039 struct net *net = sock_net(skb->sk); 2037 struct net *net = sock_net(skb->sk);
2040 struct mr_table *mrt; 2038 struct mr_table *mrt;
2041 struct mfc_cache *mfc; 2039 struct mfc_cache *mfc;
2042 unsigned int t = 0, s_t; 2040 unsigned int t = 0, s_t;
2043 unsigned int h = 0, s_h; 2041 unsigned int h = 0, s_h;
2044 unsigned int e = 0, s_e; 2042 unsigned int e = 0, s_e;
2045 2043
2046 s_t = cb->args[0]; 2044 s_t = cb->args[0];
2047 s_h = cb->args[1]; 2045 s_h = cb->args[1];
2048 s_e = cb->args[2]; 2046 s_e = cb->args[2];
2049 2047
2050 read_lock(&mrt_lock); 2048 read_lock(&mrt_lock);
2051 ipmr_for_each_table(mrt, net) { 2049 ipmr_for_each_table(mrt, net) {
2052 if (t < s_t) 2050 if (t < s_t)
2053 goto next_table; 2051 goto next_table;
2054 if (t > s_t) 2052 if (t > s_t)
2055 s_h = 0; 2053 s_h = 0;
2056 for (h = s_h; h < MFC_LINES; h++) { 2054 for (h = s_h; h < MFC_LINES; h++) {
2057 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2055 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2058 if (e < s_e) 2056 if (e < s_e)
2059 goto next_entry; 2057 goto next_entry;
2060 if (ipmr_fill_mroute(mrt, skb, 2058 if (ipmr_fill_mroute(mrt, skb,
2061 NETLINK_CB(cb->skb).pid, 2059 NETLINK_CB(cb->skb).pid,
2062 cb->nlh->nlmsg_seq, 2060 cb->nlh->nlmsg_seq,
2063 mfc) < 0) 2061 mfc) < 0)
2064 goto done; 2062 goto done;
2065 next_entry: 2063 next_entry:
2066 e++; 2064 e++;
2067 } 2065 }
2068 e = s_e = 0; 2066 e = s_e = 0;
2069 } 2067 }
2070 s_h = 0; 2068 s_h = 0;
2071 next_table: 2069 next_table:
2072 t++; 2070 t++;
2073 } 2071 }
2074 done: 2072 done:
2075 read_unlock(&mrt_lock); 2073 read_unlock(&mrt_lock);
2076 2074
2077 cb->args[2] = e; 2075 cb->args[2] = e;
2078 cb->args[1] = h; 2076 cb->args[1] = h;
2079 cb->args[0] = t; 2077 cb->args[0] = t;
2080 2078
2081 return skb->len; 2079 return skb->len;
2082 } 2080 }
2083 2081
2084 #ifdef CONFIG_PROC_FS 2082 #ifdef CONFIG_PROC_FS
2085 /* 2083 /*
2086 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2084 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2087 */ 2085 */
2088 struct ipmr_vif_iter { 2086 struct ipmr_vif_iter {
2089 struct seq_net_private p; 2087 struct seq_net_private p;
2090 struct mr_table *mrt; 2088 struct mr_table *mrt;
2091 int ct; 2089 int ct;
2092 }; 2090 };
2093 2091
2094 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2092 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2095 struct ipmr_vif_iter *iter, 2093 struct ipmr_vif_iter *iter,
2096 loff_t pos) 2094 loff_t pos)
2097 { 2095 {
2098 struct mr_table *mrt = iter->mrt; 2096 struct mr_table *mrt = iter->mrt;
2099 2097
2100 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2098 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2101 if (!VIF_EXISTS(mrt, iter->ct)) 2099 if (!VIF_EXISTS(mrt, iter->ct))
2102 continue; 2100 continue;
2103 if (pos-- == 0) 2101 if (pos-- == 0)
2104 return &mrt->vif_table[iter->ct]; 2102 return &mrt->vif_table[iter->ct];
2105 } 2103 }
2106 return NULL; 2104 return NULL;
2107 } 2105 }
2108 2106
2109 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2107 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2110 __acquires(mrt_lock) 2108 __acquires(mrt_lock)
2111 { 2109 {
2112 struct ipmr_vif_iter *iter = seq->private; 2110 struct ipmr_vif_iter *iter = seq->private;
2113 struct net *net = seq_file_net(seq); 2111 struct net *net = seq_file_net(seq);
2114 struct mr_table *mrt; 2112 struct mr_table *mrt;
2115 2113
2116 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2114 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2117 if (mrt == NULL) 2115 if (mrt == NULL)
2118 return ERR_PTR(-ENOENT); 2116 return ERR_PTR(-ENOENT);
2119 2117
2120 iter->mrt = mrt; 2118 iter->mrt = mrt;
2121 2119
2122 read_lock(&mrt_lock); 2120 read_lock(&mrt_lock);
2123 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2121 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2124 : SEQ_START_TOKEN; 2122 : SEQ_START_TOKEN;
2125 } 2123 }
2126 2124
2127 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2125 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2128 { 2126 {
2129 struct ipmr_vif_iter *iter = seq->private; 2127 struct ipmr_vif_iter *iter = seq->private;
2130 struct net *net = seq_file_net(seq); 2128 struct net *net = seq_file_net(seq);
2131 struct mr_table *mrt = iter->mrt; 2129 struct mr_table *mrt = iter->mrt;
2132 2130
2133 ++*pos; 2131 ++*pos;
2134 if (v == SEQ_START_TOKEN) 2132 if (v == SEQ_START_TOKEN)
2135 return ipmr_vif_seq_idx(net, iter, 0); 2133 return ipmr_vif_seq_idx(net, iter, 0);
2136 2134
2137 while (++iter->ct < mrt->maxvif) { 2135 while (++iter->ct < mrt->maxvif) {
2138 if (!VIF_EXISTS(mrt, iter->ct)) 2136 if (!VIF_EXISTS(mrt, iter->ct))
2139 continue; 2137 continue;
2140 return &mrt->vif_table[iter->ct]; 2138 return &mrt->vif_table[iter->ct];
2141 } 2139 }
2142 return NULL; 2140 return NULL;
2143 } 2141 }
2144 2142
2145 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2143 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2146 __releases(mrt_lock) 2144 __releases(mrt_lock)
2147 { 2145 {
2148 read_unlock(&mrt_lock); 2146 read_unlock(&mrt_lock);
2149 } 2147 }
2150 2148
2151 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2149 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2152 { 2150 {
2153 struct ipmr_vif_iter *iter = seq->private; 2151 struct ipmr_vif_iter *iter = seq->private;
2154 struct mr_table *mrt = iter->mrt; 2152 struct mr_table *mrt = iter->mrt;
2155 2153
2156 if (v == SEQ_START_TOKEN) { 2154 if (v == SEQ_START_TOKEN) {
2157 seq_puts(seq, 2155 seq_puts(seq,
2158 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2156 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2159 } else { 2157 } else {
2160 const struct vif_device *vif = v; 2158 const struct vif_device *vif = v;
2161 const char *name = vif->dev ? vif->dev->name : "none"; 2159 const char *name = vif->dev ? vif->dev->name : "none";
2162 2160
2163 seq_printf(seq, 2161 seq_printf(seq,
2164 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2162 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2165 vif - mrt->vif_table, 2163 vif - mrt->vif_table,
2166 name, vif->bytes_in, vif->pkt_in, 2164 name, vif->bytes_in, vif->pkt_in,
2167 vif->bytes_out, vif->pkt_out, 2165 vif->bytes_out, vif->pkt_out,
2168 vif->flags, vif->local, vif->remote); 2166 vif->flags, vif->local, vif->remote);
2169 } 2167 }
2170 return 0; 2168 return 0;
2171 } 2169 }
2172 2170
2173 static const struct seq_operations ipmr_vif_seq_ops = { 2171 static const struct seq_operations ipmr_vif_seq_ops = {
2174 .start = ipmr_vif_seq_start, 2172 .start = ipmr_vif_seq_start,
2175 .next = ipmr_vif_seq_next, 2173 .next = ipmr_vif_seq_next,
2176 .stop = ipmr_vif_seq_stop, 2174 .stop = ipmr_vif_seq_stop,
2177 .show = ipmr_vif_seq_show, 2175 .show = ipmr_vif_seq_show,
2178 }; 2176 };
2179 2177
2180 static int ipmr_vif_open(struct inode *inode, struct file *file) 2178 static int ipmr_vif_open(struct inode *inode, struct file *file)
2181 { 2179 {
2182 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 2180 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2183 sizeof(struct ipmr_vif_iter)); 2181 sizeof(struct ipmr_vif_iter));
2184 } 2182 }
2185 2183
2186 static const struct file_operations ipmr_vif_fops = { 2184 static const struct file_operations ipmr_vif_fops = {
2187 .owner = THIS_MODULE, 2185 .owner = THIS_MODULE,
2188 .open = ipmr_vif_open, 2186 .open = ipmr_vif_open,
2189 .read = seq_read, 2187 .read = seq_read,
2190 .llseek = seq_lseek, 2188 .llseek = seq_lseek,
2191 .release = seq_release_net, 2189 .release = seq_release_net,
2192 }; 2190 };
2193 2191
2194 struct ipmr_mfc_iter { 2192 struct ipmr_mfc_iter {
2195 struct seq_net_private p; 2193 struct seq_net_private p;
2196 struct mr_table *mrt; 2194 struct mr_table *mrt;
2197 struct list_head *cache; 2195 struct list_head *cache;
2198 int ct; 2196 int ct;
2199 }; 2197 };
2200 2198
2201 2199
2202 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2200 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2203 struct ipmr_mfc_iter *it, loff_t pos) 2201 struct ipmr_mfc_iter *it, loff_t pos)
2204 { 2202 {
2205 struct mr_table *mrt = it->mrt; 2203 struct mr_table *mrt = it->mrt;
2206 struct mfc_cache *mfc; 2204 struct mfc_cache *mfc;
2207 2205
2208 read_lock(&mrt_lock); 2206 read_lock(&mrt_lock);
2209 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2207 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2210 it->cache = &mrt->mfc_cache_array[it->ct]; 2208 it->cache = &mrt->mfc_cache_array[it->ct];
2211 list_for_each_entry(mfc, it->cache, list) 2209 list_for_each_entry(mfc, it->cache, list)
2212 if (pos-- == 0) 2210 if (pos-- == 0)
2213 return mfc; 2211 return mfc;
2214 } 2212 }
2215 read_unlock(&mrt_lock); 2213 read_unlock(&mrt_lock);
2216 2214
2217 spin_lock_bh(&mfc_unres_lock); 2215 spin_lock_bh(&mfc_unres_lock);
2218 it->cache = &mrt->mfc_unres_queue; 2216 it->cache = &mrt->mfc_unres_queue;
2219 list_for_each_entry(mfc, it->cache, list) 2217 list_for_each_entry(mfc, it->cache, list)
2220 if (pos-- == 0) 2218 if (pos-- == 0)
2221 return mfc; 2219 return mfc;
2222 spin_unlock_bh(&mfc_unres_lock); 2220 spin_unlock_bh(&mfc_unres_lock);
2223 2221
2224 it->cache = NULL; 2222 it->cache = NULL;
2225 return NULL; 2223 return NULL;
2226 } 2224 }
2227 2225
2228 2226
2229 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2227 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2230 { 2228 {
2231 struct ipmr_mfc_iter *it = seq->private; 2229 struct ipmr_mfc_iter *it = seq->private;
2232 struct net *net = seq_file_net(seq); 2230 struct net *net = seq_file_net(seq);
2233 struct mr_table *mrt; 2231 struct mr_table *mrt;
2234 2232
2235 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2233 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2236 if (mrt == NULL) 2234 if (mrt == NULL)
2237 return ERR_PTR(-ENOENT); 2235 return ERR_PTR(-ENOENT);
2238 2236
2239 it->mrt = mrt; 2237 it->mrt = mrt;
2240 it->cache = NULL; 2238 it->cache = NULL;
2241 it->ct = 0; 2239 it->ct = 0;
2242 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2240 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2243 : SEQ_START_TOKEN; 2241 : SEQ_START_TOKEN;
2244 } 2242 }
2245 2243
2246 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2244 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2247 { 2245 {
2248 struct mfc_cache *mfc = v; 2246 struct mfc_cache *mfc = v;
2249 struct ipmr_mfc_iter *it = seq->private; 2247 struct ipmr_mfc_iter *it = seq->private;
2250 struct net *net = seq_file_net(seq); 2248 struct net *net = seq_file_net(seq);
2251 struct mr_table *mrt = it->mrt; 2249 struct mr_table *mrt = it->mrt;
2252 2250
2253 ++*pos; 2251 ++*pos;
2254 2252
2255 if (v == SEQ_START_TOKEN) 2253 if (v == SEQ_START_TOKEN)
2256 return ipmr_mfc_seq_idx(net, seq->private, 0); 2254 return ipmr_mfc_seq_idx(net, seq->private, 0);
2257 2255
2258 if (mfc->list.next != it->cache) 2256 if (mfc->list.next != it->cache)
2259 return list_entry(mfc->list.next, struct mfc_cache, list); 2257 return list_entry(mfc->list.next, struct mfc_cache, list);
2260 2258
2261 if (it->cache == &mrt->mfc_unres_queue) 2259 if (it->cache == &mrt->mfc_unres_queue)
2262 goto end_of_list; 2260 goto end_of_list;
2263 2261
2264 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); 2262 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2265 2263
2266 while (++it->ct < MFC_LINES) { 2264 while (++it->ct < MFC_LINES) {
2267 it->cache = &mrt->mfc_cache_array[it->ct]; 2265 it->cache = &mrt->mfc_cache_array[it->ct];
2268 if (list_empty(it->cache)) 2266 if (list_empty(it->cache))
2269 continue; 2267 continue;
2270 return list_first_entry(it->cache, struct mfc_cache, list); 2268 return list_first_entry(it->cache, struct mfc_cache, list);
2271 } 2269 }
2272 2270
2273 /* exhausted cache_array, show unresolved */ 2271 /* exhausted cache_array, show unresolved */
2274 read_unlock(&mrt_lock); 2272 read_unlock(&mrt_lock);
2275 it->cache = &mrt->mfc_unres_queue; 2273 it->cache = &mrt->mfc_unres_queue;
2276 it->ct = 0; 2274 it->ct = 0;
2277 2275
2278 spin_lock_bh(&mfc_unres_lock); 2276 spin_lock_bh(&mfc_unres_lock);
2279 if (!list_empty(it->cache)) 2277 if (!list_empty(it->cache))
2280 return list_first_entry(it->cache, struct mfc_cache, list); 2278 return list_first_entry(it->cache, struct mfc_cache, list);
2281 2279
2282 end_of_list: 2280 end_of_list:
2283 spin_unlock_bh(&mfc_unres_lock); 2281 spin_unlock_bh(&mfc_unres_lock);
2284 it->cache = NULL; 2282 it->cache = NULL;
2285 2283
2286 return NULL; 2284 return NULL;
2287 } 2285 }
2288 2286
2289 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2287 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2290 { 2288 {
2291 struct ipmr_mfc_iter *it = seq->private; 2289 struct ipmr_mfc_iter *it = seq->private;
2292 struct mr_table *mrt = it->mrt; 2290 struct mr_table *mrt = it->mrt;
2293 2291
2294 if (it->cache == &mrt->mfc_unres_queue) 2292 if (it->cache == &mrt->mfc_unres_queue)
2295 spin_unlock_bh(&mfc_unres_lock); 2293 spin_unlock_bh(&mfc_unres_lock);
2296 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2294 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2297 read_unlock(&mrt_lock); 2295 read_unlock(&mrt_lock);
2298 } 2296 }
2299 2297
2300 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2298 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2301 { 2299 {
2302 int n; 2300 int n;
2303 2301
2304 if (v == SEQ_START_TOKEN) { 2302 if (v == SEQ_START_TOKEN) {
2305 seq_puts(seq, 2303 seq_puts(seq,
2306 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2304 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2307 } else { 2305 } else {
2308 const struct mfc_cache *mfc = v; 2306 const struct mfc_cache *mfc = v;
2309 const struct ipmr_mfc_iter *it = seq->private; 2307 const struct ipmr_mfc_iter *it = seq->private;
2310 const struct mr_table *mrt = it->mrt; 2308 const struct mr_table *mrt = it->mrt;
2311 2309
2312 seq_printf(seq, "%08X %08X %-3hd", 2310 seq_printf(seq, "%08X %08X %-3hd",
2313 (__force u32) mfc->mfc_mcastgrp, 2311 (__force u32) mfc->mfc_mcastgrp,
2314 (__force u32) mfc->mfc_origin, 2312 (__force u32) mfc->mfc_origin,
2315 mfc->mfc_parent); 2313 mfc->mfc_parent);
2316 2314
2317 if (it->cache != &mrt->mfc_unres_queue) { 2315 if (it->cache != &mrt->mfc_unres_queue) {
2318 seq_printf(seq, " %8lu %8lu %8lu", 2316 seq_printf(seq, " %8lu %8lu %8lu",
2319 mfc->mfc_un.res.pkt, 2317 mfc->mfc_un.res.pkt,
2320 mfc->mfc_un.res.bytes, 2318 mfc->mfc_un.res.bytes,
2321 mfc->mfc_un.res.wrong_if); 2319 mfc->mfc_un.res.wrong_if);
2322 for (n = mfc->mfc_un.res.minvif; 2320 for (n = mfc->mfc_un.res.minvif;
2323 n < mfc->mfc_un.res.maxvif; n++ ) { 2321 n < mfc->mfc_un.res.maxvif; n++ ) {
2324 if (VIF_EXISTS(mrt, n) && 2322 if (VIF_EXISTS(mrt, n) &&
2325 mfc->mfc_un.res.ttls[n] < 255) 2323 mfc->mfc_un.res.ttls[n] < 255)
2326 seq_printf(seq, 2324 seq_printf(seq,
2327 " %2d:%-3d", 2325 " %2d:%-3d",
2328 n, mfc->mfc_un.res.ttls[n]); 2326 n, mfc->mfc_un.res.ttls[n]);
2329 } 2327 }
2330 } else { 2328 } else {
2331 /* unresolved mfc_caches don't contain 2329 /* unresolved mfc_caches don't contain
2332 * pkt, bytes and wrong_if values 2330 * pkt, bytes and wrong_if values
2333 */ 2331 */
2334 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2332 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2335 } 2333 }
2336 seq_putc(seq, '\n'); 2334 seq_putc(seq, '\n');
2337 } 2335 }
2338 return 0; 2336 return 0;
2339 } 2337 }
2340 2338
2341 static const struct seq_operations ipmr_mfc_seq_ops = { 2339 static const struct seq_operations ipmr_mfc_seq_ops = {
2342 .start = ipmr_mfc_seq_start, 2340 .start = ipmr_mfc_seq_start,
2343 .next = ipmr_mfc_seq_next, 2341 .next = ipmr_mfc_seq_next,
2344 .stop = ipmr_mfc_seq_stop, 2342 .stop = ipmr_mfc_seq_stop,
2345 .show = ipmr_mfc_seq_show, 2343 .show = ipmr_mfc_seq_show,
2346 }; 2344 };
2347 2345
2348 static int ipmr_mfc_open(struct inode *inode, struct file *file) 2346 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2349 { 2347 {
2350 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 2348 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2351 sizeof(struct ipmr_mfc_iter)); 2349 sizeof(struct ipmr_mfc_iter));
2352 } 2350 }
2353 2351
2354 static const struct file_operations ipmr_mfc_fops = { 2352 static const struct file_operations ipmr_mfc_fops = {
2355 .owner = THIS_MODULE, 2353 .owner = THIS_MODULE,
2356 .open = ipmr_mfc_open, 2354 .open = ipmr_mfc_open,
2357 .read = seq_read, 2355 .read = seq_read,
2358 .llseek = seq_lseek, 2356 .llseek = seq_lseek,
2359 .release = seq_release_net, 2357 .release = seq_release_net,
2360 }; 2358 };
2361 #endif 2359 #endif
2362 2360
2363 #ifdef CONFIG_IP_PIMSM_V2 2361 #ifdef CONFIG_IP_PIMSM_V2
2364 static const struct net_protocol pim_protocol = { 2362 static const struct net_protocol pim_protocol = {
2365 .handler = pim_rcv, 2363 .handler = pim_rcv,
2366 .netns_ok = 1, 2364 .netns_ok = 1,
2367 }; 2365 };
2368 #endif 2366 #endif
2369 2367
2370 2368
2371 /* 2369 /*
2372 * Setup for IP multicast routing 2370 * Setup for IP multicast routing
2373 */ 2371 */
2374 static int __net_init ipmr_net_init(struct net *net) 2372 static int __net_init ipmr_net_init(struct net *net)
2375 { 2373 {
2376 int err; 2374 int err;
2377 2375
2378 err = ipmr_rules_init(net); 2376 err = ipmr_rules_init(net);
2379 if (err < 0) 2377 if (err < 0)
2380 goto fail; 2378 goto fail;
2381 2379
2382 #ifdef CONFIG_PROC_FS 2380 #ifdef CONFIG_PROC_FS
2383 err = -ENOMEM; 2381 err = -ENOMEM;
2384 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) 2382 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2385 goto proc_vif_fail; 2383 goto proc_vif_fail;
2386 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) 2384 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2387 goto proc_cache_fail; 2385 goto proc_cache_fail;
2388 #endif 2386 #endif
2389 return 0; 2387 return 0;
2390 2388
2391 #ifdef CONFIG_PROC_FS 2389 #ifdef CONFIG_PROC_FS
2392 proc_cache_fail: 2390 proc_cache_fail:
2393 proc_net_remove(net, "ip_mr_vif"); 2391 proc_net_remove(net, "ip_mr_vif");
2394 proc_vif_fail: 2392 proc_vif_fail:
2395 ipmr_rules_exit(net); 2393 ipmr_rules_exit(net);
2396 #endif 2394 #endif
2397 fail: 2395 fail:
2398 return err; 2396 return err;
2399 } 2397 }
2400 2398
2401 static void __net_exit ipmr_net_exit(struct net *net) 2399 static void __net_exit ipmr_net_exit(struct net *net)
2402 { 2400 {
2403 #ifdef CONFIG_PROC_FS 2401 #ifdef CONFIG_PROC_FS
2404 proc_net_remove(net, "ip_mr_cache"); 2402 proc_net_remove(net, "ip_mr_cache");
2405 proc_net_remove(net, "ip_mr_vif"); 2403 proc_net_remove(net, "ip_mr_vif");
2406 #endif 2404 #endif
2407 ipmr_rules_exit(net); 2405 ipmr_rules_exit(net);
2408 } 2406 }
2409 2407
2410 static struct pernet_operations ipmr_net_ops = { 2408 static struct pernet_operations ipmr_net_ops = {
2411 .init = ipmr_net_init, 2409 .init = ipmr_net_init,
2412 .exit = ipmr_net_exit, 2410 .exit = ipmr_net_exit,
2413 }; 2411 };
2414 2412
2415 int __init ip_mr_init(void) 2413 int __init ip_mr_init(void)
2416 { 2414 {
2417 int err; 2415 int err;
2418 2416
2419 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2417 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2420 sizeof(struct mfc_cache), 2418 sizeof(struct mfc_cache),
2421 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2419 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2422 NULL); 2420 NULL);
2423 if (!mrt_cachep) 2421 if (!mrt_cachep)
2424 return -ENOMEM; 2422 return -ENOMEM;
2425 2423
2426 err = register_pernet_subsys(&ipmr_net_ops); 2424 err = register_pernet_subsys(&ipmr_net_ops);
2427 if (err) 2425 if (err)
2428 goto reg_pernet_fail; 2426 goto reg_pernet_fail;
2429 2427
2430 err = register_netdevice_notifier(&ip_mr_notifier); 2428 err = register_netdevice_notifier(&ip_mr_notifier);
2431 if (err) 2429 if (err)
2432 goto reg_notif_fail; 2430 goto reg_notif_fail;
2433 #ifdef CONFIG_IP_PIMSM_V2 2431 #ifdef CONFIG_IP_PIMSM_V2
2434 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 2432 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2435 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); 2433 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2436 err = -EAGAIN; 2434 err = -EAGAIN;
2437 goto add_proto_fail; 2435 goto add_proto_fail;
2438 } 2436 }
2439 #endif 2437 #endif
2440 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); 2438 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2441 return 0; 2439 return 0;
2442 2440
2443 #ifdef CONFIG_IP_PIMSM_V2 2441 #ifdef CONFIG_IP_PIMSM_V2
2444 add_proto_fail: 2442 add_proto_fail:
2445 unregister_netdevice_notifier(&ip_mr_notifier); 2443 unregister_netdevice_notifier(&ip_mr_notifier);
2446 #endif 2444 #endif
2447 reg_notif_fail: 2445 reg_notif_fail:
2448 unregister_pernet_subsys(&ipmr_net_ops); 2446 unregister_pernet_subsys(&ipmr_net_ops);
2449 reg_pernet_fail: 2447 reg_pernet_fail:
2450 kmem_cache_destroy(mrt_cachep); 2448 kmem_cache_destroy(mrt_cachep);
2451 return err; 2449 return err;
2452 } 2450 }
2453 2451
net/ipv6/ip6_tunnel.c
1 /* 1 /*
2 * IPv6 tunneling device 2 * IPv6 tunneling device
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Ville Nuorvala <vnuorval@tcs.hut.fi> 6 * Ville Nuorvala <vnuorval@tcs.hut.fi>
7 * Yasuyuki Kozakai <kozakai@linux-ipv6.org> 7 * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
8 * 8 *
9 * Based on: 9 * Based on:
10 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c 10 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
11 * 11 *
12 * RFC 2473 12 * RFC 2473
13 * 13 *
14 * This program is free software; you can redistribute it and/or 14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License 15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version. 17 * 2 of the License, or (at your option) any later version.
18 * 18 *
19 */ 19 */
20 20
21 #include <linux/module.h> 21 #include <linux/module.h>
22 #include <linux/capability.h> 22 #include <linux/capability.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/types.h> 24 #include <linux/types.h>
25 #include <linux/sockios.h> 25 #include <linux/sockios.h>
26 #include <linux/icmp.h> 26 #include <linux/icmp.h>
27 #include <linux/if.h> 27 #include <linux/if.h>
28 #include <linux/in.h> 28 #include <linux/in.h>
29 #include <linux/ip.h> 29 #include <linux/ip.h>
30 #include <linux/if_tunnel.h> 30 #include <linux/if_tunnel.h>
31 #include <linux/net.h> 31 #include <linux/net.h>
32 #include <linux/in6.h> 32 #include <linux/in6.h>
33 #include <linux/netdevice.h> 33 #include <linux/netdevice.h>
34 #include <linux/if_arp.h> 34 #include <linux/if_arp.h>
35 #include <linux/icmpv6.h> 35 #include <linux/icmpv6.h>
36 #include <linux/init.h> 36 #include <linux/init.h>
37 #include <linux/route.h> 37 #include <linux/route.h>
38 #include <linux/rtnetlink.h> 38 #include <linux/rtnetlink.h>
39 #include <linux/netfilter_ipv6.h> 39 #include <linux/netfilter_ipv6.h>
40 #include <linux/slab.h> 40 #include <linux/slab.h>
41 41
42 #include <asm/uaccess.h> 42 #include <asm/uaccess.h>
43 #include <asm/atomic.h> 43 #include <asm/atomic.h>
44 44
45 #include <net/icmp.h> 45 #include <net/icmp.h>
46 #include <net/ip.h> 46 #include <net/ip.h>
47 #include <net/ipv6.h> 47 #include <net/ipv6.h>
48 #include <net/ip6_route.h> 48 #include <net/ip6_route.h>
49 #include <net/addrconf.h> 49 #include <net/addrconf.h>
50 #include <net/ip6_tunnel.h> 50 #include <net/ip6_tunnel.h>
51 #include <net/xfrm.h> 51 #include <net/xfrm.h>
52 #include <net/dsfield.h> 52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h> 53 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h> 54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h> 55 #include <net/netns/generic.h>
56 56
57 MODULE_AUTHOR("Ville Nuorvala"); 57 MODULE_AUTHOR("Ville Nuorvala");
58 MODULE_DESCRIPTION("IPv6 tunneling device"); 58 MODULE_DESCRIPTION("IPv6 tunneling device");
59 MODULE_LICENSE("GPL"); 59 MODULE_LICENSE("GPL");
60 60
61 #define IPV6_TLV_TEL_DST_SIZE 8 61 #define IPV6_TLV_TEL_DST_SIZE 8
62 62
63 #ifdef IP6_TNL_DEBUG 63 #ifdef IP6_TNL_DEBUG
64 #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 64 #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
65 #else 65 #else
66 #define IP6_TNL_TRACE(x...) do {;} while(0) 66 #define IP6_TNL_TRACE(x...) do {;} while(0)
67 #endif 67 #endif
68 68
69 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) 69 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
70 #define IPV6_TCLASS_SHIFT 20 70 #define IPV6_TCLASS_SHIFT 20
71 71
72 #define HASH_SIZE 32 72 #define HASH_SIZE 32
73 73
74 #define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ 74 #define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ 75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
76 (HASH_SIZE - 1)) 76 (HASH_SIZE - 1))
77 77
78 static void ip6_tnl_dev_init(struct net_device *dev); 78 static void ip6_tnl_dev_init(struct net_device *dev);
79 static void ip6_tnl_dev_setup(struct net_device *dev); 79 static void ip6_tnl_dev_setup(struct net_device *dev);
80 80
81 static int ip6_tnl_net_id __read_mostly; 81 static int ip6_tnl_net_id __read_mostly;
82 struct ip6_tnl_net { 82 struct ip6_tnl_net {
83 /* the IPv6 tunnel fallback device */ 83 /* the IPv6 tunnel fallback device */
84 struct net_device *fb_tnl_dev; 84 struct net_device *fb_tnl_dev;
85 /* lists for storing tunnels in use */ 85 /* lists for storing tunnels in use */
86 struct ip6_tnl *tnls_r_l[HASH_SIZE]; 86 struct ip6_tnl *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1]; 87 struct ip6_tnl *tnls_wc[1];
88 struct ip6_tnl **tnls[2]; 88 struct ip6_tnl **tnls[2];
89 }; 89 };
90 90
91 /* 91 /*
92 * Locking : hash tables are protected by RCU and a spinlock 92 * Locking : hash tables are protected by RCU and a spinlock
93 */ 93 */
94 static DEFINE_SPINLOCK(ip6_tnl_lock); 94 static DEFINE_SPINLOCK(ip6_tnl_lock);
95 95
96 static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 96 static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
97 { 97 {
98 struct dst_entry *dst = t->dst_cache; 98 struct dst_entry *dst = t->dst_cache;
99 99
100 if (dst && dst->obsolete && 100 if (dst && dst->obsolete &&
101 dst->ops->check(dst, t->dst_cookie) == NULL) { 101 dst->ops->check(dst, t->dst_cookie) == NULL) {
102 t->dst_cache = NULL; 102 t->dst_cache = NULL;
103 dst_release(dst); 103 dst_release(dst);
104 return NULL; 104 return NULL;
105 } 105 }
106 106
107 return dst; 107 return dst;
108 } 108 }
109 109
110 static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) 110 static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
111 { 111 {
112 dst_release(t->dst_cache); 112 dst_release(t->dst_cache);
113 t->dst_cache = NULL; 113 t->dst_cache = NULL;
114 } 114 }
115 115
116 static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 116 static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
117 { 117 {
118 struct rt6_info *rt = (struct rt6_info *) dst; 118 struct rt6_info *rt = (struct rt6_info *) dst;
119 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 119 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
120 dst_release(t->dst_cache); 120 dst_release(t->dst_cache);
121 t->dst_cache = dst; 121 t->dst_cache = dst;
122 } 122 }
123 123
124 /** 124 /**
125 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 125 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
126 * @remote: the address of the tunnel exit-point 126 * @remote: the address of the tunnel exit-point
127 * @local: the address of the tunnel entry-point 127 * @local: the address of the tunnel entry-point
128 * 128 *
129 * Return: 129 * Return:
130 * tunnel matching given end-points if found, 130 * tunnel matching given end-points if found,
131 * else fallback tunnel if its device is up, 131 * else fallback tunnel if its device is up,
132 * else %NULL 132 * else %NULL
133 **/ 133 **/
134 134
135 #define for_each_ip6_tunnel_rcu(start) \ 135 #define for_each_ip6_tunnel_rcu(start) \
136 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 136 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
137 137
138 static struct ip6_tnl * 138 static struct ip6_tnl *
139 ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 139 ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
140 { 140 {
141 unsigned h0 = HASH(remote); 141 unsigned h0 = HASH(remote);
142 unsigned h1 = HASH(local); 142 unsigned h1 = HASH(local);
143 struct ip6_tnl *t; 143 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 145
146 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { 146 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
147 if (ipv6_addr_equal(local, &t->parms.laddr) && 147 if (ipv6_addr_equal(local, &t->parms.laddr) &&
148 ipv6_addr_equal(remote, &t->parms.raddr) && 148 ipv6_addr_equal(remote, &t->parms.raddr) &&
149 (t->dev->flags & IFF_UP)) 149 (t->dev->flags & IFF_UP))
150 return t; 150 return t;
151 } 151 }
152 t = rcu_dereference(ip6n->tnls_wc[0]); 152 t = rcu_dereference(ip6n->tnls_wc[0]);
153 if (t && (t->dev->flags & IFF_UP)) 153 if (t && (t->dev->flags & IFF_UP))
154 return t; 154 return t;
155 155
156 return NULL; 156 return NULL;
157 } 157 }
158 158
159 /** 159 /**
160 * ip6_tnl_bucket - get head of list matching given tunnel parameters 160 * ip6_tnl_bucket - get head of list matching given tunnel parameters
161 * @p: parameters containing tunnel end-points 161 * @p: parameters containing tunnel end-points
162 * 162 *
163 * Description: 163 * Description:
164 * ip6_tnl_bucket() returns the head of the list matching the 164 * ip6_tnl_bucket() returns the head of the list matching the
165 * &struct in6_addr entries laddr and raddr in @p. 165 * &struct in6_addr entries laddr and raddr in @p.
166 * 166 *
167 * Return: head of IPv6 tunnel list 167 * Return: head of IPv6 tunnel list
168 **/ 168 **/
169 169
170 static struct ip6_tnl ** 170 static struct ip6_tnl **
171 ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 171 ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
172 { 172 {
173 struct in6_addr *remote = &p->raddr; 173 struct in6_addr *remote = &p->raddr;
174 struct in6_addr *local = &p->laddr; 174 struct in6_addr *local = &p->laddr;
175 unsigned h = 0; 175 unsigned h = 0;
176 int prio = 0; 176 int prio = 0;
177 177
178 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { 178 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
179 prio = 1; 179 prio = 1;
180 h = HASH(remote) ^ HASH(local); 180 h = HASH(remote) ^ HASH(local);
181 } 181 }
182 return &ip6n->tnls[prio][h]; 182 return &ip6n->tnls[prio][h];
183 } 183 }
184 184
185 /** 185 /**
186 * ip6_tnl_link - add tunnel to hash table 186 * ip6_tnl_link - add tunnel to hash table
187 * @t: tunnel to be added 187 * @t: tunnel to be added
188 **/ 188 **/
189 189
190 static void 190 static void
191 ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 191 ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
192 { 192 {
193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); 193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
194 194
195 spin_lock_bh(&ip6_tnl_lock); 195 spin_lock_bh(&ip6_tnl_lock);
196 t->next = *tp; 196 t->next = *tp;
197 rcu_assign_pointer(*tp, t); 197 rcu_assign_pointer(*tp, t);
198 spin_unlock_bh(&ip6_tnl_lock); 198 spin_unlock_bh(&ip6_tnl_lock);
199 } 199 }
200 200
201 /** 201 /**
202 * ip6_tnl_unlink - remove tunnel from hash table 202 * ip6_tnl_unlink - remove tunnel from hash table
203 * @t: tunnel to be removed 203 * @t: tunnel to be removed
204 **/ 204 **/
205 205
206 static void 206 static void
207 ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 207 ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
208 { 208 {
209 struct ip6_tnl **tp; 209 struct ip6_tnl **tp;
210 210
211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { 211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
212 if (t == *tp) { 212 if (t == *tp) {
213 spin_lock_bh(&ip6_tnl_lock); 213 spin_lock_bh(&ip6_tnl_lock);
214 *tp = t->next; 214 *tp = t->next;
215 spin_unlock_bh(&ip6_tnl_lock); 215 spin_unlock_bh(&ip6_tnl_lock);
216 break; 216 break;
217 } 217 }
218 } 218 }
219 } 219 }
220 220
221 /** 221 /**
222 * ip6_tnl_create() - create a new tunnel 222 * ip6_tnl_create() - create a new tunnel
223 * @p: tunnel parameters 223 * @p: tunnel parameters
224 * @pt: pointer to new tunnel 224 * @pt: pointer to new tunnel
225 * 225 *
226 * Description: 226 * Description:
227 * Create tunnel matching given parameters. 227 * Create tunnel matching given parameters.
228 * 228 *
229 * Return: 229 * Return:
230 * created tunnel or NULL 230 * created tunnel or NULL
231 **/ 231 **/
232 232
233 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) 233 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
234 { 234 {
235 struct net_device *dev; 235 struct net_device *dev;
236 struct ip6_tnl *t; 236 struct ip6_tnl *t;
237 char name[IFNAMSIZ]; 237 char name[IFNAMSIZ];
238 int err; 238 int err;
239 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 239 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
240 240
241 if (p->name[0]) 241 if (p->name[0])
242 strlcpy(name, p->name, IFNAMSIZ); 242 strlcpy(name, p->name, IFNAMSIZ);
243 else 243 else
244 sprintf(name, "ip6tnl%%d"); 244 sprintf(name, "ip6tnl%%d");
245 245
246 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); 246 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
247 if (dev == NULL) 247 if (dev == NULL)
248 goto failed; 248 goto failed;
249 249
250 dev_net_set(dev, net); 250 dev_net_set(dev, net);
251 251
252 if (strchr(name, '%')) { 252 if (strchr(name, '%')) {
253 if (dev_alloc_name(dev, name) < 0) 253 if (dev_alloc_name(dev, name) < 0)
254 goto failed_free; 254 goto failed_free;
255 } 255 }
256 256
257 t = netdev_priv(dev); 257 t = netdev_priv(dev);
258 t->parms = *p; 258 t->parms = *p;
259 ip6_tnl_dev_init(dev); 259 ip6_tnl_dev_init(dev);
260 260
261 if ((err = register_netdevice(dev)) < 0) 261 if ((err = register_netdevice(dev)) < 0)
262 goto failed_free; 262 goto failed_free;
263 263
264 dev_hold(dev); 264 dev_hold(dev);
265 ip6_tnl_link(ip6n, t); 265 ip6_tnl_link(ip6n, t);
266 return t; 266 return t;
267 267
268 failed_free: 268 failed_free:
269 free_netdev(dev); 269 free_netdev(dev);
270 failed: 270 failed:
271 return NULL; 271 return NULL;
272 } 272 }
273 273
274 /** 274 /**
275 * ip6_tnl_locate - find or create tunnel matching given parameters 275 * ip6_tnl_locate - find or create tunnel matching given parameters
276 * @p: tunnel parameters 276 * @p: tunnel parameters
277 * @create: != 0 if allowed to create new tunnel if no match found 277 * @create: != 0 if allowed to create new tunnel if no match found
278 * 278 *
279 * Description: 279 * Description:
280 * ip6_tnl_locate() first tries to locate an existing tunnel 280 * ip6_tnl_locate() first tries to locate an existing tunnel
281 * based on @parms. If this is unsuccessful, but @create is set a new 281 * based on @parms. If this is unsuccessful, but @create is set a new
282 * tunnel device is created and registered for use. 282 * tunnel device is created and registered for use.
283 * 283 *
284 * Return: 284 * Return:
285 * matching tunnel or NULL 285 * matching tunnel or NULL
286 **/ 286 **/
287 287
288 static struct ip6_tnl *ip6_tnl_locate(struct net *net, 288 static struct ip6_tnl *ip6_tnl_locate(struct net *net,
289 struct ip6_tnl_parm *p, int create) 289 struct ip6_tnl_parm *p, int create)
290 { 290 {
291 struct in6_addr *remote = &p->raddr; 291 struct in6_addr *remote = &p->raddr;
292 struct in6_addr *local = &p->laddr; 292 struct in6_addr *local = &p->laddr;
293 struct ip6_tnl *t; 293 struct ip6_tnl *t;
294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
295 295
296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { 296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) {
297 if (ipv6_addr_equal(local, &t->parms.laddr) && 297 if (ipv6_addr_equal(local, &t->parms.laddr) &&
298 ipv6_addr_equal(remote, &t->parms.raddr)) 298 ipv6_addr_equal(remote, &t->parms.raddr))
299 return t; 299 return t;
300 } 300 }
301 if (!create) 301 if (!create)
302 return NULL; 302 return NULL;
303 return ip6_tnl_create(net, p); 303 return ip6_tnl_create(net, p);
304 } 304 }
305 305
306 /** 306 /**
307 * ip6_tnl_dev_uninit - tunnel device uninitializer 307 * ip6_tnl_dev_uninit - tunnel device uninitializer
308 * @dev: the device to be destroyed 308 * @dev: the device to be destroyed
309 * 309 *
310 * Description: 310 * Description:
311 * ip6_tnl_dev_uninit() removes tunnel from its list 311 * ip6_tnl_dev_uninit() removes tunnel from its list
312 **/ 312 **/
313 313
314 static void 314 static void
315 ip6_tnl_dev_uninit(struct net_device *dev) 315 ip6_tnl_dev_uninit(struct net_device *dev)
316 { 316 {
317 struct ip6_tnl *t = netdev_priv(dev); 317 struct ip6_tnl *t = netdev_priv(dev);
318 struct net *net = dev_net(dev); 318 struct net *net = dev_net(dev);
319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
320 320
321 if (dev == ip6n->fb_tnl_dev) { 321 if (dev == ip6n->fb_tnl_dev) {
322 spin_lock_bh(&ip6_tnl_lock); 322 spin_lock_bh(&ip6_tnl_lock);
323 ip6n->tnls_wc[0] = NULL; 323 ip6n->tnls_wc[0] = NULL;
324 spin_unlock_bh(&ip6_tnl_lock); 324 spin_unlock_bh(&ip6_tnl_lock);
325 } else { 325 } else {
326 ip6_tnl_unlink(ip6n, t); 326 ip6_tnl_unlink(ip6n, t);
327 } 327 }
328 ip6_tnl_dst_reset(t); 328 ip6_tnl_dst_reset(t);
329 dev_put(dev); 329 dev_put(dev);
330 } 330 }
331 331
332 /** 332 /**
333 * parse_tvl_tnl_enc_lim - handle encapsulation limit option 333 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
334 * @skb: received socket buffer 334 * @skb: received socket buffer
335 * 335 *
336 * Return: 336 * Return:
337 * 0 if none was found, 337 * 0 if none was found,
338 * else index to encapsulation limit 338 * else index to encapsulation limit
339 **/ 339 **/
340 340
341 static __u16 341 static __u16
342 parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) 342 parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
343 { 343 {
344 struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw; 344 struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
345 __u8 nexthdr = ipv6h->nexthdr; 345 __u8 nexthdr = ipv6h->nexthdr;
346 __u16 off = sizeof (*ipv6h); 346 __u16 off = sizeof (*ipv6h);
347 347
348 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { 348 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
349 __u16 optlen = 0; 349 __u16 optlen = 0;
350 struct ipv6_opt_hdr *hdr; 350 struct ipv6_opt_hdr *hdr;
351 if (raw + off + sizeof (*hdr) > skb->data && 351 if (raw + off + sizeof (*hdr) > skb->data &&
352 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) 352 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
353 break; 353 break;
354 354
355 hdr = (struct ipv6_opt_hdr *) (raw + off); 355 hdr = (struct ipv6_opt_hdr *) (raw + off);
356 if (nexthdr == NEXTHDR_FRAGMENT) { 356 if (nexthdr == NEXTHDR_FRAGMENT) {
357 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; 357 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
358 if (frag_hdr->frag_off) 358 if (frag_hdr->frag_off)
359 break; 359 break;
360 optlen = 8; 360 optlen = 8;
361 } else if (nexthdr == NEXTHDR_AUTH) { 361 } else if (nexthdr == NEXTHDR_AUTH) {
362 optlen = (hdr->hdrlen + 2) << 2; 362 optlen = (hdr->hdrlen + 2) << 2;
363 } else { 363 } else {
364 optlen = ipv6_optlen(hdr); 364 optlen = ipv6_optlen(hdr);
365 } 365 }
366 if (nexthdr == NEXTHDR_DEST) { 366 if (nexthdr == NEXTHDR_DEST) {
367 __u16 i = off + 2; 367 __u16 i = off + 2;
368 while (1) { 368 while (1) {
369 struct ipv6_tlv_tnl_enc_lim *tel; 369 struct ipv6_tlv_tnl_enc_lim *tel;
370 370
371 /* No more room for encapsulation limit */ 371 /* No more room for encapsulation limit */
372 if (i + sizeof (*tel) > off + optlen) 372 if (i + sizeof (*tel) > off + optlen)
373 break; 373 break;
374 374
375 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; 375 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
376 /* return index of option if found and valid */ 376 /* return index of option if found and valid */
377 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && 377 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
378 tel->length == 1) 378 tel->length == 1)
379 return i; 379 return i;
380 /* else jump to next option */ 380 /* else jump to next option */
381 if (tel->type) 381 if (tel->type)
382 i += tel->length + 2; 382 i += tel->length + 2;
383 else 383 else
384 i++; 384 i++;
385 } 385 }
386 } 386 }
387 nexthdr = hdr->nexthdr; 387 nexthdr = hdr->nexthdr;
388 off += optlen; 388 off += optlen;
389 } 389 }
390 return 0; 390 return 0;
391 } 391 }
392 392
393 /** 393 /**
394 * ip6_tnl_err - tunnel error handler 394 * ip6_tnl_err - tunnel error handler
395 * 395 *
396 * Description: 396 * Description:
397 * ip6_tnl_err() should handle errors in the tunnel according 397 * ip6_tnl_err() should handle errors in the tunnel according
398 * to the specifications in RFC 2473. 398 * to the specifications in RFC 2473.
399 **/ 399 **/
400 400
401 static int 401 static int
402 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 402 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
403 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 403 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
404 { 404 {
405 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; 405 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
406 struct ip6_tnl *t; 406 struct ip6_tnl *t;
407 int rel_msg = 0; 407 int rel_msg = 0;
408 u8 rel_type = ICMPV6_DEST_UNREACH; 408 u8 rel_type = ICMPV6_DEST_UNREACH;
409 u8 rel_code = ICMPV6_ADDR_UNREACH; 409 u8 rel_code = ICMPV6_ADDR_UNREACH;
410 __u32 rel_info = 0; 410 __u32 rel_info = 0;
411 __u16 len; 411 __u16 len;
412 int err = -ENOENT; 412 int err = -ENOENT;
413 413
414 /* If the packet doesn't contain the original IPv6 header we are 414 /* If the packet doesn't contain the original IPv6 header we are
415 in trouble since we might need the source address for further 415 in trouble since we might need the source address for further
416 processing of the error. */ 416 processing of the error. */
417 417
418 rcu_read_lock(); 418 rcu_read_lock();
419 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, 419 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
420 &ipv6h->saddr)) == NULL) 420 &ipv6h->saddr)) == NULL)
421 goto out; 421 goto out;
422 422
423 if (t->parms.proto != ipproto && t->parms.proto != 0) 423 if (t->parms.proto != ipproto && t->parms.proto != 0)
424 goto out; 424 goto out;
425 425
426 err = 0; 426 err = 0;
427 427
428 switch (*type) { 428 switch (*type) {
429 __u32 teli; 429 __u32 teli;
430 struct ipv6_tlv_tnl_enc_lim *tel; 430 struct ipv6_tlv_tnl_enc_lim *tel;
431 __u32 mtu; 431 __u32 mtu;
432 case ICMPV6_DEST_UNREACH: 432 case ICMPV6_DEST_UNREACH:
433 if (net_ratelimit()) 433 if (net_ratelimit())
434 printk(KERN_WARNING 434 printk(KERN_WARNING
435 "%s: Path to destination invalid " 435 "%s: Path to destination invalid "
436 "or inactive!\n", t->parms.name); 436 "or inactive!\n", t->parms.name);
437 rel_msg = 1; 437 rel_msg = 1;
438 break; 438 break;
439 case ICMPV6_TIME_EXCEED: 439 case ICMPV6_TIME_EXCEED:
440 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 440 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
441 if (net_ratelimit()) 441 if (net_ratelimit())
442 printk(KERN_WARNING 442 printk(KERN_WARNING
443 "%s: Too small hop limit or " 443 "%s: Too small hop limit or "
444 "routing loop in tunnel!\n", 444 "routing loop in tunnel!\n",
445 t->parms.name); 445 t->parms.name);
446 rel_msg = 1; 446 rel_msg = 1;
447 } 447 }
448 break; 448 break;
449 case ICMPV6_PARAMPROB: 449 case ICMPV6_PARAMPROB:
450 teli = 0; 450 teli = 0;
451 if ((*code) == ICMPV6_HDR_FIELD) 451 if ((*code) == ICMPV6_HDR_FIELD)
452 teli = parse_tlv_tnl_enc_lim(skb, skb->data); 452 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
453 453
454 if (teli && teli == *info - 2) { 454 if (teli && teli == *info - 2) {
455 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 455 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
456 if (tel->encap_limit == 0) { 456 if (tel->encap_limit == 0) {
457 if (net_ratelimit()) 457 if (net_ratelimit())
458 printk(KERN_WARNING 458 printk(KERN_WARNING
459 "%s: Too small encapsulation " 459 "%s: Too small encapsulation "
460 "limit or routing loop in " 460 "limit or routing loop in "
461 "tunnel!\n", t->parms.name); 461 "tunnel!\n", t->parms.name);
462 rel_msg = 1; 462 rel_msg = 1;
463 } 463 }
464 } else if (net_ratelimit()) { 464 } else if (net_ratelimit()) {
465 printk(KERN_WARNING 465 printk(KERN_WARNING
466 "%s: Recipient unable to parse tunneled " 466 "%s: Recipient unable to parse tunneled "
467 "packet!\n ", t->parms.name); 467 "packet!\n ", t->parms.name);
468 } 468 }
469 break; 469 break;
470 case ICMPV6_PKT_TOOBIG: 470 case ICMPV6_PKT_TOOBIG:
471 mtu = *info - offset; 471 mtu = *info - offset;
472 if (mtu < IPV6_MIN_MTU) 472 if (mtu < IPV6_MIN_MTU)
473 mtu = IPV6_MIN_MTU; 473 mtu = IPV6_MIN_MTU;
474 t->dev->mtu = mtu; 474 t->dev->mtu = mtu;
475 475
476 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { 476 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
477 rel_type = ICMPV6_PKT_TOOBIG; 477 rel_type = ICMPV6_PKT_TOOBIG;
478 rel_code = 0; 478 rel_code = 0;
479 rel_info = mtu; 479 rel_info = mtu;
480 rel_msg = 1; 480 rel_msg = 1;
481 } 481 }
482 break; 482 break;
483 } 483 }
484 484
485 *type = rel_type; 485 *type = rel_type;
486 *code = rel_code; 486 *code = rel_code;
487 *info = rel_info; 487 *info = rel_info;
488 *msg = rel_msg; 488 *msg = rel_msg;
489 489
490 out: 490 out:
491 rcu_read_unlock(); 491 rcu_read_unlock();
492 return err; 492 return err;
493 } 493 }
494 494
495 static int 495 static int
496 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 496 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
497 u8 type, u8 code, int offset, __be32 info) 497 u8 type, u8 code, int offset, __be32 info)
498 { 498 {
499 int rel_msg = 0; 499 int rel_msg = 0;
500 u8 rel_type = type; 500 u8 rel_type = type;
501 u8 rel_code = code; 501 u8 rel_code = code;
502 __u32 rel_info = ntohl(info); 502 __u32 rel_info = ntohl(info);
503 int err; 503 int err;
504 struct sk_buff *skb2; 504 struct sk_buff *skb2;
505 struct iphdr *eiph; 505 struct iphdr *eiph;
506 struct flowi fl; 506 struct flowi fl;
507 struct rtable *rt; 507 struct rtable *rt;
508 508
509 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 509 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
510 &rel_msg, &rel_info, offset); 510 &rel_msg, &rel_info, offset);
511 if (err < 0) 511 if (err < 0)
512 return err; 512 return err;
513 513
514 if (rel_msg == 0) 514 if (rel_msg == 0)
515 return 0; 515 return 0;
516 516
517 switch (rel_type) { 517 switch (rel_type) {
518 case ICMPV6_DEST_UNREACH: 518 case ICMPV6_DEST_UNREACH:
519 if (rel_code != ICMPV6_ADDR_UNREACH) 519 if (rel_code != ICMPV6_ADDR_UNREACH)
520 return 0; 520 return 0;
521 rel_type = ICMP_DEST_UNREACH; 521 rel_type = ICMP_DEST_UNREACH;
522 rel_code = ICMP_HOST_UNREACH; 522 rel_code = ICMP_HOST_UNREACH;
523 break; 523 break;
524 case ICMPV6_PKT_TOOBIG: 524 case ICMPV6_PKT_TOOBIG:
525 if (rel_code != 0) 525 if (rel_code != 0)
526 return 0; 526 return 0;
527 rel_type = ICMP_DEST_UNREACH; 527 rel_type = ICMP_DEST_UNREACH;
528 rel_code = ICMP_FRAG_NEEDED; 528 rel_code = ICMP_FRAG_NEEDED;
529 break; 529 break;
530 default: 530 default:
531 return 0; 531 return 0;
532 } 532 }
533 533
534 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) 534 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
535 return 0; 535 return 0;
536 536
537 skb2 = skb_clone(skb, GFP_ATOMIC); 537 skb2 = skb_clone(skb, GFP_ATOMIC);
538 if (!skb2) 538 if (!skb2)
539 return 0; 539 return 0;
540 540
541 skb_dst_drop(skb2); 541 skb_dst_drop(skb2);
542 542
543 skb_pull(skb2, offset); 543 skb_pull(skb2, offset);
544 skb_reset_network_header(skb2); 544 skb_reset_network_header(skb2);
545 eiph = ip_hdr(skb2); 545 eiph = ip_hdr(skb2);
546 546
547 /* Try to guess incoming interface */ 547 /* Try to guess incoming interface */
548 memset(&fl, 0, sizeof(fl)); 548 memset(&fl, 0, sizeof(fl));
549 fl.fl4_dst = eiph->saddr; 549 fl.fl4_dst = eiph->saddr;
550 fl.fl4_tos = RT_TOS(eiph->tos); 550 fl.fl4_tos = RT_TOS(eiph->tos);
551 fl.proto = IPPROTO_IPIP; 551 fl.proto = IPPROTO_IPIP;
552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) 552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
553 goto out; 553 goto out;
554 554
555 skb2->dev = rt->u.dst.dev; 555 skb2->dev = rt->u.dst.dev;
556 556
557 /* route "incoming" packet */ 557 /* route "incoming" packet */
558 if (rt->rt_flags & RTCF_LOCAL) { 558 if (rt->rt_flags & RTCF_LOCAL) {
559 ip_rt_put(rt); 559 ip_rt_put(rt);
560 rt = NULL; 560 rt = NULL;
561 fl.fl4_dst = eiph->daddr; 561 fl.fl4_dst = eiph->daddr;
562 fl.fl4_src = eiph->saddr; 562 fl.fl4_src = eiph->saddr;
563 fl.fl4_tos = eiph->tos; 563 fl.fl4_tos = eiph->tos;
564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || 564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
565 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 565 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
566 ip_rt_put(rt); 566 ip_rt_put(rt);
567 goto out; 567 goto out;
568 } 568 }
569 skb_dst_set(skb2, (struct dst_entry *)rt); 569 skb_dst_set(skb2, (struct dst_entry *)rt);
570 } else { 570 } else {
571 ip_rt_put(rt); 571 ip_rt_put(rt);
572 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 572 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
573 skb2->dev) || 573 skb2->dev) ||
574 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) 574 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
575 goto out; 575 goto out;
576 } 576 }
577 577
578 /* change mtu on this route */ 578 /* change mtu on this route */
579 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { 579 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
580 if (rel_info > dst_mtu(skb_dst(skb2))) 580 if (rel_info > dst_mtu(skb_dst(skb2)))
581 goto out; 581 goto out;
582 582
583 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); 583 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
584 } 584 }
585 585
586 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 586 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
587 587
588 out: 588 out:
589 kfree_skb(skb2); 589 kfree_skb(skb2);
590 return 0; 590 return 0;
591 } 591 }
592 592
593 static int 593 static int
594 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 594 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
595 u8 type, u8 code, int offset, __be32 info) 595 u8 type, u8 code, int offset, __be32 info)
596 { 596 {
597 int rel_msg = 0; 597 int rel_msg = 0;
598 u8 rel_type = type; 598 u8 rel_type = type;
599 u8 rel_code = code; 599 u8 rel_code = code;
600 __u32 rel_info = ntohl(info); 600 __u32 rel_info = ntohl(info);
601 int err; 601 int err;
602 602
603 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 603 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
604 &rel_msg, &rel_info, offset); 604 &rel_msg, &rel_info, offset);
605 if (err < 0) 605 if (err < 0)
606 return err; 606 return err;
607 607
608 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { 608 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
609 struct rt6_info *rt; 609 struct rt6_info *rt;
610 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 610 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
611 611
612 if (!skb2) 612 if (!skb2)
613 return 0; 613 return 0;
614 614
615 skb_dst_drop(skb2); 615 skb_dst_drop(skb2);
616 skb_pull(skb2, offset); 616 skb_pull(skb2, offset);
617 skb_reset_network_header(skb2); 617 skb_reset_network_header(skb2);
618 618
619 /* Try to guess incoming interface */ 619 /* Try to guess incoming interface */
620 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, 620 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
621 NULL, 0, 0); 621 NULL, 0, 0);
622 622
623 if (rt && rt->rt6i_dev) 623 if (rt && rt->rt6i_dev)
624 skb2->dev = rt->rt6i_dev; 624 skb2->dev = rt->rt6i_dev;
625 625
626 icmpv6_send(skb2, rel_type, rel_code, rel_info); 626 icmpv6_send(skb2, rel_type, rel_code, rel_info);
627 627
628 if (rt) 628 if (rt)
629 dst_release(&rt->u.dst); 629 dst_release(&rt->u.dst);
630 630
631 kfree_skb(skb2); 631 kfree_skb(skb2);
632 } 632 }
633 633
634 return 0; 634 return 0;
635 } 635 }
636 636
637 static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, 637 static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
638 struct ipv6hdr *ipv6h, 638 struct ipv6hdr *ipv6h,
639 struct sk_buff *skb) 639 struct sk_buff *skb)
640 { 640 {
641 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 641 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
642 642
643 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 643 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
644 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); 644 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
645 645
646 if (INET_ECN_is_ce(dsfield)) 646 if (INET_ECN_is_ce(dsfield))
647 IP_ECN_set_ce(ip_hdr(skb)); 647 IP_ECN_set_ce(ip_hdr(skb));
648 } 648 }
649 649
650 static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, 650 static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
651 struct ipv6hdr *ipv6h, 651 struct ipv6hdr *ipv6h,
652 struct sk_buff *skb) 652 struct sk_buff *skb)
653 { 653 {
654 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 654 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
655 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); 655 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
656 656
657 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) 657 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
658 IP6_ECN_set_ce(ipv6_hdr(skb)); 658 IP6_ECN_set_ce(ipv6_hdr(skb));
659 } 659 }
660 660
661 /* called with rcu_read_lock() */ 661 /* called with rcu_read_lock() */
662 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) 662 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
663 { 663 {
664 struct ip6_tnl_parm *p = &t->parms; 664 struct ip6_tnl_parm *p = &t->parms;
665 int ret = 0; 665 int ret = 0;
666 struct net *net = dev_net(t->dev); 666 struct net *net = dev_net(t->dev);
667 667
668 if (p->flags & IP6_TNL_F_CAP_RCV) { 668 if (p->flags & IP6_TNL_F_CAP_RCV) {
669 struct net_device *ldev = NULL; 669 struct net_device *ldev = NULL;
670 670
671 if (p->link) 671 if (p->link)
672 ldev = dev_get_by_index_rcu(net, p->link); 672 ldev = dev_get_by_index_rcu(net, p->link);
673 673
674 if ((ipv6_addr_is_multicast(&p->laddr) || 674 if ((ipv6_addr_is_multicast(&p->laddr) ||
675 likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && 675 likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
676 likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) 676 likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
677 ret = 1; 677 ret = 1;
678 678
679 } 679 }
680 return ret; 680 return ret;
681 } 681 }
682 682
683 /** 683 /**
684 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 684 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
685 * @skb: received socket buffer 685 * @skb: received socket buffer
686 * @protocol: ethernet protocol ID 686 * @protocol: ethernet protocol ID
687 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN 687 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
688 * 688 *
689 * Return: 0 689 * Return: 0
690 **/ 690 **/
691 691
692 static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, 692 static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
693 __u8 ipproto, 693 __u8 ipproto,
694 void (*dscp_ecn_decapsulate)(struct ip6_tnl *t, 694 void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
695 struct ipv6hdr *ipv6h, 695 struct ipv6hdr *ipv6h,
696 struct sk_buff *skb)) 696 struct sk_buff *skb))
697 { 697 {
698 struct ip6_tnl *t; 698 struct ip6_tnl *t;
699 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 699 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
700 700
701 rcu_read_lock(); 701 rcu_read_lock();
702 702
703 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 703 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
704 &ipv6h->daddr)) != NULL) { 704 &ipv6h->daddr)) != NULL) {
705 if (t->parms.proto != ipproto && t->parms.proto != 0) { 705 if (t->parms.proto != ipproto && t->parms.proto != 0) {
706 rcu_read_unlock(); 706 rcu_read_unlock();
707 goto discard; 707 goto discard;
708 } 708 }
709 709
710 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 710 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
711 rcu_read_unlock(); 711 rcu_read_unlock();
712 goto discard; 712 goto discard;
713 } 713 }
714 714
715 if (!ip6_tnl_rcv_ctl(t)) { 715 if (!ip6_tnl_rcv_ctl(t)) {
716 t->dev->stats.rx_dropped++; 716 t->dev->stats.rx_dropped++;
717 rcu_read_unlock(); 717 rcu_read_unlock();
718 goto discard; 718 goto discard;
719 } 719 }
720 secpath_reset(skb); 720 secpath_reset(skb);
721 skb->mac_header = skb->network_header; 721 skb->mac_header = skb->network_header;
722 skb_reset_network_header(skb); 722 skb_reset_network_header(skb);
723 skb->protocol = htons(protocol); 723 skb->protocol = htons(protocol);
724 skb->pkt_type = PACKET_HOST; 724 skb->pkt_type = PACKET_HOST;
725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
726 skb->dev = t->dev;
727 skb_dst_drop(skb);
728 nf_reset(skb);
729 726
730 dscp_ecn_decapsulate(t, ipv6h, skb); 727 skb_tunnel_rx(skb, t->dev);
731 728
732 t->dev->stats.rx_packets++; 729 dscp_ecn_decapsulate(t, ipv6h, skb);
733 t->dev->stats.rx_bytes += skb->len;
734 netif_rx(skb); 730 netif_rx(skb);
735 rcu_read_unlock(); 731 rcu_read_unlock();
736 return 0; 732 return 0;
737 } 733 }
738 rcu_read_unlock(); 734 rcu_read_unlock();
739 return 1; 735 return 1;
740 736
741 discard: 737 discard:
742 kfree_skb(skb); 738 kfree_skb(skb);
743 return 0; 739 return 0;
744 } 740 }
745 741
746 static int ip4ip6_rcv(struct sk_buff *skb) 742 static int ip4ip6_rcv(struct sk_buff *skb)
747 { 743 {
748 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, 744 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
749 ip4ip6_dscp_ecn_decapsulate); 745 ip4ip6_dscp_ecn_decapsulate);
750 } 746 }
751 747
752 static int ip6ip6_rcv(struct sk_buff *skb) 748 static int ip6ip6_rcv(struct sk_buff *skb)
753 { 749 {
754 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, 750 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
755 ip6ip6_dscp_ecn_decapsulate); 751 ip6ip6_dscp_ecn_decapsulate);
756 } 752 }
757 753
758 struct ipv6_tel_txoption { 754 struct ipv6_tel_txoption {
759 struct ipv6_txoptions ops; 755 struct ipv6_txoptions ops;
760 __u8 dst_opt[8]; 756 __u8 dst_opt[8];
761 }; 757 };
762 758
763 static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) 759 static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
764 { 760 {
765 memset(opt, 0, sizeof(struct ipv6_tel_txoption)); 761 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
766 762
767 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; 763 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
768 opt->dst_opt[3] = 1; 764 opt->dst_opt[3] = 1;
769 opt->dst_opt[4] = encap_limit; 765 opt->dst_opt[4] = encap_limit;
770 opt->dst_opt[5] = IPV6_TLV_PADN; 766 opt->dst_opt[5] = IPV6_TLV_PADN;
771 opt->dst_opt[6] = 1; 767 opt->dst_opt[6] = 1;
772 768
773 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; 769 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
774 opt->ops.opt_nflen = 8; 770 opt->ops.opt_nflen = 8;
775 } 771 }
776 772
777 /** 773 /**
778 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own 774 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
779 * @t: the outgoing tunnel device 775 * @t: the outgoing tunnel device
780 * @hdr: IPv6 header from the incoming packet 776 * @hdr: IPv6 header from the incoming packet
781 * 777 *
782 * Description: 778 * Description:
783 * Avoid trivial tunneling loop by checking that tunnel exit-point 779 * Avoid trivial tunneling loop by checking that tunnel exit-point
784 * doesn't match source of incoming packet. 780 * doesn't match source of incoming packet.
785 * 781 *
786 * Return: 782 * Return:
787 * 1 if conflict, 783 * 1 if conflict,
788 * 0 else 784 * 0 else
789 **/ 785 **/
790 786
791 static inline int 787 static inline int
792 ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) 788 ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
793 { 789 {
794 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 790 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
795 } 791 }
796 792
797 static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 793 static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
798 { 794 {
799 struct ip6_tnl_parm *p = &t->parms; 795 struct ip6_tnl_parm *p = &t->parms;
800 int ret = 0; 796 int ret = 0;
801 struct net *net = dev_net(t->dev); 797 struct net *net = dev_net(t->dev);
802 798
803 if (p->flags & IP6_TNL_F_CAP_XMIT) { 799 if (p->flags & IP6_TNL_F_CAP_XMIT) {
804 struct net_device *ldev = NULL; 800 struct net_device *ldev = NULL;
805 801
806 rcu_read_lock(); 802 rcu_read_lock();
807 if (p->link) 803 if (p->link)
808 ldev = dev_get_by_index_rcu(net, p->link); 804 ldev = dev_get_by_index_rcu(net, p->link);
809 805
810 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) 806 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
811 printk(KERN_WARNING 807 printk(KERN_WARNING
812 "%s xmit: Local address not yet configured!\n", 808 "%s xmit: Local address not yet configured!\n",
813 p->name); 809 p->name);
814 else if (!ipv6_addr_is_multicast(&p->raddr) && 810 else if (!ipv6_addr_is_multicast(&p->raddr) &&
815 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) 811 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
816 printk(KERN_WARNING 812 printk(KERN_WARNING
817 "%s xmit: Routing loop! " 813 "%s xmit: Routing loop! "
818 "Remote address found on this node!\n", 814 "Remote address found on this node!\n",
819 p->name); 815 p->name);
820 else 816 else
821 ret = 1; 817 ret = 1;
822 rcu_read_unlock(); 818 rcu_read_unlock();
823 } 819 }
824 return ret; 820 return ret;
825 } 821 }
826 /** 822 /**
827 * ip6_tnl_xmit2 - encapsulate packet and send 823 * ip6_tnl_xmit2 - encapsulate packet and send
828 * @skb: the outgoing socket buffer 824 * @skb: the outgoing socket buffer
829 * @dev: the outgoing tunnel device 825 * @dev: the outgoing tunnel device
830 * @dsfield: dscp code for outer header 826 * @dsfield: dscp code for outer header
831 * @fl: flow of tunneled packet 827 * @fl: flow of tunneled packet
832 * @encap_limit: encapsulation limit 828 * @encap_limit: encapsulation limit
833 * @pmtu: Path MTU is stored if packet is too big 829 * @pmtu: Path MTU is stored if packet is too big
834 * 830 *
835 * Description: 831 * Description:
836 * Build new header and do some sanity checks on the packet before sending 832 * Build new header and do some sanity checks on the packet before sending
837 * it. 833 * it.
838 * 834 *
839 * Return: 835 * Return:
840 * 0 on success 836 * 0 on success
841 * -1 fail 837 * -1 fail
842 * %-EMSGSIZE message too big. return mtu in this case. 838 * %-EMSGSIZE message too big. return mtu in this case.
843 **/ 839 **/
844 840
845 static int ip6_tnl_xmit2(struct sk_buff *skb, 841 static int ip6_tnl_xmit2(struct sk_buff *skb,
846 struct net_device *dev, 842 struct net_device *dev,
847 __u8 dsfield, 843 __u8 dsfield,
848 struct flowi *fl, 844 struct flowi *fl,
849 int encap_limit, 845 int encap_limit,
850 __u32 *pmtu) 846 __u32 *pmtu)
851 { 847 {
852 struct net *net = dev_net(dev); 848 struct net *net = dev_net(dev);
853 struct ip6_tnl *t = netdev_priv(dev); 849 struct ip6_tnl *t = netdev_priv(dev);
854 struct net_device_stats *stats = &t->dev->stats; 850 struct net_device_stats *stats = &t->dev->stats;
855 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 851 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
856 struct ipv6_tel_txoption opt; 852 struct ipv6_tel_txoption opt;
857 struct dst_entry *dst; 853 struct dst_entry *dst;
858 struct net_device *tdev; 854 struct net_device *tdev;
859 int mtu; 855 int mtu;
860 unsigned int max_headroom = sizeof(struct ipv6hdr); 856 unsigned int max_headroom = sizeof(struct ipv6hdr);
861 u8 proto; 857 u8 proto;
862 int err = -1; 858 int err = -1;
863 int pkt_len; 859 int pkt_len;
864 860
865 if ((dst = ip6_tnl_dst_check(t)) != NULL) 861 if ((dst = ip6_tnl_dst_check(t)) != NULL)
866 dst_hold(dst); 862 dst_hold(dst);
867 else { 863 else {
868 dst = ip6_route_output(net, NULL, fl); 864 dst = ip6_route_output(net, NULL, fl);
869 865
870 if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) 866 if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
871 goto tx_err_link_failure; 867 goto tx_err_link_failure;
872 } 868 }
873 869
874 tdev = dst->dev; 870 tdev = dst->dev;
875 871
876 if (tdev == dev) { 872 if (tdev == dev) {
877 stats->collisions++; 873 stats->collisions++;
878 if (net_ratelimit()) 874 if (net_ratelimit())
879 printk(KERN_WARNING 875 printk(KERN_WARNING
880 "%s: Local routing loop detected!\n", 876 "%s: Local routing loop detected!\n",
881 t->parms.name); 877 t->parms.name);
882 goto tx_err_dst_release; 878 goto tx_err_dst_release;
883 } 879 }
884 mtu = dst_mtu(dst) - sizeof (*ipv6h); 880 mtu = dst_mtu(dst) - sizeof (*ipv6h);
885 if (encap_limit >= 0) { 881 if (encap_limit >= 0) {
886 max_headroom += 8; 882 max_headroom += 8;
887 mtu -= 8; 883 mtu -= 8;
888 } 884 }
889 if (mtu < IPV6_MIN_MTU) 885 if (mtu < IPV6_MIN_MTU)
890 mtu = IPV6_MIN_MTU; 886 mtu = IPV6_MIN_MTU;
891 if (skb_dst(skb)) 887 if (skb_dst(skb))
892 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 888 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
893 if (skb->len > mtu) { 889 if (skb->len > mtu) {
894 *pmtu = mtu; 890 *pmtu = mtu;
895 err = -EMSGSIZE; 891 err = -EMSGSIZE;
896 goto tx_err_dst_release; 892 goto tx_err_dst_release;
897 } 893 }
898 894
899 /* 895 /*
900 * Okay, now see if we can stuff it in the buffer as-is. 896 * Okay, now see if we can stuff it in the buffer as-is.
901 */ 897 */
902 max_headroom += LL_RESERVED_SPACE(tdev); 898 max_headroom += LL_RESERVED_SPACE(tdev);
903 899
904 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 900 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
905 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 901 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
906 struct sk_buff *new_skb; 902 struct sk_buff *new_skb;
907 903
908 if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) 904 if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
909 goto tx_err_dst_release; 905 goto tx_err_dst_release;
910 906
911 if (skb->sk) 907 if (skb->sk)
912 skb_set_owner_w(new_skb, skb->sk); 908 skb_set_owner_w(new_skb, skb->sk);
913 kfree_skb(skb); 909 kfree_skb(skb);
914 skb = new_skb; 910 skb = new_skb;
915 } 911 }
916 skb_dst_drop(skb); 912 skb_dst_drop(skb);
917 skb_dst_set(skb, dst_clone(dst)); 913 skb_dst_set(skb, dst_clone(dst));
918 914
919 skb->transport_header = skb->network_header; 915 skb->transport_header = skb->network_header;
920 916
921 proto = fl->proto; 917 proto = fl->proto;
922 if (encap_limit >= 0) { 918 if (encap_limit >= 0) {
923 init_tel_txopt(&opt, encap_limit); 919 init_tel_txopt(&opt, encap_limit);
924 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 920 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
925 } 921 }
926 skb_push(skb, sizeof(struct ipv6hdr)); 922 skb_push(skb, sizeof(struct ipv6hdr));
927 skb_reset_network_header(skb); 923 skb_reset_network_header(skb);
928 ipv6h = ipv6_hdr(skb); 924 ipv6h = ipv6_hdr(skb);
929 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); 925 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
930 dsfield = INET_ECN_encapsulate(0, dsfield); 926 dsfield = INET_ECN_encapsulate(0, dsfield);
931 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 927 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
932 ipv6h->hop_limit = t->parms.hop_limit; 928 ipv6h->hop_limit = t->parms.hop_limit;
933 ipv6h->nexthdr = proto; 929 ipv6h->nexthdr = proto;
934 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); 930 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
935 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); 931 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
936 nf_reset(skb); 932 nf_reset(skb);
937 pkt_len = skb->len; 933 pkt_len = skb->len;
938 err = ip6_local_out(skb); 934 err = ip6_local_out(skb);
939 935
940 if (net_xmit_eval(err) == 0) { 936 if (net_xmit_eval(err) == 0) {
941 stats->tx_bytes += pkt_len; 937 stats->tx_bytes += pkt_len;
942 stats->tx_packets++; 938 stats->tx_packets++;
943 } else { 939 } else {
944 stats->tx_errors++; 940 stats->tx_errors++;
945 stats->tx_aborted_errors++; 941 stats->tx_aborted_errors++;
946 } 942 }
947 ip6_tnl_dst_store(t, dst); 943 ip6_tnl_dst_store(t, dst);
948 return 0; 944 return 0;
949 tx_err_link_failure: 945 tx_err_link_failure:
950 stats->tx_carrier_errors++; 946 stats->tx_carrier_errors++;
951 dst_link_failure(skb); 947 dst_link_failure(skb);
952 tx_err_dst_release: 948 tx_err_dst_release:
953 dst_release(dst); 949 dst_release(dst);
954 return err; 950 return err;
955 } 951 }
956 952
957 static inline int 953 static inline int
958 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 954 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
959 { 955 {
960 struct ip6_tnl *t = netdev_priv(dev); 956 struct ip6_tnl *t = netdev_priv(dev);
961 struct iphdr *iph = ip_hdr(skb); 957 struct iphdr *iph = ip_hdr(skb);
962 int encap_limit = -1; 958 int encap_limit = -1;
963 struct flowi fl; 959 struct flowi fl;
964 __u8 dsfield; 960 __u8 dsfield;
965 __u32 mtu; 961 __u32 mtu;
966 int err; 962 int err;
967 963
968 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || 964 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
969 !ip6_tnl_xmit_ctl(t)) 965 !ip6_tnl_xmit_ctl(t))
970 return -1; 966 return -1;
971 967
972 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 968 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
973 encap_limit = t->parms.encap_limit; 969 encap_limit = t->parms.encap_limit;
974 970
975 memcpy(&fl, &t->fl, sizeof (fl)); 971 memcpy(&fl, &t->fl, sizeof (fl));
976 fl.proto = IPPROTO_IPIP; 972 fl.proto = IPPROTO_IPIP;
977 973
978 dsfield = ipv4_get_dsfield(iph); 974 dsfield = ipv4_get_dsfield(iph);
979 975
980 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 976 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
981 fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 977 fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
982 & IPV6_TCLASS_MASK; 978 & IPV6_TCLASS_MASK;
983 979
984 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 980 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
985 if (err != 0) { 981 if (err != 0) {
986 /* XXX: send ICMP error even if DF is not set. */ 982 /* XXX: send ICMP error even if DF is not set. */
987 if (err == -EMSGSIZE) 983 if (err == -EMSGSIZE)
988 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 984 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
989 htonl(mtu)); 985 htonl(mtu));
990 return -1; 986 return -1;
991 } 987 }
992 988
993 return 0; 989 return 0;
994 } 990 }
995 991
996 static inline int 992 static inline int
997 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 993 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
998 { 994 {
999 struct ip6_tnl *t = netdev_priv(dev); 995 struct ip6_tnl *t = netdev_priv(dev);
1000 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 996 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1001 int encap_limit = -1; 997 int encap_limit = -1;
1002 __u16 offset; 998 __u16 offset;
1003 struct flowi fl; 999 struct flowi fl;
1004 __u8 dsfield; 1000 __u8 dsfield;
1005 __u32 mtu; 1001 __u32 mtu;
1006 int err; 1002 int err;
1007 1003
1008 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || 1004 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
1009 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1005 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1010 return -1; 1006 return -1;
1011 1007
1012 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); 1008 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
1013 if (offset > 0) { 1009 if (offset > 0) {
1014 struct ipv6_tlv_tnl_enc_lim *tel; 1010 struct ipv6_tlv_tnl_enc_lim *tel;
1015 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1011 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1016 if (tel->encap_limit == 0) { 1012 if (tel->encap_limit == 0) {
1017 icmpv6_send(skb, ICMPV6_PARAMPROB, 1013 icmpv6_send(skb, ICMPV6_PARAMPROB,
1018 ICMPV6_HDR_FIELD, offset + 2); 1014 ICMPV6_HDR_FIELD, offset + 2);
1019 return -1; 1015 return -1;
1020 } 1016 }
1021 encap_limit = tel->encap_limit - 1; 1017 encap_limit = tel->encap_limit - 1;
1022 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1018 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1023 encap_limit = t->parms.encap_limit; 1019 encap_limit = t->parms.encap_limit;
1024 1020
1025 memcpy(&fl, &t->fl, sizeof (fl)); 1021 memcpy(&fl, &t->fl, sizeof (fl));
1026 fl.proto = IPPROTO_IPV6; 1022 fl.proto = IPPROTO_IPV6;
1027 1023
1028 dsfield = ipv6_get_dsfield(ipv6h); 1024 dsfield = ipv6_get_dsfield(ipv6h);
1029 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1025 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1030 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1026 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1031 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1027 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1032 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); 1028 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1033 1029
1034 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 1030 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
1035 if (err != 0) { 1031 if (err != 0) {
1036 if (err == -EMSGSIZE) 1032 if (err == -EMSGSIZE)
1037 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1033 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1038 return -1; 1034 return -1;
1039 } 1035 }
1040 1036
1041 return 0; 1037 return 0;
1042 } 1038 }
1043 1039
1044 static netdev_tx_t 1040 static netdev_tx_t
1045 ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1041 ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1046 { 1042 {
1047 struct ip6_tnl *t = netdev_priv(dev); 1043 struct ip6_tnl *t = netdev_priv(dev);
1048 struct net_device_stats *stats = &t->dev->stats; 1044 struct net_device_stats *stats = &t->dev->stats;
1049 int ret; 1045 int ret;
1050 1046
1051 switch (skb->protocol) { 1047 switch (skb->protocol) {
1052 case htons(ETH_P_IP): 1048 case htons(ETH_P_IP):
1053 ret = ip4ip6_tnl_xmit(skb, dev); 1049 ret = ip4ip6_tnl_xmit(skb, dev);
1054 break; 1050 break;
1055 case htons(ETH_P_IPV6): 1051 case htons(ETH_P_IPV6):
1056 ret = ip6ip6_tnl_xmit(skb, dev); 1052 ret = ip6ip6_tnl_xmit(skb, dev);
1057 break; 1053 break;
1058 default: 1054 default:
1059 goto tx_err; 1055 goto tx_err;
1060 } 1056 }
1061 1057
1062 if (ret < 0) 1058 if (ret < 0)
1063 goto tx_err; 1059 goto tx_err;
1064 1060
1065 return NETDEV_TX_OK; 1061 return NETDEV_TX_OK;
1066 1062
1067 tx_err: 1063 tx_err:
1068 stats->tx_errors++; 1064 stats->tx_errors++;
1069 stats->tx_dropped++; 1065 stats->tx_dropped++;
1070 kfree_skb(skb); 1066 kfree_skb(skb);
1071 return NETDEV_TX_OK; 1067 return NETDEV_TX_OK;
1072 } 1068 }
1073 1069
1074 static void ip6_tnl_set_cap(struct ip6_tnl *t) 1070 static void ip6_tnl_set_cap(struct ip6_tnl *t)
1075 { 1071 {
1076 struct ip6_tnl_parm *p = &t->parms; 1072 struct ip6_tnl_parm *p = &t->parms;
1077 int ltype = ipv6_addr_type(&p->laddr); 1073 int ltype = ipv6_addr_type(&p->laddr);
1078 int rtype = ipv6_addr_type(&p->raddr); 1074 int rtype = ipv6_addr_type(&p->raddr);
1079 1075
1080 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); 1076 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
1081 1077
1082 if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 1078 if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1083 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 1079 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1084 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && 1080 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
1085 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { 1081 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
1086 if (ltype&IPV6_ADDR_UNICAST) 1082 if (ltype&IPV6_ADDR_UNICAST)
1087 p->flags |= IP6_TNL_F_CAP_XMIT; 1083 p->flags |= IP6_TNL_F_CAP_XMIT;
1088 if (rtype&IPV6_ADDR_UNICAST) 1084 if (rtype&IPV6_ADDR_UNICAST)
1089 p->flags |= IP6_TNL_F_CAP_RCV; 1085 p->flags |= IP6_TNL_F_CAP_RCV;
1090 } 1086 }
1091 } 1087 }
1092 1088
1093 static void ip6_tnl_link_config(struct ip6_tnl *t) 1089 static void ip6_tnl_link_config(struct ip6_tnl *t)
1094 { 1090 {
1095 struct net_device *dev = t->dev; 1091 struct net_device *dev = t->dev;
1096 struct ip6_tnl_parm *p = &t->parms; 1092 struct ip6_tnl_parm *p = &t->parms;
1097 struct flowi *fl = &t->fl; 1093 struct flowi *fl = &t->fl;
1098 1094
1099 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1095 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1100 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1096 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1101 1097
1102 /* Set up flowi template */ 1098 /* Set up flowi template */
1103 ipv6_addr_copy(&fl->fl6_src, &p->laddr); 1099 ipv6_addr_copy(&fl->fl6_src, &p->laddr);
1104 ipv6_addr_copy(&fl->fl6_dst, &p->raddr); 1100 ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
1105 fl->oif = p->link; 1101 fl->oif = p->link;
1106 fl->fl6_flowlabel = 0; 1102 fl->fl6_flowlabel = 0;
1107 1103
1108 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1104 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1109 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1105 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1110 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1106 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1111 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1107 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1112 1108
1113 ip6_tnl_set_cap(t); 1109 ip6_tnl_set_cap(t);
1114 1110
1115 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) 1111 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1116 dev->flags |= IFF_POINTOPOINT; 1112 dev->flags |= IFF_POINTOPOINT;
1117 else 1113 else
1118 dev->flags &= ~IFF_POINTOPOINT; 1114 dev->flags &= ~IFF_POINTOPOINT;
1119 1115
1120 dev->iflink = p->link; 1116 dev->iflink = p->link;
1121 1117
1122 if (p->flags & IP6_TNL_F_CAP_XMIT) { 1118 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1123 int strict = (ipv6_addr_type(&p->raddr) & 1119 int strict = (ipv6_addr_type(&p->raddr) &
1124 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1120 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1125 1121
1126 struct rt6_info *rt = rt6_lookup(dev_net(dev), 1122 struct rt6_info *rt = rt6_lookup(dev_net(dev),
1127 &p->raddr, &p->laddr, 1123 &p->raddr, &p->laddr,
1128 p->link, strict); 1124 p->link, strict);
1129 1125
1130 if (rt == NULL) 1126 if (rt == NULL)
1131 return; 1127 return;
1132 1128
1133 if (rt->rt6i_dev) { 1129 if (rt->rt6i_dev) {
1134 dev->hard_header_len = rt->rt6i_dev->hard_header_len + 1130 dev->hard_header_len = rt->rt6i_dev->hard_header_len +
1135 sizeof (struct ipv6hdr); 1131 sizeof (struct ipv6hdr);
1136 1132
1137 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); 1133 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
1138 1134
1139 if (dev->mtu < IPV6_MIN_MTU) 1135 if (dev->mtu < IPV6_MIN_MTU)
1140 dev->mtu = IPV6_MIN_MTU; 1136 dev->mtu = IPV6_MIN_MTU;
1141 } 1137 }
1142 dst_release(&rt->u.dst); 1138 dst_release(&rt->u.dst);
1143 } 1139 }
1144 } 1140 }
1145 1141
1146 /** 1142 /**
1147 * ip6_tnl_change - update the tunnel parameters 1143 * ip6_tnl_change - update the tunnel parameters
1148 * @t: tunnel to be changed 1144 * @t: tunnel to be changed
1149 * @p: tunnel configuration parameters 1145 * @p: tunnel configuration parameters
1150 * 1146 *
1151 * Description: 1147 * Description:
1152 * ip6_tnl_change() updates the tunnel parameters 1148 * ip6_tnl_change() updates the tunnel parameters
1153 **/ 1149 **/
1154 1150
1155 static int 1151 static int
1156 ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) 1152 ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1157 { 1153 {
1158 ipv6_addr_copy(&t->parms.laddr, &p->laddr); 1154 ipv6_addr_copy(&t->parms.laddr, &p->laddr);
1159 ipv6_addr_copy(&t->parms.raddr, &p->raddr); 1155 ipv6_addr_copy(&t->parms.raddr, &p->raddr);
1160 t->parms.flags = p->flags; 1156 t->parms.flags = p->flags;
1161 t->parms.hop_limit = p->hop_limit; 1157 t->parms.hop_limit = p->hop_limit;
1162 t->parms.encap_limit = p->encap_limit; 1158 t->parms.encap_limit = p->encap_limit;
1163 t->parms.flowinfo = p->flowinfo; 1159 t->parms.flowinfo = p->flowinfo;
1164 t->parms.link = p->link; 1160 t->parms.link = p->link;
1165 t->parms.proto = p->proto; 1161 t->parms.proto = p->proto;
1166 ip6_tnl_dst_reset(t); 1162 ip6_tnl_dst_reset(t);
1167 ip6_tnl_link_config(t); 1163 ip6_tnl_link_config(t);
1168 return 0; 1164 return 0;
1169 } 1165 }
1170 1166
1171 /** 1167 /**
1172 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1168 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1173 * @dev: virtual device associated with tunnel 1169 * @dev: virtual device associated with tunnel
1174 * @ifr: parameters passed from userspace 1170 * @ifr: parameters passed from userspace
1175 * @cmd: command to be performed 1171 * @cmd: command to be performed
1176 * 1172 *
1177 * Description: 1173 * Description:
1178 * ip6_tnl_ioctl() is used for managing IPv6 tunnels 1174 * ip6_tnl_ioctl() is used for managing IPv6 tunnels
1179 * from userspace. 1175 * from userspace.
1180 * 1176 *
1181 * The possible commands are the following: 1177 * The possible commands are the following:
1182 * %SIOCGETTUNNEL: get tunnel parameters for device 1178 * %SIOCGETTUNNEL: get tunnel parameters for device
1183 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters 1179 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
1184 * %SIOCCHGTUNNEL: change tunnel parameters to those given 1180 * %SIOCCHGTUNNEL: change tunnel parameters to those given
1185 * %SIOCDELTUNNEL: delete tunnel 1181 * %SIOCDELTUNNEL: delete tunnel
1186 * 1182 *
1187 * The fallback device "ip6tnl0", created during module 1183 * The fallback device "ip6tnl0", created during module
1188 * initialization, can be used for creating other tunnel devices. 1184 * initialization, can be used for creating other tunnel devices.
1189 * 1185 *
1190 * Return: 1186 * Return:
1191 * 0 on success, 1187 * 0 on success,
1192 * %-EFAULT if unable to copy data to or from userspace, 1188 * %-EFAULT if unable to copy data to or from userspace,
1193 * %-EPERM if current process hasn't %CAP_NET_ADMIN set 1189 * %-EPERM if current process hasn't %CAP_NET_ADMIN set
1194 * %-EINVAL if passed tunnel parameters are invalid, 1190 * %-EINVAL if passed tunnel parameters are invalid,
1195 * %-EEXIST if changing a tunnel's parameters would cause a conflict 1191 * %-EEXIST if changing a tunnel's parameters would cause a conflict
1196 * %-ENODEV if attempting to change or delete a nonexisting device 1192 * %-ENODEV if attempting to change or delete a nonexisting device
1197 **/ 1193 **/
1198 1194
1199 static int 1195 static int
1200 ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1196 ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1201 { 1197 {
1202 int err = 0; 1198 int err = 0;
1203 struct ip6_tnl_parm p; 1199 struct ip6_tnl_parm p;
1204 struct ip6_tnl *t = NULL; 1200 struct ip6_tnl *t = NULL;
1205 struct net *net = dev_net(dev); 1201 struct net *net = dev_net(dev);
1206 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1202 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1207 1203
1208 switch (cmd) { 1204 switch (cmd) {
1209 case SIOCGETTUNNEL: 1205 case SIOCGETTUNNEL:
1210 if (dev == ip6n->fb_tnl_dev) { 1206 if (dev == ip6n->fb_tnl_dev) {
1211 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { 1207 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1212 err = -EFAULT; 1208 err = -EFAULT;
1213 break; 1209 break;
1214 } 1210 }
1215 t = ip6_tnl_locate(net, &p, 0); 1211 t = ip6_tnl_locate(net, &p, 0);
1216 } 1212 }
1217 if (t == NULL) 1213 if (t == NULL)
1218 t = netdev_priv(dev); 1214 t = netdev_priv(dev);
1219 memcpy(&p, &t->parms, sizeof (p)); 1215 memcpy(&p, &t->parms, sizeof (p));
1220 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1216 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1221 err = -EFAULT; 1217 err = -EFAULT;
1222 } 1218 }
1223 break; 1219 break;
1224 case SIOCADDTUNNEL: 1220 case SIOCADDTUNNEL:
1225 case SIOCCHGTUNNEL: 1221 case SIOCCHGTUNNEL:
1226 err = -EPERM; 1222 err = -EPERM;
1227 if (!capable(CAP_NET_ADMIN)) 1223 if (!capable(CAP_NET_ADMIN))
1228 break; 1224 break;
1229 err = -EFAULT; 1225 err = -EFAULT;
1230 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1226 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1231 break; 1227 break;
1232 err = -EINVAL; 1228 err = -EINVAL;
1233 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1229 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1234 p.proto != 0) 1230 p.proto != 0)
1235 break; 1231 break;
1236 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); 1232 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
1237 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1233 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1238 if (t != NULL) { 1234 if (t != NULL) {
1239 if (t->dev != dev) { 1235 if (t->dev != dev) {
1240 err = -EEXIST; 1236 err = -EEXIST;
1241 break; 1237 break;
1242 } 1238 }
1243 } else 1239 } else
1244 t = netdev_priv(dev); 1240 t = netdev_priv(dev);
1245 1241
1246 ip6_tnl_unlink(ip6n, t); 1242 ip6_tnl_unlink(ip6n, t);
1247 err = ip6_tnl_change(t, &p); 1243 err = ip6_tnl_change(t, &p);
1248 ip6_tnl_link(ip6n, t); 1244 ip6_tnl_link(ip6n, t);
1249 netdev_state_change(dev); 1245 netdev_state_change(dev);
1250 } 1246 }
1251 if (t) { 1247 if (t) {
1252 err = 0; 1248 err = 0;
1253 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) 1249 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
1254 err = -EFAULT; 1250 err = -EFAULT;
1255 1251
1256 } else 1252 } else
1257 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1253 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1258 break; 1254 break;
1259 case SIOCDELTUNNEL: 1255 case SIOCDELTUNNEL:
1260 err = -EPERM; 1256 err = -EPERM;
1261 if (!capable(CAP_NET_ADMIN)) 1257 if (!capable(CAP_NET_ADMIN))
1262 break; 1258 break;
1263 1259
1264 if (dev == ip6n->fb_tnl_dev) { 1260 if (dev == ip6n->fb_tnl_dev) {
1265 err = -EFAULT; 1261 err = -EFAULT;
1266 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1262 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1267 break; 1263 break;
1268 err = -ENOENT; 1264 err = -ENOENT;
1269 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) 1265 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
1270 break; 1266 break;
1271 err = -EPERM; 1267 err = -EPERM;
1272 if (t->dev == ip6n->fb_tnl_dev) 1268 if (t->dev == ip6n->fb_tnl_dev)
1273 break; 1269 break;
1274 dev = t->dev; 1270 dev = t->dev;
1275 } 1271 }
1276 err = 0; 1272 err = 0;
1277 unregister_netdevice(dev); 1273 unregister_netdevice(dev);
1278 break; 1274 break;
1279 default: 1275 default:
1280 err = -EINVAL; 1276 err = -EINVAL;
1281 } 1277 }
1282 return err; 1278 return err;
1283 } 1279 }
1284 1280
1285 /** 1281 /**
1286 * ip6_tnl_change_mtu - change mtu manually for tunnel device 1282 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1287 * @dev: virtual device associated with tunnel 1283 * @dev: virtual device associated with tunnel
1288 * @new_mtu: the new mtu 1284 * @new_mtu: the new mtu
1289 * 1285 *
1290 * Return: 1286 * Return:
1291 * 0 on success, 1287 * 0 on success,
1292 * %-EINVAL if mtu too small 1288 * %-EINVAL if mtu too small
1293 **/ 1289 **/
1294 1290
1295 static int 1291 static int
1296 ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) 1292 ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1297 { 1293 {
1298 if (new_mtu < IPV6_MIN_MTU) { 1294 if (new_mtu < IPV6_MIN_MTU) {
1299 return -EINVAL; 1295 return -EINVAL;
1300 } 1296 }
1301 dev->mtu = new_mtu; 1297 dev->mtu = new_mtu;
1302 return 0; 1298 return 0;
1303 } 1299 }
1304 1300
1305 1301
1306 static const struct net_device_ops ip6_tnl_netdev_ops = { 1302 static const struct net_device_ops ip6_tnl_netdev_ops = {
1307 .ndo_uninit = ip6_tnl_dev_uninit, 1303 .ndo_uninit = ip6_tnl_dev_uninit,
1308 .ndo_start_xmit = ip6_tnl_xmit, 1304 .ndo_start_xmit = ip6_tnl_xmit,
1309 .ndo_do_ioctl = ip6_tnl_ioctl, 1305 .ndo_do_ioctl = ip6_tnl_ioctl,
1310 .ndo_change_mtu = ip6_tnl_change_mtu, 1306 .ndo_change_mtu = ip6_tnl_change_mtu,
1311 }; 1307 };
1312 1308
1313 /** 1309 /**
1314 * ip6_tnl_dev_setup - setup virtual tunnel device 1310 * ip6_tnl_dev_setup - setup virtual tunnel device
1315 * @dev: virtual device associated with tunnel 1311 * @dev: virtual device associated with tunnel
1316 * 1312 *
1317 * Description: 1313 * Description:
1318 * Initialize function pointers and device parameters 1314 * Initialize function pointers and device parameters
1319 **/ 1315 **/
1320 1316
1321 static void ip6_tnl_dev_setup(struct net_device *dev) 1317 static void ip6_tnl_dev_setup(struct net_device *dev)
1322 { 1318 {
1323 dev->netdev_ops = &ip6_tnl_netdev_ops; 1319 dev->netdev_ops = &ip6_tnl_netdev_ops;
1324 dev->destructor = free_netdev; 1320 dev->destructor = free_netdev;
1325 1321
1326 dev->type = ARPHRD_TUNNEL6; 1322 dev->type = ARPHRD_TUNNEL6;
1327 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1323 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1328 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); 1324 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1329 dev->flags |= IFF_NOARP; 1325 dev->flags |= IFF_NOARP;
1330 dev->addr_len = sizeof(struct in6_addr); 1326 dev->addr_len = sizeof(struct in6_addr);
1331 dev->features |= NETIF_F_NETNS_LOCAL; 1327 dev->features |= NETIF_F_NETNS_LOCAL;
1332 } 1328 }
1333 1329
1334 1330
1335 /** 1331 /**
1336 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices 1332 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1337 * @dev: virtual device associated with tunnel 1333 * @dev: virtual device associated with tunnel
1338 **/ 1334 **/
1339 1335
1340 static inline void 1336 static inline void
1341 ip6_tnl_dev_init_gen(struct net_device *dev) 1337 ip6_tnl_dev_init_gen(struct net_device *dev)
1342 { 1338 {
1343 struct ip6_tnl *t = netdev_priv(dev); 1339 struct ip6_tnl *t = netdev_priv(dev);
1344 t->dev = dev; 1340 t->dev = dev;
1345 strcpy(t->parms.name, dev->name); 1341 strcpy(t->parms.name, dev->name);
1346 } 1342 }
1347 1343
1348 /** 1344 /**
1349 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices 1345 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1350 * @dev: virtual device associated with tunnel 1346 * @dev: virtual device associated with tunnel
1351 **/ 1347 **/
1352 1348
1353 static void ip6_tnl_dev_init(struct net_device *dev) 1349 static void ip6_tnl_dev_init(struct net_device *dev)
1354 { 1350 {
1355 struct ip6_tnl *t = netdev_priv(dev); 1351 struct ip6_tnl *t = netdev_priv(dev);
1356 ip6_tnl_dev_init_gen(dev); 1352 ip6_tnl_dev_init_gen(dev);
1357 ip6_tnl_link_config(t); 1353 ip6_tnl_link_config(t);
1358 } 1354 }
1359 1355
1360 /** 1356 /**
1361 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device 1357 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1362 * @dev: fallback device 1358 * @dev: fallback device
1363 * 1359 *
1364 * Return: 0 1360 * Return: 0
1365 **/ 1361 **/
1366 1362
1367 static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1363 static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1368 { 1364 {
1369 struct ip6_tnl *t = netdev_priv(dev); 1365 struct ip6_tnl *t = netdev_priv(dev);
1370 struct net *net = dev_net(dev); 1366 struct net *net = dev_net(dev);
1371 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1367 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1372 1368
1373 ip6_tnl_dev_init_gen(dev); 1369 ip6_tnl_dev_init_gen(dev);
1374 t->parms.proto = IPPROTO_IPV6; 1370 t->parms.proto = IPPROTO_IPV6;
1375 dev_hold(dev); 1371 dev_hold(dev);
1376 ip6n->tnls_wc[0] = t; 1372 ip6n->tnls_wc[0] = t;
1377 } 1373 }
1378 1374
1379 static struct xfrm6_tunnel ip4ip6_handler = { 1375 static struct xfrm6_tunnel ip4ip6_handler = {
1380 .handler = ip4ip6_rcv, 1376 .handler = ip4ip6_rcv,
1381 .err_handler = ip4ip6_err, 1377 .err_handler = ip4ip6_err,
1382 .priority = 1, 1378 .priority = 1,
1383 }; 1379 };
1384 1380
1385 static struct xfrm6_tunnel ip6ip6_handler = { 1381 static struct xfrm6_tunnel ip6ip6_handler = {
1386 .handler = ip6ip6_rcv, 1382 .handler = ip6ip6_rcv,
1387 .err_handler = ip6ip6_err, 1383 .err_handler = ip6ip6_err,
1388 .priority = 1, 1384 .priority = 1,
1389 }; 1385 };
1390 1386
1391 static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) 1387 static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1392 { 1388 {
1393 int h; 1389 int h;
1394 struct ip6_tnl *t; 1390 struct ip6_tnl *t;
1395 LIST_HEAD(list); 1391 LIST_HEAD(list);
1396 1392
1397 for (h = 0; h < HASH_SIZE; h++) { 1393 for (h = 0; h < HASH_SIZE; h++) {
1398 t = ip6n->tnls_r_l[h]; 1394 t = ip6n->tnls_r_l[h];
1399 while (t != NULL) { 1395 while (t != NULL) {
1400 unregister_netdevice_queue(t->dev, &list); 1396 unregister_netdevice_queue(t->dev, &list);
1401 t = t->next; 1397 t = t->next;
1402 } 1398 }
1403 } 1399 }
1404 1400
1405 t = ip6n->tnls_wc[0]; 1401 t = ip6n->tnls_wc[0];
1406 unregister_netdevice_queue(t->dev, &list); 1402 unregister_netdevice_queue(t->dev, &list);
1407 unregister_netdevice_many(&list); 1403 unregister_netdevice_many(&list);
1408 } 1404 }
1409 1405
1410 static int __net_init ip6_tnl_init_net(struct net *net) 1406 static int __net_init ip6_tnl_init_net(struct net *net)
1411 { 1407 {
1412 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1408 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1413 int err; 1409 int err;
1414 1410
1415 ip6n->tnls[0] = ip6n->tnls_wc; 1411 ip6n->tnls[0] = ip6n->tnls_wc;
1416 ip6n->tnls[1] = ip6n->tnls_r_l; 1412 ip6n->tnls[1] = ip6n->tnls_r_l;
1417 1413
1418 err = -ENOMEM; 1414 err = -ENOMEM;
1419 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", 1415 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1420 ip6_tnl_dev_setup); 1416 ip6_tnl_dev_setup);
1421 1417
1422 if (!ip6n->fb_tnl_dev) 1418 if (!ip6n->fb_tnl_dev)
1423 goto err_alloc_dev; 1419 goto err_alloc_dev;
1424 dev_net_set(ip6n->fb_tnl_dev, net); 1420 dev_net_set(ip6n->fb_tnl_dev, net);
1425 1421
1426 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1422 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1427 1423
1428 err = register_netdev(ip6n->fb_tnl_dev); 1424 err = register_netdev(ip6n->fb_tnl_dev);
1429 if (err < 0) 1425 if (err < 0)
1430 goto err_register; 1426 goto err_register;
1431 return 0; 1427 return 0;
1432 1428
1433 err_register: 1429 err_register:
1434 free_netdev(ip6n->fb_tnl_dev); 1430 free_netdev(ip6n->fb_tnl_dev);
1435 err_alloc_dev: 1431 err_alloc_dev:
1436 return err; 1432 return err;
1437 } 1433 }
1438 1434
1439 static void __net_exit ip6_tnl_exit_net(struct net *net) 1435 static void __net_exit ip6_tnl_exit_net(struct net *net)
1440 { 1436 {
1441 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1437 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1442 1438
1443 rtnl_lock(); 1439 rtnl_lock();
1444 ip6_tnl_destroy_tunnels(ip6n); 1440 ip6_tnl_destroy_tunnels(ip6n);
1445 rtnl_unlock(); 1441 rtnl_unlock();
1446 } 1442 }
1447 1443
1448 static struct pernet_operations ip6_tnl_net_ops = { 1444 static struct pernet_operations ip6_tnl_net_ops = {
1449 .init = ip6_tnl_init_net, 1445 .init = ip6_tnl_init_net,
1450 .exit = ip6_tnl_exit_net, 1446 .exit = ip6_tnl_exit_net,
1451 .id = &ip6_tnl_net_id, 1447 .id = &ip6_tnl_net_id,
1452 .size = sizeof(struct ip6_tnl_net), 1448 .size = sizeof(struct ip6_tnl_net),
1453 }; 1449 };
1454 1450
1455 /** 1451 /**
1456 * ip6_tunnel_init - register protocol and reserve needed resources 1452 * ip6_tunnel_init - register protocol and reserve needed resources
1457 * 1453 *
1458 * Return: 0 on success 1454 * Return: 0 on success
1459 **/ 1455 **/
1460 1456
1461 static int __init ip6_tunnel_init(void) 1457 static int __init ip6_tunnel_init(void)
1462 { 1458 {
1463 int err; 1459 int err;
1464 1460
1465 err = register_pernet_device(&ip6_tnl_net_ops); 1461 err = register_pernet_device(&ip6_tnl_net_ops);
1466 if (err < 0) 1462 if (err < 0)
1467 goto out_pernet; 1463 goto out_pernet;
1468 1464
1469 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); 1465 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
1470 if (err < 0) { 1466 if (err < 0) {
1471 printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n"); 1467 printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
1472 goto out_ip4ip6; 1468 goto out_ip4ip6;
1473 } 1469 }
1474 1470
1475 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); 1471 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
1476 if (err < 0) { 1472 if (err < 0) {
1477 printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n"); 1473 printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
1478 goto out_ip6ip6; 1474 goto out_ip6ip6;
1479 } 1475 }
1480 1476
1481 return 0; 1477 return 0;
1482 1478
1483 out_ip6ip6: 1479 out_ip6ip6:
1484 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 1480 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1485 out_ip4ip6: 1481 out_ip4ip6:
1486 unregister_pernet_device(&ip6_tnl_net_ops); 1482 unregister_pernet_device(&ip6_tnl_net_ops);
1487 out_pernet: 1483 out_pernet:
1488 return err; 1484 return err;
1489 } 1485 }
1490 1486
1491 /** 1487 /**
1492 * ip6_tunnel_cleanup - free resources and unregister protocol 1488 * ip6_tunnel_cleanup - free resources and unregister protocol
1493 **/ 1489 **/
1494 1490
1495 static void __exit ip6_tunnel_cleanup(void) 1491 static void __exit ip6_tunnel_cleanup(void)
1496 { 1492 {
1497 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 1493 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1498 printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n"); 1494 printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
1499 1495
1500 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 1496 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1501 printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); 1497 printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
1502 1498
1503 unregister_pernet_device(&ip6_tnl_net_ops); 1499 unregister_pernet_device(&ip6_tnl_net_ops);
1504 } 1500 }
1505 1501
1506 module_init(ip6_tunnel_init); 1502 module_init(ip6_tunnel_init);
1507 module_exit(ip6_tunnel_cleanup); 1503 module_exit(ip6_tunnel_cleanup);
1508 1504
1 /* 1 /*
2 * Linux IPv6 multicast routing support for BSD pim6sd 2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c. 3 * Based on net/ipv4/ipmr.c.
4 * 4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France 6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France 8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project 9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 * 11 *
12 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License 13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version. 15 * 2 of the License, or (at your option) any later version.
16 * 16 *
17 */ 17 */
18 18
19 #include <asm/system.h> 19 #include <asm/system.h>
20 #include <asm/uaccess.h> 20 #include <asm/uaccess.h>
21 #include <linux/types.h> 21 #include <linux/types.h>
22 #include <linux/sched.h> 22 #include <linux/sched.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/timer.h> 24 #include <linux/timer.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/kernel.h> 26 #include <linux/kernel.h>
27 #include <linux/fcntl.h> 27 #include <linux/fcntl.h>
28 #include <linux/stat.h> 28 #include <linux/stat.h>
29 #include <linux/socket.h> 29 #include <linux/socket.h>
30 #include <linux/inet.h> 30 #include <linux/inet.h>
31 #include <linux/netdevice.h> 31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h> 32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h> 33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h> 34 #include <linux/seq_file.h>
35 #include <linux/init.h> 35 #include <linux/init.h>
36 #include <linux/slab.h> 36 #include <linux/slab.h>
37 #include <net/protocol.h> 37 #include <net/protocol.h>
38 #include <linux/skbuff.h> 38 #include <linux/skbuff.h>
39 #include <net/sock.h> 39 #include <net/sock.h>
40 #include <net/raw.h> 40 #include <net/raw.h>
41 #include <linux/notifier.h> 41 #include <linux/notifier.h>
42 #include <linux/if_arp.h> 42 #include <linux/if_arp.h>
43 #include <net/checksum.h> 43 #include <net/checksum.h>
44 #include <net/netlink.h> 44 #include <net/netlink.h>
45 #include <net/fib_rules.h> 45 #include <net/fib_rules.h>
46 46
47 #include <net/ipv6.h> 47 #include <net/ipv6.h>
48 #include <net/ip6_route.h> 48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h> 49 #include <linux/mroute6.h>
50 #include <linux/pim.h> 50 #include <linux/pim.h>
51 #include <net/addrconf.h> 51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h> 52 #include <linux/netfilter_ipv6.h>
53 #include <net/ip6_checksum.h> 53 #include <net/ip6_checksum.h>
54 54
55 struct mr6_table { 55 struct mr6_table {
56 struct list_head list; 56 struct list_head list;
57 #ifdef CONFIG_NET_NS 57 #ifdef CONFIG_NET_NS
58 struct net *net; 58 struct net *net;
59 #endif 59 #endif
60 u32 id; 60 u32 id;
61 struct sock *mroute6_sk; 61 struct sock *mroute6_sk;
62 struct timer_list ipmr_expire_timer; 62 struct timer_list ipmr_expire_timer;
63 struct list_head mfc6_unres_queue; 63 struct list_head mfc6_unres_queue;
64 struct list_head mfc6_cache_array[MFC6_LINES]; 64 struct list_head mfc6_cache_array[MFC6_LINES];
65 struct mif_device vif6_table[MAXMIFS]; 65 struct mif_device vif6_table[MAXMIFS];
66 int maxvif; 66 int maxvif;
67 atomic_t cache_resolve_queue_len; 67 atomic_t cache_resolve_queue_len;
68 int mroute_do_assert; 68 int mroute_do_assert;
69 int mroute_do_pim; 69 int mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2 70 #ifdef CONFIG_IPV6_PIMSM_V2
71 int mroute_reg_vif_num; 71 int mroute_reg_vif_num;
72 #endif 72 #endif
73 }; 73 };
74 74
75 struct ip6mr_rule { 75 struct ip6mr_rule {
76 struct fib_rule common; 76 struct fib_rule common;
77 }; 77 };
78 78
79 struct ip6mr_result { 79 struct ip6mr_result {
80 struct mr6_table *mrt; 80 struct mr6_table *mrt;
81 }; 81 };
82 82
83 /* Big lock, protecting vif table, mrt cache and mroute socket state. 83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84 Note that the changes are semaphored via rtnl_lock. 84 Note that the changes are semaphored via rtnl_lock.
85 */ 85 */
86 86
87 static DEFINE_RWLOCK(mrt_lock); 87 static DEFINE_RWLOCK(mrt_lock);
88 88
89 /* 89 /*
90 * Multicast router control variables 90 * Multicast router control variables
91 */ 91 */
92 92
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) 93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 94
95 /* Special spinlock for queue of unresolved entries */ 95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock); 96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 97
98 /* We return to original Alan's scheme. Hash table of resolved 98 /* We return to original Alan's scheme. Hash table of resolved
99 entries is changed only in process context and protected 99 entries is changed only in process context and protected
100 with weak lock mrt_lock. Queue of unresolved entries is protected 100 with weak lock mrt_lock. Queue of unresolved entries is protected
101 with strong spinlock mfc_unres_lock. 101 with strong spinlock mfc_unres_lock.
102 102
103 In this case data path is free of exclusive locks at all. 103 In this case data path is free of exclusive locks at all.
104 */ 104 */
105 105
106 static struct kmem_cache *mrt_cachep __read_mostly; 106 static struct kmem_cache *mrt_cachep __read_mostly;
107 107
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); 108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt); 109 static void ip6mr_free_table(struct mr6_table *mrt);
110 110
111 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 111 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 struct sk_buff *skb, struct mfc6_cache *cache); 112 struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 mifi_t mifi, int assert); 114 mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 struct mfc6_cache *c, struct rtmsg *rtm); 116 struct mfc6_cache *c, struct rtmsg *rtm);
117 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 117 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
118 struct netlink_callback *cb); 118 struct netlink_callback *cb);
119 static void mroute_clean_tables(struct mr6_table *mrt); 119 static void mroute_clean_tables(struct mr6_table *mrt);
120 static void ipmr_expire_process(unsigned long arg); 120 static void ipmr_expire_process(unsigned long arg);
121 121
122 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 122 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123 #define ip6mr_for_each_table(mrt, met) \ 123 #define ip6mr_for_each_table(mrt, met) \
124 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 124 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125 125
126 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 126 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
127 { 127 {
128 struct mr6_table *mrt; 128 struct mr6_table *mrt;
129 129
130 ip6mr_for_each_table(mrt, net) { 130 ip6mr_for_each_table(mrt, net) {
131 if (mrt->id == id) 131 if (mrt->id == id)
132 return mrt; 132 return mrt;
133 } 133 }
134 return NULL; 134 return NULL;
135 } 135 }
136 136
137 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 137 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
138 struct mr6_table **mrt) 138 struct mr6_table **mrt)
139 { 139 {
140 struct ip6mr_result res; 140 struct ip6mr_result res;
141 struct fib_lookup_arg arg = { .result = &res, }; 141 struct fib_lookup_arg arg = { .result = &res, };
142 int err; 142 int err;
143 143
144 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); 144 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
145 if (err < 0) 145 if (err < 0)
146 return err; 146 return err;
147 *mrt = res.mrt; 147 *mrt = res.mrt;
148 return 0; 148 return 0;
149 } 149 }
150 150
151 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 151 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
152 int flags, struct fib_lookup_arg *arg) 152 int flags, struct fib_lookup_arg *arg)
153 { 153 {
154 struct ip6mr_result *res = arg->result; 154 struct ip6mr_result *res = arg->result;
155 struct mr6_table *mrt; 155 struct mr6_table *mrt;
156 156
157 switch (rule->action) { 157 switch (rule->action) {
158 case FR_ACT_TO_TBL: 158 case FR_ACT_TO_TBL:
159 break; 159 break;
160 case FR_ACT_UNREACHABLE: 160 case FR_ACT_UNREACHABLE:
161 return -ENETUNREACH; 161 return -ENETUNREACH;
162 case FR_ACT_PROHIBIT: 162 case FR_ACT_PROHIBIT:
163 return -EACCES; 163 return -EACCES;
164 case FR_ACT_BLACKHOLE: 164 case FR_ACT_BLACKHOLE:
165 default: 165 default:
166 return -EINVAL; 166 return -EINVAL;
167 } 167 }
168 168
169 mrt = ip6mr_get_table(rule->fr_net, rule->table); 169 mrt = ip6mr_get_table(rule->fr_net, rule->table);
170 if (mrt == NULL) 170 if (mrt == NULL)
171 return -EAGAIN; 171 return -EAGAIN;
172 res->mrt = mrt; 172 res->mrt = mrt;
173 return 0; 173 return 0;
174 } 174 }
175 175
176 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 176 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
177 { 177 {
178 return 1; 178 return 1;
179 } 179 }
180 180
181 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 181 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
182 FRA_GENERIC_POLICY, 182 FRA_GENERIC_POLICY,
183 }; 183 };
184 184
185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 struct fib_rule_hdr *frh, struct nlattr **tb) 186 struct fib_rule_hdr *frh, struct nlattr **tb)
187 { 187 {
188 return 0; 188 return 0;
189 } 189 }
190 190
191 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 191 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
192 struct nlattr **tb) 192 struct nlattr **tb)
193 { 193 {
194 return 1; 194 return 1;
195 } 195 }
196 196
197 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 197 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
198 struct fib_rule_hdr *frh) 198 struct fib_rule_hdr *frh)
199 { 199 {
200 frh->dst_len = 0; 200 frh->dst_len = 0;
201 frh->src_len = 0; 201 frh->src_len = 0;
202 frh->tos = 0; 202 frh->tos = 0;
203 return 0; 203 return 0;
204 } 204 }
205 205
206 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { 206 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
207 .family = RTNL_FAMILY_IP6MR, 207 .family = RTNL_FAMILY_IP6MR,
208 .rule_size = sizeof(struct ip6mr_rule), 208 .rule_size = sizeof(struct ip6mr_rule),
209 .addr_size = sizeof(struct in6_addr), 209 .addr_size = sizeof(struct in6_addr),
210 .action = ip6mr_rule_action, 210 .action = ip6mr_rule_action,
211 .match = ip6mr_rule_match, 211 .match = ip6mr_rule_match,
212 .configure = ip6mr_rule_configure, 212 .configure = ip6mr_rule_configure,
213 .compare = ip6mr_rule_compare, 213 .compare = ip6mr_rule_compare,
214 .default_pref = fib_default_rule_pref, 214 .default_pref = fib_default_rule_pref,
215 .fill = ip6mr_rule_fill, 215 .fill = ip6mr_rule_fill,
216 .nlgroup = RTNLGRP_IPV6_RULE, 216 .nlgroup = RTNLGRP_IPV6_RULE,
217 .policy = ip6mr_rule_policy, 217 .policy = ip6mr_rule_policy,
218 .owner = THIS_MODULE, 218 .owner = THIS_MODULE,
219 }; 219 };
220 220
221 static int __net_init ip6mr_rules_init(struct net *net) 221 static int __net_init ip6mr_rules_init(struct net *net)
222 { 222 {
223 struct fib_rules_ops *ops; 223 struct fib_rules_ops *ops;
224 struct mr6_table *mrt; 224 struct mr6_table *mrt;
225 int err; 225 int err;
226 226
227 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 227 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
228 if (IS_ERR(ops)) 228 if (IS_ERR(ops))
229 return PTR_ERR(ops); 229 return PTR_ERR(ops);
230 230
231 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 231 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
232 232
233 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 233 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
234 if (mrt == NULL) { 234 if (mrt == NULL) {
235 err = -ENOMEM; 235 err = -ENOMEM;
236 goto err1; 236 goto err1;
237 } 237 }
238 238
239 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 239 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
240 if (err < 0) 240 if (err < 0)
241 goto err2; 241 goto err2;
242 242
243 net->ipv6.mr6_rules_ops = ops; 243 net->ipv6.mr6_rules_ops = ops;
244 return 0; 244 return 0;
245 245
246 err2: 246 err2:
247 kfree(mrt); 247 kfree(mrt);
248 err1: 248 err1:
249 fib_rules_unregister(ops); 249 fib_rules_unregister(ops);
250 return err; 250 return err;
251 } 251 }
252 252
253 static void __net_exit ip6mr_rules_exit(struct net *net) 253 static void __net_exit ip6mr_rules_exit(struct net *net)
254 { 254 {
255 struct mr6_table *mrt, *next; 255 struct mr6_table *mrt, *next;
256 256
257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) 257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
258 ip6mr_free_table(mrt); 258 ip6mr_free_table(mrt);
259 fib_rules_unregister(net->ipv6.mr6_rules_ops); 259 fib_rules_unregister(net->ipv6.mr6_rules_ops);
260 } 260 }
261 #else 261 #else
262 #define ip6mr_for_each_table(mrt, net) \ 262 #define ip6mr_for_each_table(mrt, net) \
263 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 263 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
264 264
265 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 265 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
266 { 266 {
267 return net->ipv6.mrt6; 267 return net->ipv6.mrt6;
268 } 268 }
269 269
270 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 270 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
271 struct mr6_table **mrt) 271 struct mr6_table **mrt)
272 { 272 {
273 *mrt = net->ipv6.mrt6; 273 *mrt = net->ipv6.mrt6;
274 return 0; 274 return 0;
275 } 275 }
276 276
277 static int __net_init ip6mr_rules_init(struct net *net) 277 static int __net_init ip6mr_rules_init(struct net *net)
278 { 278 {
279 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); 279 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
280 return net->ipv6.mrt6 ? 0 : -ENOMEM; 280 return net->ipv6.mrt6 ? 0 : -ENOMEM;
281 } 281 }
282 282
283 static void __net_exit ip6mr_rules_exit(struct net *net) 283 static void __net_exit ip6mr_rules_exit(struct net *net)
284 { 284 {
285 ip6mr_free_table(net->ipv6.mrt6); 285 ip6mr_free_table(net->ipv6.mrt6);
286 } 286 }
287 #endif 287 #endif
288 288
289 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) 289 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
290 { 290 {
291 struct mr6_table *mrt; 291 struct mr6_table *mrt;
292 unsigned int i; 292 unsigned int i;
293 293
294 mrt = ip6mr_get_table(net, id); 294 mrt = ip6mr_get_table(net, id);
295 if (mrt != NULL) 295 if (mrt != NULL)
296 return mrt; 296 return mrt;
297 297
298 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 298 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
299 if (mrt == NULL) 299 if (mrt == NULL)
300 return NULL; 300 return NULL;
301 mrt->id = id; 301 mrt->id = id;
302 write_pnet(&mrt->net, net); 302 write_pnet(&mrt->net, net);
303 303
304 /* Forwarding cache */ 304 /* Forwarding cache */
305 for (i = 0; i < MFC6_LINES; i++) 305 for (i = 0; i < MFC6_LINES; i++)
306 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); 306 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
307 307
308 INIT_LIST_HEAD(&mrt->mfc6_unres_queue); 308 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
309 309
310 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 310 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
311 (unsigned long)mrt); 311 (unsigned long)mrt);
312 312
313 #ifdef CONFIG_IPV6_PIMSM_V2 313 #ifdef CONFIG_IPV6_PIMSM_V2
314 mrt->mroute_reg_vif_num = -1; 314 mrt->mroute_reg_vif_num = -1;
315 #endif 315 #endif
316 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 316 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
317 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 317 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
318 #endif 318 #endif
319 return mrt; 319 return mrt;
320 } 320 }
321 321
322 static void ip6mr_free_table(struct mr6_table *mrt) 322 static void ip6mr_free_table(struct mr6_table *mrt)
323 { 323 {
324 del_timer(&mrt->ipmr_expire_timer); 324 del_timer(&mrt->ipmr_expire_timer);
325 mroute_clean_tables(mrt); 325 mroute_clean_tables(mrt);
326 kfree(mrt); 326 kfree(mrt);
327 } 327 }
328 328
329 #ifdef CONFIG_PROC_FS 329 #ifdef CONFIG_PROC_FS
330 330
331 struct ipmr_mfc_iter { 331 struct ipmr_mfc_iter {
332 struct seq_net_private p; 332 struct seq_net_private p;
333 struct mr6_table *mrt; 333 struct mr6_table *mrt;
334 struct list_head *cache; 334 struct list_head *cache;
335 int ct; 335 int ct;
336 }; 336 };
337 337
338 338
339 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, 339 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
340 struct ipmr_mfc_iter *it, loff_t pos) 340 struct ipmr_mfc_iter *it, loff_t pos)
341 { 341 {
342 struct mr6_table *mrt = it->mrt; 342 struct mr6_table *mrt = it->mrt;
343 struct mfc6_cache *mfc; 343 struct mfc6_cache *mfc;
344 344
345 read_lock(&mrt_lock); 345 read_lock(&mrt_lock);
346 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { 346 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
347 it->cache = &mrt->mfc6_cache_array[it->ct]; 347 it->cache = &mrt->mfc6_cache_array[it->ct];
348 list_for_each_entry(mfc, it->cache, list) 348 list_for_each_entry(mfc, it->cache, list)
349 if (pos-- == 0) 349 if (pos-- == 0)
350 return mfc; 350 return mfc;
351 } 351 }
352 read_unlock(&mrt_lock); 352 read_unlock(&mrt_lock);
353 353
354 spin_lock_bh(&mfc_unres_lock); 354 spin_lock_bh(&mfc_unres_lock);
355 it->cache = &mrt->mfc6_unres_queue; 355 it->cache = &mrt->mfc6_unres_queue;
356 list_for_each_entry(mfc, it->cache, list) 356 list_for_each_entry(mfc, it->cache, list)
357 if (pos-- == 0) 357 if (pos-- == 0)
358 return mfc; 358 return mfc;
359 spin_unlock_bh(&mfc_unres_lock); 359 spin_unlock_bh(&mfc_unres_lock);
360 360
361 it->cache = NULL; 361 it->cache = NULL;
362 return NULL; 362 return NULL;
363 } 363 }
364 364
365 /* 365 /*
366 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif 366 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
367 */ 367 */
368 368
369 struct ipmr_vif_iter { 369 struct ipmr_vif_iter {
370 struct seq_net_private p; 370 struct seq_net_private p;
371 struct mr6_table *mrt; 371 struct mr6_table *mrt;
372 int ct; 372 int ct;
373 }; 373 };
374 374
375 static struct mif_device *ip6mr_vif_seq_idx(struct net *net, 375 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
376 struct ipmr_vif_iter *iter, 376 struct ipmr_vif_iter *iter,
377 loff_t pos) 377 loff_t pos)
378 { 378 {
379 struct mr6_table *mrt = iter->mrt; 379 struct mr6_table *mrt = iter->mrt;
380 380
381 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 381 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
382 if (!MIF_EXISTS(mrt, iter->ct)) 382 if (!MIF_EXISTS(mrt, iter->ct))
383 continue; 383 continue;
384 if (pos-- == 0) 384 if (pos-- == 0)
385 return &mrt->vif6_table[iter->ct]; 385 return &mrt->vif6_table[iter->ct];
386 } 386 }
387 return NULL; 387 return NULL;
388 } 388 }
389 389
390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
391 __acquires(mrt_lock) 391 __acquires(mrt_lock)
392 { 392 {
393 struct ipmr_vif_iter *iter = seq->private; 393 struct ipmr_vif_iter *iter = seq->private;
394 struct net *net = seq_file_net(seq); 394 struct net *net = seq_file_net(seq);
395 struct mr6_table *mrt; 395 struct mr6_table *mrt;
396 396
397 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 397 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
398 if (mrt == NULL) 398 if (mrt == NULL)
399 return ERR_PTR(-ENOENT); 399 return ERR_PTR(-ENOENT);
400 400
401 iter->mrt = mrt; 401 iter->mrt = mrt;
402 402
403 read_lock(&mrt_lock); 403 read_lock(&mrt_lock);
404 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) 404 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
405 : SEQ_START_TOKEN; 405 : SEQ_START_TOKEN;
406 } 406 }
407 407
408 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 408 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
409 { 409 {
410 struct ipmr_vif_iter *iter = seq->private; 410 struct ipmr_vif_iter *iter = seq->private;
411 struct net *net = seq_file_net(seq); 411 struct net *net = seq_file_net(seq);
412 struct mr6_table *mrt = iter->mrt; 412 struct mr6_table *mrt = iter->mrt;
413 413
414 ++*pos; 414 ++*pos;
415 if (v == SEQ_START_TOKEN) 415 if (v == SEQ_START_TOKEN)
416 return ip6mr_vif_seq_idx(net, iter, 0); 416 return ip6mr_vif_seq_idx(net, iter, 0);
417 417
418 while (++iter->ct < mrt->maxvif) { 418 while (++iter->ct < mrt->maxvif) {
419 if (!MIF_EXISTS(mrt, iter->ct)) 419 if (!MIF_EXISTS(mrt, iter->ct))
420 continue; 420 continue;
421 return &mrt->vif6_table[iter->ct]; 421 return &mrt->vif6_table[iter->ct];
422 } 422 }
423 return NULL; 423 return NULL;
424 } 424 }
425 425
426 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 426 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
427 __releases(mrt_lock) 427 __releases(mrt_lock)
428 { 428 {
429 read_unlock(&mrt_lock); 429 read_unlock(&mrt_lock);
430 } 430 }
431 431
432 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 432 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
433 { 433 {
434 struct ipmr_vif_iter *iter = seq->private; 434 struct ipmr_vif_iter *iter = seq->private;
435 struct mr6_table *mrt = iter->mrt; 435 struct mr6_table *mrt = iter->mrt;
436 436
437 if (v == SEQ_START_TOKEN) { 437 if (v == SEQ_START_TOKEN) {
438 seq_puts(seq, 438 seq_puts(seq,
439 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 439 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
440 } else { 440 } else {
441 const struct mif_device *vif = v; 441 const struct mif_device *vif = v;
442 const char *name = vif->dev ? vif->dev->name : "none"; 442 const char *name = vif->dev ? vif->dev->name : "none";
443 443
444 seq_printf(seq, 444 seq_printf(seq,
445 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 445 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
446 vif - mrt->vif6_table, 446 vif - mrt->vif6_table,
447 name, vif->bytes_in, vif->pkt_in, 447 name, vif->bytes_in, vif->pkt_in,
448 vif->bytes_out, vif->pkt_out, 448 vif->bytes_out, vif->pkt_out,
449 vif->flags); 449 vif->flags);
450 } 450 }
451 return 0; 451 return 0;
452 } 452 }
453 453
454 static const struct seq_operations ip6mr_vif_seq_ops = { 454 static const struct seq_operations ip6mr_vif_seq_ops = {
455 .start = ip6mr_vif_seq_start, 455 .start = ip6mr_vif_seq_start,
456 .next = ip6mr_vif_seq_next, 456 .next = ip6mr_vif_seq_next,
457 .stop = ip6mr_vif_seq_stop, 457 .stop = ip6mr_vif_seq_stop,
458 .show = ip6mr_vif_seq_show, 458 .show = ip6mr_vif_seq_show,
459 }; 459 };
460 460
461 static int ip6mr_vif_open(struct inode *inode, struct file *file) 461 static int ip6mr_vif_open(struct inode *inode, struct file *file)
462 { 462 {
463 return seq_open_net(inode, file, &ip6mr_vif_seq_ops, 463 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
464 sizeof(struct ipmr_vif_iter)); 464 sizeof(struct ipmr_vif_iter));
465 } 465 }
466 466
467 static const struct file_operations ip6mr_vif_fops = { 467 static const struct file_operations ip6mr_vif_fops = {
468 .owner = THIS_MODULE, 468 .owner = THIS_MODULE,
469 .open = ip6mr_vif_open, 469 .open = ip6mr_vif_open,
470 .read = seq_read, 470 .read = seq_read,
471 .llseek = seq_lseek, 471 .llseek = seq_lseek,
472 .release = seq_release_net, 472 .release = seq_release_net,
473 }; 473 };
474 474
475 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 475 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
476 { 476 {
477 struct ipmr_mfc_iter *it = seq->private; 477 struct ipmr_mfc_iter *it = seq->private;
478 struct net *net = seq_file_net(seq); 478 struct net *net = seq_file_net(seq);
479 struct mr6_table *mrt; 479 struct mr6_table *mrt;
480 480
481 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 481 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
482 if (mrt == NULL) 482 if (mrt == NULL)
483 return ERR_PTR(-ENOENT); 483 return ERR_PTR(-ENOENT);
484 484
485 it->mrt = mrt; 485 it->mrt = mrt;
486 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 486 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
487 : SEQ_START_TOKEN; 487 : SEQ_START_TOKEN;
488 } 488 }
489 489
490 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 490 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
491 { 491 {
492 struct mfc6_cache *mfc = v; 492 struct mfc6_cache *mfc = v;
493 struct ipmr_mfc_iter *it = seq->private; 493 struct ipmr_mfc_iter *it = seq->private;
494 struct net *net = seq_file_net(seq); 494 struct net *net = seq_file_net(seq);
495 struct mr6_table *mrt = it->mrt; 495 struct mr6_table *mrt = it->mrt;
496 496
497 ++*pos; 497 ++*pos;
498 498
499 if (v == SEQ_START_TOKEN) 499 if (v == SEQ_START_TOKEN)
500 return ipmr_mfc_seq_idx(net, seq->private, 0); 500 return ipmr_mfc_seq_idx(net, seq->private, 0);
501 501
502 if (mfc->list.next != it->cache) 502 if (mfc->list.next != it->cache)
503 return list_entry(mfc->list.next, struct mfc6_cache, list); 503 return list_entry(mfc->list.next, struct mfc6_cache, list);
504 504
505 if (it->cache == &mrt->mfc6_unres_queue) 505 if (it->cache == &mrt->mfc6_unres_queue)
506 goto end_of_list; 506 goto end_of_list;
507 507
508 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); 508 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
509 509
510 while (++it->ct < MFC6_LINES) { 510 while (++it->ct < MFC6_LINES) {
511 it->cache = &mrt->mfc6_cache_array[it->ct]; 511 it->cache = &mrt->mfc6_cache_array[it->ct];
512 if (list_empty(it->cache)) 512 if (list_empty(it->cache))
513 continue; 513 continue;
514 return list_first_entry(it->cache, struct mfc6_cache, list); 514 return list_first_entry(it->cache, struct mfc6_cache, list);
515 } 515 }
516 516
517 /* exhausted cache_array, show unresolved */ 517 /* exhausted cache_array, show unresolved */
518 read_unlock(&mrt_lock); 518 read_unlock(&mrt_lock);
519 it->cache = &mrt->mfc6_unres_queue; 519 it->cache = &mrt->mfc6_unres_queue;
520 it->ct = 0; 520 it->ct = 0;
521 521
522 spin_lock_bh(&mfc_unres_lock); 522 spin_lock_bh(&mfc_unres_lock);
523 if (!list_empty(it->cache)) 523 if (!list_empty(it->cache))
524 return list_first_entry(it->cache, struct mfc6_cache, list); 524 return list_first_entry(it->cache, struct mfc6_cache, list);
525 525
526 end_of_list: 526 end_of_list:
527 spin_unlock_bh(&mfc_unres_lock); 527 spin_unlock_bh(&mfc_unres_lock);
528 it->cache = NULL; 528 it->cache = NULL;
529 529
530 return NULL; 530 return NULL;
531 } 531 }
532 532
533 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 533 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
534 { 534 {
535 struct ipmr_mfc_iter *it = seq->private; 535 struct ipmr_mfc_iter *it = seq->private;
536 struct mr6_table *mrt = it->mrt; 536 struct mr6_table *mrt = it->mrt;
537 537
538 if (it->cache == &mrt->mfc6_unres_queue) 538 if (it->cache == &mrt->mfc6_unres_queue)
539 spin_unlock_bh(&mfc_unres_lock); 539 spin_unlock_bh(&mfc_unres_lock);
540 else if (it->cache == mrt->mfc6_cache_array) 540 else if (it->cache == mrt->mfc6_cache_array)
541 read_unlock(&mrt_lock); 541 read_unlock(&mrt_lock);
542 } 542 }
543 543
544 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 544 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
545 { 545 {
546 int n; 546 int n;
547 547
548 if (v == SEQ_START_TOKEN) { 548 if (v == SEQ_START_TOKEN) {
549 seq_puts(seq, 549 seq_puts(seq,
550 "Group " 550 "Group "
551 "Origin " 551 "Origin "
552 "Iif Pkts Bytes Wrong Oifs\n"); 552 "Iif Pkts Bytes Wrong Oifs\n");
553 } else { 553 } else {
554 const struct mfc6_cache *mfc = v; 554 const struct mfc6_cache *mfc = v;
555 const struct ipmr_mfc_iter *it = seq->private; 555 const struct ipmr_mfc_iter *it = seq->private;
556 struct mr6_table *mrt = it->mrt; 556 struct mr6_table *mrt = it->mrt;
557 557
558 seq_printf(seq, "%pI6 %pI6 %-3hd", 558 seq_printf(seq, "%pI6 %pI6 %-3hd",
559 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 559 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
560 mfc->mf6c_parent); 560 mfc->mf6c_parent);
561 561
562 if (it->cache != &mrt->mfc6_unres_queue) { 562 if (it->cache != &mrt->mfc6_unres_queue) {
563 seq_printf(seq, " %8lu %8lu %8lu", 563 seq_printf(seq, " %8lu %8lu %8lu",
564 mfc->mfc_un.res.pkt, 564 mfc->mfc_un.res.pkt,
565 mfc->mfc_un.res.bytes, 565 mfc->mfc_un.res.bytes,
566 mfc->mfc_un.res.wrong_if); 566 mfc->mfc_un.res.wrong_if);
567 for (n = mfc->mfc_un.res.minvif; 567 for (n = mfc->mfc_un.res.minvif;
568 n < mfc->mfc_un.res.maxvif; n++) { 568 n < mfc->mfc_un.res.maxvif; n++) {
569 if (MIF_EXISTS(mrt, n) && 569 if (MIF_EXISTS(mrt, n) &&
570 mfc->mfc_un.res.ttls[n] < 255) 570 mfc->mfc_un.res.ttls[n] < 255)
571 seq_printf(seq, 571 seq_printf(seq,
572 " %2d:%-3d", 572 " %2d:%-3d",
573 n, mfc->mfc_un.res.ttls[n]); 573 n, mfc->mfc_un.res.ttls[n]);
574 } 574 }
575 } else { 575 } else {
576 /* unresolved mfc_caches don't contain 576 /* unresolved mfc_caches don't contain
577 * pkt, bytes and wrong_if values 577 * pkt, bytes and wrong_if values
578 */ 578 */
579 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 579 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
580 } 580 }
581 seq_putc(seq, '\n'); 581 seq_putc(seq, '\n');
582 } 582 }
583 return 0; 583 return 0;
584 } 584 }
585 585
586 static const struct seq_operations ipmr_mfc_seq_ops = { 586 static const struct seq_operations ipmr_mfc_seq_ops = {
587 .start = ipmr_mfc_seq_start, 587 .start = ipmr_mfc_seq_start,
588 .next = ipmr_mfc_seq_next, 588 .next = ipmr_mfc_seq_next,
589 .stop = ipmr_mfc_seq_stop, 589 .stop = ipmr_mfc_seq_stop,
590 .show = ipmr_mfc_seq_show, 590 .show = ipmr_mfc_seq_show,
591 }; 591 };
592 592
593 static int ipmr_mfc_open(struct inode *inode, struct file *file) 593 static int ipmr_mfc_open(struct inode *inode, struct file *file)
594 { 594 {
595 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 595 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
596 sizeof(struct ipmr_mfc_iter)); 596 sizeof(struct ipmr_mfc_iter));
597 } 597 }
598 598
599 static const struct file_operations ip6mr_mfc_fops = { 599 static const struct file_operations ip6mr_mfc_fops = {
600 .owner = THIS_MODULE, 600 .owner = THIS_MODULE,
601 .open = ipmr_mfc_open, 601 .open = ipmr_mfc_open,
602 .read = seq_read, 602 .read = seq_read,
603 .llseek = seq_lseek, 603 .llseek = seq_lseek,
604 .release = seq_release_net, 604 .release = seq_release_net,
605 }; 605 };
606 #endif 606 #endif
607 607
608 #ifdef CONFIG_IPV6_PIMSM_V2 608 #ifdef CONFIG_IPV6_PIMSM_V2
609 609
610 static int pim6_rcv(struct sk_buff *skb) 610 static int pim6_rcv(struct sk_buff *skb)
611 { 611 {
612 struct pimreghdr *pim; 612 struct pimreghdr *pim;
613 struct ipv6hdr *encap; 613 struct ipv6hdr *encap;
614 struct net_device *reg_dev = NULL; 614 struct net_device *reg_dev = NULL;
615 struct net *net = dev_net(skb->dev); 615 struct net *net = dev_net(skb->dev);
616 struct mr6_table *mrt; 616 struct mr6_table *mrt;
617 struct flowi fl = { 617 struct flowi fl = {
618 .iif = skb->dev->ifindex, 618 .iif = skb->dev->ifindex,
619 .mark = skb->mark, 619 .mark = skb->mark,
620 }; 620 };
621 int reg_vif_num; 621 int reg_vif_num;
622 622
623 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 623 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
624 goto drop; 624 goto drop;
625 625
626 pim = (struct pimreghdr *)skb_transport_header(skb); 626 pim = (struct pimreghdr *)skb_transport_header(skb);
627 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || 627 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
628 (pim->flags & PIM_NULL_REGISTER) || 628 (pim->flags & PIM_NULL_REGISTER) ||
629 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 629 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
630 sizeof(*pim), IPPROTO_PIM, 630 sizeof(*pim), IPPROTO_PIM,
631 csum_partial((void *)pim, sizeof(*pim), 0)) && 631 csum_partial((void *)pim, sizeof(*pim), 0)) &&
632 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 632 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
633 goto drop; 633 goto drop;
634 634
635 /* check if the inner packet is destined to mcast group */ 635 /* check if the inner packet is destined to mcast group */
636 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 636 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
637 sizeof(*pim)); 637 sizeof(*pim));
638 638
639 if (!ipv6_addr_is_multicast(&encap->daddr) || 639 if (!ipv6_addr_is_multicast(&encap->daddr) ||
640 encap->payload_len == 0 || 640 encap->payload_len == 0 ||
641 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 641 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
642 goto drop; 642 goto drop;
643 643
644 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 644 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
645 goto drop; 645 goto drop;
646 reg_vif_num = mrt->mroute_reg_vif_num; 646 reg_vif_num = mrt->mroute_reg_vif_num;
647 647
648 read_lock(&mrt_lock); 648 read_lock(&mrt_lock);
649 if (reg_vif_num >= 0) 649 if (reg_vif_num >= 0)
650 reg_dev = mrt->vif6_table[reg_vif_num].dev; 650 reg_dev = mrt->vif6_table[reg_vif_num].dev;
651 if (reg_dev) 651 if (reg_dev)
652 dev_hold(reg_dev); 652 dev_hold(reg_dev);
653 read_unlock(&mrt_lock); 653 read_unlock(&mrt_lock);
654 654
655 if (reg_dev == NULL) 655 if (reg_dev == NULL)
656 goto drop; 656 goto drop;
657 657
658 skb->mac_header = skb->network_header; 658 skb->mac_header = skb->network_header;
659 skb_pull(skb, (u8 *)encap - skb->data); 659 skb_pull(skb, (u8 *)encap - skb->data);
660 skb_reset_network_header(skb); 660 skb_reset_network_header(skb);
661 skb->dev = reg_dev;
662 skb->protocol = htons(ETH_P_IPV6); 661 skb->protocol = htons(ETH_P_IPV6);
663 skb->ip_summed = 0; 662 skb->ip_summed = 0;
664 skb->pkt_type = PACKET_HOST; 663 skb->pkt_type = PACKET_HOST;
665 skb_dst_drop(skb); 664
666 reg_dev->stats.rx_bytes += skb->len; 665 skb_tunnel_rx(skb, reg_dev);
667 reg_dev->stats.rx_packets++; 666
668 nf_reset(skb);
669 netif_rx(skb); 667 netif_rx(skb);
670 dev_put(reg_dev); 668 dev_put(reg_dev);
671 return 0; 669 return 0;
672 drop: 670 drop:
673 kfree_skb(skb); 671 kfree_skb(skb);
674 return 0; 672 return 0;
675 } 673 }
676 674
677 static const struct inet6_protocol pim6_protocol = { 675 static const struct inet6_protocol pim6_protocol = {
678 .handler = pim6_rcv, 676 .handler = pim6_rcv,
679 }; 677 };
680 678
681 /* Service routines creating virtual interfaces: PIMREG */ 679 /* Service routines creating virtual interfaces: PIMREG */
682 680
683 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 681 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
684 struct net_device *dev) 682 struct net_device *dev)
685 { 683 {
686 struct net *net = dev_net(dev); 684 struct net *net = dev_net(dev);
687 struct mr6_table *mrt; 685 struct mr6_table *mrt;
688 struct flowi fl = { 686 struct flowi fl = {
689 .oif = dev->ifindex, 687 .oif = dev->ifindex,
690 .iif = skb->skb_iif, 688 .iif = skb->skb_iif,
691 .mark = skb->mark, 689 .mark = skb->mark,
692 }; 690 };
693 int err; 691 int err;
694 692
695 err = ip6mr_fib_lookup(net, &fl, &mrt); 693 err = ip6mr_fib_lookup(net, &fl, &mrt);
696 if (err < 0) 694 if (err < 0)
697 return err; 695 return err;
698 696
699 read_lock(&mrt_lock); 697 read_lock(&mrt_lock);
700 dev->stats.tx_bytes += skb->len; 698 dev->stats.tx_bytes += skb->len;
701 dev->stats.tx_packets++; 699 dev->stats.tx_packets++;
702 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 700 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
703 read_unlock(&mrt_lock); 701 read_unlock(&mrt_lock);
704 kfree_skb(skb); 702 kfree_skb(skb);
705 return NETDEV_TX_OK; 703 return NETDEV_TX_OK;
706 } 704 }
707 705
708 static const struct net_device_ops reg_vif_netdev_ops = { 706 static const struct net_device_ops reg_vif_netdev_ops = {
709 .ndo_start_xmit = reg_vif_xmit, 707 .ndo_start_xmit = reg_vif_xmit,
710 }; 708 };
711 709
712 static void reg_vif_setup(struct net_device *dev) 710 static void reg_vif_setup(struct net_device *dev)
713 { 711 {
714 dev->type = ARPHRD_PIMREG; 712 dev->type = ARPHRD_PIMREG;
715 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 713 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
716 dev->flags = IFF_NOARP; 714 dev->flags = IFF_NOARP;
717 dev->netdev_ops = &reg_vif_netdev_ops; 715 dev->netdev_ops = &reg_vif_netdev_ops;
718 dev->destructor = free_netdev; 716 dev->destructor = free_netdev;
719 dev->features |= NETIF_F_NETNS_LOCAL; 717 dev->features |= NETIF_F_NETNS_LOCAL;
720 } 718 }
721 719
722 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) 720 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
723 { 721 {
724 struct net_device *dev; 722 struct net_device *dev;
725 char name[IFNAMSIZ]; 723 char name[IFNAMSIZ];
726 724
727 if (mrt->id == RT6_TABLE_DFLT) 725 if (mrt->id == RT6_TABLE_DFLT)
728 sprintf(name, "pim6reg"); 726 sprintf(name, "pim6reg");
729 else 727 else
730 sprintf(name, "pim6reg%u", mrt->id); 728 sprintf(name, "pim6reg%u", mrt->id);
731 729
732 dev = alloc_netdev(0, name, reg_vif_setup); 730 dev = alloc_netdev(0, name, reg_vif_setup);
733 if (dev == NULL) 731 if (dev == NULL)
734 return NULL; 732 return NULL;
735 733
736 dev_net_set(dev, net); 734 dev_net_set(dev, net);
737 735
738 if (register_netdevice(dev)) { 736 if (register_netdevice(dev)) {
739 free_netdev(dev); 737 free_netdev(dev);
740 return NULL; 738 return NULL;
741 } 739 }
742 dev->iflink = 0; 740 dev->iflink = 0;
743 741
744 if (dev_open(dev)) 742 if (dev_open(dev))
745 goto failure; 743 goto failure;
746 744
747 dev_hold(dev); 745 dev_hold(dev);
748 return dev; 746 return dev;
749 747
750 failure: 748 failure:
751 /* allow the register to be completed before unregistering. */ 749 /* allow the register to be completed before unregistering. */
752 rtnl_unlock(); 750 rtnl_unlock();
753 rtnl_lock(); 751 rtnl_lock();
754 752
755 unregister_netdevice(dev); 753 unregister_netdevice(dev);
756 return NULL; 754 return NULL;
757 } 755 }
758 #endif 756 #endif
759 757
760 /* 758 /*
761 * Delete a VIF entry 759 * Delete a VIF entry
762 */ 760 */
763 761
764 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) 762 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
765 { 763 {
766 struct mif_device *v; 764 struct mif_device *v;
767 struct net_device *dev; 765 struct net_device *dev;
768 struct inet6_dev *in6_dev; 766 struct inet6_dev *in6_dev;
769 767
770 if (vifi < 0 || vifi >= mrt->maxvif) 768 if (vifi < 0 || vifi >= mrt->maxvif)
771 return -EADDRNOTAVAIL; 769 return -EADDRNOTAVAIL;
772 770
773 v = &mrt->vif6_table[vifi]; 771 v = &mrt->vif6_table[vifi];
774 772
775 write_lock_bh(&mrt_lock); 773 write_lock_bh(&mrt_lock);
776 dev = v->dev; 774 dev = v->dev;
777 v->dev = NULL; 775 v->dev = NULL;
778 776
779 if (!dev) { 777 if (!dev) {
780 write_unlock_bh(&mrt_lock); 778 write_unlock_bh(&mrt_lock);
781 return -EADDRNOTAVAIL; 779 return -EADDRNOTAVAIL;
782 } 780 }
783 781
784 #ifdef CONFIG_IPV6_PIMSM_V2 782 #ifdef CONFIG_IPV6_PIMSM_V2
785 if (vifi == mrt->mroute_reg_vif_num) 783 if (vifi == mrt->mroute_reg_vif_num)
786 mrt->mroute_reg_vif_num = -1; 784 mrt->mroute_reg_vif_num = -1;
787 #endif 785 #endif
788 786
789 if (vifi + 1 == mrt->maxvif) { 787 if (vifi + 1 == mrt->maxvif) {
790 int tmp; 788 int tmp;
791 for (tmp = vifi - 1; tmp >= 0; tmp--) { 789 for (tmp = vifi - 1; tmp >= 0; tmp--) {
792 if (MIF_EXISTS(mrt, tmp)) 790 if (MIF_EXISTS(mrt, tmp))
793 break; 791 break;
794 } 792 }
795 mrt->maxvif = tmp + 1; 793 mrt->maxvif = tmp + 1;
796 } 794 }
797 795
798 write_unlock_bh(&mrt_lock); 796 write_unlock_bh(&mrt_lock);
799 797
800 dev_set_allmulti(dev, -1); 798 dev_set_allmulti(dev, -1);
801 799
802 in6_dev = __in6_dev_get(dev); 800 in6_dev = __in6_dev_get(dev);
803 if (in6_dev) 801 if (in6_dev)
804 in6_dev->cnf.mc_forwarding--; 802 in6_dev->cnf.mc_forwarding--;
805 803
806 if (v->flags & MIFF_REGISTER) 804 if (v->flags & MIFF_REGISTER)
807 unregister_netdevice_queue(dev, head); 805 unregister_netdevice_queue(dev, head);
808 806
809 dev_put(dev); 807 dev_put(dev);
810 return 0; 808 return 0;
811 } 809 }
812 810
813 static inline void ip6mr_cache_free(struct mfc6_cache *c) 811 static inline void ip6mr_cache_free(struct mfc6_cache *c)
814 { 812 {
815 kmem_cache_free(mrt_cachep, c); 813 kmem_cache_free(mrt_cachep, c);
816 } 814 }
817 815
818 /* Destroy an unresolved cache entry, killing queued skbs 816 /* Destroy an unresolved cache entry, killing queued skbs
819 and reporting error to netlink readers. 817 and reporting error to netlink readers.
820 */ 818 */
821 819
822 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) 820 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
823 { 821 {
824 struct net *net = read_pnet(&mrt->net); 822 struct net *net = read_pnet(&mrt->net);
825 struct sk_buff *skb; 823 struct sk_buff *skb;
826 824
827 atomic_dec(&mrt->cache_resolve_queue_len); 825 atomic_dec(&mrt->cache_resolve_queue_len);
828 826
829 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { 827 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
830 if (ipv6_hdr(skb)->version == 0) { 828 if (ipv6_hdr(skb)->version == 0) {
831 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 829 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
832 nlh->nlmsg_type = NLMSG_ERROR; 830 nlh->nlmsg_type = NLMSG_ERROR;
833 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 831 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
834 skb_trim(skb, nlh->nlmsg_len); 832 skb_trim(skb, nlh->nlmsg_len);
835 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 833 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
836 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 834 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
837 } else 835 } else
838 kfree_skb(skb); 836 kfree_skb(skb);
839 } 837 }
840 838
841 ip6mr_cache_free(c); 839 ip6mr_cache_free(c);
842 } 840 }
843 841
844 842
845 /* Timer process for all the unresolved queue. */ 843 /* Timer process for all the unresolved queue. */
846 844
847 static void ipmr_do_expire_process(struct mr6_table *mrt) 845 static void ipmr_do_expire_process(struct mr6_table *mrt)
848 { 846 {
849 unsigned long now = jiffies; 847 unsigned long now = jiffies;
850 unsigned long expires = 10 * HZ; 848 unsigned long expires = 10 * HZ;
851 struct mfc6_cache *c, *next; 849 struct mfc6_cache *c, *next;
852 850
853 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 851 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
854 if (time_after(c->mfc_un.unres.expires, now)) { 852 if (time_after(c->mfc_un.unres.expires, now)) {
855 /* not yet... */ 853 /* not yet... */
856 unsigned long interval = c->mfc_un.unres.expires - now; 854 unsigned long interval = c->mfc_un.unres.expires - now;
857 if (interval < expires) 855 if (interval < expires)
858 expires = interval; 856 expires = interval;
859 continue; 857 continue;
860 } 858 }
861 859
862 list_del(&c->list); 860 list_del(&c->list);
863 ip6mr_destroy_unres(mrt, c); 861 ip6mr_destroy_unres(mrt, c);
864 } 862 }
865 863
866 if (!list_empty(&mrt->mfc6_unres_queue)) 864 if (!list_empty(&mrt->mfc6_unres_queue))
867 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 865 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
868 } 866 }
869 867
870 static void ipmr_expire_process(unsigned long arg) 868 static void ipmr_expire_process(unsigned long arg)
871 { 869 {
872 struct mr6_table *mrt = (struct mr6_table *)arg; 870 struct mr6_table *mrt = (struct mr6_table *)arg;
873 871
874 if (!spin_trylock(&mfc_unres_lock)) { 872 if (!spin_trylock(&mfc_unres_lock)) {
875 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 873 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
876 return; 874 return;
877 } 875 }
878 876
879 if (!list_empty(&mrt->mfc6_unres_queue)) 877 if (!list_empty(&mrt->mfc6_unres_queue))
880 ipmr_do_expire_process(mrt); 878 ipmr_do_expire_process(mrt);
881 879
882 spin_unlock(&mfc_unres_lock); 880 spin_unlock(&mfc_unres_lock);
883 } 881 }
884 882
885 /* Fill oifs list. It is called under write locked mrt_lock. */ 883 /* Fill oifs list. It is called under write locked mrt_lock. */
886 884
887 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, 885 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
888 unsigned char *ttls) 886 unsigned char *ttls)
889 { 887 {
890 int vifi; 888 int vifi;
891 889
892 cache->mfc_un.res.minvif = MAXMIFS; 890 cache->mfc_un.res.minvif = MAXMIFS;
893 cache->mfc_un.res.maxvif = 0; 891 cache->mfc_un.res.maxvif = 0;
894 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 892 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
895 893
896 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 894 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
897 if (MIF_EXISTS(mrt, vifi) && 895 if (MIF_EXISTS(mrt, vifi) &&
898 ttls[vifi] && ttls[vifi] < 255) { 896 ttls[vifi] && ttls[vifi] < 255) {
899 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 897 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
900 if (cache->mfc_un.res.minvif > vifi) 898 if (cache->mfc_un.res.minvif > vifi)
901 cache->mfc_un.res.minvif = vifi; 899 cache->mfc_un.res.minvif = vifi;
902 if (cache->mfc_un.res.maxvif <= vifi) 900 if (cache->mfc_un.res.maxvif <= vifi)
903 cache->mfc_un.res.maxvif = vifi + 1; 901 cache->mfc_un.res.maxvif = vifi + 1;
904 } 902 }
905 } 903 }
906 } 904 }
907 905
908 static int mif6_add(struct net *net, struct mr6_table *mrt, 906 static int mif6_add(struct net *net, struct mr6_table *mrt,
909 struct mif6ctl *vifc, int mrtsock) 907 struct mif6ctl *vifc, int mrtsock)
910 { 908 {
911 int vifi = vifc->mif6c_mifi; 909 int vifi = vifc->mif6c_mifi;
912 struct mif_device *v = &mrt->vif6_table[vifi]; 910 struct mif_device *v = &mrt->vif6_table[vifi];
913 struct net_device *dev; 911 struct net_device *dev;
914 struct inet6_dev *in6_dev; 912 struct inet6_dev *in6_dev;
915 int err; 913 int err;
916 914
917 /* Is vif busy ? */ 915 /* Is vif busy ? */
918 if (MIF_EXISTS(mrt, vifi)) 916 if (MIF_EXISTS(mrt, vifi))
919 return -EADDRINUSE; 917 return -EADDRINUSE;
920 918
921 switch (vifc->mif6c_flags) { 919 switch (vifc->mif6c_flags) {
922 #ifdef CONFIG_IPV6_PIMSM_V2 920 #ifdef CONFIG_IPV6_PIMSM_V2
923 case MIFF_REGISTER: 921 case MIFF_REGISTER:
924 /* 922 /*
925 * Special Purpose VIF in PIM 923 * Special Purpose VIF in PIM
926 * All the packets will be sent to the daemon 924 * All the packets will be sent to the daemon
927 */ 925 */
928 if (mrt->mroute_reg_vif_num >= 0) 926 if (mrt->mroute_reg_vif_num >= 0)
929 return -EADDRINUSE; 927 return -EADDRINUSE;
930 dev = ip6mr_reg_vif(net, mrt); 928 dev = ip6mr_reg_vif(net, mrt);
931 if (!dev) 929 if (!dev)
932 return -ENOBUFS; 930 return -ENOBUFS;
933 err = dev_set_allmulti(dev, 1); 931 err = dev_set_allmulti(dev, 1);
934 if (err) { 932 if (err) {
935 unregister_netdevice(dev); 933 unregister_netdevice(dev);
936 dev_put(dev); 934 dev_put(dev);
937 return err; 935 return err;
938 } 936 }
939 break; 937 break;
940 #endif 938 #endif
941 case 0: 939 case 0:
942 dev = dev_get_by_index(net, vifc->mif6c_pifi); 940 dev = dev_get_by_index(net, vifc->mif6c_pifi);
943 if (!dev) 941 if (!dev)
944 return -EADDRNOTAVAIL; 942 return -EADDRNOTAVAIL;
945 err = dev_set_allmulti(dev, 1); 943 err = dev_set_allmulti(dev, 1);
946 if (err) { 944 if (err) {
947 dev_put(dev); 945 dev_put(dev);
948 return err; 946 return err;
949 } 947 }
950 break; 948 break;
951 default: 949 default:
952 return -EINVAL; 950 return -EINVAL;
953 } 951 }
954 952
955 in6_dev = __in6_dev_get(dev); 953 in6_dev = __in6_dev_get(dev);
956 if (in6_dev) 954 if (in6_dev)
957 in6_dev->cnf.mc_forwarding++; 955 in6_dev->cnf.mc_forwarding++;
958 956
959 /* 957 /*
960 * Fill in the VIF structures 958 * Fill in the VIF structures
961 */ 959 */
962 v->rate_limit = vifc->vifc_rate_limit; 960 v->rate_limit = vifc->vifc_rate_limit;
963 v->flags = vifc->mif6c_flags; 961 v->flags = vifc->mif6c_flags;
964 if (!mrtsock) 962 if (!mrtsock)
965 v->flags |= VIFF_STATIC; 963 v->flags |= VIFF_STATIC;
966 v->threshold = vifc->vifc_threshold; 964 v->threshold = vifc->vifc_threshold;
967 v->bytes_in = 0; 965 v->bytes_in = 0;
968 v->bytes_out = 0; 966 v->bytes_out = 0;
969 v->pkt_in = 0; 967 v->pkt_in = 0;
970 v->pkt_out = 0; 968 v->pkt_out = 0;
971 v->link = dev->ifindex; 969 v->link = dev->ifindex;
972 if (v->flags & MIFF_REGISTER) 970 if (v->flags & MIFF_REGISTER)
973 v->link = dev->iflink; 971 v->link = dev->iflink;
974 972
975 /* And finish update writing critical data */ 973 /* And finish update writing critical data */
976 write_lock_bh(&mrt_lock); 974 write_lock_bh(&mrt_lock);
977 v->dev = dev; 975 v->dev = dev;
978 #ifdef CONFIG_IPV6_PIMSM_V2 976 #ifdef CONFIG_IPV6_PIMSM_V2
979 if (v->flags & MIFF_REGISTER) 977 if (v->flags & MIFF_REGISTER)
980 mrt->mroute_reg_vif_num = vifi; 978 mrt->mroute_reg_vif_num = vifi;
981 #endif 979 #endif
982 if (vifi + 1 > mrt->maxvif) 980 if (vifi + 1 > mrt->maxvif)
983 mrt->maxvif = vifi + 1; 981 mrt->maxvif = vifi + 1;
984 write_unlock_bh(&mrt_lock); 982 write_unlock_bh(&mrt_lock);
985 return 0; 983 return 0;
986 } 984 }
987 985
988 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, 986 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
989 struct in6_addr *origin, 987 struct in6_addr *origin,
990 struct in6_addr *mcastgrp) 988 struct in6_addr *mcastgrp)
991 { 989 {
992 int line = MFC6_HASH(mcastgrp, origin); 990 int line = MFC6_HASH(mcastgrp, origin);
993 struct mfc6_cache *c; 991 struct mfc6_cache *c;
994 992
995 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 993 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
996 if (ipv6_addr_equal(&c->mf6c_origin, origin) && 994 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
997 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) 995 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
998 return c; 996 return c;
999 } 997 }
1000 return NULL; 998 return NULL;
1001 } 999 }
1002 1000
1003 /* 1001 /*
1004 * Allocate a multicast cache entry 1002 * Allocate a multicast cache entry
1005 */ 1003 */
1006 static struct mfc6_cache *ip6mr_cache_alloc(void) 1004 static struct mfc6_cache *ip6mr_cache_alloc(void)
1007 { 1005 {
1008 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1006 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1009 if (c == NULL) 1007 if (c == NULL)
1010 return NULL; 1008 return NULL;
1011 c->mfc_un.res.minvif = MAXMIFS; 1009 c->mfc_un.res.minvif = MAXMIFS;
1012 return c; 1010 return c;
1013 } 1011 }
1014 1012
1015 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1013 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1016 { 1014 {
1017 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1015 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1018 if (c == NULL) 1016 if (c == NULL)
1019 return NULL; 1017 return NULL;
1020 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1018 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1021 c->mfc_un.unres.expires = jiffies + 10 * HZ; 1019 c->mfc_un.unres.expires = jiffies + 10 * HZ;
1022 return c; 1020 return c;
1023 } 1021 }
1024 1022
1025 /* 1023 /*
1026 * A cache entry has gone into a resolved state from queued 1024 * A cache entry has gone into a resolved state from queued
1027 */ 1025 */
1028 1026
1029 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, 1027 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1030 struct mfc6_cache *uc, struct mfc6_cache *c) 1028 struct mfc6_cache *uc, struct mfc6_cache *c)
1031 { 1029 {
1032 struct sk_buff *skb; 1030 struct sk_buff *skb;
1033 1031
1034 /* 1032 /*
1035 * Play the pending entries through our router 1033 * Play the pending entries through our router
1036 */ 1034 */
1037 1035
1038 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1036 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1039 if (ipv6_hdr(skb)->version == 0) { 1037 if (ipv6_hdr(skb)->version == 0) {
1040 int err; 1038 int err;
1041 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1039 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1042 1040
1043 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1041 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1044 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1042 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1045 } else { 1043 } else {
1046 nlh->nlmsg_type = NLMSG_ERROR; 1044 nlh->nlmsg_type = NLMSG_ERROR;
1047 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 1045 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1048 skb_trim(skb, nlh->nlmsg_len); 1046 skb_trim(skb, nlh->nlmsg_len);
1049 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1047 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1050 } 1048 }
1051 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1049 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1052 } else 1050 } else
1053 ip6_mr_forward(net, mrt, skb, c); 1051 ip6_mr_forward(net, mrt, skb, c);
1054 } 1052 }
1055 } 1053 }
1056 1054
1057 /* 1055 /*
1058 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd 1056 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1059 * expects the following bizarre scheme. 1057 * expects the following bizarre scheme.
1060 * 1058 *
1061 * Called under mrt_lock. 1059 * Called under mrt_lock.
1062 */ 1060 */
1063 1061
1064 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 1062 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1065 mifi_t mifi, int assert) 1063 mifi_t mifi, int assert)
1066 { 1064 {
1067 struct sk_buff *skb; 1065 struct sk_buff *skb;
1068 struct mrt6msg *msg; 1066 struct mrt6msg *msg;
1069 int ret; 1067 int ret;
1070 1068
1071 #ifdef CONFIG_IPV6_PIMSM_V2 1069 #ifdef CONFIG_IPV6_PIMSM_V2
1072 if (assert == MRT6MSG_WHOLEPKT) 1070 if (assert == MRT6MSG_WHOLEPKT)
1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1071 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 +sizeof(*msg)); 1072 +sizeof(*msg));
1075 else 1073 else
1076 #endif 1074 #endif
1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1075 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078 1076
1079 if (!skb) 1077 if (!skb)
1080 return -ENOBUFS; 1078 return -ENOBUFS;
1081 1079
1082 /* I suppose that internal messages 1080 /* I suppose that internal messages
1083 * do not require checksums */ 1081 * do not require checksums */
1084 1082
1085 skb->ip_summed = CHECKSUM_UNNECESSARY; 1083 skb->ip_summed = CHECKSUM_UNNECESSARY;
1086 1084
1087 #ifdef CONFIG_IPV6_PIMSM_V2 1085 #ifdef CONFIG_IPV6_PIMSM_V2
1088 if (assert == MRT6MSG_WHOLEPKT) { 1086 if (assert == MRT6MSG_WHOLEPKT) {
1089 /* Ugly, but we have no choice with this interface. 1087 /* Ugly, but we have no choice with this interface.
1090 Duplicate old header, fix length etc. 1088 Duplicate old header, fix length etc.
1091 And all this only to mangle msg->im6_msgtype and 1089 And all this only to mangle msg->im6_msgtype and
1092 to set msg->im6_mbz to "mbz" :-) 1090 to set msg->im6_mbz to "mbz" :-)
1093 */ 1091 */
1094 skb_push(skb, -skb_network_offset(pkt)); 1092 skb_push(skb, -skb_network_offset(pkt));
1095 1093
1096 skb_push(skb, sizeof(*msg)); 1094 skb_push(skb, sizeof(*msg));
1097 skb_reset_transport_header(skb); 1095 skb_reset_transport_header(skb);
1098 msg = (struct mrt6msg *)skb_transport_header(skb); 1096 msg = (struct mrt6msg *)skb_transport_header(skb);
1099 msg->im6_mbz = 0; 1097 msg->im6_mbz = 0;
1100 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1098 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1101 msg->im6_mif = mrt->mroute_reg_vif_num; 1099 msg->im6_mif = mrt->mroute_reg_vif_num;
1102 msg->im6_pad = 0; 1100 msg->im6_pad = 0;
1103 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); 1101 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1104 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); 1102 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1105 1103
1106 skb->ip_summed = CHECKSUM_UNNECESSARY; 1104 skb->ip_summed = CHECKSUM_UNNECESSARY;
1107 } else 1105 } else
1108 #endif 1106 #endif
1109 { 1107 {
1110 /* 1108 /*
1111 * Copy the IP header 1109 * Copy the IP header
1112 */ 1110 */
1113 1111
1114 skb_put(skb, sizeof(struct ipv6hdr)); 1112 skb_put(skb, sizeof(struct ipv6hdr));
1115 skb_reset_network_header(skb); 1113 skb_reset_network_header(skb);
1116 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1114 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1117 1115
1118 /* 1116 /*
1119 * Add our header 1117 * Add our header
1120 */ 1118 */
1121 skb_put(skb, sizeof(*msg)); 1119 skb_put(skb, sizeof(*msg));
1122 skb_reset_transport_header(skb); 1120 skb_reset_transport_header(skb);
1123 msg = (struct mrt6msg *)skb_transport_header(skb); 1121 msg = (struct mrt6msg *)skb_transport_header(skb);
1124 1122
1125 msg->im6_mbz = 0; 1123 msg->im6_mbz = 0;
1126 msg->im6_msgtype = assert; 1124 msg->im6_msgtype = assert;
1127 msg->im6_mif = mifi; 1125 msg->im6_mif = mifi;
1128 msg->im6_pad = 0; 1126 msg->im6_pad = 0;
1129 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); 1127 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1130 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); 1128 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1131 1129
1132 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1130 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1133 skb->ip_summed = CHECKSUM_UNNECESSARY; 1131 skb->ip_summed = CHECKSUM_UNNECESSARY;
1134 } 1132 }
1135 1133
1136 if (mrt->mroute6_sk == NULL) { 1134 if (mrt->mroute6_sk == NULL) {
1137 kfree_skb(skb); 1135 kfree_skb(skb);
1138 return -EINVAL; 1136 return -EINVAL;
1139 } 1137 }
1140 1138
1141 /* 1139 /*
1142 * Deliver to user space multicast routing algorithms 1140 * Deliver to user space multicast routing algorithms
1143 */ 1141 */
1144 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); 1142 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1145 if (ret < 0) { 1143 if (ret < 0) {
1146 if (net_ratelimit()) 1144 if (net_ratelimit())
1147 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); 1145 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1148 kfree_skb(skb); 1146 kfree_skb(skb);
1149 } 1147 }
1150 1148
1151 return ret; 1149 return ret;
1152 } 1150 }
1153 1151
1154 /* 1152 /*
1155 * Queue a packet for resolution. It gets locked cache entry! 1153 * Queue a packet for resolution. It gets locked cache entry!
1156 */ 1154 */
1157 1155
1158 static int 1156 static int
1159 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) 1157 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1160 { 1158 {
1161 bool found = false; 1159 bool found = false;
1162 int err; 1160 int err;
1163 struct mfc6_cache *c; 1161 struct mfc6_cache *c;
1164 1162
1165 spin_lock_bh(&mfc_unres_lock); 1163 spin_lock_bh(&mfc_unres_lock);
1166 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { 1164 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1167 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1165 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1168 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1166 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1169 found = true; 1167 found = true;
1170 break; 1168 break;
1171 } 1169 }
1172 } 1170 }
1173 1171
1174 if (!found) { 1172 if (!found) {
1175 /* 1173 /*
1176 * Create a new entry if allowable 1174 * Create a new entry if allowable
1177 */ 1175 */
1178 1176
1179 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1177 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1180 (c = ip6mr_cache_alloc_unres()) == NULL) { 1178 (c = ip6mr_cache_alloc_unres()) == NULL) {
1181 spin_unlock_bh(&mfc_unres_lock); 1179 spin_unlock_bh(&mfc_unres_lock);
1182 1180
1183 kfree_skb(skb); 1181 kfree_skb(skb);
1184 return -ENOBUFS; 1182 return -ENOBUFS;
1185 } 1183 }
1186 1184
1187 /* 1185 /*
1188 * Fill in the new cache entry 1186 * Fill in the new cache entry
1189 */ 1187 */
1190 c->mf6c_parent = -1; 1188 c->mf6c_parent = -1;
1191 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1189 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1192 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1190 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1193 1191
1194 /* 1192 /*
1195 * Reflect first query at pim6sd 1193 * Reflect first query at pim6sd
1196 */ 1194 */
1197 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1195 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1198 if (err < 0) { 1196 if (err < 0) {
1199 /* If the report failed throw the cache entry 1197 /* If the report failed throw the cache entry
1200 out - Brad Parker 1198 out - Brad Parker
1201 */ 1199 */
1202 spin_unlock_bh(&mfc_unres_lock); 1200 spin_unlock_bh(&mfc_unres_lock);
1203 1201
1204 ip6mr_cache_free(c); 1202 ip6mr_cache_free(c);
1205 kfree_skb(skb); 1203 kfree_skb(skb);
1206 return err; 1204 return err;
1207 } 1205 }
1208 1206
1209 atomic_inc(&mrt->cache_resolve_queue_len); 1207 atomic_inc(&mrt->cache_resolve_queue_len);
1210 list_add(&c->list, &mrt->mfc6_unres_queue); 1208 list_add(&c->list, &mrt->mfc6_unres_queue);
1211 1209
1212 ipmr_do_expire_process(mrt); 1210 ipmr_do_expire_process(mrt);
1213 } 1211 }
1214 1212
1215 /* 1213 /*
1216 * See if we can append the packet 1214 * See if we can append the packet
1217 */ 1215 */
1218 if (c->mfc_un.unres.unresolved.qlen > 3) { 1216 if (c->mfc_un.unres.unresolved.qlen > 3) {
1219 kfree_skb(skb); 1217 kfree_skb(skb);
1220 err = -ENOBUFS; 1218 err = -ENOBUFS;
1221 } else { 1219 } else {
1222 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1220 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1223 err = 0; 1221 err = 0;
1224 } 1222 }
1225 1223
1226 spin_unlock_bh(&mfc_unres_lock); 1224 spin_unlock_bh(&mfc_unres_lock);
1227 return err; 1225 return err;
1228 } 1226 }
1229 1227
1230 /* 1228 /*
1231 * MFC6 cache manipulation by user space 1229 * MFC6 cache manipulation by user space
1232 */ 1230 */
1233 1231
1234 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) 1232 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1235 { 1233 {
1236 int line; 1234 int line;
1237 struct mfc6_cache *c, *next; 1235 struct mfc6_cache *c, *next;
1238 1236
1239 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1237 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1240 1238
1241 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { 1239 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1242 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1240 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1243 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 1241 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1244 write_lock_bh(&mrt_lock); 1242 write_lock_bh(&mrt_lock);
1245 list_del(&c->list); 1243 list_del(&c->list);
1246 write_unlock_bh(&mrt_lock); 1244 write_unlock_bh(&mrt_lock);
1247 1245
1248 ip6mr_cache_free(c); 1246 ip6mr_cache_free(c);
1249 return 0; 1247 return 0;
1250 } 1248 }
1251 } 1249 }
1252 return -ENOENT; 1250 return -ENOENT;
1253 } 1251 }
1254 1252
1255 static int ip6mr_device_event(struct notifier_block *this, 1253 static int ip6mr_device_event(struct notifier_block *this,
1256 unsigned long event, void *ptr) 1254 unsigned long event, void *ptr)
1257 { 1255 {
1258 struct net_device *dev = ptr; 1256 struct net_device *dev = ptr;
1259 struct net *net = dev_net(dev); 1257 struct net *net = dev_net(dev);
1260 struct mr6_table *mrt; 1258 struct mr6_table *mrt;
1261 struct mif_device *v; 1259 struct mif_device *v;
1262 int ct; 1260 int ct;
1263 LIST_HEAD(list); 1261 LIST_HEAD(list);
1264 1262
1265 if (event != NETDEV_UNREGISTER) 1263 if (event != NETDEV_UNREGISTER)
1266 return NOTIFY_DONE; 1264 return NOTIFY_DONE;
1267 1265
1268 ip6mr_for_each_table(mrt, net) { 1266 ip6mr_for_each_table(mrt, net) {
1269 v = &mrt->vif6_table[0]; 1267 v = &mrt->vif6_table[0];
1270 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1268 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1271 if (v->dev == dev) 1269 if (v->dev == dev)
1272 mif6_delete(mrt, ct, &list); 1270 mif6_delete(mrt, ct, &list);
1273 } 1271 }
1274 } 1272 }
1275 unregister_netdevice_many(&list); 1273 unregister_netdevice_many(&list);
1276 1274
1277 return NOTIFY_DONE; 1275 return NOTIFY_DONE;
1278 } 1276 }
1279 1277
1280 static struct notifier_block ip6_mr_notifier = { 1278 static struct notifier_block ip6_mr_notifier = {
1281 .notifier_call = ip6mr_device_event 1279 .notifier_call = ip6mr_device_event
1282 }; 1280 };
1283 1281
1284 /* 1282 /*
1285 * Setup for IP multicast routing 1283 * Setup for IP multicast routing
1286 */ 1284 */
1287 1285
1288 static int __net_init ip6mr_net_init(struct net *net) 1286 static int __net_init ip6mr_net_init(struct net *net)
1289 { 1287 {
1290 int err; 1288 int err;
1291 1289
1292 err = ip6mr_rules_init(net); 1290 err = ip6mr_rules_init(net);
1293 if (err < 0) 1291 if (err < 0)
1294 goto fail; 1292 goto fail;
1295 1293
1296 #ifdef CONFIG_PROC_FS 1294 #ifdef CONFIG_PROC_FS
1297 err = -ENOMEM; 1295 err = -ENOMEM;
1298 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) 1296 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1299 goto proc_vif_fail; 1297 goto proc_vif_fail;
1300 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) 1298 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1301 goto proc_cache_fail; 1299 goto proc_cache_fail;
1302 #endif 1300 #endif
1303 1301
1304 return 0; 1302 return 0;
1305 1303
1306 #ifdef CONFIG_PROC_FS 1304 #ifdef CONFIG_PROC_FS
1307 proc_cache_fail: 1305 proc_cache_fail:
1308 proc_net_remove(net, "ip6_mr_vif"); 1306 proc_net_remove(net, "ip6_mr_vif");
1309 proc_vif_fail: 1307 proc_vif_fail:
1310 ip6mr_rules_exit(net); 1308 ip6mr_rules_exit(net);
1311 #endif 1309 #endif
1312 fail: 1310 fail:
1313 return err; 1311 return err;
1314 } 1312 }
1315 1313
1316 static void __net_exit ip6mr_net_exit(struct net *net) 1314 static void __net_exit ip6mr_net_exit(struct net *net)
1317 { 1315 {
1318 #ifdef CONFIG_PROC_FS 1316 #ifdef CONFIG_PROC_FS
1319 proc_net_remove(net, "ip6_mr_cache"); 1317 proc_net_remove(net, "ip6_mr_cache");
1320 proc_net_remove(net, "ip6_mr_vif"); 1318 proc_net_remove(net, "ip6_mr_vif");
1321 #endif 1319 #endif
1322 ip6mr_rules_exit(net); 1320 ip6mr_rules_exit(net);
1323 } 1321 }
1324 1322
1325 static struct pernet_operations ip6mr_net_ops = { 1323 static struct pernet_operations ip6mr_net_ops = {
1326 .init = ip6mr_net_init, 1324 .init = ip6mr_net_init,
1327 .exit = ip6mr_net_exit, 1325 .exit = ip6mr_net_exit,
1328 }; 1326 };
1329 1327
1330 int __init ip6_mr_init(void) 1328 int __init ip6_mr_init(void)
1331 { 1329 {
1332 int err; 1330 int err;
1333 1331
1334 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1332 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1335 sizeof(struct mfc6_cache), 1333 sizeof(struct mfc6_cache),
1336 0, SLAB_HWCACHE_ALIGN, 1334 0, SLAB_HWCACHE_ALIGN,
1337 NULL); 1335 NULL);
1338 if (!mrt_cachep) 1336 if (!mrt_cachep)
1339 return -ENOMEM; 1337 return -ENOMEM;
1340 1338
1341 err = register_pernet_subsys(&ip6mr_net_ops); 1339 err = register_pernet_subsys(&ip6mr_net_ops);
1342 if (err) 1340 if (err)
1343 goto reg_pernet_fail; 1341 goto reg_pernet_fail;
1344 1342
1345 err = register_netdevice_notifier(&ip6_mr_notifier); 1343 err = register_netdevice_notifier(&ip6_mr_notifier);
1346 if (err) 1344 if (err)
1347 goto reg_notif_fail; 1345 goto reg_notif_fail;
1348 #ifdef CONFIG_IPV6_PIMSM_V2 1346 #ifdef CONFIG_IPV6_PIMSM_V2
1349 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1347 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1350 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n"); 1348 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1351 err = -EAGAIN; 1349 err = -EAGAIN;
1352 goto add_proto_fail; 1350 goto add_proto_fail;
1353 } 1351 }
1354 #endif 1352 #endif
1355 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute); 1353 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1356 return 0; 1354 return 0;
1357 #ifdef CONFIG_IPV6_PIMSM_V2 1355 #ifdef CONFIG_IPV6_PIMSM_V2
1358 add_proto_fail: 1356 add_proto_fail:
1359 unregister_netdevice_notifier(&ip6_mr_notifier); 1357 unregister_netdevice_notifier(&ip6_mr_notifier);
1360 #endif 1358 #endif
1361 reg_notif_fail: 1359 reg_notif_fail:
1362 unregister_pernet_subsys(&ip6mr_net_ops); 1360 unregister_pernet_subsys(&ip6mr_net_ops);
1363 reg_pernet_fail: 1361 reg_pernet_fail:
1364 kmem_cache_destroy(mrt_cachep); 1362 kmem_cache_destroy(mrt_cachep);
1365 return err; 1363 return err;
1366 } 1364 }
1367 1365
1368 void ip6_mr_cleanup(void) 1366 void ip6_mr_cleanup(void)
1369 { 1367 {
1370 unregister_netdevice_notifier(&ip6_mr_notifier); 1368 unregister_netdevice_notifier(&ip6_mr_notifier);
1371 unregister_pernet_subsys(&ip6mr_net_ops); 1369 unregister_pernet_subsys(&ip6mr_net_ops);
1372 kmem_cache_destroy(mrt_cachep); 1370 kmem_cache_destroy(mrt_cachep);
1373 } 1371 }
1374 1372
1375 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, 1373 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1376 struct mf6cctl *mfc, int mrtsock) 1374 struct mf6cctl *mfc, int mrtsock)
1377 { 1375 {
1378 bool found = false; 1376 bool found = false;
1379 int line; 1377 int line;
1380 struct mfc6_cache *uc, *c; 1378 struct mfc6_cache *uc, *c;
1381 unsigned char ttls[MAXMIFS]; 1379 unsigned char ttls[MAXMIFS];
1382 int i; 1380 int i;
1383 1381
1384 if (mfc->mf6cc_parent >= MAXMIFS) 1382 if (mfc->mf6cc_parent >= MAXMIFS)
1385 return -ENFILE; 1383 return -ENFILE;
1386 1384
1387 memset(ttls, 255, MAXMIFS); 1385 memset(ttls, 255, MAXMIFS);
1388 for (i = 0; i < MAXMIFS; i++) { 1386 for (i = 0; i < MAXMIFS; i++) {
1389 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1387 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1390 ttls[i] = 1; 1388 ttls[i] = 1;
1391 1389
1392 } 1390 }
1393 1391
1394 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1392 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1395 1393
1396 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 1394 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1397 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1395 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1398 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 1396 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1399 found = true; 1397 found = true;
1400 break; 1398 break;
1401 } 1399 }
1402 } 1400 }
1403 1401
1404 if (found) { 1402 if (found) {
1405 write_lock_bh(&mrt_lock); 1403 write_lock_bh(&mrt_lock);
1406 c->mf6c_parent = mfc->mf6cc_parent; 1404 c->mf6c_parent = mfc->mf6cc_parent;
1407 ip6mr_update_thresholds(mrt, c, ttls); 1405 ip6mr_update_thresholds(mrt, c, ttls);
1408 if (!mrtsock) 1406 if (!mrtsock)
1409 c->mfc_flags |= MFC_STATIC; 1407 c->mfc_flags |= MFC_STATIC;
1410 write_unlock_bh(&mrt_lock); 1408 write_unlock_bh(&mrt_lock);
1411 return 0; 1409 return 0;
1412 } 1410 }
1413 1411
1414 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1412 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1415 return -EINVAL; 1413 return -EINVAL;
1416 1414
1417 c = ip6mr_cache_alloc(); 1415 c = ip6mr_cache_alloc();
1418 if (c == NULL) 1416 if (c == NULL)
1419 return -ENOMEM; 1417 return -ENOMEM;
1420 1418
1421 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1419 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1422 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1420 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1423 c->mf6c_parent = mfc->mf6cc_parent; 1421 c->mf6c_parent = mfc->mf6cc_parent;
1424 ip6mr_update_thresholds(mrt, c, ttls); 1422 ip6mr_update_thresholds(mrt, c, ttls);
1425 if (!mrtsock) 1423 if (!mrtsock)
1426 c->mfc_flags |= MFC_STATIC; 1424 c->mfc_flags |= MFC_STATIC;
1427 1425
1428 write_lock_bh(&mrt_lock); 1426 write_lock_bh(&mrt_lock);
1429 list_add(&c->list, &mrt->mfc6_cache_array[line]); 1427 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1430 write_unlock_bh(&mrt_lock); 1428 write_unlock_bh(&mrt_lock);
1431 1429
1432 /* 1430 /*
1433 * Check to see if we resolved a queued list. If so we 1431 * Check to see if we resolved a queued list. If so we
1434 * need to send on the frames and tidy up. 1432 * need to send on the frames and tidy up.
1435 */ 1433 */
1436 found = false; 1434 found = false;
1437 spin_lock_bh(&mfc_unres_lock); 1435 spin_lock_bh(&mfc_unres_lock);
1438 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { 1436 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1439 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1437 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1440 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1438 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1441 list_del(&uc->list); 1439 list_del(&uc->list);
1442 atomic_dec(&mrt->cache_resolve_queue_len); 1440 atomic_dec(&mrt->cache_resolve_queue_len);
1443 found = true; 1441 found = true;
1444 break; 1442 break;
1445 } 1443 }
1446 } 1444 }
1447 if (list_empty(&mrt->mfc6_unres_queue)) 1445 if (list_empty(&mrt->mfc6_unres_queue))
1448 del_timer(&mrt->ipmr_expire_timer); 1446 del_timer(&mrt->ipmr_expire_timer);
1449 spin_unlock_bh(&mfc_unres_lock); 1447 spin_unlock_bh(&mfc_unres_lock);
1450 1448
1451 if (found) { 1449 if (found) {
1452 ip6mr_cache_resolve(net, mrt, uc, c); 1450 ip6mr_cache_resolve(net, mrt, uc, c);
1453 ip6mr_cache_free(uc); 1451 ip6mr_cache_free(uc);
1454 } 1452 }
1455 return 0; 1453 return 0;
1456 } 1454 }
1457 1455
1458 /* 1456 /*
1459 * Close the multicast socket, and clear the vif tables etc 1457 * Close the multicast socket, and clear the vif tables etc
1460 */ 1458 */
1461 1459
1462 static void mroute_clean_tables(struct mr6_table *mrt) 1460 static void mroute_clean_tables(struct mr6_table *mrt)
1463 { 1461 {
1464 int i; 1462 int i;
1465 LIST_HEAD(list); 1463 LIST_HEAD(list);
1466 struct mfc6_cache *c, *next; 1464 struct mfc6_cache *c, *next;
1467 1465
1468 /* 1466 /*
1469 * Shut down all active vif entries 1467 * Shut down all active vif entries
1470 */ 1468 */
1471 for (i = 0; i < mrt->maxvif; i++) { 1469 for (i = 0; i < mrt->maxvif; i++) {
1472 if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) 1470 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1473 mif6_delete(mrt, i, &list); 1471 mif6_delete(mrt, i, &list);
1474 } 1472 }
1475 unregister_netdevice_many(&list); 1473 unregister_netdevice_many(&list);
1476 1474
1477 /* 1475 /*
1478 * Wipe the cache 1476 * Wipe the cache
1479 */ 1477 */
1480 for (i = 0; i < MFC6_LINES; i++) { 1478 for (i = 0; i < MFC6_LINES; i++) {
1481 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { 1479 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1482 if (c->mfc_flags & MFC_STATIC) 1480 if (c->mfc_flags & MFC_STATIC)
1483 continue; 1481 continue;
1484 write_lock_bh(&mrt_lock); 1482 write_lock_bh(&mrt_lock);
1485 list_del(&c->list); 1483 list_del(&c->list);
1486 write_unlock_bh(&mrt_lock); 1484 write_unlock_bh(&mrt_lock);
1487 1485
1488 ip6mr_cache_free(c); 1486 ip6mr_cache_free(c);
1489 } 1487 }
1490 } 1488 }
1491 1489
1492 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1490 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1493 spin_lock_bh(&mfc_unres_lock); 1491 spin_lock_bh(&mfc_unres_lock);
1494 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 1492 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1495 list_del(&c->list); 1493 list_del(&c->list);
1496 ip6mr_destroy_unres(mrt, c); 1494 ip6mr_destroy_unres(mrt, c);
1497 } 1495 }
1498 spin_unlock_bh(&mfc_unres_lock); 1496 spin_unlock_bh(&mfc_unres_lock);
1499 } 1497 }
1500 } 1498 }
1501 1499
1502 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) 1500 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1503 { 1501 {
1504 int err = 0; 1502 int err = 0;
1505 struct net *net = sock_net(sk); 1503 struct net *net = sock_net(sk);
1506 1504
1507 rtnl_lock(); 1505 rtnl_lock();
1508 write_lock_bh(&mrt_lock); 1506 write_lock_bh(&mrt_lock);
1509 if (likely(mrt->mroute6_sk == NULL)) { 1507 if (likely(mrt->mroute6_sk == NULL)) {
1510 mrt->mroute6_sk = sk; 1508 mrt->mroute6_sk = sk;
1511 net->ipv6.devconf_all->mc_forwarding++; 1509 net->ipv6.devconf_all->mc_forwarding++;
1512 } 1510 }
1513 else 1511 else
1514 err = -EADDRINUSE; 1512 err = -EADDRINUSE;
1515 write_unlock_bh(&mrt_lock); 1513 write_unlock_bh(&mrt_lock);
1516 1514
1517 rtnl_unlock(); 1515 rtnl_unlock();
1518 1516
1519 return err; 1517 return err;
1520 } 1518 }
1521 1519
1522 int ip6mr_sk_done(struct sock *sk) 1520 int ip6mr_sk_done(struct sock *sk)
1523 { 1521 {
1524 int err = -EACCES; 1522 int err = -EACCES;
1525 struct net *net = sock_net(sk); 1523 struct net *net = sock_net(sk);
1526 struct mr6_table *mrt; 1524 struct mr6_table *mrt;
1527 1525
1528 rtnl_lock(); 1526 rtnl_lock();
1529 ip6mr_for_each_table(mrt, net) { 1527 ip6mr_for_each_table(mrt, net) {
1530 if (sk == mrt->mroute6_sk) { 1528 if (sk == mrt->mroute6_sk) {
1531 write_lock_bh(&mrt_lock); 1529 write_lock_bh(&mrt_lock);
1532 mrt->mroute6_sk = NULL; 1530 mrt->mroute6_sk = NULL;
1533 net->ipv6.devconf_all->mc_forwarding--; 1531 net->ipv6.devconf_all->mc_forwarding--;
1534 write_unlock_bh(&mrt_lock); 1532 write_unlock_bh(&mrt_lock);
1535 1533
1536 mroute_clean_tables(mrt); 1534 mroute_clean_tables(mrt);
1537 err = 0; 1535 err = 0;
1538 break; 1536 break;
1539 } 1537 }
1540 } 1538 }
1541 rtnl_unlock(); 1539 rtnl_unlock();
1542 1540
1543 return err; 1541 return err;
1544 } 1542 }
1545 1543
1546 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) 1544 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1547 { 1545 {
1548 struct mr6_table *mrt; 1546 struct mr6_table *mrt;
1549 struct flowi fl = { 1547 struct flowi fl = {
1550 .iif = skb->skb_iif, 1548 .iif = skb->skb_iif,
1551 .oif = skb->dev->ifindex, 1549 .oif = skb->dev->ifindex,
1552 .mark = skb->mark, 1550 .mark = skb->mark,
1553 }; 1551 };
1554 1552
1555 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 1553 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1556 return NULL; 1554 return NULL;
1557 1555
1558 return mrt->mroute6_sk; 1556 return mrt->mroute6_sk;
1559 } 1557 }
1560 1558
1561 /* 1559 /*
1562 * Socket options and virtual interface manipulation. The whole 1560 * Socket options and virtual interface manipulation. The whole
1563 * virtual interface system is a complete heap, but unfortunately 1561 * virtual interface system is a complete heap, but unfortunately
1564 * that's how BSD mrouted happens to think. Maybe one day with a proper 1562 * that's how BSD mrouted happens to think. Maybe one day with a proper
1565 * MOSPF/PIM router set up we can clean this up. 1563 * MOSPF/PIM router set up we can clean this up.
1566 */ 1564 */
1567 1565
1568 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1566 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1569 { 1567 {
1570 int ret; 1568 int ret;
1571 struct mif6ctl vif; 1569 struct mif6ctl vif;
1572 struct mf6cctl mfc; 1570 struct mf6cctl mfc;
1573 mifi_t mifi; 1571 mifi_t mifi;
1574 struct net *net = sock_net(sk); 1572 struct net *net = sock_net(sk);
1575 struct mr6_table *mrt; 1573 struct mr6_table *mrt;
1576 1574
1577 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1575 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1578 if (mrt == NULL) 1576 if (mrt == NULL)
1579 return -ENOENT; 1577 return -ENOENT;
1580 1578
1581 if (optname != MRT6_INIT) { 1579 if (optname != MRT6_INIT) {
1582 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN)) 1580 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1583 return -EACCES; 1581 return -EACCES;
1584 } 1582 }
1585 1583
1586 switch (optname) { 1584 switch (optname) {
1587 case MRT6_INIT: 1585 case MRT6_INIT:
1588 if (sk->sk_type != SOCK_RAW || 1586 if (sk->sk_type != SOCK_RAW ||
1589 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1587 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1590 return -EOPNOTSUPP; 1588 return -EOPNOTSUPP;
1591 if (optlen < sizeof(int)) 1589 if (optlen < sizeof(int))
1592 return -EINVAL; 1590 return -EINVAL;
1593 1591
1594 return ip6mr_sk_init(mrt, sk); 1592 return ip6mr_sk_init(mrt, sk);
1595 1593
1596 case MRT6_DONE: 1594 case MRT6_DONE:
1597 return ip6mr_sk_done(sk); 1595 return ip6mr_sk_done(sk);
1598 1596
1599 case MRT6_ADD_MIF: 1597 case MRT6_ADD_MIF:
1600 if (optlen < sizeof(vif)) 1598 if (optlen < sizeof(vif))
1601 return -EINVAL; 1599 return -EINVAL;
1602 if (copy_from_user(&vif, optval, sizeof(vif))) 1600 if (copy_from_user(&vif, optval, sizeof(vif)))
1603 return -EFAULT; 1601 return -EFAULT;
1604 if (vif.mif6c_mifi >= MAXMIFS) 1602 if (vif.mif6c_mifi >= MAXMIFS)
1605 return -ENFILE; 1603 return -ENFILE;
1606 rtnl_lock(); 1604 rtnl_lock();
1607 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); 1605 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1608 rtnl_unlock(); 1606 rtnl_unlock();
1609 return ret; 1607 return ret;
1610 1608
1611 case MRT6_DEL_MIF: 1609 case MRT6_DEL_MIF:
1612 if (optlen < sizeof(mifi_t)) 1610 if (optlen < sizeof(mifi_t))
1613 return -EINVAL; 1611 return -EINVAL;
1614 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1612 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1615 return -EFAULT; 1613 return -EFAULT;
1616 rtnl_lock(); 1614 rtnl_lock();
1617 ret = mif6_delete(mrt, mifi, NULL); 1615 ret = mif6_delete(mrt, mifi, NULL);
1618 rtnl_unlock(); 1616 rtnl_unlock();
1619 return ret; 1617 return ret;
1620 1618
1621 /* 1619 /*
1622 * Manipulate the forwarding caches. These live 1620 * Manipulate the forwarding caches. These live
1623 * in a sort of kernel/user symbiosis. 1621 * in a sort of kernel/user symbiosis.
1624 */ 1622 */
1625 case MRT6_ADD_MFC: 1623 case MRT6_ADD_MFC:
1626 case MRT6_DEL_MFC: 1624 case MRT6_DEL_MFC:
1627 if (optlen < sizeof(mfc)) 1625 if (optlen < sizeof(mfc))
1628 return -EINVAL; 1626 return -EINVAL;
1629 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1627 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1630 return -EFAULT; 1628 return -EFAULT;
1631 rtnl_lock(); 1629 rtnl_lock();
1632 if (optname == MRT6_DEL_MFC) 1630 if (optname == MRT6_DEL_MFC)
1633 ret = ip6mr_mfc_delete(mrt, &mfc); 1631 ret = ip6mr_mfc_delete(mrt, &mfc);
1634 else 1632 else
1635 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); 1633 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1636 rtnl_unlock(); 1634 rtnl_unlock();
1637 return ret; 1635 return ret;
1638 1636
1639 /* 1637 /*
1640 * Control PIM assert (to activate pim will activate assert) 1638 * Control PIM assert (to activate pim will activate assert)
1641 */ 1639 */
1642 case MRT6_ASSERT: 1640 case MRT6_ASSERT:
1643 { 1641 {
1644 int v; 1642 int v;
1645 if (get_user(v, (int __user *)optval)) 1643 if (get_user(v, (int __user *)optval))
1646 return -EFAULT; 1644 return -EFAULT;
1647 mrt->mroute_do_assert = !!v; 1645 mrt->mroute_do_assert = !!v;
1648 return 0; 1646 return 0;
1649 } 1647 }
1650 1648
1651 #ifdef CONFIG_IPV6_PIMSM_V2 1649 #ifdef CONFIG_IPV6_PIMSM_V2
1652 case MRT6_PIM: 1650 case MRT6_PIM:
1653 { 1651 {
1654 int v; 1652 int v;
1655 if (get_user(v, (int __user *)optval)) 1653 if (get_user(v, (int __user *)optval))
1656 return -EFAULT; 1654 return -EFAULT;
1657 v = !!v; 1655 v = !!v;
1658 rtnl_lock(); 1656 rtnl_lock();
1659 ret = 0; 1657 ret = 0;
1660 if (v != mrt->mroute_do_pim) { 1658 if (v != mrt->mroute_do_pim) {
1661 mrt->mroute_do_pim = v; 1659 mrt->mroute_do_pim = v;
1662 mrt->mroute_do_assert = v; 1660 mrt->mroute_do_assert = v;
1663 } 1661 }
1664 rtnl_unlock(); 1662 rtnl_unlock();
1665 return ret; 1663 return ret;
1666 } 1664 }
1667 1665
1668 #endif 1666 #endif
1669 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1667 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1670 case MRT6_TABLE: 1668 case MRT6_TABLE:
1671 { 1669 {
1672 u32 v; 1670 u32 v;
1673 1671
1674 if (optlen != sizeof(u32)) 1672 if (optlen != sizeof(u32))
1675 return -EINVAL; 1673 return -EINVAL;
1676 if (get_user(v, (u32 __user *)optval)) 1674 if (get_user(v, (u32 __user *)optval))
1677 return -EFAULT; 1675 return -EFAULT;
1678 if (sk == mrt->mroute6_sk) 1676 if (sk == mrt->mroute6_sk)
1679 return -EBUSY; 1677 return -EBUSY;
1680 1678
1681 rtnl_lock(); 1679 rtnl_lock();
1682 ret = 0; 1680 ret = 0;
1683 if (!ip6mr_new_table(net, v)) 1681 if (!ip6mr_new_table(net, v))
1684 ret = -ENOMEM; 1682 ret = -ENOMEM;
1685 raw6_sk(sk)->ip6mr_table = v; 1683 raw6_sk(sk)->ip6mr_table = v;
1686 rtnl_unlock(); 1684 rtnl_unlock();
1687 return ret; 1685 return ret;
1688 } 1686 }
1689 #endif 1687 #endif
1690 /* 1688 /*
1691 * Spurious command, or MRT6_VERSION which you cannot 1689 * Spurious command, or MRT6_VERSION which you cannot
1692 * set. 1690 * set.
1693 */ 1691 */
1694 default: 1692 default:
1695 return -ENOPROTOOPT; 1693 return -ENOPROTOOPT;
1696 } 1694 }
1697 } 1695 }
1698 1696
1699 /* 1697 /*
1700 * Getsock opt support for the multicast routing system. 1698 * Getsock opt support for the multicast routing system.
1701 */ 1699 */
1702 1700
1703 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1701 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1704 int __user *optlen) 1702 int __user *optlen)
1705 { 1703 {
1706 int olr; 1704 int olr;
1707 int val; 1705 int val;
1708 struct net *net = sock_net(sk); 1706 struct net *net = sock_net(sk);
1709 struct mr6_table *mrt; 1707 struct mr6_table *mrt;
1710 1708
1711 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1709 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1712 if (mrt == NULL) 1710 if (mrt == NULL)
1713 return -ENOENT; 1711 return -ENOENT;
1714 1712
1715 switch (optname) { 1713 switch (optname) {
1716 case MRT6_VERSION: 1714 case MRT6_VERSION:
1717 val = 0x0305; 1715 val = 0x0305;
1718 break; 1716 break;
1719 #ifdef CONFIG_IPV6_PIMSM_V2 1717 #ifdef CONFIG_IPV6_PIMSM_V2
1720 case MRT6_PIM: 1718 case MRT6_PIM:
1721 val = mrt->mroute_do_pim; 1719 val = mrt->mroute_do_pim;
1722 break; 1720 break;
1723 #endif 1721 #endif
1724 case MRT6_ASSERT: 1722 case MRT6_ASSERT:
1725 val = mrt->mroute_do_assert; 1723 val = mrt->mroute_do_assert;
1726 break; 1724 break;
1727 default: 1725 default:
1728 return -ENOPROTOOPT; 1726 return -ENOPROTOOPT;
1729 } 1727 }
1730 1728
1731 if (get_user(olr, optlen)) 1729 if (get_user(olr, optlen))
1732 return -EFAULT; 1730 return -EFAULT;
1733 1731
1734 olr = min_t(int, olr, sizeof(int)); 1732 olr = min_t(int, olr, sizeof(int));
1735 if (olr < 0) 1733 if (olr < 0)
1736 return -EINVAL; 1734 return -EINVAL;
1737 1735
1738 if (put_user(olr, optlen)) 1736 if (put_user(olr, optlen))
1739 return -EFAULT; 1737 return -EFAULT;
1740 if (copy_to_user(optval, &val, olr)) 1738 if (copy_to_user(optval, &val, olr))
1741 return -EFAULT; 1739 return -EFAULT;
1742 return 0; 1740 return 0;
1743 } 1741 }
1744 1742
1745 /* 1743 /*
1746 * The IP multicast ioctl support routines. 1744 * The IP multicast ioctl support routines.
1747 */ 1745 */
1748 1746
1749 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1747 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1750 { 1748 {
1751 struct sioc_sg_req6 sr; 1749 struct sioc_sg_req6 sr;
1752 struct sioc_mif_req6 vr; 1750 struct sioc_mif_req6 vr;
1753 struct mif_device *vif; 1751 struct mif_device *vif;
1754 struct mfc6_cache *c; 1752 struct mfc6_cache *c;
1755 struct net *net = sock_net(sk); 1753 struct net *net = sock_net(sk);
1756 struct mr6_table *mrt; 1754 struct mr6_table *mrt;
1757 1755
1758 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1756 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1759 if (mrt == NULL) 1757 if (mrt == NULL)
1760 return -ENOENT; 1758 return -ENOENT;
1761 1759
1762 switch (cmd) { 1760 switch (cmd) {
1763 case SIOCGETMIFCNT_IN6: 1761 case SIOCGETMIFCNT_IN6:
1764 if (copy_from_user(&vr, arg, sizeof(vr))) 1762 if (copy_from_user(&vr, arg, sizeof(vr)))
1765 return -EFAULT; 1763 return -EFAULT;
1766 if (vr.mifi >= mrt->maxvif) 1764 if (vr.mifi >= mrt->maxvif)
1767 return -EINVAL; 1765 return -EINVAL;
1768 read_lock(&mrt_lock); 1766 read_lock(&mrt_lock);
1769 vif = &mrt->vif6_table[vr.mifi]; 1767 vif = &mrt->vif6_table[vr.mifi];
1770 if (MIF_EXISTS(mrt, vr.mifi)) { 1768 if (MIF_EXISTS(mrt, vr.mifi)) {
1771 vr.icount = vif->pkt_in; 1769 vr.icount = vif->pkt_in;
1772 vr.ocount = vif->pkt_out; 1770 vr.ocount = vif->pkt_out;
1773 vr.ibytes = vif->bytes_in; 1771 vr.ibytes = vif->bytes_in;
1774 vr.obytes = vif->bytes_out; 1772 vr.obytes = vif->bytes_out;
1775 read_unlock(&mrt_lock); 1773 read_unlock(&mrt_lock);
1776 1774
1777 if (copy_to_user(arg, &vr, sizeof(vr))) 1775 if (copy_to_user(arg, &vr, sizeof(vr)))
1778 return -EFAULT; 1776 return -EFAULT;
1779 return 0; 1777 return 0;
1780 } 1778 }
1781 read_unlock(&mrt_lock); 1779 read_unlock(&mrt_lock);
1782 return -EADDRNOTAVAIL; 1780 return -EADDRNOTAVAIL;
1783 case SIOCGETSGCNT_IN6: 1781 case SIOCGETSGCNT_IN6:
1784 if (copy_from_user(&sr, arg, sizeof(sr))) 1782 if (copy_from_user(&sr, arg, sizeof(sr)))
1785 return -EFAULT; 1783 return -EFAULT;
1786 1784
1787 read_lock(&mrt_lock); 1785 read_lock(&mrt_lock);
1788 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1786 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1789 if (c) { 1787 if (c) {
1790 sr.pktcnt = c->mfc_un.res.pkt; 1788 sr.pktcnt = c->mfc_un.res.pkt;
1791 sr.bytecnt = c->mfc_un.res.bytes; 1789 sr.bytecnt = c->mfc_un.res.bytes;
1792 sr.wrong_if = c->mfc_un.res.wrong_if; 1790 sr.wrong_if = c->mfc_un.res.wrong_if;
1793 read_unlock(&mrt_lock); 1791 read_unlock(&mrt_lock);
1794 1792
1795 if (copy_to_user(arg, &sr, sizeof(sr))) 1793 if (copy_to_user(arg, &sr, sizeof(sr)))
1796 return -EFAULT; 1794 return -EFAULT;
1797 return 0; 1795 return 0;
1798 } 1796 }
1799 read_unlock(&mrt_lock); 1797 read_unlock(&mrt_lock);
1800 return -EADDRNOTAVAIL; 1798 return -EADDRNOTAVAIL;
1801 default: 1799 default:
1802 return -ENOIOCTLCMD; 1800 return -ENOIOCTLCMD;
1803 } 1801 }
1804 } 1802 }
1805 1803
1806 1804
1807 static inline int ip6mr_forward2_finish(struct sk_buff *skb) 1805 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1808 { 1806 {
1809 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 1807 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1810 IPSTATS_MIB_OUTFORWDATAGRAMS); 1808 IPSTATS_MIB_OUTFORWDATAGRAMS);
1811 return dst_output(skb); 1809 return dst_output(skb);
1812 } 1810 }
1813 1811
1814 /* 1812 /*
1815 * Processing handlers for ip6mr_forward 1813 * Processing handlers for ip6mr_forward
1816 */ 1814 */
1817 1815
1818 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, 1816 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1819 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1817 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1820 { 1818 {
1821 struct ipv6hdr *ipv6h; 1819 struct ipv6hdr *ipv6h;
1822 struct mif_device *vif = &mrt->vif6_table[vifi]; 1820 struct mif_device *vif = &mrt->vif6_table[vifi];
1823 struct net_device *dev; 1821 struct net_device *dev;
1824 struct dst_entry *dst; 1822 struct dst_entry *dst;
1825 struct flowi fl; 1823 struct flowi fl;
1826 1824
1827 if (vif->dev == NULL) 1825 if (vif->dev == NULL)
1828 goto out_free; 1826 goto out_free;
1829 1827
1830 #ifdef CONFIG_IPV6_PIMSM_V2 1828 #ifdef CONFIG_IPV6_PIMSM_V2
1831 if (vif->flags & MIFF_REGISTER) { 1829 if (vif->flags & MIFF_REGISTER) {
1832 vif->pkt_out++; 1830 vif->pkt_out++;
1833 vif->bytes_out += skb->len; 1831 vif->bytes_out += skb->len;
1834 vif->dev->stats.tx_bytes += skb->len; 1832 vif->dev->stats.tx_bytes += skb->len;
1835 vif->dev->stats.tx_packets++; 1833 vif->dev->stats.tx_packets++;
1836 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1834 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1837 goto out_free; 1835 goto out_free;
1838 } 1836 }
1839 #endif 1837 #endif
1840 1838
1841 ipv6h = ipv6_hdr(skb); 1839 ipv6h = ipv6_hdr(skb);
1842 1840
1843 fl = (struct flowi) { 1841 fl = (struct flowi) {
1844 .oif = vif->link, 1842 .oif = vif->link,
1845 .nl_u = { .ip6_u = 1843 .nl_u = { .ip6_u =
1846 { .daddr = ipv6h->daddr, } 1844 { .daddr = ipv6h->daddr, }
1847 } 1845 }
1848 }; 1846 };
1849 1847
1850 dst = ip6_route_output(net, NULL, &fl); 1848 dst = ip6_route_output(net, NULL, &fl);
1851 if (!dst) 1849 if (!dst)
1852 goto out_free; 1850 goto out_free;
1853 1851
1854 skb_dst_drop(skb); 1852 skb_dst_drop(skb);
1855 skb_dst_set(skb, dst); 1853 skb_dst_set(skb, dst);
1856 1854
1857 /* 1855 /*
1858 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1856 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1859 * not only before forwarding, but after forwarding on all output 1857 * not only before forwarding, but after forwarding on all output
1860 * interfaces. It is clear, if mrouter runs a multicasting 1858 * interfaces. It is clear, if mrouter runs a multicasting
1861 * program, it should receive packets not depending to what interface 1859 * program, it should receive packets not depending to what interface
1862 * program is joined. 1860 * program is joined.
1863 * If we will not make it, the program will have to join on all 1861 * If we will not make it, the program will have to join on all
1864 * interfaces. On the other hand, multihoming host (or router, but 1862 * interfaces. On the other hand, multihoming host (or router, but
1865 * not mrouter) cannot join to more than one interface - it will 1863 * not mrouter) cannot join to more than one interface - it will
1866 * result in receiving multiple packets. 1864 * result in receiving multiple packets.
1867 */ 1865 */
1868 dev = vif->dev; 1866 dev = vif->dev;
1869 skb->dev = dev; 1867 skb->dev = dev;
1870 vif->pkt_out++; 1868 vif->pkt_out++;
1871 vif->bytes_out += skb->len; 1869 vif->bytes_out += skb->len;
1872 1870
1873 /* We are about to write */ 1871 /* We are about to write */
1874 /* XXX: extension headers? */ 1872 /* XXX: extension headers? */
1875 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 1873 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1876 goto out_free; 1874 goto out_free;
1877 1875
1878 ipv6h = ipv6_hdr(skb); 1876 ipv6h = ipv6_hdr(skb);
1879 ipv6h->hop_limit--; 1877 ipv6h->hop_limit--;
1880 1878
1881 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 1879 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1882 1880
1883 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, 1881 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1884 ip6mr_forward2_finish); 1882 ip6mr_forward2_finish);
1885 1883
1886 out_free: 1884 out_free:
1887 kfree_skb(skb); 1885 kfree_skb(skb);
1888 return 0; 1886 return 0;
1889 } 1887 }
1890 1888
1891 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) 1889 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1892 { 1890 {
1893 int ct; 1891 int ct;
1894 1892
1895 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 1893 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1896 if (mrt->vif6_table[ct].dev == dev) 1894 if (mrt->vif6_table[ct].dev == dev)
1897 break; 1895 break;
1898 } 1896 }
1899 return ct; 1897 return ct;
1900 } 1898 }
1901 1899
1902 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 1900 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1903 struct sk_buff *skb, struct mfc6_cache *cache) 1901 struct sk_buff *skb, struct mfc6_cache *cache)
1904 { 1902 {
1905 int psend = -1; 1903 int psend = -1;
1906 int vif, ct; 1904 int vif, ct;
1907 1905
1908 vif = cache->mf6c_parent; 1906 vif = cache->mf6c_parent;
1909 cache->mfc_un.res.pkt++; 1907 cache->mfc_un.res.pkt++;
1910 cache->mfc_un.res.bytes += skb->len; 1908 cache->mfc_un.res.bytes += skb->len;
1911 1909
1912 /* 1910 /*
1913 * Wrong interface: drop packet and (maybe) send PIM assert. 1911 * Wrong interface: drop packet and (maybe) send PIM assert.
1914 */ 1912 */
1915 if (mrt->vif6_table[vif].dev != skb->dev) { 1913 if (mrt->vif6_table[vif].dev != skb->dev) {
1916 int true_vifi; 1914 int true_vifi;
1917 1915
1918 cache->mfc_un.res.wrong_if++; 1916 cache->mfc_un.res.wrong_if++;
1919 true_vifi = ip6mr_find_vif(mrt, skb->dev); 1917 true_vifi = ip6mr_find_vif(mrt, skb->dev);
1920 1918
1921 if (true_vifi >= 0 && mrt->mroute_do_assert && 1919 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1922 /* pimsm uses asserts, when switching from RPT to SPT, 1920 /* pimsm uses asserts, when switching from RPT to SPT,
1923 so that we cannot check that packet arrived on an oif. 1921 so that we cannot check that packet arrived on an oif.
1924 It is bad, but otherwise we would need to move pretty 1922 It is bad, but otherwise we would need to move pretty
1925 large chunk of pimd to kernel. Ough... --ANK 1923 large chunk of pimd to kernel. Ough... --ANK
1926 */ 1924 */
1927 (mrt->mroute_do_pim || 1925 (mrt->mroute_do_pim ||
1928 cache->mfc_un.res.ttls[true_vifi] < 255) && 1926 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1929 time_after(jiffies, 1927 time_after(jiffies,
1930 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1928 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1931 cache->mfc_un.res.last_assert = jiffies; 1929 cache->mfc_un.res.last_assert = jiffies;
1932 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 1930 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1933 } 1931 }
1934 goto dont_forward; 1932 goto dont_forward;
1935 } 1933 }
1936 1934
1937 mrt->vif6_table[vif].pkt_in++; 1935 mrt->vif6_table[vif].pkt_in++;
1938 mrt->vif6_table[vif].bytes_in += skb->len; 1936 mrt->vif6_table[vif].bytes_in += skb->len;
1939 1937
1940 /* 1938 /*
1941 * Forward the frame 1939 * Forward the frame
1942 */ 1940 */
1943 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { 1941 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1944 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { 1942 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1945 if (psend != -1) { 1943 if (psend != -1) {
1946 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1944 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1947 if (skb2) 1945 if (skb2)
1948 ip6mr_forward2(net, mrt, skb2, cache, psend); 1946 ip6mr_forward2(net, mrt, skb2, cache, psend);
1949 } 1947 }
1950 psend = ct; 1948 psend = ct;
1951 } 1949 }
1952 } 1950 }
1953 if (psend != -1) { 1951 if (psend != -1) {
1954 ip6mr_forward2(net, mrt, skb, cache, psend); 1952 ip6mr_forward2(net, mrt, skb, cache, psend);
1955 return 0; 1953 return 0;
1956 } 1954 }
1957 1955
1958 dont_forward: 1956 dont_forward:
1959 kfree_skb(skb); 1957 kfree_skb(skb);
1960 return 0; 1958 return 0;
1961 } 1959 }
1962 1960
1963 1961
1964 /* 1962 /*
1965 * Multicast packets for forwarding arrive here 1963 * Multicast packets for forwarding arrive here
1966 */ 1964 */
1967 1965
1968 int ip6_mr_input(struct sk_buff *skb) 1966 int ip6_mr_input(struct sk_buff *skb)
1969 { 1967 {
1970 struct mfc6_cache *cache; 1968 struct mfc6_cache *cache;
1971 struct net *net = dev_net(skb->dev); 1969 struct net *net = dev_net(skb->dev);
1972 struct mr6_table *mrt; 1970 struct mr6_table *mrt;
1973 struct flowi fl = { 1971 struct flowi fl = {
1974 .iif = skb->dev->ifindex, 1972 .iif = skb->dev->ifindex,
1975 .mark = skb->mark, 1973 .mark = skb->mark,
1976 }; 1974 };
1977 int err; 1975 int err;
1978 1976
1979 err = ip6mr_fib_lookup(net, &fl, &mrt); 1977 err = ip6mr_fib_lookup(net, &fl, &mrt);
1980 if (err < 0) 1978 if (err < 0)
1981 return err; 1979 return err;
1982 1980
1983 read_lock(&mrt_lock); 1981 read_lock(&mrt_lock);
1984 cache = ip6mr_cache_find(mrt, 1982 cache = ip6mr_cache_find(mrt,
1985 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 1983 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1986 1984
1987 /* 1985 /*
1988 * No usable cache entry 1986 * No usable cache entry
1989 */ 1987 */
1990 if (cache == NULL) { 1988 if (cache == NULL) {
1991 int vif; 1989 int vif;
1992 1990
1993 vif = ip6mr_find_vif(mrt, skb->dev); 1991 vif = ip6mr_find_vif(mrt, skb->dev);
1994 if (vif >= 0) { 1992 if (vif >= 0) {
1995 int err = ip6mr_cache_unresolved(mrt, vif, skb); 1993 int err = ip6mr_cache_unresolved(mrt, vif, skb);
1996 read_unlock(&mrt_lock); 1994 read_unlock(&mrt_lock);
1997 1995
1998 return err; 1996 return err;
1999 } 1997 }
2000 read_unlock(&mrt_lock); 1998 read_unlock(&mrt_lock);
2001 kfree_skb(skb); 1999 kfree_skb(skb);
2002 return -ENODEV; 2000 return -ENODEV;
2003 } 2001 }
2004 2002
2005 ip6_mr_forward(net, mrt, skb, cache); 2003 ip6_mr_forward(net, mrt, skb, cache);
2006 2004
2007 read_unlock(&mrt_lock); 2005 read_unlock(&mrt_lock);
2008 2006
2009 return 0; 2007 return 0;
2010 } 2008 }
2011 2009
2012 2010
2013 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2011 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2014 struct mfc6_cache *c, struct rtmsg *rtm) 2012 struct mfc6_cache *c, struct rtmsg *rtm)
2015 { 2013 {
2016 int ct; 2014 int ct;
2017 struct rtnexthop *nhp; 2015 struct rtnexthop *nhp;
2018 u8 *b = skb_tail_pointer(skb); 2016 u8 *b = skb_tail_pointer(skb);
2019 struct rtattr *mp_head; 2017 struct rtattr *mp_head;
2020 2018
2021 /* If cache is unresolved, don't try to parse IIF and OIF */ 2019 /* If cache is unresolved, don't try to parse IIF and OIF */
2022 if (c->mf6c_parent > MAXMIFS) 2020 if (c->mf6c_parent > MAXMIFS)
2023 return -ENOENT; 2021 return -ENOENT;
2024 2022
2025 if (MIF_EXISTS(mrt, c->mf6c_parent)) 2023 if (MIF_EXISTS(mrt, c->mf6c_parent))
2026 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex); 2024 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2027 2025
2028 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 2026 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2029 2027
2030 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2028 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2031 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2029 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2032 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 2030 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2033 goto rtattr_failure; 2031 goto rtattr_failure;
2034 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 2032 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2035 nhp->rtnh_flags = 0; 2033 nhp->rtnh_flags = 0;
2036 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2034 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2037 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; 2035 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2038 nhp->rtnh_len = sizeof(*nhp); 2036 nhp->rtnh_len = sizeof(*nhp);
2039 } 2037 }
2040 } 2038 }
2041 mp_head->rta_type = RTA_MULTIPATH; 2039 mp_head->rta_type = RTA_MULTIPATH;
2042 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 2040 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2043 rtm->rtm_type = RTN_MULTICAST; 2041 rtm->rtm_type = RTN_MULTICAST;
2044 return 1; 2042 return 1;
2045 2043
2046 rtattr_failure: 2044 rtattr_failure:
2047 nlmsg_trim(skb, b); 2045 nlmsg_trim(skb, b);
2048 return -EMSGSIZE; 2046 return -EMSGSIZE;
2049 } 2047 }
2050 2048
2051 int ip6mr_get_route(struct net *net, 2049 int ip6mr_get_route(struct net *net,
2052 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2050 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2053 { 2051 {
2054 int err; 2052 int err;
2055 struct mr6_table *mrt; 2053 struct mr6_table *mrt;
2056 struct mfc6_cache *cache; 2054 struct mfc6_cache *cache;
2057 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2055 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2058 2056
2059 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2057 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2060 if (mrt == NULL) 2058 if (mrt == NULL)
2061 return -ENOENT; 2059 return -ENOENT;
2062 2060
2063 read_lock(&mrt_lock); 2061 read_lock(&mrt_lock);
2064 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2062 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2065 2063
2066 if (!cache) { 2064 if (!cache) {
2067 struct sk_buff *skb2; 2065 struct sk_buff *skb2;
2068 struct ipv6hdr *iph; 2066 struct ipv6hdr *iph;
2069 struct net_device *dev; 2067 struct net_device *dev;
2070 int vif; 2068 int vif;
2071 2069
2072 if (nowait) { 2070 if (nowait) {
2073 read_unlock(&mrt_lock); 2071 read_unlock(&mrt_lock);
2074 return -EAGAIN; 2072 return -EAGAIN;
2075 } 2073 }
2076 2074
2077 dev = skb->dev; 2075 dev = skb->dev;
2078 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2076 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2079 read_unlock(&mrt_lock); 2077 read_unlock(&mrt_lock);
2080 return -ENODEV; 2078 return -ENODEV;
2081 } 2079 }
2082 2080
2083 /* really correct? */ 2081 /* really correct? */
2084 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2082 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2085 if (!skb2) { 2083 if (!skb2) {
2086 read_unlock(&mrt_lock); 2084 read_unlock(&mrt_lock);
2087 return -ENOMEM; 2085 return -ENOMEM;
2088 } 2086 }
2089 2087
2090 skb_reset_transport_header(skb2); 2088 skb_reset_transport_header(skb2);
2091 2089
2092 skb_put(skb2, sizeof(struct ipv6hdr)); 2090 skb_put(skb2, sizeof(struct ipv6hdr));
2093 skb_reset_network_header(skb2); 2091 skb_reset_network_header(skb2);
2094 2092
2095 iph = ipv6_hdr(skb2); 2093 iph = ipv6_hdr(skb2);
2096 iph->version = 0; 2094 iph->version = 0;
2097 iph->priority = 0; 2095 iph->priority = 0;
2098 iph->flow_lbl[0] = 0; 2096 iph->flow_lbl[0] = 0;
2099 iph->flow_lbl[1] = 0; 2097 iph->flow_lbl[1] = 0;
2100 iph->flow_lbl[2] = 0; 2098 iph->flow_lbl[2] = 0;
2101 iph->payload_len = 0; 2099 iph->payload_len = 0;
2102 iph->nexthdr = IPPROTO_NONE; 2100 iph->nexthdr = IPPROTO_NONE;
2103 iph->hop_limit = 0; 2101 iph->hop_limit = 0;
2104 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); 2102 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2105 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); 2103 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2106 2104
2107 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2105 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2108 read_unlock(&mrt_lock); 2106 read_unlock(&mrt_lock);
2109 2107
2110 return err; 2108 return err;
2111 } 2109 }
2112 2110
2113 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2111 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2114 cache->mfc_flags |= MFC_NOTIFY; 2112 cache->mfc_flags |= MFC_NOTIFY;
2115 2113
2116 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); 2114 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2117 read_unlock(&mrt_lock); 2115 read_unlock(&mrt_lock);
2118 return err; 2116 return err;
2119 } 2117 }
2120 2118
2121 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2119 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2122 u32 pid, u32 seq, struct mfc6_cache *c) 2120 u32 pid, u32 seq, struct mfc6_cache *c)
2123 { 2121 {
2124 struct nlmsghdr *nlh; 2122 struct nlmsghdr *nlh;
2125 struct rtmsg *rtm; 2123 struct rtmsg *rtm;
2126 2124
2127 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2125 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2128 if (nlh == NULL) 2126 if (nlh == NULL)
2129 return -EMSGSIZE; 2127 return -EMSGSIZE;
2130 2128
2131 rtm = nlmsg_data(nlh); 2129 rtm = nlmsg_data(nlh);
2132 rtm->rtm_family = RTNL_FAMILY_IPMR; 2130 rtm->rtm_family = RTNL_FAMILY_IPMR;
2133 rtm->rtm_dst_len = 128; 2131 rtm->rtm_dst_len = 128;
2134 rtm->rtm_src_len = 128; 2132 rtm->rtm_src_len = 128;
2135 rtm->rtm_tos = 0; 2133 rtm->rtm_tos = 0;
2136 rtm->rtm_table = mrt->id; 2134 rtm->rtm_table = mrt->id;
2137 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2135 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2138 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2136 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2139 rtm->rtm_protocol = RTPROT_UNSPEC; 2137 rtm->rtm_protocol = RTPROT_UNSPEC;
2140 rtm->rtm_flags = 0; 2138 rtm->rtm_flags = 0;
2141 2139
2142 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin); 2140 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2143 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp); 2141 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2144 2142
2145 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) 2143 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2146 goto nla_put_failure; 2144 goto nla_put_failure;
2147 2145
2148 return nlmsg_end(skb, nlh); 2146 return nlmsg_end(skb, nlh);
2149 2147
2150 nla_put_failure: 2148 nla_put_failure:
2151 nlmsg_cancel(skb, nlh); 2149 nlmsg_cancel(skb, nlh);
2152 return -EMSGSIZE; 2150 return -EMSGSIZE;
2153 } 2151 }
2154 2152
2155 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2153 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2156 { 2154 {
2157 struct net *net = sock_net(skb->sk); 2155 struct net *net = sock_net(skb->sk);
2158 struct mr6_table *mrt; 2156 struct mr6_table *mrt;
2159 struct mfc6_cache *mfc; 2157 struct mfc6_cache *mfc;
2160 unsigned int t = 0, s_t; 2158 unsigned int t = 0, s_t;
2161 unsigned int h = 0, s_h; 2159 unsigned int h = 0, s_h;
2162 unsigned int e = 0, s_e; 2160 unsigned int e = 0, s_e;
2163 2161
2164 s_t = cb->args[0]; 2162 s_t = cb->args[0];
2165 s_h = cb->args[1]; 2163 s_h = cb->args[1];
2166 s_e = cb->args[2]; 2164 s_e = cb->args[2];
2167 2165
2168 read_lock(&mrt_lock); 2166 read_lock(&mrt_lock);
2169 ip6mr_for_each_table(mrt, net) { 2167 ip6mr_for_each_table(mrt, net) {
2170 if (t < s_t) 2168 if (t < s_t)
2171 goto next_table; 2169 goto next_table;
2172 if (t > s_t) 2170 if (t > s_t)
2173 s_h = 0; 2171 s_h = 0;
2174 for (h = s_h; h < MFC6_LINES; h++) { 2172 for (h = s_h; h < MFC6_LINES; h++) {
2175 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { 2173 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2176 if (e < s_e) 2174 if (e < s_e)
2177 goto next_entry; 2175 goto next_entry;
2178 if (ip6mr_fill_mroute(mrt, skb, 2176 if (ip6mr_fill_mroute(mrt, skb,
2179 NETLINK_CB(cb->skb).pid, 2177 NETLINK_CB(cb->skb).pid,
2180 cb->nlh->nlmsg_seq, 2178 cb->nlh->nlmsg_seq,
2181 mfc) < 0) 2179 mfc) < 0)
2182 goto done; 2180 goto done;
2183 next_entry: 2181 next_entry:
2184 e++; 2182 e++;
2185 } 2183 }
2186 e = s_e = 0; 2184 e = s_e = 0;
2187 } 2185 }
2188 s_h = 0; 2186 s_h = 0;
2189 next_table: 2187 next_table:
2190 t++; 2188 t++;
2191 } 2189 }
2192 done: 2190 done:
2193 read_unlock(&mrt_lock); 2191 read_unlock(&mrt_lock);
2194 2192
2195 cb->args[2] = e; 2193 cb->args[2] = e;
2196 cb->args[1] = h; 2194 cb->args[1] = h;
2197 cb->args[0] = t; 2195 cb->args[0] = t;
2198 2196
2199 return skb->len; 2197 return skb->len;
2200 } 2198 }
2201 2199
1 /* 1 /*
2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) 2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version. 12 * 2 of the License, or (at your option) any later version.
13 * 13 *
14 * Changes: 14 * Changes:
15 * Roger Venning <r.venning@telstra.com>: 6to4 support 15 * Roger Venning <r.venning@telstra.com>: 6to4 support
16 * Nate Thompson <nate@thebog.net>: 6to4 support 16 * Nate Thompson <nate@thebog.net>: 6to4 support
17 * Fred Templin <fred.l.templin@boeing.com>: isatap support 17 * Fred Templin <fred.l.templin@boeing.com>: isatap support
18 */ 18 */
19 19
20 #include <linux/module.h> 20 #include <linux/module.h>
21 #include <linux/capability.h> 21 #include <linux/capability.h>
22 #include <linux/errno.h> 22 #include <linux/errno.h>
23 #include <linux/types.h> 23 #include <linux/types.h>
24 #include <linux/socket.h> 24 #include <linux/socket.h>
25 #include <linux/sockios.h> 25 #include <linux/sockios.h>
26 #include <linux/net.h> 26 #include <linux/net.h>
27 #include <linux/in6.h> 27 #include <linux/in6.h>
28 #include <linux/netdevice.h> 28 #include <linux/netdevice.h>
29 #include <linux/if_arp.h> 29 #include <linux/if_arp.h>
30 #include <linux/icmp.h> 30 #include <linux/icmp.h>
31 #include <linux/slab.h> 31 #include <linux/slab.h>
32 #include <asm/uaccess.h> 32 #include <asm/uaccess.h>
33 #include <linux/init.h> 33 #include <linux/init.h>
34 #include <linux/netfilter_ipv4.h> 34 #include <linux/netfilter_ipv4.h>
35 #include <linux/if_ether.h> 35 #include <linux/if_ether.h>
36 36
37 #include <net/sock.h> 37 #include <net/sock.h>
38 #include <net/snmp.h> 38 #include <net/snmp.h>
39 39
40 #include <net/ipv6.h> 40 #include <net/ipv6.h>
41 #include <net/protocol.h> 41 #include <net/protocol.h>
42 #include <net/transp_v6.h> 42 #include <net/transp_v6.h>
43 #include <net/ip6_fib.h> 43 #include <net/ip6_fib.h>
44 #include <net/ip6_route.h> 44 #include <net/ip6_route.h>
45 #include <net/ndisc.h> 45 #include <net/ndisc.h>
46 #include <net/addrconf.h> 46 #include <net/addrconf.h>
47 #include <net/ip.h> 47 #include <net/ip.h>
48 #include <net/udp.h> 48 #include <net/udp.h>
49 #include <net/icmp.h> 49 #include <net/icmp.h>
50 #include <net/ipip.h> 50 #include <net/ipip.h>
51 #include <net/inet_ecn.h> 51 #include <net/inet_ecn.h>
52 #include <net/xfrm.h> 52 #include <net/xfrm.h>
53 #include <net/dsfield.h> 53 #include <net/dsfield.h>
54 #include <net/net_namespace.h> 54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h> 55 #include <net/netns/generic.h>
56 56
57 /* 57 /*
58 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c 58 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
59 59
60 For comments look at net/ipv4/ip_gre.c --ANK 60 For comments look at net/ipv4/ip_gre.c --ANK
61 */ 61 */
62 62
63 #define HASH_SIZE 16 63 #define HASH_SIZE 16
64 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
65 65
66 static void ipip6_tunnel_init(struct net_device *dev); 66 static void ipip6_tunnel_init(struct net_device *dev);
67 static void ipip6_tunnel_setup(struct net_device *dev); 67 static void ipip6_tunnel_setup(struct net_device *dev);
68 68
69 static int sit_net_id __read_mostly; 69 static int sit_net_id __read_mostly;
70 struct sit_net { 70 struct sit_net {
71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 71 struct ip_tunnel *tunnels_r_l[HASH_SIZE];
72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 72 struct ip_tunnel *tunnels_r[HASH_SIZE];
73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 73 struct ip_tunnel *tunnels_l[HASH_SIZE];
74 struct ip_tunnel *tunnels_wc[1]; 74 struct ip_tunnel *tunnels_wc[1];
75 struct ip_tunnel **tunnels[4]; 75 struct ip_tunnel **tunnels[4];
76 76
77 struct net_device *fb_tunnel_dev; 77 struct net_device *fb_tunnel_dev;
78 }; 78 };
79 79
80 /* 80 /*
81 * Locking : hash tables are protected by RCU and a spinlock 81 * Locking : hash tables are protected by RCU and a spinlock
82 */ 82 */
83 static DEFINE_SPINLOCK(ipip6_lock); 83 static DEFINE_SPINLOCK(ipip6_lock);
84 84
85 #define for_each_ip_tunnel_rcu(start) \ 85 #define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
87 87
88 /* 88 /*
89 * Must be invoked with rcu_read_lock 89 * Must be invoked with rcu_read_lock
90 */ 90 */
91 static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 91 static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
92 struct net_device *dev, __be32 remote, __be32 local) 92 struct net_device *dev, __be32 remote, __be32 local)
93 { 93 {
94 unsigned h0 = HASH(remote); 94 unsigned h0 = HASH(remote);
95 unsigned h1 = HASH(local); 95 unsigned h1 = HASH(local);
96 struct ip_tunnel *t; 96 struct ip_tunnel *t;
97 struct sit_net *sitn = net_generic(net, sit_net_id); 97 struct sit_net *sitn = net_generic(net, sit_net_id);
98 98
99 for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { 99 for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
100 if (local == t->parms.iph.saddr && 100 if (local == t->parms.iph.saddr &&
101 remote == t->parms.iph.daddr && 101 remote == t->parms.iph.daddr &&
102 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 102 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
103 (t->dev->flags & IFF_UP)) 103 (t->dev->flags & IFF_UP))
104 return t; 104 return t;
105 } 105 }
106 for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { 106 for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
107 if (remote == t->parms.iph.daddr && 107 if (remote == t->parms.iph.daddr &&
108 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 108 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
109 (t->dev->flags & IFF_UP)) 109 (t->dev->flags & IFF_UP))
110 return t; 110 return t;
111 } 111 }
112 for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { 112 for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
113 if (local == t->parms.iph.saddr && 113 if (local == t->parms.iph.saddr &&
114 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 114 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
115 (t->dev->flags & IFF_UP)) 115 (t->dev->flags & IFF_UP))
116 return t; 116 return t;
117 } 117 }
118 t = rcu_dereference(sitn->tunnels_wc[0]); 118 t = rcu_dereference(sitn->tunnels_wc[0]);
119 if ((t != NULL) && (t->dev->flags & IFF_UP)) 119 if ((t != NULL) && (t->dev->flags & IFF_UP))
120 return t; 120 return t;
121 return NULL; 121 return NULL;
122 } 122 }
123 123
124 static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 124 static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
125 struct ip_tunnel_parm *parms) 125 struct ip_tunnel_parm *parms)
126 { 126 {
127 __be32 remote = parms->iph.daddr; 127 __be32 remote = parms->iph.daddr;
128 __be32 local = parms->iph.saddr; 128 __be32 local = parms->iph.saddr;
129 unsigned h = 0; 129 unsigned h = 0;
130 int prio = 0; 130 int prio = 0;
131 131
132 if (remote) { 132 if (remote) {
133 prio |= 2; 133 prio |= 2;
134 h ^= HASH(remote); 134 h ^= HASH(remote);
135 } 135 }
136 if (local) { 136 if (local) {
137 prio |= 1; 137 prio |= 1;
138 h ^= HASH(local); 138 h ^= HASH(local);
139 } 139 }
140 return &sitn->tunnels[prio][h]; 140 return &sitn->tunnels[prio][h];
141 } 141 }
142 142
143 static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 143 static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
144 struct ip_tunnel *t) 144 struct ip_tunnel *t)
145 { 145 {
146 return __ipip6_bucket(sitn, &t->parms); 146 return __ipip6_bucket(sitn, &t->parms);
147 } 147 }
148 148
149 static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 149 static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
150 { 150 {
151 struct ip_tunnel **tp; 151 struct ip_tunnel **tp;
152 152
153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) {
154 if (t == *tp) { 154 if (t == *tp) {
155 spin_lock_bh(&ipip6_lock); 155 spin_lock_bh(&ipip6_lock);
156 *tp = t->next; 156 *tp = t->next;
157 spin_unlock_bh(&ipip6_lock); 157 spin_unlock_bh(&ipip6_lock);
158 break; 158 break;
159 } 159 }
160 } 160 }
161 } 161 }
162 162
163 static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 163 static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
164 { 164 {
165 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 165 struct ip_tunnel **tp = ipip6_bucket(sitn, t);
166 166
167 spin_lock_bh(&ipip6_lock); 167 spin_lock_bh(&ipip6_lock);
168 t->next = *tp; 168 t->next = *tp;
169 rcu_assign_pointer(*tp, t); 169 rcu_assign_pointer(*tp, t);
170 spin_unlock_bh(&ipip6_lock); 170 spin_unlock_bh(&ipip6_lock);
171 } 171 }
172 172
173 static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 173 static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
174 { 174 {
175 #ifdef CONFIG_IPV6_SIT_6RD 175 #ifdef CONFIG_IPV6_SIT_6RD
176 struct ip_tunnel *t = netdev_priv(dev); 176 struct ip_tunnel *t = netdev_priv(dev);
177 177
178 if (t->dev == sitn->fb_tunnel_dev) { 178 if (t->dev == sitn->fb_tunnel_dev) {
179 ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); 179 ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
180 t->ip6rd.relay_prefix = 0; 180 t->ip6rd.relay_prefix = 0;
181 t->ip6rd.prefixlen = 16; 181 t->ip6rd.prefixlen = 16;
182 t->ip6rd.relay_prefixlen = 0; 182 t->ip6rd.relay_prefixlen = 0;
183 } else { 183 } else {
184 struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); 184 struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
185 memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); 185 memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
186 } 186 }
187 #endif 187 #endif
188 } 188 }
189 189
190 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 190 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
191 struct ip_tunnel_parm *parms, int create) 191 struct ip_tunnel_parm *parms, int create)
192 { 192 {
193 __be32 remote = parms->iph.daddr; 193 __be32 remote = parms->iph.daddr;
194 __be32 local = parms->iph.saddr; 194 __be32 local = parms->iph.saddr;
195 struct ip_tunnel *t, **tp, *nt; 195 struct ip_tunnel *t, **tp, *nt;
196 struct net_device *dev; 196 struct net_device *dev;
197 char name[IFNAMSIZ]; 197 char name[IFNAMSIZ];
198 struct sit_net *sitn = net_generic(net, sit_net_id); 198 struct sit_net *sitn = net_generic(net, sit_net_id);
199 199
200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) {
201 if (local == t->parms.iph.saddr && 201 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && 202 remote == t->parms.iph.daddr &&
203 parms->link == t->parms.link) { 203 parms->link == t->parms.link) {
204 if (create) 204 if (create)
205 return NULL; 205 return NULL;
206 else 206 else
207 return t; 207 return t;
208 } 208 }
209 } 209 }
210 if (!create) 210 if (!create)
211 goto failed; 211 goto failed;
212 212
213 if (parms->name[0]) 213 if (parms->name[0])
214 strlcpy(name, parms->name, IFNAMSIZ); 214 strlcpy(name, parms->name, IFNAMSIZ);
215 else 215 else
216 sprintf(name, "sit%%d"); 216 sprintf(name, "sit%%d");
217 217
218 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 218 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
219 if (dev == NULL) 219 if (dev == NULL)
220 return NULL; 220 return NULL;
221 221
222 dev_net_set(dev, net); 222 dev_net_set(dev, net);
223 223
224 if (strchr(name, '%')) { 224 if (strchr(name, '%')) {
225 if (dev_alloc_name(dev, name) < 0) 225 if (dev_alloc_name(dev, name) < 0)
226 goto failed_free; 226 goto failed_free;
227 } 227 }
228 228
229 nt = netdev_priv(dev); 229 nt = netdev_priv(dev);
230 230
231 nt->parms = *parms; 231 nt->parms = *parms;
232 ipip6_tunnel_init(dev); 232 ipip6_tunnel_init(dev);
233 ipip6_tunnel_clone_6rd(dev, sitn); 233 ipip6_tunnel_clone_6rd(dev, sitn);
234 234
235 if (parms->i_flags & SIT_ISATAP) 235 if (parms->i_flags & SIT_ISATAP)
236 dev->priv_flags |= IFF_ISATAP; 236 dev->priv_flags |= IFF_ISATAP;
237 237
238 if (register_netdevice(dev) < 0) 238 if (register_netdevice(dev) < 0)
239 goto failed_free; 239 goto failed_free;
240 240
241 dev_hold(dev); 241 dev_hold(dev);
242 242
243 ipip6_tunnel_link(sitn, nt); 243 ipip6_tunnel_link(sitn, nt);
244 return nt; 244 return nt;
245 245
246 failed_free: 246 failed_free:
247 free_netdev(dev); 247 free_netdev(dev);
248 failed: 248 failed:
249 return NULL; 249 return NULL;
250 } 250 }
251 251
252 static DEFINE_SPINLOCK(ipip6_prl_lock); 252 static DEFINE_SPINLOCK(ipip6_prl_lock);
253 253
254 #define for_each_prl_rcu(start) \ 254 #define for_each_prl_rcu(start) \
255 for (prl = rcu_dereference(start); \ 255 for (prl = rcu_dereference(start); \
256 prl; \ 256 prl; \
257 prl = rcu_dereference(prl->next)) 257 prl = rcu_dereference(prl->next))
258 258
259 static struct ip_tunnel_prl_entry * 259 static struct ip_tunnel_prl_entry *
260 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) 260 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
261 { 261 {
262 struct ip_tunnel_prl_entry *prl; 262 struct ip_tunnel_prl_entry *prl;
263 263
264 for_each_prl_rcu(t->prl) 264 for_each_prl_rcu(t->prl)
265 if (prl->addr == addr) 265 if (prl->addr == addr)
266 break; 266 break;
267 return prl; 267 return prl;
268 268
269 } 269 }
270 270
271 static int ipip6_tunnel_get_prl(struct ip_tunnel *t, 271 static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
272 struct ip_tunnel_prl __user *a) 272 struct ip_tunnel_prl __user *a)
273 { 273 {
274 struct ip_tunnel_prl kprl, *kp; 274 struct ip_tunnel_prl kprl, *kp;
275 struct ip_tunnel_prl_entry *prl; 275 struct ip_tunnel_prl_entry *prl;
276 unsigned int cmax, c = 0, ca, len; 276 unsigned int cmax, c = 0, ca, len;
277 int ret = 0; 277 int ret = 0;
278 278
279 if (copy_from_user(&kprl, a, sizeof(kprl))) 279 if (copy_from_user(&kprl, a, sizeof(kprl)))
280 return -EFAULT; 280 return -EFAULT;
281 cmax = kprl.datalen / sizeof(kprl); 281 cmax = kprl.datalen / sizeof(kprl);
282 if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) 282 if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
283 cmax = 1; 283 cmax = 1;
284 284
285 /* For simple GET or for root users, 285 /* For simple GET or for root users,
286 * we try harder to allocate. 286 * we try harder to allocate.
287 */ 287 */
288 kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? 288 kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
289 kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : 289 kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
290 NULL; 290 NULL;
291 291
292 rcu_read_lock(); 292 rcu_read_lock();
293 293
294 ca = t->prl_count < cmax ? t->prl_count : cmax; 294 ca = t->prl_count < cmax ? t->prl_count : cmax;
295 295
296 if (!kp) { 296 if (!kp) {
297 /* We don't try hard to allocate much memory for 297 /* We don't try hard to allocate much memory for
298 * non-root users. 298 * non-root users.
299 * For root users, retry allocating enough memory for 299 * For root users, retry allocating enough memory for
300 * the answer. 300 * the answer.
301 */ 301 */
302 kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); 302 kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
303 if (!kp) { 303 if (!kp) {
304 ret = -ENOMEM; 304 ret = -ENOMEM;
305 goto out; 305 goto out;
306 } 306 }
307 } 307 }
308 308
309 c = 0; 309 c = 0;
310 for_each_prl_rcu(t->prl) { 310 for_each_prl_rcu(t->prl) {
311 if (c >= cmax) 311 if (c >= cmax)
312 break; 312 break;
313 if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) 313 if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
314 continue; 314 continue;
315 kp[c].addr = prl->addr; 315 kp[c].addr = prl->addr;
316 kp[c].flags = prl->flags; 316 kp[c].flags = prl->flags;
317 c++; 317 c++;
318 if (kprl.addr != htonl(INADDR_ANY)) 318 if (kprl.addr != htonl(INADDR_ANY))
319 break; 319 break;
320 } 320 }
321 out: 321 out:
322 rcu_read_unlock(); 322 rcu_read_unlock();
323 323
324 len = sizeof(*kp) * c; 324 len = sizeof(*kp) * c;
325 ret = 0; 325 ret = 0;
326 if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) 326 if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
327 ret = -EFAULT; 327 ret = -EFAULT;
328 328
329 kfree(kp); 329 kfree(kp);
330 330
331 return ret; 331 return ret;
332 } 332 }
333 333
334 static int 334 static int
335 ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) 335 ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
336 { 336 {
337 struct ip_tunnel_prl_entry *p; 337 struct ip_tunnel_prl_entry *p;
338 int err = 0; 338 int err = 0;
339 339
340 if (a->addr == htonl(INADDR_ANY)) 340 if (a->addr == htonl(INADDR_ANY))
341 return -EINVAL; 341 return -EINVAL;
342 342
343 spin_lock(&ipip6_prl_lock); 343 spin_lock(&ipip6_prl_lock);
344 344
345 for (p = t->prl; p; p = p->next) { 345 for (p = t->prl; p; p = p->next) {
346 if (p->addr == a->addr) { 346 if (p->addr == a->addr) {
347 if (chg) { 347 if (chg) {
348 p->flags = a->flags; 348 p->flags = a->flags;
349 goto out; 349 goto out;
350 } 350 }
351 err = -EEXIST; 351 err = -EEXIST;
352 goto out; 352 goto out;
353 } 353 }
354 } 354 }
355 355
356 if (chg) { 356 if (chg) {
357 err = -ENXIO; 357 err = -ENXIO;
358 goto out; 358 goto out;
359 } 359 }
360 360
361 p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); 361 p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
362 if (!p) { 362 if (!p) {
363 err = -ENOBUFS; 363 err = -ENOBUFS;
364 goto out; 364 goto out;
365 } 365 }
366 366
367 p->next = t->prl; 367 p->next = t->prl;
368 p->addr = a->addr; 368 p->addr = a->addr;
369 p->flags = a->flags; 369 p->flags = a->flags;
370 t->prl_count++; 370 t->prl_count++;
371 rcu_assign_pointer(t->prl, p); 371 rcu_assign_pointer(t->prl, p);
372 out: 372 out:
373 spin_unlock(&ipip6_prl_lock); 373 spin_unlock(&ipip6_prl_lock);
374 return err; 374 return err;
375 } 375 }
376 376
377 static void prl_entry_destroy_rcu(struct rcu_head *head) 377 static void prl_entry_destroy_rcu(struct rcu_head *head)
378 { 378 {
379 kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); 379 kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
380 } 380 }
381 381
382 static void prl_list_destroy_rcu(struct rcu_head *head) 382 static void prl_list_destroy_rcu(struct rcu_head *head)
383 { 383 {
384 struct ip_tunnel_prl_entry *p, *n; 384 struct ip_tunnel_prl_entry *p, *n;
385 385
386 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); 386 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
387 do { 387 do {
388 n = p->next; 388 n = p->next;
389 kfree(p); 389 kfree(p);
390 p = n; 390 p = n;
391 } while (p); 391 } while (p);
392 } 392 }
393 393
394 static int 394 static int
395 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) 395 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
396 { 396 {
397 struct ip_tunnel_prl_entry *x, **p; 397 struct ip_tunnel_prl_entry *x, **p;
398 int err = 0; 398 int err = 0;
399 399
400 spin_lock(&ipip6_prl_lock); 400 spin_lock(&ipip6_prl_lock);
401 401
402 if (a && a->addr != htonl(INADDR_ANY)) { 402 if (a && a->addr != htonl(INADDR_ANY)) {
403 for (p = &t->prl; *p; p = &(*p)->next) { 403 for (p = &t->prl; *p; p = &(*p)->next) {
404 if ((*p)->addr == a->addr) { 404 if ((*p)->addr == a->addr) {
405 x = *p; 405 x = *p;
406 *p = x->next; 406 *p = x->next;
407 call_rcu(&x->rcu_head, prl_entry_destroy_rcu); 407 call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
408 t->prl_count--; 408 t->prl_count--;
409 goto out; 409 goto out;
410 } 410 }
411 } 411 }
412 err = -ENXIO; 412 err = -ENXIO;
413 } else { 413 } else {
414 if (t->prl) { 414 if (t->prl) {
415 t->prl_count = 0; 415 t->prl_count = 0;
416 x = t->prl; 416 x = t->prl;
417 call_rcu(&x->rcu_head, prl_list_destroy_rcu); 417 call_rcu(&x->rcu_head, prl_list_destroy_rcu);
418 t->prl = NULL; 418 t->prl = NULL;
419 } 419 }
420 } 420 }
421 out: 421 out:
422 spin_unlock(&ipip6_prl_lock); 422 spin_unlock(&ipip6_prl_lock);
423 return err; 423 return err;
424 } 424 }
425 425
426 static int 426 static int
427 isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) 427 isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
428 { 428 {
429 struct ip_tunnel_prl_entry *p; 429 struct ip_tunnel_prl_entry *p;
430 int ok = 1; 430 int ok = 1;
431 431
432 rcu_read_lock(); 432 rcu_read_lock();
433 p = __ipip6_tunnel_locate_prl(t, iph->saddr); 433 p = __ipip6_tunnel_locate_prl(t, iph->saddr);
434 if (p) { 434 if (p) {
435 if (p->flags & PRL_DEFAULT) 435 if (p->flags & PRL_DEFAULT)
436 skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; 436 skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
437 else 437 else
438 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; 438 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
439 } else { 439 } else {
440 struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; 440 struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
441 if (ipv6_addr_is_isatap(addr6) && 441 if (ipv6_addr_is_isatap(addr6) &&
442 (addr6->s6_addr32[3] == iph->saddr) && 442 (addr6->s6_addr32[3] == iph->saddr) &&
443 ipv6_chk_prefix(addr6, t->dev)) 443 ipv6_chk_prefix(addr6, t->dev))
444 skb->ndisc_nodetype = NDISC_NODETYPE_HOST; 444 skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
445 else 445 else
446 ok = 0; 446 ok = 0;
447 } 447 }
448 rcu_read_unlock(); 448 rcu_read_unlock();
449 return ok; 449 return ok;
450 } 450 }
451 451
452 static void ipip6_tunnel_uninit(struct net_device *dev) 452 static void ipip6_tunnel_uninit(struct net_device *dev)
453 { 453 {
454 struct net *net = dev_net(dev); 454 struct net *net = dev_net(dev);
455 struct sit_net *sitn = net_generic(net, sit_net_id); 455 struct sit_net *sitn = net_generic(net, sit_net_id);
456 456
457 if (dev == sitn->fb_tunnel_dev) { 457 if (dev == sitn->fb_tunnel_dev) {
458 spin_lock_bh(&ipip6_lock); 458 spin_lock_bh(&ipip6_lock);
459 sitn->tunnels_wc[0] = NULL; 459 sitn->tunnels_wc[0] = NULL;
460 spin_unlock_bh(&ipip6_lock); 460 spin_unlock_bh(&ipip6_lock);
461 dev_put(dev); 461 dev_put(dev);
462 } else { 462 } else {
463 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 463 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
464 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 464 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
465 dev_put(dev); 465 dev_put(dev);
466 } 466 }
467 } 467 }
468 468
469 469
470 static int ipip6_err(struct sk_buff *skb, u32 info) 470 static int ipip6_err(struct sk_buff *skb, u32 info)
471 { 471 {
472 472
473 /* All the routers (except for Linux) return only 473 /* All the routers (except for Linux) return only
474 8 bytes of packet payload. It means, that precise relaying of 474 8 bytes of packet payload. It means, that precise relaying of
475 ICMP in the real Internet is absolutely infeasible. 475 ICMP in the real Internet is absolutely infeasible.
476 */ 476 */
477 struct iphdr *iph = (struct iphdr*)skb->data; 477 struct iphdr *iph = (struct iphdr*)skb->data;
478 const int type = icmp_hdr(skb)->type; 478 const int type = icmp_hdr(skb)->type;
479 const int code = icmp_hdr(skb)->code; 479 const int code = icmp_hdr(skb)->code;
480 struct ip_tunnel *t; 480 struct ip_tunnel *t;
481 int err; 481 int err;
482 482
483 switch (type) { 483 switch (type) {
484 default: 484 default:
485 case ICMP_PARAMETERPROB: 485 case ICMP_PARAMETERPROB:
486 return 0; 486 return 0;
487 487
488 case ICMP_DEST_UNREACH: 488 case ICMP_DEST_UNREACH:
489 switch (code) { 489 switch (code) {
490 case ICMP_SR_FAILED: 490 case ICMP_SR_FAILED:
491 case ICMP_PORT_UNREACH: 491 case ICMP_PORT_UNREACH:
492 /* Impossible event. */ 492 /* Impossible event. */
493 return 0; 493 return 0;
494 case ICMP_FRAG_NEEDED: 494 case ICMP_FRAG_NEEDED:
495 /* Soft state for pmtu is maintained by IP core. */ 495 /* Soft state for pmtu is maintained by IP core. */
496 return 0; 496 return 0;
497 default: 497 default:
498 /* All others are translated to HOST_UNREACH. 498 /* All others are translated to HOST_UNREACH.
499 rfc2003 contains "deep thoughts" about NET_UNREACH, 499 rfc2003 contains "deep thoughts" about NET_UNREACH,
500 I believe they are just ether pollution. --ANK 500 I believe they are just ether pollution. --ANK
501 */ 501 */
502 break; 502 break;
503 } 503 }
504 break; 504 break;
505 case ICMP_TIME_EXCEEDED: 505 case ICMP_TIME_EXCEEDED:
506 if (code != ICMP_EXC_TTL) 506 if (code != ICMP_EXC_TTL)
507 return 0; 507 return 0;
508 break; 508 break;
509 } 509 }
510 510
511 err = -ENOENT; 511 err = -ENOENT;
512 512
513 rcu_read_lock(); 513 rcu_read_lock();
514 t = ipip6_tunnel_lookup(dev_net(skb->dev), 514 t = ipip6_tunnel_lookup(dev_net(skb->dev),
515 skb->dev, 515 skb->dev,
516 iph->daddr, 516 iph->daddr,
517 iph->saddr); 517 iph->saddr);
518 if (t == NULL || t->parms.iph.daddr == 0) 518 if (t == NULL || t->parms.iph.daddr == 0)
519 goto out; 519 goto out;
520 520
521 err = 0; 521 err = 0;
522 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 522 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
523 goto out; 523 goto out;
524 524
525 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 525 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
526 t->err_count++; 526 t->err_count++;
527 else 527 else
528 t->err_count = 1; 528 t->err_count = 1;
529 t->err_time = jiffies; 529 t->err_time = jiffies;
530 out: 530 out:
531 rcu_read_unlock(); 531 rcu_read_unlock();
532 return err; 532 return err;
533 } 533 }
534 534
535 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 535 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
536 { 536 {
537 if (INET_ECN_is_ce(iph->tos)) 537 if (INET_ECN_is_ce(iph->tos))
538 IP6_ECN_set_ce(ipv6_hdr(skb)); 538 IP6_ECN_set_ce(ipv6_hdr(skb));
539 } 539 }
540 540
541 static int ipip6_rcv(struct sk_buff *skb) 541 static int ipip6_rcv(struct sk_buff *skb)
542 { 542 {
543 struct iphdr *iph; 543 struct iphdr *iph;
544 struct ip_tunnel *tunnel; 544 struct ip_tunnel *tunnel;
545 545
546 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 546 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
547 goto out; 547 goto out;
548 548
549 iph = ip_hdr(skb); 549 iph = ip_hdr(skb);
550 550
551 rcu_read_lock(); 551 rcu_read_lock();
552 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 552 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
553 iph->saddr, iph->daddr); 553 iph->saddr, iph->daddr);
554 if (tunnel != NULL) { 554 if (tunnel != NULL) {
555 secpath_reset(skb); 555 secpath_reset(skb);
556 skb->mac_header = skb->network_header; 556 skb->mac_header = skb->network_header;
557 skb_reset_network_header(skb); 557 skb_reset_network_header(skb);
558 IPCB(skb)->flags = 0; 558 IPCB(skb)->flags = 0;
559 skb->protocol = htons(ETH_P_IPV6); 559 skb->protocol = htons(ETH_P_IPV6);
560 skb->pkt_type = PACKET_HOST; 560 skb->pkt_type = PACKET_HOST;
561 561
562 if ((tunnel->dev->priv_flags & IFF_ISATAP) && 562 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
563 !isatap_chksrc(skb, iph, tunnel)) { 563 !isatap_chksrc(skb, iph, tunnel)) {
564 tunnel->dev->stats.rx_errors++; 564 tunnel->dev->stats.rx_errors++;
565 rcu_read_unlock(); 565 rcu_read_unlock();
566 kfree_skb(skb); 566 kfree_skb(skb);
567 return 0; 567 return 0;
568 } 568 }
569 tunnel->dev->stats.rx_packets++; 569
570 tunnel->dev->stats.rx_bytes += skb->len; 570 skb_tunnel_rx(skb, tunnel->dev);
571 skb->dev = tunnel->dev; 571
572 skb_dst_drop(skb);
573 nf_reset(skb);
574 ipip6_ecn_decapsulate(iph, skb); 572 ipip6_ecn_decapsulate(iph, skb);
575 netif_rx(skb); 573 netif_rx(skb);
576 rcu_read_unlock(); 574 rcu_read_unlock();
577 return 0; 575 return 0;
578 } 576 }
579 577
580 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 578 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
581 rcu_read_unlock(); 579 rcu_read_unlock();
582 out: 580 out:
583 kfree_skb(skb); 581 kfree_skb(skb);
584 return 0; 582 return 0;
585 } 583 }
586 584
587 /* 585 /*
588 * Returns the embedded IPv4 address if the IPv6 address 586 * Returns the embedded IPv4 address if the IPv6 address
589 * comes from 6rd / 6to4 (RFC 3056) addr space. 587 * comes from 6rd / 6to4 (RFC 3056) addr space.
590 */ 588 */
591 static inline 589 static inline
592 __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) 590 __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
593 { 591 {
594 __be32 dst = 0; 592 __be32 dst = 0;
595 593
596 #ifdef CONFIG_IPV6_SIT_6RD 594 #ifdef CONFIG_IPV6_SIT_6RD
597 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, 595 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
598 tunnel->ip6rd.prefixlen)) { 596 tunnel->ip6rd.prefixlen)) {
599 unsigned pbw0, pbi0; 597 unsigned pbw0, pbi0;
600 int pbi1; 598 int pbi1;
601 u32 d; 599 u32 d;
602 600
603 pbw0 = tunnel->ip6rd.prefixlen >> 5; 601 pbw0 = tunnel->ip6rd.prefixlen >> 5;
604 pbi0 = tunnel->ip6rd.prefixlen & 0x1f; 602 pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
605 603
606 d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> 604 d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
607 tunnel->ip6rd.relay_prefixlen; 605 tunnel->ip6rd.relay_prefixlen;
608 606
609 pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; 607 pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
610 if (pbi1 > 0) 608 if (pbi1 > 0)
611 d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> 609 d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
612 (32 - pbi1); 610 (32 - pbi1);
613 611
614 dst = tunnel->ip6rd.relay_prefix | htonl(d); 612 dst = tunnel->ip6rd.relay_prefix | htonl(d);
615 } 613 }
616 #else 614 #else
617 if (v6dst->s6_addr16[0] == htons(0x2002)) { 615 if (v6dst->s6_addr16[0] == htons(0x2002)) {
618 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ 616 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
619 memcpy(&dst, &v6dst->s6_addr16[1], 4); 617 memcpy(&dst, &v6dst->s6_addr16[1], 4);
620 } 618 }
621 #endif 619 #endif
622 return dst; 620 return dst;
623 } 621 }
624 622
625 /* 623 /*
626 * This function assumes it is being called from dev_queue_xmit() 624 * This function assumes it is being called from dev_queue_xmit()
627 * and that skb is filled properly by that function. 625 * and that skb is filled properly by that function.
628 */ 626 */
629 627
630 static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, 628 static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
631 struct net_device *dev) 629 struct net_device *dev)
632 { 630 {
633 struct ip_tunnel *tunnel = netdev_priv(dev); 631 struct ip_tunnel *tunnel = netdev_priv(dev);
634 struct net_device_stats *stats = &dev->stats; 632 struct net_device_stats *stats = &dev->stats;
635 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); 633 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
636 struct iphdr *tiph = &tunnel->parms.iph; 634 struct iphdr *tiph = &tunnel->parms.iph;
637 struct ipv6hdr *iph6 = ipv6_hdr(skb); 635 struct ipv6hdr *iph6 = ipv6_hdr(skb);
638 u8 tos = tunnel->parms.iph.tos; 636 u8 tos = tunnel->parms.iph.tos;
639 __be16 df = tiph->frag_off; 637 __be16 df = tiph->frag_off;
640 struct rtable *rt; /* Route to the other host */ 638 struct rtable *rt; /* Route to the other host */
641 struct net_device *tdev; /* Device to other host */ 639 struct net_device *tdev; /* Device to other host */
642 struct iphdr *iph; /* Our new IP header */ 640 struct iphdr *iph; /* Our new IP header */
643 unsigned int max_headroom; /* The extra header space needed */ 641 unsigned int max_headroom; /* The extra header space needed */
644 __be32 dst = tiph->daddr; 642 __be32 dst = tiph->daddr;
645 int mtu; 643 int mtu;
646 struct in6_addr *addr6; 644 struct in6_addr *addr6;
647 int addr_type; 645 int addr_type;
648 646
649 if (skb->protocol != htons(ETH_P_IPV6)) 647 if (skb->protocol != htons(ETH_P_IPV6))
650 goto tx_error; 648 goto tx_error;
651 649
652 /* ISATAP (RFC4214) - must come before 6to4 */ 650 /* ISATAP (RFC4214) - must come before 6to4 */
653 if (dev->priv_flags & IFF_ISATAP) { 651 if (dev->priv_flags & IFF_ISATAP) {
654 struct neighbour *neigh = NULL; 652 struct neighbour *neigh = NULL;
655 653
656 if (skb_dst(skb)) 654 if (skb_dst(skb))
657 neigh = skb_dst(skb)->neighbour; 655 neigh = skb_dst(skb)->neighbour;
658 656
659 if (neigh == NULL) { 657 if (neigh == NULL) {
660 if (net_ratelimit()) 658 if (net_ratelimit())
661 printk(KERN_DEBUG "sit: nexthop == NULL\n"); 659 printk(KERN_DEBUG "sit: nexthop == NULL\n");
662 goto tx_error; 660 goto tx_error;
663 } 661 }
664 662
665 addr6 = (struct in6_addr*)&neigh->primary_key; 663 addr6 = (struct in6_addr*)&neigh->primary_key;
666 addr_type = ipv6_addr_type(addr6); 664 addr_type = ipv6_addr_type(addr6);
667 665
668 if ((addr_type & IPV6_ADDR_UNICAST) && 666 if ((addr_type & IPV6_ADDR_UNICAST) &&
669 ipv6_addr_is_isatap(addr6)) 667 ipv6_addr_is_isatap(addr6))
670 dst = addr6->s6_addr32[3]; 668 dst = addr6->s6_addr32[3];
671 else 669 else
672 goto tx_error; 670 goto tx_error;
673 } 671 }
674 672
675 if (!dst) 673 if (!dst)
676 dst = try_6rd(&iph6->daddr, tunnel); 674 dst = try_6rd(&iph6->daddr, tunnel);
677 675
678 if (!dst) { 676 if (!dst) {
679 struct neighbour *neigh = NULL; 677 struct neighbour *neigh = NULL;
680 678
681 if (skb_dst(skb)) 679 if (skb_dst(skb))
682 neigh = skb_dst(skb)->neighbour; 680 neigh = skb_dst(skb)->neighbour;
683 681
684 if (neigh == NULL) { 682 if (neigh == NULL) {
685 if (net_ratelimit()) 683 if (net_ratelimit())
686 printk(KERN_DEBUG "sit: nexthop == NULL\n"); 684 printk(KERN_DEBUG "sit: nexthop == NULL\n");
687 goto tx_error; 685 goto tx_error;
688 } 686 }
689 687
690 addr6 = (struct in6_addr*)&neigh->primary_key; 688 addr6 = (struct in6_addr*)&neigh->primary_key;
691 addr_type = ipv6_addr_type(addr6); 689 addr_type = ipv6_addr_type(addr6);
692 690
693 if (addr_type == IPV6_ADDR_ANY) { 691 if (addr_type == IPV6_ADDR_ANY) {
694 addr6 = &ipv6_hdr(skb)->daddr; 692 addr6 = &ipv6_hdr(skb)->daddr;
695 addr_type = ipv6_addr_type(addr6); 693 addr_type = ipv6_addr_type(addr6);
696 } 694 }
697 695
698 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 696 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699 goto tx_error_icmp; 697 goto tx_error_icmp;
700 698
701 dst = addr6->s6_addr32[3]; 699 dst = addr6->s6_addr32[3];
702 } 700 }
703 701
704 { 702 {
705 struct flowi fl = { .nl_u = { .ip4_u = 703 struct flowi fl = { .nl_u = { .ip4_u =
706 { .daddr = dst, 704 { .daddr = dst,
707 .saddr = tiph->saddr, 705 .saddr = tiph->saddr,
708 .tos = RT_TOS(tos) } }, 706 .tos = RT_TOS(tos) } },
709 .oif = tunnel->parms.link, 707 .oif = tunnel->parms.link,
710 .proto = IPPROTO_IPV6 }; 708 .proto = IPPROTO_IPV6 };
711 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 709 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
712 stats->tx_carrier_errors++; 710 stats->tx_carrier_errors++;
713 goto tx_error_icmp; 711 goto tx_error_icmp;
714 } 712 }
715 } 713 }
716 if (rt->rt_type != RTN_UNICAST) { 714 if (rt->rt_type != RTN_UNICAST) {
717 ip_rt_put(rt); 715 ip_rt_put(rt);
718 stats->tx_carrier_errors++; 716 stats->tx_carrier_errors++;
719 goto tx_error_icmp; 717 goto tx_error_icmp;
720 } 718 }
721 tdev = rt->u.dst.dev; 719 tdev = rt->u.dst.dev;
722 720
723 if (tdev == dev) { 721 if (tdev == dev) {
724 ip_rt_put(rt); 722 ip_rt_put(rt);
725 stats->collisions++; 723 stats->collisions++;
726 goto tx_error; 724 goto tx_error;
727 } 725 }
728 726
729 if (df) { 727 if (df) {
730 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 728 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
731 729
732 if (mtu < 68) { 730 if (mtu < 68) {
733 stats->collisions++; 731 stats->collisions++;
734 ip_rt_put(rt); 732 ip_rt_put(rt);
735 goto tx_error; 733 goto tx_error;
736 } 734 }
737 735
738 if (mtu < IPV6_MIN_MTU) { 736 if (mtu < IPV6_MIN_MTU) {
739 mtu = IPV6_MIN_MTU; 737 mtu = IPV6_MIN_MTU;
740 df = 0; 738 df = 0;
741 } 739 }
742 740
743 if (tunnel->parms.iph.daddr && skb_dst(skb)) 741 if (tunnel->parms.iph.daddr && skb_dst(skb))
744 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 742 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
745 743
746 if (skb->len > mtu) { 744 if (skb->len > mtu) {
747 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 745 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
748 ip_rt_put(rt); 746 ip_rt_put(rt);
749 goto tx_error; 747 goto tx_error;
750 } 748 }
751 } 749 }
752 750
753 if (tunnel->err_count > 0) { 751 if (tunnel->err_count > 0) {
754 if (time_before(jiffies, 752 if (time_before(jiffies,
755 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 753 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
756 tunnel->err_count--; 754 tunnel->err_count--;
757 dst_link_failure(skb); 755 dst_link_failure(skb);
758 } else 756 } else
759 tunnel->err_count = 0; 757 tunnel->err_count = 0;
760 } 758 }
761 759
762 /* 760 /*
763 * Okay, now see if we can stuff it in the buffer as-is. 761 * Okay, now see if we can stuff it in the buffer as-is.
764 */ 762 */
765 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); 763 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
766 764
767 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 765 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
768 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 766 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
769 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 767 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
770 if (!new_skb) { 768 if (!new_skb) {
771 ip_rt_put(rt); 769 ip_rt_put(rt);
772 txq->tx_dropped++; 770 txq->tx_dropped++;
773 dev_kfree_skb(skb); 771 dev_kfree_skb(skb);
774 return NETDEV_TX_OK; 772 return NETDEV_TX_OK;
775 } 773 }
776 if (skb->sk) 774 if (skb->sk)
777 skb_set_owner_w(new_skb, skb->sk); 775 skb_set_owner_w(new_skb, skb->sk);
778 dev_kfree_skb(skb); 776 dev_kfree_skb(skb);
779 skb = new_skb; 777 skb = new_skb;
780 iph6 = ipv6_hdr(skb); 778 iph6 = ipv6_hdr(skb);
781 } 779 }
782 780
783 skb->transport_header = skb->network_header; 781 skb->transport_header = skb->network_header;
784 skb_push(skb, sizeof(struct iphdr)); 782 skb_push(skb, sizeof(struct iphdr));
785 skb_reset_network_header(skb); 783 skb_reset_network_header(skb);
786 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 784 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
787 IPCB(skb)->flags = 0; 785 IPCB(skb)->flags = 0;
788 skb_dst_drop(skb); 786 skb_dst_drop(skb);
789 skb_dst_set(skb, &rt->u.dst); 787 skb_dst_set(skb, &rt->u.dst);
790 788
791 /* 789 /*
792 * Push down and install the IPIP header. 790 * Push down and install the IPIP header.
793 */ 791 */
794 792
795 iph = ip_hdr(skb); 793 iph = ip_hdr(skb);
796 iph->version = 4; 794 iph->version = 4;
797 iph->ihl = sizeof(struct iphdr)>>2; 795 iph->ihl = sizeof(struct iphdr)>>2;
798 iph->frag_off = df; 796 iph->frag_off = df;
799 iph->protocol = IPPROTO_IPV6; 797 iph->protocol = IPPROTO_IPV6;
800 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); 798 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
801 iph->daddr = rt->rt_dst; 799 iph->daddr = rt->rt_dst;
802 iph->saddr = rt->rt_src; 800 iph->saddr = rt->rt_src;
803 801
804 if ((iph->ttl = tiph->ttl) == 0) 802 if ((iph->ttl = tiph->ttl) == 0)
805 iph->ttl = iph6->hop_limit; 803 iph->ttl = iph6->hop_limit;
806 804
807 nf_reset(skb); 805 nf_reset(skb);
808 806
809 IPTUNNEL_XMIT(); 807 IPTUNNEL_XMIT();
810 return NETDEV_TX_OK; 808 return NETDEV_TX_OK;
811 809
812 tx_error_icmp: 810 tx_error_icmp:
813 dst_link_failure(skb); 811 dst_link_failure(skb);
814 tx_error: 812 tx_error:
815 stats->tx_errors++; 813 stats->tx_errors++;
816 dev_kfree_skb(skb); 814 dev_kfree_skb(skb);
817 return NETDEV_TX_OK; 815 return NETDEV_TX_OK;
818 } 816 }
819 817
820 static void ipip6_tunnel_bind_dev(struct net_device *dev) 818 static void ipip6_tunnel_bind_dev(struct net_device *dev)
821 { 819 {
822 struct net_device *tdev = NULL; 820 struct net_device *tdev = NULL;
823 struct ip_tunnel *tunnel; 821 struct ip_tunnel *tunnel;
824 struct iphdr *iph; 822 struct iphdr *iph;
825 823
826 tunnel = netdev_priv(dev); 824 tunnel = netdev_priv(dev);
827 iph = &tunnel->parms.iph; 825 iph = &tunnel->parms.iph;
828 826
829 if (iph->daddr) { 827 if (iph->daddr) {
830 struct flowi fl = { .nl_u = { .ip4_u = 828 struct flowi fl = { .nl_u = { .ip4_u =
831 { .daddr = iph->daddr, 829 { .daddr = iph->daddr,
832 .saddr = iph->saddr, 830 .saddr = iph->saddr,
833 .tos = RT_TOS(iph->tos) } }, 831 .tos = RT_TOS(iph->tos) } },
834 .oif = tunnel->parms.link, 832 .oif = tunnel->parms.link,
835 .proto = IPPROTO_IPV6 }; 833 .proto = IPPROTO_IPV6 };
836 struct rtable *rt; 834 struct rtable *rt;
837 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 835 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
838 tdev = rt->u.dst.dev; 836 tdev = rt->u.dst.dev;
839 ip_rt_put(rt); 837 ip_rt_put(rt);
840 } 838 }
841 dev->flags |= IFF_POINTOPOINT; 839 dev->flags |= IFF_POINTOPOINT;
842 } 840 }
843 841
844 if (!tdev && tunnel->parms.link) 842 if (!tdev && tunnel->parms.link)
845 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 843 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
846 844
847 if (tdev) { 845 if (tdev) {
848 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 846 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
849 dev->mtu = tdev->mtu - sizeof(struct iphdr); 847 dev->mtu = tdev->mtu - sizeof(struct iphdr);
850 if (dev->mtu < IPV6_MIN_MTU) 848 if (dev->mtu < IPV6_MIN_MTU)
851 dev->mtu = IPV6_MIN_MTU; 849 dev->mtu = IPV6_MIN_MTU;
852 } 850 }
853 dev->iflink = tunnel->parms.link; 851 dev->iflink = tunnel->parms.link;
854 } 852 }
855 853
856 static int 854 static int
857 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 855 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
858 { 856 {
859 int err = 0; 857 int err = 0;
860 struct ip_tunnel_parm p; 858 struct ip_tunnel_parm p;
861 struct ip_tunnel_prl prl; 859 struct ip_tunnel_prl prl;
862 struct ip_tunnel *t; 860 struct ip_tunnel *t;
863 struct net *net = dev_net(dev); 861 struct net *net = dev_net(dev);
864 struct sit_net *sitn = net_generic(net, sit_net_id); 862 struct sit_net *sitn = net_generic(net, sit_net_id);
865 #ifdef CONFIG_IPV6_SIT_6RD 863 #ifdef CONFIG_IPV6_SIT_6RD
866 struct ip_tunnel_6rd ip6rd; 864 struct ip_tunnel_6rd ip6rd;
867 #endif 865 #endif
868 866
869 switch (cmd) { 867 switch (cmd) {
870 case SIOCGETTUNNEL: 868 case SIOCGETTUNNEL:
871 #ifdef CONFIG_IPV6_SIT_6RD 869 #ifdef CONFIG_IPV6_SIT_6RD
872 case SIOCGET6RD: 870 case SIOCGET6RD:
873 #endif 871 #endif
874 t = NULL; 872 t = NULL;
875 if (dev == sitn->fb_tunnel_dev) { 873 if (dev == sitn->fb_tunnel_dev) {
876 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 874 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
877 err = -EFAULT; 875 err = -EFAULT;
878 break; 876 break;
879 } 877 }
880 t = ipip6_tunnel_locate(net, &p, 0); 878 t = ipip6_tunnel_locate(net, &p, 0);
881 } 879 }
882 if (t == NULL) 880 if (t == NULL)
883 t = netdev_priv(dev); 881 t = netdev_priv(dev);
884 882
885 err = -EFAULT; 883 err = -EFAULT;
886 if (cmd == SIOCGETTUNNEL) { 884 if (cmd == SIOCGETTUNNEL) {
887 memcpy(&p, &t->parms, sizeof(p)); 885 memcpy(&p, &t->parms, sizeof(p));
888 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, 886 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
889 sizeof(p))) 887 sizeof(p)))
890 goto done; 888 goto done;
891 #ifdef CONFIG_IPV6_SIT_6RD 889 #ifdef CONFIG_IPV6_SIT_6RD
892 } else { 890 } else {
893 ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); 891 ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
894 ip6rd.relay_prefix = t->ip6rd.relay_prefix; 892 ip6rd.relay_prefix = t->ip6rd.relay_prefix;
895 ip6rd.prefixlen = t->ip6rd.prefixlen; 893 ip6rd.prefixlen = t->ip6rd.prefixlen;
896 ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; 894 ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
897 if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, 895 if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
898 sizeof(ip6rd))) 896 sizeof(ip6rd)))
899 goto done; 897 goto done;
900 #endif 898 #endif
901 } 899 }
902 err = 0; 900 err = 0;
903 break; 901 break;
904 902
905 case SIOCADDTUNNEL: 903 case SIOCADDTUNNEL:
906 case SIOCCHGTUNNEL: 904 case SIOCCHGTUNNEL:
907 err = -EPERM; 905 err = -EPERM;
908 if (!capable(CAP_NET_ADMIN)) 906 if (!capable(CAP_NET_ADMIN))
909 goto done; 907 goto done;
910 908
911 err = -EFAULT; 909 err = -EFAULT;
912 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 910 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
913 goto done; 911 goto done;
914 912
915 err = -EINVAL; 913 err = -EINVAL;
916 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || 914 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
917 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 915 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
918 goto done; 916 goto done;
919 if (p.iph.ttl) 917 if (p.iph.ttl)
920 p.iph.frag_off |= htons(IP_DF); 918 p.iph.frag_off |= htons(IP_DF);
921 919
922 t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 920 t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
923 921
924 if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 922 if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
925 if (t != NULL) { 923 if (t != NULL) {
926 if (t->dev != dev) { 924 if (t->dev != dev) {
927 err = -EEXIST; 925 err = -EEXIST;
928 break; 926 break;
929 } 927 }
930 } else { 928 } else {
931 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || 929 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
932 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { 930 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
933 err = -EINVAL; 931 err = -EINVAL;
934 break; 932 break;
935 } 933 }
936 t = netdev_priv(dev); 934 t = netdev_priv(dev);
937 ipip6_tunnel_unlink(sitn, t); 935 ipip6_tunnel_unlink(sitn, t);
938 t->parms.iph.saddr = p.iph.saddr; 936 t->parms.iph.saddr = p.iph.saddr;
939 t->parms.iph.daddr = p.iph.daddr; 937 t->parms.iph.daddr = p.iph.daddr;
940 memcpy(dev->dev_addr, &p.iph.saddr, 4); 938 memcpy(dev->dev_addr, &p.iph.saddr, 4);
941 memcpy(dev->broadcast, &p.iph.daddr, 4); 939 memcpy(dev->broadcast, &p.iph.daddr, 4);
942 ipip6_tunnel_link(sitn, t); 940 ipip6_tunnel_link(sitn, t);
943 netdev_state_change(dev); 941 netdev_state_change(dev);
944 } 942 }
945 } 943 }
946 944
947 if (t) { 945 if (t) {
948 err = 0; 946 err = 0;
949 if (cmd == SIOCCHGTUNNEL) { 947 if (cmd == SIOCCHGTUNNEL) {
950 t->parms.iph.ttl = p.iph.ttl; 948 t->parms.iph.ttl = p.iph.ttl;
951 t->parms.iph.tos = p.iph.tos; 949 t->parms.iph.tos = p.iph.tos;
952 if (t->parms.link != p.link) { 950 if (t->parms.link != p.link) {
953 t->parms.link = p.link; 951 t->parms.link = p.link;
954 ipip6_tunnel_bind_dev(dev); 952 ipip6_tunnel_bind_dev(dev);
955 netdev_state_change(dev); 953 netdev_state_change(dev);
956 } 954 }
957 } 955 }
958 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 956 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
959 err = -EFAULT; 957 err = -EFAULT;
960 } else 958 } else
961 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 959 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
962 break; 960 break;
963 961
964 case SIOCDELTUNNEL: 962 case SIOCDELTUNNEL:
965 err = -EPERM; 963 err = -EPERM;
966 if (!capable(CAP_NET_ADMIN)) 964 if (!capable(CAP_NET_ADMIN))
967 goto done; 965 goto done;
968 966
969 if (dev == sitn->fb_tunnel_dev) { 967 if (dev == sitn->fb_tunnel_dev) {
970 err = -EFAULT; 968 err = -EFAULT;
971 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 969 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
972 goto done; 970 goto done;
973 err = -ENOENT; 971 err = -ENOENT;
974 if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) 972 if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL)
975 goto done; 973 goto done;
976 err = -EPERM; 974 err = -EPERM;
977 if (t == netdev_priv(sitn->fb_tunnel_dev)) 975 if (t == netdev_priv(sitn->fb_tunnel_dev))
978 goto done; 976 goto done;
979 dev = t->dev; 977 dev = t->dev;
980 } 978 }
981 unregister_netdevice(dev); 979 unregister_netdevice(dev);
982 err = 0; 980 err = 0;
983 break; 981 break;
984 982
985 case SIOCGETPRL: 983 case SIOCGETPRL:
986 err = -EINVAL; 984 err = -EINVAL;
987 if (dev == sitn->fb_tunnel_dev) 985 if (dev == sitn->fb_tunnel_dev)
988 goto done; 986 goto done;
989 err = -ENOENT; 987 err = -ENOENT;
990 if (!(t = netdev_priv(dev))) 988 if (!(t = netdev_priv(dev)))
991 goto done; 989 goto done;
992 err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); 990 err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
993 break; 991 break;
994 992
995 case SIOCADDPRL: 993 case SIOCADDPRL:
996 case SIOCDELPRL: 994 case SIOCDELPRL:
997 case SIOCCHGPRL: 995 case SIOCCHGPRL:
998 err = -EPERM; 996 err = -EPERM;
999 if (!capable(CAP_NET_ADMIN)) 997 if (!capable(CAP_NET_ADMIN))
1000 goto done; 998 goto done;
1001 err = -EINVAL; 999 err = -EINVAL;
1002 if (dev == sitn->fb_tunnel_dev) 1000 if (dev == sitn->fb_tunnel_dev)
1003 goto done; 1001 goto done;
1004 err = -EFAULT; 1002 err = -EFAULT;
1005 if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) 1003 if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
1006 goto done; 1004 goto done;
1007 err = -ENOENT; 1005 err = -ENOENT;
1008 if (!(t = netdev_priv(dev))) 1006 if (!(t = netdev_priv(dev)))
1009 goto done; 1007 goto done;
1010 1008
1011 switch (cmd) { 1009 switch (cmd) {
1012 case SIOCDELPRL: 1010 case SIOCDELPRL:
1013 err = ipip6_tunnel_del_prl(t, &prl); 1011 err = ipip6_tunnel_del_prl(t, &prl);
1014 break; 1012 break;
1015 case SIOCADDPRL: 1013 case SIOCADDPRL:
1016 case SIOCCHGPRL: 1014 case SIOCCHGPRL:
1017 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); 1015 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
1018 break; 1016 break;
1019 } 1017 }
1020 netdev_state_change(dev); 1018 netdev_state_change(dev);
1021 break; 1019 break;
1022 1020
1023 #ifdef CONFIG_IPV6_SIT_6RD 1021 #ifdef CONFIG_IPV6_SIT_6RD
1024 case SIOCADD6RD: 1022 case SIOCADD6RD:
1025 case SIOCCHG6RD: 1023 case SIOCCHG6RD:
1026 case SIOCDEL6RD: 1024 case SIOCDEL6RD:
1027 err = -EPERM; 1025 err = -EPERM;
1028 if (!capable(CAP_NET_ADMIN)) 1026 if (!capable(CAP_NET_ADMIN))
1029 goto done; 1027 goto done;
1030 1028
1031 err = -EFAULT; 1029 err = -EFAULT;
1032 if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, 1030 if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
1033 sizeof(ip6rd))) 1031 sizeof(ip6rd)))
1034 goto done; 1032 goto done;
1035 1033
1036 t = netdev_priv(dev); 1034 t = netdev_priv(dev);
1037 1035
1038 if (cmd != SIOCDEL6RD) { 1036 if (cmd != SIOCDEL6RD) {
1039 struct in6_addr prefix; 1037 struct in6_addr prefix;
1040 __be32 relay_prefix; 1038 __be32 relay_prefix;
1041 1039
1042 err = -EINVAL; 1040 err = -EINVAL;
1043 if (ip6rd.relay_prefixlen > 32 || 1041 if (ip6rd.relay_prefixlen > 32 ||
1044 ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) 1042 ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
1045 goto done; 1043 goto done;
1046 1044
1047 ipv6_addr_prefix(&prefix, &ip6rd.prefix, 1045 ipv6_addr_prefix(&prefix, &ip6rd.prefix,
1048 ip6rd.prefixlen); 1046 ip6rd.prefixlen);
1049 if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) 1047 if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
1050 goto done; 1048 goto done;
1051 if (ip6rd.relay_prefixlen) 1049 if (ip6rd.relay_prefixlen)
1052 relay_prefix = ip6rd.relay_prefix & 1050 relay_prefix = ip6rd.relay_prefix &
1053 htonl(0xffffffffUL << 1051 htonl(0xffffffffUL <<
1054 (32 - ip6rd.relay_prefixlen)); 1052 (32 - ip6rd.relay_prefixlen));
1055 else 1053 else
1056 relay_prefix = 0; 1054 relay_prefix = 0;
1057 if (relay_prefix != ip6rd.relay_prefix) 1055 if (relay_prefix != ip6rd.relay_prefix)
1058 goto done; 1056 goto done;
1059 1057
1060 ipv6_addr_copy(&t->ip6rd.prefix, &prefix); 1058 ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
1061 t->ip6rd.relay_prefix = relay_prefix; 1059 t->ip6rd.relay_prefix = relay_prefix;
1062 t->ip6rd.prefixlen = ip6rd.prefixlen; 1060 t->ip6rd.prefixlen = ip6rd.prefixlen;
1063 t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; 1061 t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
1064 } else 1062 } else
1065 ipip6_tunnel_clone_6rd(dev, sitn); 1063 ipip6_tunnel_clone_6rd(dev, sitn);
1066 1064
1067 err = 0; 1065 err = 0;
1068 break; 1066 break;
1069 #endif 1067 #endif
1070 1068
1071 default: 1069 default:
1072 err = -EINVAL; 1070 err = -EINVAL;
1073 } 1071 }
1074 1072
1075 done: 1073 done:
1076 return err; 1074 return err;
1077 } 1075 }
1078 1076
1079 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1077 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1080 { 1078 {
1081 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 1079 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
1082 return -EINVAL; 1080 return -EINVAL;
1083 dev->mtu = new_mtu; 1081 dev->mtu = new_mtu;
1084 return 0; 1082 return 0;
1085 } 1083 }
1086 1084
1087 static const struct net_device_ops ipip6_netdev_ops = { 1085 static const struct net_device_ops ipip6_netdev_ops = {
1088 .ndo_uninit = ipip6_tunnel_uninit, 1086 .ndo_uninit = ipip6_tunnel_uninit,
1089 .ndo_start_xmit = ipip6_tunnel_xmit, 1087 .ndo_start_xmit = ipip6_tunnel_xmit,
1090 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1088 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1091 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1089 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1092 }; 1090 };
1093 1091
1094 static void ipip6_tunnel_setup(struct net_device *dev) 1092 static void ipip6_tunnel_setup(struct net_device *dev)
1095 { 1093 {
1096 dev->netdev_ops = &ipip6_netdev_ops; 1094 dev->netdev_ops = &ipip6_netdev_ops;
1097 dev->destructor = free_netdev; 1095 dev->destructor = free_netdev;
1098 1096
1099 dev->type = ARPHRD_SIT; 1097 dev->type = ARPHRD_SIT;
1100 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 1098 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
1101 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); 1099 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1102 dev->flags = IFF_NOARP; 1100 dev->flags = IFF_NOARP;
1103 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1101 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1104 dev->iflink = 0; 1102 dev->iflink = 0;
1105 dev->addr_len = 4; 1103 dev->addr_len = 4;
1106 dev->features |= NETIF_F_NETNS_LOCAL; 1104 dev->features |= NETIF_F_NETNS_LOCAL;
1107 } 1105 }
1108 1106
1109 static void ipip6_tunnel_init(struct net_device *dev) 1107 static void ipip6_tunnel_init(struct net_device *dev)
1110 { 1108 {
1111 struct ip_tunnel *tunnel = netdev_priv(dev); 1109 struct ip_tunnel *tunnel = netdev_priv(dev);
1112 1110
1113 tunnel->dev = dev; 1111 tunnel->dev = dev;
1114 strcpy(tunnel->parms.name, dev->name); 1112 strcpy(tunnel->parms.name, dev->name);
1115 1113
1116 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1114 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1117 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1115 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1118 1116
1119 ipip6_tunnel_bind_dev(dev); 1117 ipip6_tunnel_bind_dev(dev);
1120 } 1118 }
1121 1119
1122 static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) 1120 static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1123 { 1121 {
1124 struct ip_tunnel *tunnel = netdev_priv(dev); 1122 struct ip_tunnel *tunnel = netdev_priv(dev);
1125 struct iphdr *iph = &tunnel->parms.iph; 1123 struct iphdr *iph = &tunnel->parms.iph;
1126 struct net *net = dev_net(dev); 1124 struct net *net = dev_net(dev);
1127 struct sit_net *sitn = net_generic(net, sit_net_id); 1125 struct sit_net *sitn = net_generic(net, sit_net_id);
1128 1126
1129 tunnel->dev = dev; 1127 tunnel->dev = dev;
1130 strcpy(tunnel->parms.name, dev->name); 1128 strcpy(tunnel->parms.name, dev->name);
1131 1129
1132 iph->version = 4; 1130 iph->version = 4;
1133 iph->protocol = IPPROTO_IPV6; 1131 iph->protocol = IPPROTO_IPV6;
1134 iph->ihl = 5; 1132 iph->ihl = 5;
1135 iph->ttl = 64; 1133 iph->ttl = 64;
1136 1134
1137 dev_hold(dev); 1135 dev_hold(dev);
1138 sitn->tunnels_wc[0] = tunnel; 1136 sitn->tunnels_wc[0] = tunnel;
1139 } 1137 }
1140 1138
1141 static struct xfrm_tunnel sit_handler = { 1139 static struct xfrm_tunnel sit_handler = {
1142 .handler = ipip6_rcv, 1140 .handler = ipip6_rcv,
1143 .err_handler = ipip6_err, 1141 .err_handler = ipip6_err,
1144 .priority = 1, 1142 .priority = 1,
1145 }; 1143 };
1146 1144
1147 static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) 1145 static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
1148 { 1146 {
1149 int prio; 1147 int prio;
1150 1148
1151 for (prio = 1; prio < 4; prio++) { 1149 for (prio = 1; prio < 4; prio++) {
1152 int h; 1150 int h;
1153 for (h = 0; h < HASH_SIZE; h++) { 1151 for (h = 0; h < HASH_SIZE; h++) {
1154 struct ip_tunnel *t = sitn->tunnels[prio][h]; 1152 struct ip_tunnel *t = sitn->tunnels[prio][h];
1155 1153
1156 while (t != NULL) { 1154 while (t != NULL) {
1157 unregister_netdevice_queue(t->dev, head); 1155 unregister_netdevice_queue(t->dev, head);
1158 t = t->next; 1156 t = t->next;
1159 } 1157 }
1160 } 1158 }
1161 } 1159 }
1162 } 1160 }
1163 1161
1164 static int __net_init sit_init_net(struct net *net) 1162 static int __net_init sit_init_net(struct net *net)
1165 { 1163 {
1166 struct sit_net *sitn = net_generic(net, sit_net_id); 1164 struct sit_net *sitn = net_generic(net, sit_net_id);
1167 int err; 1165 int err;
1168 1166
1169 sitn->tunnels[0] = sitn->tunnels_wc; 1167 sitn->tunnels[0] = sitn->tunnels_wc;
1170 sitn->tunnels[1] = sitn->tunnels_l; 1168 sitn->tunnels[1] = sitn->tunnels_l;
1171 sitn->tunnels[2] = sitn->tunnels_r; 1169 sitn->tunnels[2] = sitn->tunnels_r;
1172 sitn->tunnels[3] = sitn->tunnels_r_l; 1170 sitn->tunnels[3] = sitn->tunnels_r_l;
1173 1171
1174 sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 1172 sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
1175 ipip6_tunnel_setup); 1173 ipip6_tunnel_setup);
1176 if (!sitn->fb_tunnel_dev) { 1174 if (!sitn->fb_tunnel_dev) {
1177 err = -ENOMEM; 1175 err = -ENOMEM;
1178 goto err_alloc_dev; 1176 goto err_alloc_dev;
1179 } 1177 }
1180 dev_net_set(sitn->fb_tunnel_dev, net); 1178 dev_net_set(sitn->fb_tunnel_dev, net);
1181 1179
1182 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); 1180 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1183 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); 1181 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1184 1182
1185 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1183 if ((err = register_netdev(sitn->fb_tunnel_dev)))
1186 goto err_reg_dev; 1184 goto err_reg_dev;
1187 1185
1188 return 0; 1186 return 0;
1189 1187
1190 err_reg_dev: 1188 err_reg_dev:
1191 dev_put(sitn->fb_tunnel_dev); 1189 dev_put(sitn->fb_tunnel_dev);
1192 free_netdev(sitn->fb_tunnel_dev); 1190 free_netdev(sitn->fb_tunnel_dev);
1193 err_alloc_dev: 1191 err_alloc_dev:
1194 return err; 1192 return err;
1195 } 1193 }
1196 1194
1197 static void __net_exit sit_exit_net(struct net *net) 1195 static void __net_exit sit_exit_net(struct net *net)
1198 { 1196 {
1199 struct sit_net *sitn = net_generic(net, sit_net_id); 1197 struct sit_net *sitn = net_generic(net, sit_net_id);
1200 LIST_HEAD(list); 1198 LIST_HEAD(list);
1201 1199
1202 rtnl_lock(); 1200 rtnl_lock();
1203 sit_destroy_tunnels(sitn, &list); 1201 sit_destroy_tunnels(sitn, &list);
1204 unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); 1202 unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
1205 unregister_netdevice_many(&list); 1203 unregister_netdevice_many(&list);
1206 rtnl_unlock(); 1204 rtnl_unlock();
1207 } 1205 }
1208 1206
1209 static struct pernet_operations sit_net_ops = { 1207 static struct pernet_operations sit_net_ops = {
1210 .init = sit_init_net, 1208 .init = sit_init_net,
1211 .exit = sit_exit_net, 1209 .exit = sit_exit_net,
1212 .id = &sit_net_id, 1210 .id = &sit_net_id,
1213 .size = sizeof(struct sit_net), 1211 .size = sizeof(struct sit_net),
1214 }; 1212 };
1215 1213
1216 static void __exit sit_cleanup(void) 1214 static void __exit sit_cleanup(void)
1217 { 1215 {
1218 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1216 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1219 1217
1220 unregister_pernet_device(&sit_net_ops); 1218 unregister_pernet_device(&sit_net_ops);
1221 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 1219 rcu_barrier(); /* Wait for completion of call_rcu()'s */
1222 } 1220 }
1223 1221
1224 static int __init sit_init(void) 1222 static int __init sit_init(void)
1225 { 1223 {
1226 int err; 1224 int err;
1227 1225
1228 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); 1226 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
1229 1227
1230 err = register_pernet_device(&sit_net_ops); 1228 err = register_pernet_device(&sit_net_ops);
1231 if (err < 0) 1229 if (err < 0)
1232 return err; 1230 return err;
1233 err = xfrm4_tunnel_register(&sit_handler, AF_INET6); 1231 err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1234 if (err < 0) { 1232 if (err < 0) {
1235 unregister_pernet_device(&sit_net_ops); 1233 unregister_pernet_device(&sit_net_ops);
1236 printk(KERN_INFO "sit init: Can't add protocol\n"); 1234 printk(KERN_INFO "sit init: Can't add protocol\n");
1237 } 1235 }
1238 return err; 1236 return err;
1239 } 1237 }
1240 1238
1241 module_init(sit_init); 1239 module_init(sit_init);
1242 module_exit(sit_cleanup); 1240 module_exit(sit_cleanup);
1243 MODULE_LICENSE("GPL"); 1241 MODULE_LICENSE("GPL");
1244 MODULE_ALIAS("sit0"); 1242 MODULE_ALIAS("sit0");
1245 1243