Commit d19d56ddc88e7895429ef118db9c83c7bbe3ce6a
Committed by
David S. Miller
1 parent
de213e5eed
Exists in
master
and in
39 other branches
net: Introduce skb_tunnel_rx() helper
skb rxhash should be cleared when a skb is handled by a tunnel before being delivered again, so that correct packet steering can take place. There are other cleanups and accounting that we can factorize in a new helper, skb_tunnel_rx() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 7 changed files with 34 additions and 34 deletions Inline Diff
include/net/dst.h
1 | /* | 1 | /* |
2 | * net/dst.h Protocol independent destination cache definitions. | 2 | * net/dst.h Protocol independent destination cache definitions. |
3 | * | 3 | * |
4 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 4 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
5 | * | 5 | * |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifndef _NET_DST_H | 8 | #ifndef _NET_DST_H |
9 | #define _NET_DST_H | 9 | #define _NET_DST_H |
10 | 10 | ||
11 | #include <net/dst_ops.h> | 11 | #include <net/dst_ops.h> |
12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
13 | #include <linux/rtnetlink.h> | 13 | #include <linux/rtnetlink.h> |
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <linux/jiffies.h> | 15 | #include <linux/jiffies.h> |
16 | #include <net/neighbour.h> | 16 | #include <net/neighbour.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * 0 - no debugging messages | 20 | * 0 - no debugging messages |
21 | * 1 - rare events and bugs (default) | 21 | * 1 - rare events and bugs (default) |
22 | * 2 - trace mode. | 22 | * 2 - trace mode. |
23 | */ | 23 | */ |
24 | #define RT_CACHE_DEBUG 0 | 24 | #define RT_CACHE_DEBUG 0 |
25 | 25 | ||
26 | #define DST_GC_MIN (HZ/10) | 26 | #define DST_GC_MIN (HZ/10) |
27 | #define DST_GC_INC (HZ/2) | 27 | #define DST_GC_INC (HZ/2) |
28 | #define DST_GC_MAX (120*HZ) | 28 | #define DST_GC_MAX (120*HZ) |
29 | 29 | ||
30 | /* Each dst_entry has reference count and sits in some parent list(s). | 30 | /* Each dst_entry has reference count and sits in some parent list(s). |
31 | * When it is removed from parent list, it is "freed" (dst_free). | 31 | * When it is removed from parent list, it is "freed" (dst_free). |
32 | * After this it enters dead state (dst->obsolete > 0) and if its refcnt | 32 | * After this it enters dead state (dst->obsolete > 0) and if its refcnt |
33 | * is zero, it can be destroyed immediately, otherwise it is added | 33 | * is zero, it can be destroyed immediately, otherwise it is added |
34 | * to gc list and garbage collector periodically checks the refcnt. | 34 | * to gc list and garbage collector periodically checks the refcnt. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | struct sk_buff; | 37 | struct sk_buff; |
38 | 38 | ||
39 | struct dst_entry { | 39 | struct dst_entry { |
40 | struct rcu_head rcu_head; | 40 | struct rcu_head rcu_head; |
41 | struct dst_entry *child; | 41 | struct dst_entry *child; |
42 | struct net_device *dev; | 42 | struct net_device *dev; |
43 | short error; | 43 | short error; |
44 | short obsolete; | 44 | short obsolete; |
45 | int flags; | 45 | int flags; |
46 | #define DST_HOST 1 | 46 | #define DST_HOST 1 |
47 | #define DST_NOXFRM 2 | 47 | #define DST_NOXFRM 2 |
48 | #define DST_NOPOLICY 4 | 48 | #define DST_NOPOLICY 4 |
49 | #define DST_NOHASH 8 | 49 | #define DST_NOHASH 8 |
50 | unsigned long expires; | 50 | unsigned long expires; |
51 | 51 | ||
52 | unsigned short header_len; /* more space at head required */ | 52 | unsigned short header_len; /* more space at head required */ |
53 | unsigned short trailer_len; /* space to reserve at tail */ | 53 | unsigned short trailer_len; /* space to reserve at tail */ |
54 | 54 | ||
55 | unsigned int rate_tokens; | 55 | unsigned int rate_tokens; |
56 | unsigned long rate_last; /* rate limiting for ICMP */ | 56 | unsigned long rate_last; /* rate limiting for ICMP */ |
57 | 57 | ||
58 | struct dst_entry *path; | 58 | struct dst_entry *path; |
59 | 59 | ||
60 | struct neighbour *neighbour; | 60 | struct neighbour *neighbour; |
61 | struct hh_cache *hh; | 61 | struct hh_cache *hh; |
62 | #ifdef CONFIG_XFRM | 62 | #ifdef CONFIG_XFRM |
63 | struct xfrm_state *xfrm; | 63 | struct xfrm_state *xfrm; |
64 | #else | 64 | #else |
65 | void *__pad1; | 65 | void *__pad1; |
66 | #endif | 66 | #endif |
67 | int (*input)(struct sk_buff*); | 67 | int (*input)(struct sk_buff*); |
68 | int (*output)(struct sk_buff*); | 68 | int (*output)(struct sk_buff*); |
69 | 69 | ||
70 | struct dst_ops *ops; | 70 | struct dst_ops *ops; |
71 | 71 | ||
72 | u32 metrics[RTAX_MAX]; | 72 | u32 metrics[RTAX_MAX]; |
73 | 73 | ||
74 | #ifdef CONFIG_NET_CLS_ROUTE | 74 | #ifdef CONFIG_NET_CLS_ROUTE |
75 | __u32 tclassid; | 75 | __u32 tclassid; |
76 | #else | 76 | #else |
77 | __u32 __pad2; | 77 | __u32 __pad2; |
78 | #endif | 78 | #endif |
79 | 79 | ||
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Align __refcnt to a 64 bytes alignment | 82 | * Align __refcnt to a 64 bytes alignment |
83 | * (L1_CACHE_SIZE would be too much) | 83 | * (L1_CACHE_SIZE would be too much) |
84 | */ | 84 | */ |
85 | #ifdef CONFIG_64BIT | 85 | #ifdef CONFIG_64BIT |
86 | long __pad_to_align_refcnt[1]; | 86 | long __pad_to_align_refcnt[1]; |
87 | #endif | 87 | #endif |
88 | /* | 88 | /* |
89 | * __refcnt wants to be on a different cache line from | 89 | * __refcnt wants to be on a different cache line from |
90 | * input/output/ops or performance tanks badly | 90 | * input/output/ops or performance tanks badly |
91 | */ | 91 | */ |
92 | atomic_t __refcnt; /* client references */ | 92 | atomic_t __refcnt; /* client references */ |
93 | int __use; | 93 | int __use; |
94 | unsigned long lastuse; | 94 | unsigned long lastuse; |
95 | union { | 95 | union { |
96 | struct dst_entry *next; | 96 | struct dst_entry *next; |
97 | struct rtable *rt_next; | 97 | struct rtable *rt_next; |
98 | struct rt6_info *rt6_next; | 98 | struct rt6_info *rt6_next; |
99 | struct dn_route *dn_next; | 99 | struct dn_route *dn_next; |
100 | }; | 100 | }; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
104 | 104 | ||
105 | static inline u32 | 105 | static inline u32 |
106 | dst_metric(const struct dst_entry *dst, int metric) | 106 | dst_metric(const struct dst_entry *dst, int metric) |
107 | { | 107 | { |
108 | return dst->metrics[metric-1]; | 108 | return dst->metrics[metric-1]; |
109 | } | 109 | } |
110 | 110 | ||
111 | static inline u32 | 111 | static inline u32 |
112 | dst_feature(const struct dst_entry *dst, u32 feature) | 112 | dst_feature(const struct dst_entry *dst, u32 feature) |
113 | { | 113 | { |
114 | return dst_metric(dst, RTAX_FEATURES) & feature; | 114 | return dst_metric(dst, RTAX_FEATURES) & feature; |
115 | } | 115 | } |
116 | 116 | ||
117 | static inline u32 dst_mtu(const struct dst_entry *dst) | 117 | static inline u32 dst_mtu(const struct dst_entry *dst) |
118 | { | 118 | { |
119 | u32 mtu = dst_metric(dst, RTAX_MTU); | 119 | u32 mtu = dst_metric(dst, RTAX_MTU); |
120 | /* | 120 | /* |
121 | * Alexey put it here, so ask him about it :) | 121 | * Alexey put it here, so ask him about it :) |
122 | */ | 122 | */ |
123 | barrier(); | 123 | barrier(); |
124 | return mtu; | 124 | return mtu; |
125 | } | 125 | } |
126 | 126 | ||
127 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ | 127 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ |
128 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) | 128 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) |
129 | { | 129 | { |
130 | return msecs_to_jiffies(dst_metric(dst, metric)); | 130 | return msecs_to_jiffies(dst_metric(dst, metric)); |
131 | } | 131 | } |
132 | 132 | ||
133 | static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, | 133 | static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, |
134 | unsigned long rtt) | 134 | unsigned long rtt) |
135 | { | 135 | { |
136 | dst->metrics[metric-1] = jiffies_to_msecs(rtt); | 136 | dst->metrics[metric-1] = jiffies_to_msecs(rtt); |
137 | } | 137 | } |
138 | 138 | ||
139 | static inline u32 | 139 | static inline u32 |
140 | dst_allfrag(const struct dst_entry *dst) | 140 | dst_allfrag(const struct dst_entry *dst) |
141 | { | 141 | { |
142 | int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); | 142 | int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); |
143 | /* Yes, _exactly_. This is paranoia. */ | 143 | /* Yes, _exactly_. This is paranoia. */ |
144 | barrier(); | 144 | barrier(); |
145 | return ret; | 145 | return ret; |
146 | } | 146 | } |
147 | 147 | ||
148 | static inline int | 148 | static inline int |
149 | dst_metric_locked(struct dst_entry *dst, int metric) | 149 | dst_metric_locked(struct dst_entry *dst, int metric) |
150 | { | 150 | { |
151 | return dst_metric(dst, RTAX_LOCK) & (1<<metric); | 151 | return dst_metric(dst, RTAX_LOCK) & (1<<metric); |
152 | } | 152 | } |
153 | 153 | ||
154 | static inline void dst_hold(struct dst_entry * dst) | 154 | static inline void dst_hold(struct dst_entry * dst) |
155 | { | 155 | { |
156 | /* | 156 | /* |
157 | * If your kernel compilation stops here, please check | 157 | * If your kernel compilation stops here, please check |
158 | * __pad_to_align_refcnt declaration in struct dst_entry | 158 | * __pad_to_align_refcnt declaration in struct dst_entry |
159 | */ | 159 | */ |
160 | BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); | 160 | BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); |
161 | atomic_inc(&dst->__refcnt); | 161 | atomic_inc(&dst->__refcnt); |
162 | } | 162 | } |
163 | 163 | ||
164 | static inline void dst_use(struct dst_entry *dst, unsigned long time) | 164 | static inline void dst_use(struct dst_entry *dst, unsigned long time) |
165 | { | 165 | { |
166 | dst_hold(dst); | 166 | dst_hold(dst); |
167 | dst->__use++; | 167 | dst->__use++; |
168 | dst->lastuse = time; | 168 | dst->lastuse = time; |
169 | } | 169 | } |
170 | 170 | ||
171 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) | 171 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) |
172 | { | 172 | { |
173 | dst->__use++; | 173 | dst->__use++; |
174 | dst->lastuse = time; | 174 | dst->lastuse = time; |
175 | } | 175 | } |
176 | 176 | ||
177 | static inline | 177 | static inline |
178 | struct dst_entry * dst_clone(struct dst_entry * dst) | 178 | struct dst_entry * dst_clone(struct dst_entry * dst) |
179 | { | 179 | { |
180 | if (dst) | 180 | if (dst) |
181 | atomic_inc(&dst->__refcnt); | 181 | atomic_inc(&dst->__refcnt); |
182 | return dst; | 182 | return dst; |
183 | } | 183 | } |
184 | 184 | ||
185 | extern void dst_release(struct dst_entry *dst); | 185 | extern void dst_release(struct dst_entry *dst); |
186 | 186 | ||
187 | static inline void refdst_drop(unsigned long refdst) | 187 | static inline void refdst_drop(unsigned long refdst) |
188 | { | 188 | { |
189 | if (!(refdst & SKB_DST_NOREF)) | 189 | if (!(refdst & SKB_DST_NOREF)) |
190 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); | 190 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); |
191 | } | 191 | } |
192 | 192 | ||
193 | /** | 193 | /** |
194 | * skb_dst_drop - drops skb dst | 194 | * skb_dst_drop - drops skb dst |
195 | * @skb: buffer | 195 | * @skb: buffer |
196 | * | 196 | * |
197 | * Drops dst reference count if a reference was taken. | 197 | * Drops dst reference count if a reference was taken. |
198 | */ | 198 | */ |
199 | static inline void skb_dst_drop(struct sk_buff *skb) | 199 | static inline void skb_dst_drop(struct sk_buff *skb) |
200 | { | 200 | { |
201 | if (skb->_skb_refdst) { | 201 | if (skb->_skb_refdst) { |
202 | refdst_drop(skb->_skb_refdst); | 202 | refdst_drop(skb->_skb_refdst); |
203 | skb->_skb_refdst = 0UL; | 203 | skb->_skb_refdst = 0UL; |
204 | } | 204 | } |
205 | } | 205 | } |
206 | 206 | ||
207 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) | 207 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) |
208 | { | 208 | { |
209 | nskb->_skb_refdst = oskb->_skb_refdst; | 209 | nskb->_skb_refdst = oskb->_skb_refdst; |
210 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) | 210 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) |
211 | dst_clone(skb_dst(nskb)); | 211 | dst_clone(skb_dst(nskb)); |
212 | } | 212 | } |
213 | 213 | ||
214 | /** | 214 | /** |
215 | * skb_dst_force - makes sure skb dst is refcounted | 215 | * skb_dst_force - makes sure skb dst is refcounted |
216 | * @skb: buffer | 216 | * @skb: buffer |
217 | * | 217 | * |
218 | * If dst is not yet refcounted, let's do it | 218 | * If dst is not yet refcounted, let's do it |
219 | */ | 219 | */ |
220 | static inline void skb_dst_force(struct sk_buff *skb) | 220 | static inline void skb_dst_force(struct sk_buff *skb) |
221 | { | 221 | { |
222 | if (skb_dst_is_noref(skb)) { | 222 | if (skb_dst_is_noref(skb)) { |
223 | WARN_ON(!rcu_read_lock_held()); | 223 | WARN_ON(!rcu_read_lock_held()); |
224 | skb->_skb_refdst &= ~SKB_DST_NOREF; | 224 | skb->_skb_refdst &= ~SKB_DST_NOREF; |
225 | dst_clone(skb_dst(skb)); | 225 | dst_clone(skb_dst(skb)); |
226 | } | 226 | } |
227 | } | 227 | } |
228 | 228 | ||
229 | |||
230 | /** | ||
231 | * skb_tunnel_rx - prepare skb for rx reinsert | ||
232 | * @skb: buffer | ||
233 | * @dev: tunnel device | ||
234 | * | ||
235 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, | ||
236 | * so make some cleanups, and perform accounting. | ||
237 | */ | ||
238 | static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) | ||
239 | { | ||
240 | skb->dev = dev; | ||
241 | /* TODO : stats should be SMP safe */ | ||
242 | dev->stats.rx_packets++; | ||
243 | dev->stats.rx_bytes += skb->len; | ||
244 | skb->rxhash = 0; | ||
245 | skb_dst_drop(skb); | ||
246 | nf_reset(skb); | ||
247 | } | ||
248 | |||
229 | /* Children define the path of the packet through the | 249 | /* Children define the path of the packet through the |
230 | * Linux networking. Thus, destinations are stackable. | 250 | * Linux networking. Thus, destinations are stackable. |
231 | */ | 251 | */ |
232 | 252 | ||
233 | static inline struct dst_entry *dst_pop(struct dst_entry *dst) | 253 | static inline struct dst_entry *dst_pop(struct dst_entry *dst) |
234 | { | 254 | { |
235 | struct dst_entry *child = dst_clone(dst->child); | 255 | struct dst_entry *child = dst_clone(dst->child); |
236 | 256 | ||
237 | dst_release(dst); | 257 | dst_release(dst); |
238 | return child; | 258 | return child; |
239 | } | 259 | } |
240 | 260 | ||
241 | extern int dst_discard(struct sk_buff *skb); | 261 | extern int dst_discard(struct sk_buff *skb); |
242 | extern void * dst_alloc(struct dst_ops * ops); | 262 | extern void * dst_alloc(struct dst_ops * ops); |
243 | extern void __dst_free(struct dst_entry * dst); | 263 | extern void __dst_free(struct dst_entry * dst); |
244 | extern struct dst_entry *dst_destroy(struct dst_entry * dst); | 264 | extern struct dst_entry *dst_destroy(struct dst_entry * dst); |
245 | 265 | ||
246 | static inline void dst_free(struct dst_entry * dst) | 266 | static inline void dst_free(struct dst_entry * dst) |
247 | { | 267 | { |
248 | if (dst->obsolete > 1) | 268 | if (dst->obsolete > 1) |
249 | return; | 269 | return; |
250 | if (!atomic_read(&dst->__refcnt)) { | 270 | if (!atomic_read(&dst->__refcnt)) { |
251 | dst = dst_destroy(dst); | 271 | dst = dst_destroy(dst); |
252 | if (!dst) | 272 | if (!dst) |
253 | return; | 273 | return; |
254 | } | 274 | } |
255 | __dst_free(dst); | 275 | __dst_free(dst); |
256 | } | 276 | } |
257 | 277 | ||
258 | static inline void dst_rcu_free(struct rcu_head *head) | 278 | static inline void dst_rcu_free(struct rcu_head *head) |
259 | { | 279 | { |
260 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | 280 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); |
261 | dst_free(dst); | 281 | dst_free(dst); |
262 | } | 282 | } |
263 | 283 | ||
264 | static inline void dst_confirm(struct dst_entry *dst) | 284 | static inline void dst_confirm(struct dst_entry *dst) |
265 | { | 285 | { |
266 | if (dst) | 286 | if (dst) |
267 | neigh_confirm(dst->neighbour); | 287 | neigh_confirm(dst->neighbour); |
268 | } | 288 | } |
269 | 289 | ||
270 | static inline void dst_link_failure(struct sk_buff *skb) | 290 | static inline void dst_link_failure(struct sk_buff *skb) |
271 | { | 291 | { |
272 | struct dst_entry *dst = skb_dst(skb); | 292 | struct dst_entry *dst = skb_dst(skb); |
273 | if (dst && dst->ops && dst->ops->link_failure) | 293 | if (dst && dst->ops && dst->ops->link_failure) |
274 | dst->ops->link_failure(skb); | 294 | dst->ops->link_failure(skb); |
275 | } | 295 | } |
276 | 296 | ||
277 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) | 297 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) |
278 | { | 298 | { |
279 | unsigned long expires = jiffies + timeout; | 299 | unsigned long expires = jiffies + timeout; |
280 | 300 | ||
281 | if (expires == 0) | 301 | if (expires == 0) |
282 | expires = 1; | 302 | expires = 1; |
283 | 303 | ||
284 | if (dst->expires == 0 || time_before(expires, dst->expires)) | 304 | if (dst->expires == 0 || time_before(expires, dst->expires)) |
285 | dst->expires = expires; | 305 | dst->expires = expires; |
286 | } | 306 | } |
287 | 307 | ||
288 | /* Output packet to network from transport. */ | 308 | /* Output packet to network from transport. */ |
289 | static inline int dst_output(struct sk_buff *skb) | 309 | static inline int dst_output(struct sk_buff *skb) |
290 | { | 310 | { |
291 | return skb_dst(skb)->output(skb); | 311 | return skb_dst(skb)->output(skb); |
292 | } | 312 | } |
293 | 313 | ||
294 | /* Input packet from network to transport. */ | 314 | /* Input packet from network to transport. */ |
295 | static inline int dst_input(struct sk_buff *skb) | 315 | static inline int dst_input(struct sk_buff *skb) |
296 | { | 316 | { |
297 | return skb_dst(skb)->input(skb); | 317 | return skb_dst(skb)->input(skb); |
298 | } | 318 | } |
299 | 319 | ||
300 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) | 320 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) |
301 | { | 321 | { |
302 | if (dst->obsolete) | 322 | if (dst->obsolete) |
303 | dst = dst->ops->check(dst, cookie); | 323 | dst = dst->ops->check(dst, cookie); |
304 | return dst; | 324 | return dst; |
305 | } | 325 | } |
306 | 326 | ||
307 | extern void dst_init(void); | 327 | extern void dst_init(void); |
308 | 328 | ||
309 | /* Flags for xfrm_lookup flags argument. */ | 329 | /* Flags for xfrm_lookup flags argument. */ |
310 | enum { | 330 | enum { |
311 | XFRM_LOOKUP_WAIT = 1 << 0, | 331 | XFRM_LOOKUP_WAIT = 1 << 0, |
312 | XFRM_LOOKUP_ICMP = 1 << 1, | 332 | XFRM_LOOKUP_ICMP = 1 << 1, |
313 | }; | 333 | }; |
314 | 334 | ||
315 | struct flowi; | 335 | struct flowi; |
316 | #ifndef CONFIG_XFRM | 336 | #ifndef CONFIG_XFRM |
317 | static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, | 337 | static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, |
318 | struct flowi *fl, struct sock *sk, int flags) | 338 | struct flowi *fl, struct sock *sk, int flags) |
319 | { | 339 | { |
320 | return 0; | 340 | return 0; |
321 | } | 341 | } |
322 | static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, | 342 | static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, |
323 | struct flowi *fl, struct sock *sk, int flags) | 343 | struct flowi *fl, struct sock *sk, int flags) |
324 | { | 344 | { |
325 | return 0; | 345 | return 0; |
326 | } | 346 | } |
327 | #else | 347 | #else |
328 | extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, | 348 | extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, |
329 | struct flowi *fl, struct sock *sk, int flags); | 349 | struct flowi *fl, struct sock *sk, int flags); |
330 | extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, | 350 | extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, |
331 | struct flowi *fl, struct sock *sk, int flags); | 351 | struct flowi *fl, struct sock *sk, int flags); |
332 | #endif | 352 | #endif |
333 | #endif | 353 | #endif |
334 | 354 | ||
335 | #endif /* _NET_DST_H */ | 355 | #endif /* _NET_DST_H */ |
336 | 356 |
net/ipv4/ip_gre.c
1 | /* | 1 | /* |
2 | * Linux NET3: GRE over IP protocol decoder. | 2 | * Linux NET3: GRE over IP protocol decoder. |
3 | * | 3 | * |
4 | * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) | 4 | * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 7 | * modify it under the terms of the GNU General Public License |
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
20 | #include <linux/netdevice.h> | 20 | #include <linux/netdevice.h> |
21 | #include <linux/in.h> | 21 | #include <linux/in.h> |
22 | #include <linux/tcp.h> | 22 | #include <linux/tcp.h> |
23 | #include <linux/udp.h> | 23 | #include <linux/udp.h> |
24 | #include <linux/if_arp.h> | 24 | #include <linux/if_arp.h> |
25 | #include <linux/mroute.h> | 25 | #include <linux/mroute.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/in6.h> | 27 | #include <linux/in6.h> |
28 | #include <linux/inetdevice.h> | 28 | #include <linux/inetdevice.h> |
29 | #include <linux/igmp.h> | 29 | #include <linux/igmp.h> |
30 | #include <linux/netfilter_ipv4.h> | 30 | #include <linux/netfilter_ipv4.h> |
31 | #include <linux/etherdevice.h> | 31 | #include <linux/etherdevice.h> |
32 | #include <linux/if_ether.h> | 32 | #include <linux/if_ether.h> |
33 | 33 | ||
34 | #include <net/sock.h> | 34 | #include <net/sock.h> |
35 | #include <net/ip.h> | 35 | #include <net/ip.h> |
36 | #include <net/icmp.h> | 36 | #include <net/icmp.h> |
37 | #include <net/protocol.h> | 37 | #include <net/protocol.h> |
38 | #include <net/ipip.h> | 38 | #include <net/ipip.h> |
39 | #include <net/arp.h> | 39 | #include <net/arp.h> |
40 | #include <net/checksum.h> | 40 | #include <net/checksum.h> |
41 | #include <net/dsfield.h> | 41 | #include <net/dsfield.h> |
42 | #include <net/inet_ecn.h> | 42 | #include <net/inet_ecn.h> |
43 | #include <net/xfrm.h> | 43 | #include <net/xfrm.h> |
44 | #include <net/net_namespace.h> | 44 | #include <net/net_namespace.h> |
45 | #include <net/netns/generic.h> | 45 | #include <net/netns/generic.h> |
46 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
47 | 47 | ||
48 | #ifdef CONFIG_IPV6 | 48 | #ifdef CONFIG_IPV6 |
49 | #include <net/ipv6.h> | 49 | #include <net/ipv6.h> |
50 | #include <net/ip6_fib.h> | 50 | #include <net/ip6_fib.h> |
51 | #include <net/ip6_route.h> | 51 | #include <net/ip6_route.h> |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | /* | 54 | /* |
55 | Problems & solutions | 55 | Problems & solutions |
56 | -------------------- | 56 | -------------------- |
57 | 57 | ||
58 | 1. The most important issue is detecting local dead loops. | 58 | 1. The most important issue is detecting local dead loops. |
59 | They would cause complete host lockup in transmit, which | 59 | They would cause complete host lockup in transmit, which |
60 | would be "resolved" by stack overflow or, if queueing is enabled, | 60 | would be "resolved" by stack overflow or, if queueing is enabled, |
61 | with infinite looping in net_bh. | 61 | with infinite looping in net_bh. |
62 | 62 | ||
63 | We cannot track such dead loops during route installation, | 63 | We cannot track such dead loops during route installation, |
64 | it is infeasible task. The most general solutions would be | 64 | it is infeasible task. The most general solutions would be |
65 | to keep skb->encapsulation counter (sort of local ttl), | 65 | to keep skb->encapsulation counter (sort of local ttl), |
66 | and silently drop packet when it expires. It is the best | 66 | and silently drop packet when it expires. It is the best |
67 | solution, but it supposes maintaing new variable in ALL | 67 | solution, but it supposes maintaing new variable in ALL |
68 | skb, even if no tunneling is used. | 68 | skb, even if no tunneling is used. |
69 | 69 | ||
70 | Current solution: HARD_TX_LOCK lock breaks dead loops. | 70 | Current solution: HARD_TX_LOCK lock breaks dead loops. |
71 | 71 | ||
72 | 72 | ||
73 | 73 | ||
74 | 2. Networking dead loops would not kill routers, but would really | 74 | 2. Networking dead loops would not kill routers, but would really |
75 | kill network. IP hop limit plays role of "t->recursion" in this case, | 75 | kill network. IP hop limit plays role of "t->recursion" in this case, |
76 | if we copy it from packet being encapsulated to upper header. | 76 | if we copy it from packet being encapsulated to upper header. |
77 | It is very good solution, but it introduces two problems: | 77 | It is very good solution, but it introduces two problems: |
78 | 78 | ||
79 | - Routing protocols, using packets with ttl=1 (OSPF, RIP2), | 79 | - Routing protocols, using packets with ttl=1 (OSPF, RIP2), |
80 | do not work over tunnels. | 80 | do not work over tunnels. |
81 | - traceroute does not work. I planned to relay ICMP from tunnel, | 81 | - traceroute does not work. I planned to relay ICMP from tunnel, |
82 | so that this problem would be solved and traceroute output | 82 | so that this problem would be solved and traceroute output |
83 | would even more informative. This idea appeared to be wrong: | 83 | would even more informative. This idea appeared to be wrong: |
84 | only Linux complies to rfc1812 now (yes, guys, Linux is the only | 84 | only Linux complies to rfc1812 now (yes, guys, Linux is the only |
85 | true router now :-)), all routers (at least, in neighbourhood of mine) | 85 | true router now :-)), all routers (at least, in neighbourhood of mine) |
86 | return only 8 bytes of payload. It is the end. | 86 | return only 8 bytes of payload. It is the end. |
87 | 87 | ||
88 | Hence, if we want that OSPF worked or traceroute said something reasonable, | 88 | Hence, if we want that OSPF worked or traceroute said something reasonable, |
89 | we should search for another solution. | 89 | we should search for another solution. |
90 | 90 | ||
91 | One of them is to parse packet trying to detect inner encapsulation | 91 | One of them is to parse packet trying to detect inner encapsulation |
92 | made by our node. It is difficult or even impossible, especially, | 92 | made by our node. It is difficult or even impossible, especially, |
93 | taking into account fragmentation. TO be short, tt is not solution at all. | 93 | taking into account fragmentation. TO be short, tt is not solution at all. |
94 | 94 | ||
95 | Current solution: The solution was UNEXPECTEDLY SIMPLE. | 95 | Current solution: The solution was UNEXPECTEDLY SIMPLE. |
96 | We force DF flag on tunnels with preconfigured hop limit, | 96 | We force DF flag on tunnels with preconfigured hop limit, |
97 | that is ALL. :-) Well, it does not remove the problem completely, | 97 | that is ALL. :-) Well, it does not remove the problem completely, |
98 | but exponential growth of network traffic is changed to linear | 98 | but exponential growth of network traffic is changed to linear |
99 | (branches, that exceed pmtu are pruned) and tunnel mtu | 99 | (branches, that exceed pmtu are pruned) and tunnel mtu |
100 | fastly degrades to value <68, where looping stops. | 100 | fastly degrades to value <68, where looping stops. |
101 | Yes, it is not good if there exists a router in the loop, | 101 | Yes, it is not good if there exists a router in the loop, |
102 | which does not force DF, even when encapsulating packets have DF set. | 102 | which does not force DF, even when encapsulating packets have DF set. |
103 | But it is not our problem! Nobody could accuse us, we made | 103 | But it is not our problem! Nobody could accuse us, we made |
104 | all that we could make. Even if it is your gated who injected | 104 | all that we could make. Even if it is your gated who injected |
105 | fatal route to network, even if it were you who configured | 105 | fatal route to network, even if it were you who configured |
106 | fatal static route: you are innocent. :-) | 106 | fatal static route: you are innocent. :-) |
107 | 107 | ||
108 | 108 | ||
109 | 109 | ||
110 | 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain | 110 | 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain |
111 | practically identical code. It would be good to glue them | 111 | practically identical code. It would be good to glue them |
112 | together, but it is not very evident, how to make them modular. | 112 | together, but it is not very evident, how to make them modular. |
113 | sit is integral part of IPv6, ipip and gre are naturally modular. | 113 | sit is integral part of IPv6, ipip and gre are naturally modular. |
114 | We could extract common parts (hash table, ioctl etc) | 114 | We could extract common parts (hash table, ioctl etc) |
115 | to a separate module (ip_tunnel.c). | 115 | to a separate module (ip_tunnel.c). |
116 | 116 | ||
117 | Alexey Kuznetsov. | 117 | Alexey Kuznetsov. |
118 | */ | 118 | */ |
119 | 119 | ||
120 | static struct rtnl_link_ops ipgre_link_ops __read_mostly; | 120 | static struct rtnl_link_ops ipgre_link_ops __read_mostly; |
121 | static int ipgre_tunnel_init(struct net_device *dev); | 121 | static int ipgre_tunnel_init(struct net_device *dev); |
122 | static void ipgre_tunnel_setup(struct net_device *dev); | 122 | static void ipgre_tunnel_setup(struct net_device *dev); |
123 | static int ipgre_tunnel_bind_dev(struct net_device *dev); | 123 | static int ipgre_tunnel_bind_dev(struct net_device *dev); |
124 | 124 | ||
125 | /* Fallback tunnel: no source, no destination, no key, no options */ | 125 | /* Fallback tunnel: no source, no destination, no key, no options */ |
126 | 126 | ||
127 | #define HASH_SIZE 16 | 127 | #define HASH_SIZE 16 |
128 | 128 | ||
129 | static int ipgre_net_id __read_mostly; | 129 | static int ipgre_net_id __read_mostly; |
130 | struct ipgre_net { | 130 | struct ipgre_net { |
131 | struct ip_tunnel *tunnels[4][HASH_SIZE]; | 131 | struct ip_tunnel *tunnels[4][HASH_SIZE]; |
132 | 132 | ||
133 | struct net_device *fb_tunnel_dev; | 133 | struct net_device *fb_tunnel_dev; |
134 | }; | 134 | }; |
135 | 135 | ||
136 | /* Tunnel hash table */ | 136 | /* Tunnel hash table */ |
137 | 137 | ||
138 | /* | 138 | /* |
139 | 4 hash tables: | 139 | 4 hash tables: |
140 | 140 | ||
141 | 3: (remote,local) | 141 | 3: (remote,local) |
142 | 2: (remote,*) | 142 | 2: (remote,*) |
143 | 1: (*,local) | 143 | 1: (*,local) |
144 | 0: (*,*) | 144 | 0: (*,*) |
145 | 145 | ||
146 | We require exact key match i.e. if a key is present in packet | 146 | We require exact key match i.e. if a key is present in packet |
147 | it will match only tunnel with the same key; if it is not present, | 147 | it will match only tunnel with the same key; if it is not present, |
148 | it will match only keyless tunnel. | 148 | it will match only keyless tunnel. |
149 | 149 | ||
150 | All keysless packets, if not matched configured keyless tunnels | 150 | All keysless packets, if not matched configured keyless tunnels |
151 | will match fallback tunnel. | 151 | will match fallback tunnel. |
152 | */ | 152 | */ |
153 | 153 | ||
154 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 154 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
155 | 155 | ||
156 | #define tunnels_r_l tunnels[3] | 156 | #define tunnels_r_l tunnels[3] |
157 | #define tunnels_r tunnels[2] | 157 | #define tunnels_r tunnels[2] |
158 | #define tunnels_l tunnels[1] | 158 | #define tunnels_l tunnels[1] |
159 | #define tunnels_wc tunnels[0] | 159 | #define tunnels_wc tunnels[0] |
160 | /* | 160 | /* |
161 | * Locking : hash tables are protected by RCU and a spinlock | 161 | * Locking : hash tables are protected by RCU and a spinlock |
162 | */ | 162 | */ |
163 | static DEFINE_SPINLOCK(ipgre_lock); | 163 | static DEFINE_SPINLOCK(ipgre_lock); |
164 | 164 | ||
165 | #define for_each_ip_tunnel_rcu(start) \ | 165 | #define for_each_ip_tunnel_rcu(start) \ |
166 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | 166 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) |
167 | 167 | ||
168 | /* Given src, dst and key, find appropriate for input tunnel. */ | 168 | /* Given src, dst and key, find appropriate for input tunnel. */ |
169 | 169 | ||
170 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | 170 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, |
171 | __be32 remote, __be32 local, | 171 | __be32 remote, __be32 local, |
172 | __be32 key, __be16 gre_proto) | 172 | __be32 key, __be16 gre_proto) |
173 | { | 173 | { |
174 | struct net *net = dev_net(dev); | 174 | struct net *net = dev_net(dev); |
175 | int link = dev->ifindex; | 175 | int link = dev->ifindex; |
176 | unsigned h0 = HASH(remote); | 176 | unsigned h0 = HASH(remote); |
177 | unsigned h1 = HASH(key); | 177 | unsigned h1 = HASH(key); |
178 | struct ip_tunnel *t, *cand = NULL; | 178 | struct ip_tunnel *t, *cand = NULL; |
179 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 179 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
180 | int dev_type = (gre_proto == htons(ETH_P_TEB)) ? | 180 | int dev_type = (gre_proto == htons(ETH_P_TEB)) ? |
181 | ARPHRD_ETHER : ARPHRD_IPGRE; | 181 | ARPHRD_ETHER : ARPHRD_IPGRE; |
182 | int score, cand_score = 4; | 182 | int score, cand_score = 4; |
183 | 183 | ||
184 | for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { | 184 | for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { |
185 | if (local != t->parms.iph.saddr || | 185 | if (local != t->parms.iph.saddr || |
186 | remote != t->parms.iph.daddr || | 186 | remote != t->parms.iph.daddr || |
187 | key != t->parms.i_key || | 187 | key != t->parms.i_key || |
188 | !(t->dev->flags & IFF_UP)) | 188 | !(t->dev->flags & IFF_UP)) |
189 | continue; | 189 | continue; |
190 | 190 | ||
191 | if (t->dev->type != ARPHRD_IPGRE && | 191 | if (t->dev->type != ARPHRD_IPGRE && |
192 | t->dev->type != dev_type) | 192 | t->dev->type != dev_type) |
193 | continue; | 193 | continue; |
194 | 194 | ||
195 | score = 0; | 195 | score = 0; |
196 | if (t->parms.link != link) | 196 | if (t->parms.link != link) |
197 | score |= 1; | 197 | score |= 1; |
198 | if (t->dev->type != dev_type) | 198 | if (t->dev->type != dev_type) |
199 | score |= 2; | 199 | score |= 2; |
200 | if (score == 0) | 200 | if (score == 0) |
201 | return t; | 201 | return t; |
202 | 202 | ||
203 | if (score < cand_score) { | 203 | if (score < cand_score) { |
204 | cand = t; | 204 | cand = t; |
205 | cand_score = score; | 205 | cand_score = score; |
206 | } | 206 | } |
207 | } | 207 | } |
208 | 208 | ||
209 | for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { | 209 | for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { |
210 | if (remote != t->parms.iph.daddr || | 210 | if (remote != t->parms.iph.daddr || |
211 | key != t->parms.i_key || | 211 | key != t->parms.i_key || |
212 | !(t->dev->flags & IFF_UP)) | 212 | !(t->dev->flags & IFF_UP)) |
213 | continue; | 213 | continue; |
214 | 214 | ||
215 | if (t->dev->type != ARPHRD_IPGRE && | 215 | if (t->dev->type != ARPHRD_IPGRE && |
216 | t->dev->type != dev_type) | 216 | t->dev->type != dev_type) |
217 | continue; | 217 | continue; |
218 | 218 | ||
219 | score = 0; | 219 | score = 0; |
220 | if (t->parms.link != link) | 220 | if (t->parms.link != link) |
221 | score |= 1; | 221 | score |= 1; |
222 | if (t->dev->type != dev_type) | 222 | if (t->dev->type != dev_type) |
223 | score |= 2; | 223 | score |= 2; |
224 | if (score == 0) | 224 | if (score == 0) |
225 | return t; | 225 | return t; |
226 | 226 | ||
227 | if (score < cand_score) { | 227 | if (score < cand_score) { |
228 | cand = t; | 228 | cand = t; |
229 | cand_score = score; | 229 | cand_score = score; |
230 | } | 230 | } |
231 | } | 231 | } |
232 | 232 | ||
233 | for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { | 233 | for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { |
234 | if ((local != t->parms.iph.saddr && | 234 | if ((local != t->parms.iph.saddr && |
235 | (local != t->parms.iph.daddr || | 235 | (local != t->parms.iph.daddr || |
236 | !ipv4_is_multicast(local))) || | 236 | !ipv4_is_multicast(local))) || |
237 | key != t->parms.i_key || | 237 | key != t->parms.i_key || |
238 | !(t->dev->flags & IFF_UP)) | 238 | !(t->dev->flags & IFF_UP)) |
239 | continue; | 239 | continue; |
240 | 240 | ||
241 | if (t->dev->type != ARPHRD_IPGRE && | 241 | if (t->dev->type != ARPHRD_IPGRE && |
242 | t->dev->type != dev_type) | 242 | t->dev->type != dev_type) |
243 | continue; | 243 | continue; |
244 | 244 | ||
245 | score = 0; | 245 | score = 0; |
246 | if (t->parms.link != link) | 246 | if (t->parms.link != link) |
247 | score |= 1; | 247 | score |= 1; |
248 | if (t->dev->type != dev_type) | 248 | if (t->dev->type != dev_type) |
249 | score |= 2; | 249 | score |= 2; |
250 | if (score == 0) | 250 | if (score == 0) |
251 | return t; | 251 | return t; |
252 | 252 | ||
253 | if (score < cand_score) { | 253 | if (score < cand_score) { |
254 | cand = t; | 254 | cand = t; |
255 | cand_score = score; | 255 | cand_score = score; |
256 | } | 256 | } |
257 | } | 257 | } |
258 | 258 | ||
259 | for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { | 259 | for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { |
260 | if (t->parms.i_key != key || | 260 | if (t->parms.i_key != key || |
261 | !(t->dev->flags & IFF_UP)) | 261 | !(t->dev->flags & IFF_UP)) |
262 | continue; | 262 | continue; |
263 | 263 | ||
264 | if (t->dev->type != ARPHRD_IPGRE && | 264 | if (t->dev->type != ARPHRD_IPGRE && |
265 | t->dev->type != dev_type) | 265 | t->dev->type != dev_type) |
266 | continue; | 266 | continue; |
267 | 267 | ||
268 | score = 0; | 268 | score = 0; |
269 | if (t->parms.link != link) | 269 | if (t->parms.link != link) |
270 | score |= 1; | 270 | score |= 1; |
271 | if (t->dev->type != dev_type) | 271 | if (t->dev->type != dev_type) |
272 | score |= 2; | 272 | score |= 2; |
273 | if (score == 0) | 273 | if (score == 0) |
274 | return t; | 274 | return t; |
275 | 275 | ||
276 | if (score < cand_score) { | 276 | if (score < cand_score) { |
277 | cand = t; | 277 | cand = t; |
278 | cand_score = score; | 278 | cand_score = score; |
279 | } | 279 | } |
280 | } | 280 | } |
281 | 281 | ||
282 | if (cand != NULL) | 282 | if (cand != NULL) |
283 | return cand; | 283 | return cand; |
284 | 284 | ||
285 | dev = ign->fb_tunnel_dev; | 285 | dev = ign->fb_tunnel_dev; |
286 | if (dev->flags & IFF_UP) | 286 | if (dev->flags & IFF_UP) |
287 | return netdev_priv(dev); | 287 | return netdev_priv(dev); |
288 | 288 | ||
289 | return NULL; | 289 | return NULL; |
290 | } | 290 | } |
291 | 291 | ||
292 | static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, | 292 | static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, |
293 | struct ip_tunnel_parm *parms) | 293 | struct ip_tunnel_parm *parms) |
294 | { | 294 | { |
295 | __be32 remote = parms->iph.daddr; | 295 | __be32 remote = parms->iph.daddr; |
296 | __be32 local = parms->iph.saddr; | 296 | __be32 local = parms->iph.saddr; |
297 | __be32 key = parms->i_key; | 297 | __be32 key = parms->i_key; |
298 | unsigned h = HASH(key); | 298 | unsigned h = HASH(key); |
299 | int prio = 0; | 299 | int prio = 0; |
300 | 300 | ||
301 | if (local) | 301 | if (local) |
302 | prio |= 1; | 302 | prio |= 1; |
303 | if (remote && !ipv4_is_multicast(remote)) { | 303 | if (remote && !ipv4_is_multicast(remote)) { |
304 | prio |= 2; | 304 | prio |= 2; |
305 | h ^= HASH(remote); | 305 | h ^= HASH(remote); |
306 | } | 306 | } |
307 | 307 | ||
308 | return &ign->tunnels[prio][h]; | 308 | return &ign->tunnels[prio][h]; |
309 | } | 309 | } |
310 | 310 | ||
311 | static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, | 311 | static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, |
312 | struct ip_tunnel *t) | 312 | struct ip_tunnel *t) |
313 | { | 313 | { |
314 | return __ipgre_bucket(ign, &t->parms); | 314 | return __ipgre_bucket(ign, &t->parms); |
315 | } | 315 | } |
316 | 316 | ||
317 | static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) | 317 | static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) |
318 | { | 318 | { |
319 | struct ip_tunnel **tp = ipgre_bucket(ign, t); | 319 | struct ip_tunnel **tp = ipgre_bucket(ign, t); |
320 | 320 | ||
321 | spin_lock_bh(&ipgre_lock); | 321 | spin_lock_bh(&ipgre_lock); |
322 | t->next = *tp; | 322 | t->next = *tp; |
323 | rcu_assign_pointer(*tp, t); | 323 | rcu_assign_pointer(*tp, t); |
324 | spin_unlock_bh(&ipgre_lock); | 324 | spin_unlock_bh(&ipgre_lock); |
325 | } | 325 | } |
326 | 326 | ||
327 | static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) | 327 | static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) |
328 | { | 328 | { |
329 | struct ip_tunnel **tp; | 329 | struct ip_tunnel **tp; |
330 | 330 | ||
331 | for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { | 331 | for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { |
332 | if (t == *tp) { | 332 | if (t == *tp) { |
333 | spin_lock_bh(&ipgre_lock); | 333 | spin_lock_bh(&ipgre_lock); |
334 | *tp = t->next; | 334 | *tp = t->next; |
335 | spin_unlock_bh(&ipgre_lock); | 335 | spin_unlock_bh(&ipgre_lock); |
336 | break; | 336 | break; |
337 | } | 337 | } |
338 | } | 338 | } |
339 | } | 339 | } |
340 | 340 | ||
341 | static struct ip_tunnel *ipgre_tunnel_find(struct net *net, | 341 | static struct ip_tunnel *ipgre_tunnel_find(struct net *net, |
342 | struct ip_tunnel_parm *parms, | 342 | struct ip_tunnel_parm *parms, |
343 | int type) | 343 | int type) |
344 | { | 344 | { |
345 | __be32 remote = parms->iph.daddr; | 345 | __be32 remote = parms->iph.daddr; |
346 | __be32 local = parms->iph.saddr; | 346 | __be32 local = parms->iph.saddr; |
347 | __be32 key = parms->i_key; | 347 | __be32 key = parms->i_key; |
348 | int link = parms->link; | 348 | int link = parms->link; |
349 | struct ip_tunnel *t, **tp; | 349 | struct ip_tunnel *t, **tp; |
350 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 350 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
351 | 351 | ||
352 | for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) | 352 | for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) |
353 | if (local == t->parms.iph.saddr && | 353 | if (local == t->parms.iph.saddr && |
354 | remote == t->parms.iph.daddr && | 354 | remote == t->parms.iph.daddr && |
355 | key == t->parms.i_key && | 355 | key == t->parms.i_key && |
356 | link == t->parms.link && | 356 | link == t->parms.link && |
357 | type == t->dev->type) | 357 | type == t->dev->type) |
358 | break; | 358 | break; |
359 | 359 | ||
360 | return t; | 360 | return t; |
361 | } | 361 | } |
362 | 362 | ||
363 | static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, | 363 | static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, |
364 | struct ip_tunnel_parm *parms, int create) | 364 | struct ip_tunnel_parm *parms, int create) |
365 | { | 365 | { |
366 | struct ip_tunnel *t, *nt; | 366 | struct ip_tunnel *t, *nt; |
367 | struct net_device *dev; | 367 | struct net_device *dev; |
368 | char name[IFNAMSIZ]; | 368 | char name[IFNAMSIZ]; |
369 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 369 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
370 | 370 | ||
371 | t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); | 371 | t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); |
372 | if (t || !create) | 372 | if (t || !create) |
373 | return t; | 373 | return t; |
374 | 374 | ||
375 | if (parms->name[0]) | 375 | if (parms->name[0]) |
376 | strlcpy(name, parms->name, IFNAMSIZ); | 376 | strlcpy(name, parms->name, IFNAMSIZ); |
377 | else | 377 | else |
378 | sprintf(name, "gre%%d"); | 378 | sprintf(name, "gre%%d"); |
379 | 379 | ||
380 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); | 380 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); |
381 | if (!dev) | 381 | if (!dev) |
382 | return NULL; | 382 | return NULL; |
383 | 383 | ||
384 | dev_net_set(dev, net); | 384 | dev_net_set(dev, net); |
385 | 385 | ||
386 | if (strchr(name, '%')) { | 386 | if (strchr(name, '%')) { |
387 | if (dev_alloc_name(dev, name) < 0) | 387 | if (dev_alloc_name(dev, name) < 0) |
388 | goto failed_free; | 388 | goto failed_free; |
389 | } | 389 | } |
390 | 390 | ||
391 | nt = netdev_priv(dev); | 391 | nt = netdev_priv(dev); |
392 | nt->parms = *parms; | 392 | nt->parms = *parms; |
393 | dev->rtnl_link_ops = &ipgre_link_ops; | 393 | dev->rtnl_link_ops = &ipgre_link_ops; |
394 | 394 | ||
395 | dev->mtu = ipgre_tunnel_bind_dev(dev); | 395 | dev->mtu = ipgre_tunnel_bind_dev(dev); |
396 | 396 | ||
397 | if (register_netdevice(dev) < 0) | 397 | if (register_netdevice(dev) < 0) |
398 | goto failed_free; | 398 | goto failed_free; |
399 | 399 | ||
400 | dev_hold(dev); | 400 | dev_hold(dev); |
401 | ipgre_tunnel_link(ign, nt); | 401 | ipgre_tunnel_link(ign, nt); |
402 | return nt; | 402 | return nt; |
403 | 403 | ||
404 | failed_free: | 404 | failed_free: |
405 | free_netdev(dev); | 405 | free_netdev(dev); |
406 | return NULL; | 406 | return NULL; |
407 | } | 407 | } |
408 | 408 | ||
409 | static void ipgre_tunnel_uninit(struct net_device *dev) | 409 | static void ipgre_tunnel_uninit(struct net_device *dev) |
410 | { | 410 | { |
411 | struct net *net = dev_net(dev); | 411 | struct net *net = dev_net(dev); |
412 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 412 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
413 | 413 | ||
414 | ipgre_tunnel_unlink(ign, netdev_priv(dev)); | 414 | ipgre_tunnel_unlink(ign, netdev_priv(dev)); |
415 | dev_put(dev); | 415 | dev_put(dev); |
416 | } | 416 | } |
417 | 417 | ||
418 | 418 | ||
419 | static void ipgre_err(struct sk_buff *skb, u32 info) | 419 | static void ipgre_err(struct sk_buff *skb, u32 info) |
420 | { | 420 | { |
421 | 421 | ||
422 | /* All the routers (except for Linux) return only | 422 | /* All the routers (except for Linux) return only |
423 | 8 bytes of packet payload. It means, that precise relaying of | 423 | 8 bytes of packet payload. It means, that precise relaying of |
424 | ICMP in the real Internet is absolutely infeasible. | 424 | ICMP in the real Internet is absolutely infeasible. |
425 | 425 | ||
426 | Moreover, Cisco "wise men" put GRE key to the third word | 426 | Moreover, Cisco "wise men" put GRE key to the third word |
427 | in GRE header. It makes impossible maintaining even soft state for keyed | 427 | in GRE header. It makes impossible maintaining even soft state for keyed |
428 | GRE tunnels with enabled checksum. Tell them "thank you". | 428 | GRE tunnels with enabled checksum. Tell them "thank you". |
429 | 429 | ||
430 | Well, I wonder, rfc1812 was written by Cisco employee, | 430 | Well, I wonder, rfc1812 was written by Cisco employee, |
431 | what the hell these idiots break standrads established | 431 | what the hell these idiots break standrads established |
432 | by themself??? | 432 | by themself??? |
433 | */ | 433 | */ |
434 | 434 | ||
435 | struct iphdr *iph = (struct iphdr *)skb->data; | 435 | struct iphdr *iph = (struct iphdr *)skb->data; |
436 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 436 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); |
437 | int grehlen = (iph->ihl<<2) + 4; | 437 | int grehlen = (iph->ihl<<2) + 4; |
438 | const int type = icmp_hdr(skb)->type; | 438 | const int type = icmp_hdr(skb)->type; |
439 | const int code = icmp_hdr(skb)->code; | 439 | const int code = icmp_hdr(skb)->code; |
440 | struct ip_tunnel *t; | 440 | struct ip_tunnel *t; |
441 | __be16 flags; | 441 | __be16 flags; |
442 | 442 | ||
443 | flags = p[0]; | 443 | flags = p[0]; |
444 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { | 444 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { |
445 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 445 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
446 | return; | 446 | return; |
447 | if (flags&GRE_KEY) { | 447 | if (flags&GRE_KEY) { |
448 | grehlen += 4; | 448 | grehlen += 4; |
449 | if (flags&GRE_CSUM) | 449 | if (flags&GRE_CSUM) |
450 | grehlen += 4; | 450 | grehlen += 4; |
451 | } | 451 | } |
452 | } | 452 | } |
453 | 453 | ||
454 | /* If only 8 bytes returned, keyed message will be dropped here */ | 454 | /* If only 8 bytes returned, keyed message will be dropped here */ |
455 | if (skb_headlen(skb) < grehlen) | 455 | if (skb_headlen(skb) < grehlen) |
456 | return; | 456 | return; |
457 | 457 | ||
458 | switch (type) { | 458 | switch (type) { |
459 | default: | 459 | default: |
460 | case ICMP_PARAMETERPROB: | 460 | case ICMP_PARAMETERPROB: |
461 | return; | 461 | return; |
462 | 462 | ||
463 | case ICMP_DEST_UNREACH: | 463 | case ICMP_DEST_UNREACH: |
464 | switch (code) { | 464 | switch (code) { |
465 | case ICMP_SR_FAILED: | 465 | case ICMP_SR_FAILED: |
466 | case ICMP_PORT_UNREACH: | 466 | case ICMP_PORT_UNREACH: |
467 | /* Impossible event. */ | 467 | /* Impossible event. */ |
468 | return; | 468 | return; |
469 | case ICMP_FRAG_NEEDED: | 469 | case ICMP_FRAG_NEEDED: |
470 | /* Soft state for pmtu is maintained by IP core. */ | 470 | /* Soft state for pmtu is maintained by IP core. */ |
471 | return; | 471 | return; |
472 | default: | 472 | default: |
473 | /* All others are translated to HOST_UNREACH. | 473 | /* All others are translated to HOST_UNREACH. |
474 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 474 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
475 | I believe they are just ether pollution. --ANK | 475 | I believe they are just ether pollution. --ANK |
476 | */ | 476 | */ |
477 | break; | 477 | break; |
478 | } | 478 | } |
479 | break; | 479 | break; |
480 | case ICMP_TIME_EXCEEDED: | 480 | case ICMP_TIME_EXCEEDED: |
481 | if (code != ICMP_EXC_TTL) | 481 | if (code != ICMP_EXC_TTL) |
482 | return; | 482 | return; |
483 | break; | 483 | break; |
484 | } | 484 | } |
485 | 485 | ||
486 | rcu_read_lock(); | 486 | rcu_read_lock(); |
487 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, | 487 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, |
488 | flags & GRE_KEY ? | 488 | flags & GRE_KEY ? |
489 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, | 489 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, |
490 | p[1]); | 490 | p[1]); |
491 | if (t == NULL || t->parms.iph.daddr == 0 || | 491 | if (t == NULL || t->parms.iph.daddr == 0 || |
492 | ipv4_is_multicast(t->parms.iph.daddr)) | 492 | ipv4_is_multicast(t->parms.iph.daddr)) |
493 | goto out; | 493 | goto out; |
494 | 494 | ||
495 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 495 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
496 | goto out; | 496 | goto out; |
497 | 497 | ||
498 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) | 498 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
499 | t->err_count++; | 499 | t->err_count++; |
500 | else | 500 | else |
501 | t->err_count = 1; | 501 | t->err_count = 1; |
502 | t->err_time = jiffies; | 502 | t->err_time = jiffies; |
503 | out: | 503 | out: |
504 | rcu_read_unlock(); | 504 | rcu_read_unlock(); |
505 | return; | 505 | return; |
506 | } | 506 | } |
507 | 507 | ||
508 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 508 | static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
509 | { | 509 | { |
510 | if (INET_ECN_is_ce(iph->tos)) { | 510 | if (INET_ECN_is_ce(iph->tos)) { |
511 | if (skb->protocol == htons(ETH_P_IP)) { | 511 | if (skb->protocol == htons(ETH_P_IP)) { |
512 | IP_ECN_set_ce(ip_hdr(skb)); | 512 | IP_ECN_set_ce(ip_hdr(skb)); |
513 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | 513 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
514 | IP6_ECN_set_ce(ipv6_hdr(skb)); | 514 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
515 | } | 515 | } |
516 | } | 516 | } |
517 | } | 517 | } |
518 | 518 | ||
519 | static inline u8 | 519 | static inline u8 |
520 | ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) | 520 | ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) |
521 | { | 521 | { |
522 | u8 inner = 0; | 522 | u8 inner = 0; |
523 | if (skb->protocol == htons(ETH_P_IP)) | 523 | if (skb->protocol == htons(ETH_P_IP)) |
524 | inner = old_iph->tos; | 524 | inner = old_iph->tos; |
525 | else if (skb->protocol == htons(ETH_P_IPV6)) | 525 | else if (skb->protocol == htons(ETH_P_IPV6)) |
526 | inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 526 | inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); |
527 | return INET_ECN_encapsulate(tos, inner); | 527 | return INET_ECN_encapsulate(tos, inner); |
528 | } | 528 | } |
529 | 529 | ||
530 | static int ipgre_rcv(struct sk_buff *skb) | 530 | static int ipgre_rcv(struct sk_buff *skb) |
531 | { | 531 | { |
532 | struct iphdr *iph; | 532 | struct iphdr *iph; |
533 | u8 *h; | 533 | u8 *h; |
534 | __be16 flags; | 534 | __be16 flags; |
535 | __sum16 csum = 0; | 535 | __sum16 csum = 0; |
536 | __be32 key = 0; | 536 | __be32 key = 0; |
537 | u32 seqno = 0; | 537 | u32 seqno = 0; |
538 | struct ip_tunnel *tunnel; | 538 | struct ip_tunnel *tunnel; |
539 | int offset = 4; | 539 | int offset = 4; |
540 | __be16 gre_proto; | 540 | __be16 gre_proto; |
541 | unsigned int len; | ||
542 | 541 | ||
543 | if (!pskb_may_pull(skb, 16)) | 542 | if (!pskb_may_pull(skb, 16)) |
544 | goto drop_nolock; | 543 | goto drop_nolock; |
545 | 544 | ||
546 | iph = ip_hdr(skb); | 545 | iph = ip_hdr(skb); |
547 | h = skb->data; | 546 | h = skb->data; |
548 | flags = *(__be16*)h; | 547 | flags = *(__be16*)h; |
549 | 548 | ||
550 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { | 549 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { |
551 | /* - Version must be 0. | 550 | /* - Version must be 0. |
552 | - We do not support routing headers. | 551 | - We do not support routing headers. |
553 | */ | 552 | */ |
554 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 553 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
555 | goto drop_nolock; | 554 | goto drop_nolock; |
556 | 555 | ||
557 | if (flags&GRE_CSUM) { | 556 | if (flags&GRE_CSUM) { |
558 | switch (skb->ip_summed) { | 557 | switch (skb->ip_summed) { |
559 | case CHECKSUM_COMPLETE: | 558 | case CHECKSUM_COMPLETE: |
560 | csum = csum_fold(skb->csum); | 559 | csum = csum_fold(skb->csum); |
561 | if (!csum) | 560 | if (!csum) |
562 | break; | 561 | break; |
563 | /* fall through */ | 562 | /* fall through */ |
564 | case CHECKSUM_NONE: | 563 | case CHECKSUM_NONE: |
565 | skb->csum = 0; | 564 | skb->csum = 0; |
566 | csum = __skb_checksum_complete(skb); | 565 | csum = __skb_checksum_complete(skb); |
567 | skb->ip_summed = CHECKSUM_COMPLETE; | 566 | skb->ip_summed = CHECKSUM_COMPLETE; |
568 | } | 567 | } |
569 | offset += 4; | 568 | offset += 4; |
570 | } | 569 | } |
571 | if (flags&GRE_KEY) { | 570 | if (flags&GRE_KEY) { |
572 | key = *(__be32*)(h + offset); | 571 | key = *(__be32*)(h + offset); |
573 | offset += 4; | 572 | offset += 4; |
574 | } | 573 | } |
575 | if (flags&GRE_SEQ) { | 574 | if (flags&GRE_SEQ) { |
576 | seqno = ntohl(*(__be32*)(h + offset)); | 575 | seqno = ntohl(*(__be32*)(h + offset)); |
577 | offset += 4; | 576 | offset += 4; |
578 | } | 577 | } |
579 | } | 578 | } |
580 | 579 | ||
581 | gre_proto = *(__be16 *)(h + 2); | 580 | gre_proto = *(__be16 *)(h + 2); |
582 | 581 | ||
583 | rcu_read_lock(); | 582 | rcu_read_lock(); |
584 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, | 583 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, |
585 | iph->saddr, iph->daddr, key, | 584 | iph->saddr, iph->daddr, key, |
586 | gre_proto))) { | 585 | gre_proto))) { |
587 | struct net_device_stats *stats = &tunnel->dev->stats; | 586 | struct net_device_stats *stats = &tunnel->dev->stats; |
588 | 587 | ||
589 | secpath_reset(skb); | 588 | secpath_reset(skb); |
590 | 589 | ||
591 | skb->protocol = gre_proto; | 590 | skb->protocol = gre_proto; |
592 | /* WCCP version 1 and 2 protocol decoding. | 591 | /* WCCP version 1 and 2 protocol decoding. |
593 | * - Change protocol to IP | 592 | * - Change protocol to IP |
594 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header | 593 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header |
595 | */ | 594 | */ |
596 | if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { | 595 | if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { |
597 | skb->protocol = htons(ETH_P_IP); | 596 | skb->protocol = htons(ETH_P_IP); |
598 | if ((*(h + offset) & 0xF0) != 0x40) | 597 | if ((*(h + offset) & 0xF0) != 0x40) |
599 | offset += 4; | 598 | offset += 4; |
600 | } | 599 | } |
601 | 600 | ||
602 | skb->mac_header = skb->network_header; | 601 | skb->mac_header = skb->network_header; |
603 | __pskb_pull(skb, offset); | 602 | __pskb_pull(skb, offset); |
604 | skb_postpull_rcsum(skb, skb_transport_header(skb), offset); | 603 | skb_postpull_rcsum(skb, skb_transport_header(skb), offset); |
605 | skb->pkt_type = PACKET_HOST; | 604 | skb->pkt_type = PACKET_HOST; |
606 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 605 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
607 | if (ipv4_is_multicast(iph->daddr)) { | 606 | if (ipv4_is_multicast(iph->daddr)) { |
608 | /* Looped back packet, drop it! */ | 607 | /* Looped back packet, drop it! */ |
609 | if (skb_rtable(skb)->fl.iif == 0) | 608 | if (skb_rtable(skb)->fl.iif == 0) |
610 | goto drop; | 609 | goto drop; |
611 | stats->multicast++; | 610 | stats->multicast++; |
612 | skb->pkt_type = PACKET_BROADCAST; | 611 | skb->pkt_type = PACKET_BROADCAST; |
613 | } | 612 | } |
614 | #endif | 613 | #endif |
615 | 614 | ||
616 | if (((flags&GRE_CSUM) && csum) || | 615 | if (((flags&GRE_CSUM) && csum) || |
617 | (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { | 616 | (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { |
618 | stats->rx_crc_errors++; | 617 | stats->rx_crc_errors++; |
619 | stats->rx_errors++; | 618 | stats->rx_errors++; |
620 | goto drop; | 619 | goto drop; |
621 | } | 620 | } |
622 | if (tunnel->parms.i_flags&GRE_SEQ) { | 621 | if (tunnel->parms.i_flags&GRE_SEQ) { |
623 | if (!(flags&GRE_SEQ) || | 622 | if (!(flags&GRE_SEQ) || |
624 | (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { | 623 | (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { |
625 | stats->rx_fifo_errors++; | 624 | stats->rx_fifo_errors++; |
626 | stats->rx_errors++; | 625 | stats->rx_errors++; |
627 | goto drop; | 626 | goto drop; |
628 | } | 627 | } |
629 | tunnel->i_seqno = seqno + 1; | 628 | tunnel->i_seqno = seqno + 1; |
630 | } | 629 | } |
631 | 630 | ||
632 | len = skb->len; | ||
633 | |||
634 | /* Warning: All skb pointers will be invalidated! */ | 631 | /* Warning: All skb pointers will be invalidated! */ |
635 | if (tunnel->dev->type == ARPHRD_ETHER) { | 632 | if (tunnel->dev->type == ARPHRD_ETHER) { |
636 | if (!pskb_may_pull(skb, ETH_HLEN)) { | 633 | if (!pskb_may_pull(skb, ETH_HLEN)) { |
637 | stats->rx_length_errors++; | 634 | stats->rx_length_errors++; |
638 | stats->rx_errors++; | 635 | stats->rx_errors++; |
639 | goto drop; | 636 | goto drop; |
640 | } | 637 | } |
641 | 638 | ||
642 | iph = ip_hdr(skb); | 639 | iph = ip_hdr(skb); |
643 | skb->protocol = eth_type_trans(skb, tunnel->dev); | 640 | skb->protocol = eth_type_trans(skb, tunnel->dev); |
644 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | 641 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); |
645 | } | 642 | } |
646 | 643 | ||
647 | stats->rx_packets++; | 644 | skb_tunnel_rx(skb, tunnel->dev); |
648 | stats->rx_bytes += len; | ||
649 | skb->dev = tunnel->dev; | ||
650 | skb_dst_drop(skb); | ||
651 | nf_reset(skb); | ||
652 | 645 | ||
653 | skb_reset_network_header(skb); | 646 | skb_reset_network_header(skb); |
654 | ipgre_ecn_decapsulate(iph, skb); | 647 | ipgre_ecn_decapsulate(iph, skb); |
655 | 648 | ||
656 | netif_rx(skb); | 649 | netif_rx(skb); |
657 | rcu_read_unlock(); | 650 | rcu_read_unlock(); |
658 | return(0); | 651 | return(0); |
659 | } | 652 | } |
660 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 653 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
661 | 654 | ||
662 | drop: | 655 | drop: |
663 | rcu_read_unlock(); | 656 | rcu_read_unlock(); |
664 | drop_nolock: | 657 | drop_nolock: |
665 | kfree_skb(skb); | 658 | kfree_skb(skb); |
666 | return(0); | 659 | return(0); |
667 | } | 660 | } |
668 | 661 | ||
669 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 662 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
670 | { | 663 | { |
671 | struct ip_tunnel *tunnel = netdev_priv(dev); | 664 | struct ip_tunnel *tunnel = netdev_priv(dev); |
672 | struct net_device_stats *stats = &dev->stats; | 665 | struct net_device_stats *stats = &dev->stats; |
673 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | 666 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); |
674 | struct iphdr *old_iph = ip_hdr(skb); | 667 | struct iphdr *old_iph = ip_hdr(skb); |
675 | struct iphdr *tiph; | 668 | struct iphdr *tiph; |
676 | u8 tos; | 669 | u8 tos; |
677 | __be16 df; | 670 | __be16 df; |
678 | struct rtable *rt; /* Route to the other host */ | 671 | struct rtable *rt; /* Route to the other host */ |
679 | struct net_device *tdev; /* Device to other host */ | 672 | struct net_device *tdev; /* Device to other host */ |
680 | struct iphdr *iph; /* Our new IP header */ | 673 | struct iphdr *iph; /* Our new IP header */ |
681 | unsigned int max_headroom; /* The extra header space needed */ | 674 | unsigned int max_headroom; /* The extra header space needed */ |
682 | int gre_hlen; | 675 | int gre_hlen; |
683 | __be32 dst; | 676 | __be32 dst; |
684 | int mtu; | 677 | int mtu; |
685 | 678 | ||
686 | if (dev->type == ARPHRD_ETHER) | 679 | if (dev->type == ARPHRD_ETHER) |
687 | IPCB(skb)->flags = 0; | 680 | IPCB(skb)->flags = 0; |
688 | 681 | ||
689 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { | 682 | if (dev->header_ops && dev->type == ARPHRD_IPGRE) { |
690 | gre_hlen = 0; | 683 | gre_hlen = 0; |
691 | tiph = (struct iphdr *)skb->data; | 684 | tiph = (struct iphdr *)skb->data; |
692 | } else { | 685 | } else { |
693 | gre_hlen = tunnel->hlen; | 686 | gre_hlen = tunnel->hlen; |
694 | tiph = &tunnel->parms.iph; | 687 | tiph = &tunnel->parms.iph; |
695 | } | 688 | } |
696 | 689 | ||
697 | if ((dst = tiph->daddr) == 0) { | 690 | if ((dst = tiph->daddr) == 0) { |
698 | /* NBMA tunnel */ | 691 | /* NBMA tunnel */ |
699 | 692 | ||
700 | if (skb_dst(skb) == NULL) { | 693 | if (skb_dst(skb) == NULL) { |
701 | stats->tx_fifo_errors++; | 694 | stats->tx_fifo_errors++; |
702 | goto tx_error; | 695 | goto tx_error; |
703 | } | 696 | } |
704 | 697 | ||
705 | if (skb->protocol == htons(ETH_P_IP)) { | 698 | if (skb->protocol == htons(ETH_P_IP)) { |
706 | rt = skb_rtable(skb); | 699 | rt = skb_rtable(skb); |
707 | if ((dst = rt->rt_gateway) == 0) | 700 | if ((dst = rt->rt_gateway) == 0) |
708 | goto tx_error_icmp; | 701 | goto tx_error_icmp; |
709 | } | 702 | } |
710 | #ifdef CONFIG_IPV6 | 703 | #ifdef CONFIG_IPV6 |
711 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 704 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
712 | struct in6_addr *addr6; | 705 | struct in6_addr *addr6; |
713 | int addr_type; | 706 | int addr_type; |
714 | struct neighbour *neigh = skb_dst(skb)->neighbour; | 707 | struct neighbour *neigh = skb_dst(skb)->neighbour; |
715 | 708 | ||
716 | if (neigh == NULL) | 709 | if (neigh == NULL) |
717 | goto tx_error; | 710 | goto tx_error; |
718 | 711 | ||
719 | addr6 = (struct in6_addr *)&neigh->primary_key; | 712 | addr6 = (struct in6_addr *)&neigh->primary_key; |
720 | addr_type = ipv6_addr_type(addr6); | 713 | addr_type = ipv6_addr_type(addr6); |
721 | 714 | ||
722 | if (addr_type == IPV6_ADDR_ANY) { | 715 | if (addr_type == IPV6_ADDR_ANY) { |
723 | addr6 = &ipv6_hdr(skb)->daddr; | 716 | addr6 = &ipv6_hdr(skb)->daddr; |
724 | addr_type = ipv6_addr_type(addr6); | 717 | addr_type = ipv6_addr_type(addr6); |
725 | } | 718 | } |
726 | 719 | ||
727 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | 720 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) |
728 | goto tx_error_icmp; | 721 | goto tx_error_icmp; |
729 | 722 | ||
730 | dst = addr6->s6_addr32[3]; | 723 | dst = addr6->s6_addr32[3]; |
731 | } | 724 | } |
732 | #endif | 725 | #endif |
733 | else | 726 | else |
734 | goto tx_error; | 727 | goto tx_error; |
735 | } | 728 | } |
736 | 729 | ||
737 | tos = tiph->tos; | 730 | tos = tiph->tos; |
738 | if (tos == 1) { | 731 | if (tos == 1) { |
739 | tos = 0; | 732 | tos = 0; |
740 | if (skb->protocol == htons(ETH_P_IP)) | 733 | if (skb->protocol == htons(ETH_P_IP)) |
741 | tos = old_iph->tos; | 734 | tos = old_iph->tos; |
742 | } | 735 | } |
743 | 736 | ||
744 | { | 737 | { |
745 | struct flowi fl = { .oif = tunnel->parms.link, | 738 | struct flowi fl = { .oif = tunnel->parms.link, |
746 | .nl_u = { .ip4_u = | 739 | .nl_u = { .ip4_u = |
747 | { .daddr = dst, | 740 | { .daddr = dst, |
748 | .saddr = tiph->saddr, | 741 | .saddr = tiph->saddr, |
749 | .tos = RT_TOS(tos) } }, | 742 | .tos = RT_TOS(tos) } }, |
750 | .proto = IPPROTO_GRE }; | 743 | .proto = IPPROTO_GRE }; |
751 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | 744 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
752 | stats->tx_carrier_errors++; | 745 | stats->tx_carrier_errors++; |
753 | goto tx_error; | 746 | goto tx_error; |
754 | } | 747 | } |
755 | } | 748 | } |
756 | tdev = rt->u.dst.dev; | 749 | tdev = rt->u.dst.dev; |
757 | 750 | ||
758 | if (tdev == dev) { | 751 | if (tdev == dev) { |
759 | ip_rt_put(rt); | 752 | ip_rt_put(rt); |
760 | stats->collisions++; | 753 | stats->collisions++; |
761 | goto tx_error; | 754 | goto tx_error; |
762 | } | 755 | } |
763 | 756 | ||
764 | df = tiph->frag_off; | 757 | df = tiph->frag_off; |
765 | if (df) | 758 | if (df) |
766 | mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; | 759 | mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; |
767 | else | 760 | else |
768 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; | 761 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
769 | 762 | ||
770 | if (skb_dst(skb)) | 763 | if (skb_dst(skb)) |
771 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 764 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
772 | 765 | ||
773 | if (skb->protocol == htons(ETH_P_IP)) { | 766 | if (skb->protocol == htons(ETH_P_IP)) { |
774 | df |= (old_iph->frag_off&htons(IP_DF)); | 767 | df |= (old_iph->frag_off&htons(IP_DF)); |
775 | 768 | ||
776 | if ((old_iph->frag_off&htons(IP_DF)) && | 769 | if ((old_iph->frag_off&htons(IP_DF)) && |
777 | mtu < ntohs(old_iph->tot_len)) { | 770 | mtu < ntohs(old_iph->tot_len)) { |
778 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 771 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
779 | ip_rt_put(rt); | 772 | ip_rt_put(rt); |
780 | goto tx_error; | 773 | goto tx_error; |
781 | } | 774 | } |
782 | } | 775 | } |
783 | #ifdef CONFIG_IPV6 | 776 | #ifdef CONFIG_IPV6 |
784 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 777 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
785 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); | 778 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); |
786 | 779 | ||
787 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { | 780 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { |
788 | if ((tunnel->parms.iph.daddr && | 781 | if ((tunnel->parms.iph.daddr && |
789 | !ipv4_is_multicast(tunnel->parms.iph.daddr)) || | 782 | !ipv4_is_multicast(tunnel->parms.iph.daddr)) || |
790 | rt6->rt6i_dst.plen == 128) { | 783 | rt6->rt6i_dst.plen == 128) { |
791 | rt6->rt6i_flags |= RTF_MODIFIED; | 784 | rt6->rt6i_flags |= RTF_MODIFIED; |
792 | skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; | 785 | skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; |
793 | } | 786 | } |
794 | } | 787 | } |
795 | 788 | ||
796 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { | 789 | if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { |
797 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 790 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
798 | ip_rt_put(rt); | 791 | ip_rt_put(rt); |
799 | goto tx_error; | 792 | goto tx_error; |
800 | } | 793 | } |
801 | } | 794 | } |
802 | #endif | 795 | #endif |
803 | 796 | ||
804 | if (tunnel->err_count > 0) { | 797 | if (tunnel->err_count > 0) { |
805 | if (time_before(jiffies, | 798 | if (time_before(jiffies, |
806 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | 799 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { |
807 | tunnel->err_count--; | 800 | tunnel->err_count--; |
808 | 801 | ||
809 | dst_link_failure(skb); | 802 | dst_link_failure(skb); |
810 | } else | 803 | } else |
811 | tunnel->err_count = 0; | 804 | tunnel->err_count = 0; |
812 | } | 805 | } |
813 | 806 | ||
814 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; | 807 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; |
815 | 808 | ||
816 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| | 809 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| |
817 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 810 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
818 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 811 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
819 | if (max_headroom > dev->needed_headroom) | 812 | if (max_headroom > dev->needed_headroom) |
820 | dev->needed_headroom = max_headroom; | 813 | dev->needed_headroom = max_headroom; |
821 | if (!new_skb) { | 814 | if (!new_skb) { |
822 | ip_rt_put(rt); | 815 | ip_rt_put(rt); |
823 | txq->tx_dropped++; | 816 | txq->tx_dropped++; |
824 | dev_kfree_skb(skb); | 817 | dev_kfree_skb(skb); |
825 | return NETDEV_TX_OK; | 818 | return NETDEV_TX_OK; |
826 | } | 819 | } |
827 | if (skb->sk) | 820 | if (skb->sk) |
828 | skb_set_owner_w(new_skb, skb->sk); | 821 | skb_set_owner_w(new_skb, skb->sk); |
829 | dev_kfree_skb(skb); | 822 | dev_kfree_skb(skb); |
830 | skb = new_skb; | 823 | skb = new_skb; |
831 | old_iph = ip_hdr(skb); | 824 | old_iph = ip_hdr(skb); |
832 | } | 825 | } |
833 | 826 | ||
834 | skb_reset_transport_header(skb); | 827 | skb_reset_transport_header(skb); |
835 | skb_push(skb, gre_hlen); | 828 | skb_push(skb, gre_hlen); |
836 | skb_reset_network_header(skb); | 829 | skb_reset_network_header(skb); |
837 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 830 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
838 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 831 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
839 | IPSKB_REROUTED); | 832 | IPSKB_REROUTED); |
840 | skb_dst_drop(skb); | 833 | skb_dst_drop(skb); |
841 | skb_dst_set(skb, &rt->u.dst); | 834 | skb_dst_set(skb, &rt->u.dst); |
842 | 835 | ||
843 | /* | 836 | /* |
844 | * Push down and install the IPIP header. | 837 | * Push down and install the IPIP header. |
845 | */ | 838 | */ |
846 | 839 | ||
847 | iph = ip_hdr(skb); | 840 | iph = ip_hdr(skb); |
848 | iph->version = 4; | 841 | iph->version = 4; |
849 | iph->ihl = sizeof(struct iphdr) >> 2; | 842 | iph->ihl = sizeof(struct iphdr) >> 2; |
850 | iph->frag_off = df; | 843 | iph->frag_off = df; |
851 | iph->protocol = IPPROTO_GRE; | 844 | iph->protocol = IPPROTO_GRE; |
852 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); | 845 | iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); |
853 | iph->daddr = rt->rt_dst; | 846 | iph->daddr = rt->rt_dst; |
854 | iph->saddr = rt->rt_src; | 847 | iph->saddr = rt->rt_src; |
855 | 848 | ||
856 | if ((iph->ttl = tiph->ttl) == 0) { | 849 | if ((iph->ttl = tiph->ttl) == 0) { |
857 | if (skb->protocol == htons(ETH_P_IP)) | 850 | if (skb->protocol == htons(ETH_P_IP)) |
858 | iph->ttl = old_iph->ttl; | 851 | iph->ttl = old_iph->ttl; |
859 | #ifdef CONFIG_IPV6 | 852 | #ifdef CONFIG_IPV6 |
860 | else if (skb->protocol == htons(ETH_P_IPV6)) | 853 | else if (skb->protocol == htons(ETH_P_IPV6)) |
861 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; | 854 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; |
862 | #endif | 855 | #endif |
863 | else | 856 | else |
864 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | 857 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); |
865 | } | 858 | } |
866 | 859 | ||
867 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; | 860 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; |
868 | ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? | 861 | ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? |
869 | htons(ETH_P_TEB) : skb->protocol; | 862 | htons(ETH_P_TEB) : skb->protocol; |
870 | 863 | ||
871 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { | 864 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { |
872 | __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); | 865 | __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); |
873 | 866 | ||
874 | if (tunnel->parms.o_flags&GRE_SEQ) { | 867 | if (tunnel->parms.o_flags&GRE_SEQ) { |
875 | ++tunnel->o_seqno; | 868 | ++tunnel->o_seqno; |
876 | *ptr = htonl(tunnel->o_seqno); | 869 | *ptr = htonl(tunnel->o_seqno); |
877 | ptr--; | 870 | ptr--; |
878 | } | 871 | } |
879 | if (tunnel->parms.o_flags&GRE_KEY) { | 872 | if (tunnel->parms.o_flags&GRE_KEY) { |
880 | *ptr = tunnel->parms.o_key; | 873 | *ptr = tunnel->parms.o_key; |
881 | ptr--; | 874 | ptr--; |
882 | } | 875 | } |
883 | if (tunnel->parms.o_flags&GRE_CSUM) { | 876 | if (tunnel->parms.o_flags&GRE_CSUM) { |
884 | *ptr = 0; | 877 | *ptr = 0; |
885 | *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); | 878 | *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); |
886 | } | 879 | } |
887 | } | 880 | } |
888 | 881 | ||
889 | nf_reset(skb); | 882 | nf_reset(skb); |
890 | 883 | ||
891 | IPTUNNEL_XMIT(); | 884 | IPTUNNEL_XMIT(); |
892 | return NETDEV_TX_OK; | 885 | return NETDEV_TX_OK; |
893 | 886 | ||
894 | tx_error_icmp: | 887 | tx_error_icmp: |
895 | dst_link_failure(skb); | 888 | dst_link_failure(skb); |
896 | 889 | ||
897 | tx_error: | 890 | tx_error: |
898 | stats->tx_errors++; | 891 | stats->tx_errors++; |
899 | dev_kfree_skb(skb); | 892 | dev_kfree_skb(skb); |
900 | return NETDEV_TX_OK; | 893 | return NETDEV_TX_OK; |
901 | } | 894 | } |
902 | 895 | ||
903 | static int ipgre_tunnel_bind_dev(struct net_device *dev) | 896 | static int ipgre_tunnel_bind_dev(struct net_device *dev) |
904 | { | 897 | { |
905 | struct net_device *tdev = NULL; | 898 | struct net_device *tdev = NULL; |
906 | struct ip_tunnel *tunnel; | 899 | struct ip_tunnel *tunnel; |
907 | struct iphdr *iph; | 900 | struct iphdr *iph; |
908 | int hlen = LL_MAX_HEADER; | 901 | int hlen = LL_MAX_HEADER; |
909 | int mtu = ETH_DATA_LEN; | 902 | int mtu = ETH_DATA_LEN; |
910 | int addend = sizeof(struct iphdr) + 4; | 903 | int addend = sizeof(struct iphdr) + 4; |
911 | 904 | ||
912 | tunnel = netdev_priv(dev); | 905 | tunnel = netdev_priv(dev); |
913 | iph = &tunnel->parms.iph; | 906 | iph = &tunnel->parms.iph; |
914 | 907 | ||
915 | /* Guess output device to choose reasonable mtu and needed_headroom */ | 908 | /* Guess output device to choose reasonable mtu and needed_headroom */ |
916 | 909 | ||
917 | if (iph->daddr) { | 910 | if (iph->daddr) { |
918 | struct flowi fl = { .oif = tunnel->parms.link, | 911 | struct flowi fl = { .oif = tunnel->parms.link, |
919 | .nl_u = { .ip4_u = | 912 | .nl_u = { .ip4_u = |
920 | { .daddr = iph->daddr, | 913 | { .daddr = iph->daddr, |
921 | .saddr = iph->saddr, | 914 | .saddr = iph->saddr, |
922 | .tos = RT_TOS(iph->tos) } }, | 915 | .tos = RT_TOS(iph->tos) } }, |
923 | .proto = IPPROTO_GRE }; | 916 | .proto = IPPROTO_GRE }; |
924 | struct rtable *rt; | 917 | struct rtable *rt; |
925 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 918 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
926 | tdev = rt->u.dst.dev; | 919 | tdev = rt->u.dst.dev; |
927 | ip_rt_put(rt); | 920 | ip_rt_put(rt); |
928 | } | 921 | } |
929 | 922 | ||
930 | if (dev->type != ARPHRD_ETHER) | 923 | if (dev->type != ARPHRD_ETHER) |
931 | dev->flags |= IFF_POINTOPOINT; | 924 | dev->flags |= IFF_POINTOPOINT; |
932 | } | 925 | } |
933 | 926 | ||
934 | if (!tdev && tunnel->parms.link) | 927 | if (!tdev && tunnel->parms.link) |
935 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); | 928 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); |
936 | 929 | ||
937 | if (tdev) { | 930 | if (tdev) { |
938 | hlen = tdev->hard_header_len + tdev->needed_headroom; | 931 | hlen = tdev->hard_header_len + tdev->needed_headroom; |
939 | mtu = tdev->mtu; | 932 | mtu = tdev->mtu; |
940 | } | 933 | } |
941 | dev->iflink = tunnel->parms.link; | 934 | dev->iflink = tunnel->parms.link; |
942 | 935 | ||
943 | /* Precalculate GRE options length */ | 936 | /* Precalculate GRE options length */ |
944 | if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { | 937 | if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { |
945 | if (tunnel->parms.o_flags&GRE_CSUM) | 938 | if (tunnel->parms.o_flags&GRE_CSUM) |
946 | addend += 4; | 939 | addend += 4; |
947 | if (tunnel->parms.o_flags&GRE_KEY) | 940 | if (tunnel->parms.o_flags&GRE_KEY) |
948 | addend += 4; | 941 | addend += 4; |
949 | if (tunnel->parms.o_flags&GRE_SEQ) | 942 | if (tunnel->parms.o_flags&GRE_SEQ) |
950 | addend += 4; | 943 | addend += 4; |
951 | } | 944 | } |
952 | dev->needed_headroom = addend + hlen; | 945 | dev->needed_headroom = addend + hlen; |
953 | mtu -= dev->hard_header_len + addend; | 946 | mtu -= dev->hard_header_len + addend; |
954 | 947 | ||
955 | if (mtu < 68) | 948 | if (mtu < 68) |
956 | mtu = 68; | 949 | mtu = 68; |
957 | 950 | ||
958 | tunnel->hlen = addend; | 951 | tunnel->hlen = addend; |
959 | 952 | ||
960 | return mtu; | 953 | return mtu; |
961 | } | 954 | } |
962 | 955 | ||
963 | static int | 956 | static int |
964 | ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 957 | ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
965 | { | 958 | { |
966 | int err = 0; | 959 | int err = 0; |
967 | struct ip_tunnel_parm p; | 960 | struct ip_tunnel_parm p; |
968 | struct ip_tunnel *t; | 961 | struct ip_tunnel *t; |
969 | struct net *net = dev_net(dev); | 962 | struct net *net = dev_net(dev); |
970 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 963 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
971 | 964 | ||
972 | switch (cmd) { | 965 | switch (cmd) { |
973 | case SIOCGETTUNNEL: | 966 | case SIOCGETTUNNEL: |
974 | t = NULL; | 967 | t = NULL; |
975 | if (dev == ign->fb_tunnel_dev) { | 968 | if (dev == ign->fb_tunnel_dev) { |
976 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 969 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
977 | err = -EFAULT; | 970 | err = -EFAULT; |
978 | break; | 971 | break; |
979 | } | 972 | } |
980 | t = ipgre_tunnel_locate(net, &p, 0); | 973 | t = ipgre_tunnel_locate(net, &p, 0); |
981 | } | 974 | } |
982 | if (t == NULL) | 975 | if (t == NULL) |
983 | t = netdev_priv(dev); | 976 | t = netdev_priv(dev); |
984 | memcpy(&p, &t->parms, sizeof(p)); | 977 | memcpy(&p, &t->parms, sizeof(p)); |
985 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 978 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
986 | err = -EFAULT; | 979 | err = -EFAULT; |
987 | break; | 980 | break; |
988 | 981 | ||
989 | case SIOCADDTUNNEL: | 982 | case SIOCADDTUNNEL: |
990 | case SIOCCHGTUNNEL: | 983 | case SIOCCHGTUNNEL: |
991 | err = -EPERM; | 984 | err = -EPERM; |
992 | if (!capable(CAP_NET_ADMIN)) | 985 | if (!capable(CAP_NET_ADMIN)) |
993 | goto done; | 986 | goto done; |
994 | 987 | ||
995 | err = -EFAULT; | 988 | err = -EFAULT; |
996 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 989 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
997 | goto done; | 990 | goto done; |
998 | 991 | ||
999 | err = -EINVAL; | 992 | err = -EINVAL; |
1000 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || | 993 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || |
1001 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || | 994 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || |
1002 | ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) | 995 | ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) |
1003 | goto done; | 996 | goto done; |
1004 | if (p.iph.ttl) | 997 | if (p.iph.ttl) |
1005 | p.iph.frag_off |= htons(IP_DF); | 998 | p.iph.frag_off |= htons(IP_DF); |
1006 | 999 | ||
1007 | if (!(p.i_flags&GRE_KEY)) | 1000 | if (!(p.i_flags&GRE_KEY)) |
1008 | p.i_key = 0; | 1001 | p.i_key = 0; |
1009 | if (!(p.o_flags&GRE_KEY)) | 1002 | if (!(p.o_flags&GRE_KEY)) |
1010 | p.o_key = 0; | 1003 | p.o_key = 0; |
1011 | 1004 | ||
1012 | t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); | 1005 | t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); |
1013 | 1006 | ||
1014 | if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 1007 | if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
1015 | if (t != NULL) { | 1008 | if (t != NULL) { |
1016 | if (t->dev != dev) { | 1009 | if (t->dev != dev) { |
1017 | err = -EEXIST; | 1010 | err = -EEXIST; |
1018 | break; | 1011 | break; |
1019 | } | 1012 | } |
1020 | } else { | 1013 | } else { |
1021 | unsigned nflags = 0; | 1014 | unsigned nflags = 0; |
1022 | 1015 | ||
1023 | t = netdev_priv(dev); | 1016 | t = netdev_priv(dev); |
1024 | 1017 | ||
1025 | if (ipv4_is_multicast(p.iph.daddr)) | 1018 | if (ipv4_is_multicast(p.iph.daddr)) |
1026 | nflags = IFF_BROADCAST; | 1019 | nflags = IFF_BROADCAST; |
1027 | else if (p.iph.daddr) | 1020 | else if (p.iph.daddr) |
1028 | nflags = IFF_POINTOPOINT; | 1021 | nflags = IFF_POINTOPOINT; |
1029 | 1022 | ||
1030 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { | 1023 | if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { |
1031 | err = -EINVAL; | 1024 | err = -EINVAL; |
1032 | break; | 1025 | break; |
1033 | } | 1026 | } |
1034 | ipgre_tunnel_unlink(ign, t); | 1027 | ipgre_tunnel_unlink(ign, t); |
1035 | t->parms.iph.saddr = p.iph.saddr; | 1028 | t->parms.iph.saddr = p.iph.saddr; |
1036 | t->parms.iph.daddr = p.iph.daddr; | 1029 | t->parms.iph.daddr = p.iph.daddr; |
1037 | t->parms.i_key = p.i_key; | 1030 | t->parms.i_key = p.i_key; |
1038 | t->parms.o_key = p.o_key; | 1031 | t->parms.o_key = p.o_key; |
1039 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 1032 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
1040 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 1033 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
1041 | ipgre_tunnel_link(ign, t); | 1034 | ipgre_tunnel_link(ign, t); |
1042 | netdev_state_change(dev); | 1035 | netdev_state_change(dev); |
1043 | } | 1036 | } |
1044 | } | 1037 | } |
1045 | 1038 | ||
1046 | if (t) { | 1039 | if (t) { |
1047 | err = 0; | 1040 | err = 0; |
1048 | if (cmd == SIOCCHGTUNNEL) { | 1041 | if (cmd == SIOCCHGTUNNEL) { |
1049 | t->parms.iph.ttl = p.iph.ttl; | 1042 | t->parms.iph.ttl = p.iph.ttl; |
1050 | t->parms.iph.tos = p.iph.tos; | 1043 | t->parms.iph.tos = p.iph.tos; |
1051 | t->parms.iph.frag_off = p.iph.frag_off; | 1044 | t->parms.iph.frag_off = p.iph.frag_off; |
1052 | if (t->parms.link != p.link) { | 1045 | if (t->parms.link != p.link) { |
1053 | t->parms.link = p.link; | 1046 | t->parms.link = p.link; |
1054 | dev->mtu = ipgre_tunnel_bind_dev(dev); | 1047 | dev->mtu = ipgre_tunnel_bind_dev(dev); |
1055 | netdev_state_change(dev); | 1048 | netdev_state_change(dev); |
1056 | } | 1049 | } |
1057 | } | 1050 | } |
1058 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 1051 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
1059 | err = -EFAULT; | 1052 | err = -EFAULT; |
1060 | } else | 1053 | } else |
1061 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 1054 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
1062 | break; | 1055 | break; |
1063 | 1056 | ||
1064 | case SIOCDELTUNNEL: | 1057 | case SIOCDELTUNNEL: |
1065 | err = -EPERM; | 1058 | err = -EPERM; |
1066 | if (!capable(CAP_NET_ADMIN)) | 1059 | if (!capable(CAP_NET_ADMIN)) |
1067 | goto done; | 1060 | goto done; |
1068 | 1061 | ||
1069 | if (dev == ign->fb_tunnel_dev) { | 1062 | if (dev == ign->fb_tunnel_dev) { |
1070 | err = -EFAULT; | 1063 | err = -EFAULT; |
1071 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 1064 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
1072 | goto done; | 1065 | goto done; |
1073 | err = -ENOENT; | 1066 | err = -ENOENT; |
1074 | if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) | 1067 | if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) |
1075 | goto done; | 1068 | goto done; |
1076 | err = -EPERM; | 1069 | err = -EPERM; |
1077 | if (t == netdev_priv(ign->fb_tunnel_dev)) | 1070 | if (t == netdev_priv(ign->fb_tunnel_dev)) |
1078 | goto done; | 1071 | goto done; |
1079 | dev = t->dev; | 1072 | dev = t->dev; |
1080 | } | 1073 | } |
1081 | unregister_netdevice(dev); | 1074 | unregister_netdevice(dev); |
1082 | err = 0; | 1075 | err = 0; |
1083 | break; | 1076 | break; |
1084 | 1077 | ||
1085 | default: | 1078 | default: |
1086 | err = -EINVAL; | 1079 | err = -EINVAL; |
1087 | } | 1080 | } |
1088 | 1081 | ||
1089 | done: | 1082 | done: |
1090 | return err; | 1083 | return err; |
1091 | } | 1084 | } |
1092 | 1085 | ||
1093 | static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 1086 | static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
1094 | { | 1087 | { |
1095 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1088 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1096 | if (new_mtu < 68 || | 1089 | if (new_mtu < 68 || |
1097 | new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) | 1090 | new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) |
1098 | return -EINVAL; | 1091 | return -EINVAL; |
1099 | dev->mtu = new_mtu; | 1092 | dev->mtu = new_mtu; |
1100 | return 0; | 1093 | return 0; |
1101 | } | 1094 | } |
1102 | 1095 | ||
1103 | /* Nice toy. Unfortunately, useless in real life :-) | 1096 | /* Nice toy. Unfortunately, useless in real life :-) |
1104 | It allows to construct virtual multiprotocol broadcast "LAN" | 1097 | It allows to construct virtual multiprotocol broadcast "LAN" |
1105 | over the Internet, provided multicast routing is tuned. | 1098 | over the Internet, provided multicast routing is tuned. |
1106 | 1099 | ||
1107 | 1100 | ||
1108 | I have no idea was this bicycle invented before me, | 1101 | I have no idea was this bicycle invented before me, |
1109 | so that I had to set ARPHRD_IPGRE to a random value. | 1102 | so that I had to set ARPHRD_IPGRE to a random value. |
1110 | I have an impression, that Cisco could make something similar, | 1103 | I have an impression, that Cisco could make something similar, |
1111 | but this feature is apparently missing in IOS<=11.2(8). | 1104 | but this feature is apparently missing in IOS<=11.2(8). |
1112 | 1105 | ||
1113 | I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks | 1106 | I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks |
1114 | with broadcast 224.66.66.66. If you have access to mbone, play with me :-) | 1107 | with broadcast 224.66.66.66. If you have access to mbone, play with me :-) |
1115 | 1108 | ||
1116 | ping -t 255 224.66.66.66 | 1109 | ping -t 255 224.66.66.66 |
1117 | 1110 | ||
1118 | If nobody answers, mbone does not work. | 1111 | If nobody answers, mbone does not work. |
1119 | 1112 | ||
1120 | ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 | 1113 | ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 |
1121 | ip addr add 10.66.66.<somewhat>/24 dev Universe | 1114 | ip addr add 10.66.66.<somewhat>/24 dev Universe |
1122 | ifconfig Universe up | 1115 | ifconfig Universe up |
1123 | ifconfig Universe add fe80::<Your_real_addr>/10 | 1116 | ifconfig Universe add fe80::<Your_real_addr>/10 |
1124 | ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 | 1117 | ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 |
1125 | ftp 10.66.66.66 | 1118 | ftp 10.66.66.66 |
1126 | ... | 1119 | ... |
1127 | ftp fec0:6666:6666::193.233.7.65 | 1120 | ftp fec0:6666:6666::193.233.7.65 |
1128 | ... | 1121 | ... |
1129 | 1122 | ||
1130 | */ | 1123 | */ |
1131 | 1124 | ||
1132 | static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | 1125 | static int ipgre_header(struct sk_buff *skb, struct net_device *dev, |
1133 | unsigned short type, | 1126 | unsigned short type, |
1134 | const void *daddr, const void *saddr, unsigned len) | 1127 | const void *daddr, const void *saddr, unsigned len) |
1135 | { | 1128 | { |
1136 | struct ip_tunnel *t = netdev_priv(dev); | 1129 | struct ip_tunnel *t = netdev_priv(dev); |
1137 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); | 1130 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); |
1138 | __be16 *p = (__be16*)(iph+1); | 1131 | __be16 *p = (__be16*)(iph+1); |
1139 | 1132 | ||
1140 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); | 1133 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); |
1141 | p[0] = t->parms.o_flags; | 1134 | p[0] = t->parms.o_flags; |
1142 | p[1] = htons(type); | 1135 | p[1] = htons(type); |
1143 | 1136 | ||
1144 | /* | 1137 | /* |
1145 | * Set the source hardware address. | 1138 | * Set the source hardware address. |
1146 | */ | 1139 | */ |
1147 | 1140 | ||
1148 | if (saddr) | 1141 | if (saddr) |
1149 | memcpy(&iph->saddr, saddr, 4); | 1142 | memcpy(&iph->saddr, saddr, 4); |
1150 | if (daddr) | 1143 | if (daddr) |
1151 | memcpy(&iph->daddr, daddr, 4); | 1144 | memcpy(&iph->daddr, daddr, 4); |
1152 | if (iph->daddr) | 1145 | if (iph->daddr) |
1153 | return t->hlen; | 1146 | return t->hlen; |
1154 | 1147 | ||
1155 | return -t->hlen; | 1148 | return -t->hlen; |
1156 | } | 1149 | } |
1157 | 1150 | ||
1158 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) | 1151 | static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) |
1159 | { | 1152 | { |
1160 | struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); | 1153 | struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); |
1161 | memcpy(haddr, &iph->saddr, 4); | 1154 | memcpy(haddr, &iph->saddr, 4); |
1162 | return 4; | 1155 | return 4; |
1163 | } | 1156 | } |
1164 | 1157 | ||
1165 | static const struct header_ops ipgre_header_ops = { | 1158 | static const struct header_ops ipgre_header_ops = { |
1166 | .create = ipgre_header, | 1159 | .create = ipgre_header, |
1167 | .parse = ipgre_header_parse, | 1160 | .parse = ipgre_header_parse, |
1168 | }; | 1161 | }; |
1169 | 1162 | ||
1170 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 1163 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
1171 | static int ipgre_open(struct net_device *dev) | 1164 | static int ipgre_open(struct net_device *dev) |
1172 | { | 1165 | { |
1173 | struct ip_tunnel *t = netdev_priv(dev); | 1166 | struct ip_tunnel *t = netdev_priv(dev); |
1174 | 1167 | ||
1175 | if (ipv4_is_multicast(t->parms.iph.daddr)) { | 1168 | if (ipv4_is_multicast(t->parms.iph.daddr)) { |
1176 | struct flowi fl = { .oif = t->parms.link, | 1169 | struct flowi fl = { .oif = t->parms.link, |
1177 | .nl_u = { .ip4_u = | 1170 | .nl_u = { .ip4_u = |
1178 | { .daddr = t->parms.iph.daddr, | 1171 | { .daddr = t->parms.iph.daddr, |
1179 | .saddr = t->parms.iph.saddr, | 1172 | .saddr = t->parms.iph.saddr, |
1180 | .tos = RT_TOS(t->parms.iph.tos) } }, | 1173 | .tos = RT_TOS(t->parms.iph.tos) } }, |
1181 | .proto = IPPROTO_GRE }; | 1174 | .proto = IPPROTO_GRE }; |
1182 | struct rtable *rt; | 1175 | struct rtable *rt; |
1183 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) | 1176 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) |
1184 | return -EADDRNOTAVAIL; | 1177 | return -EADDRNOTAVAIL; |
1185 | dev = rt->u.dst.dev; | 1178 | dev = rt->u.dst.dev; |
1186 | ip_rt_put(rt); | 1179 | ip_rt_put(rt); |
1187 | if (__in_dev_get_rtnl(dev) == NULL) | 1180 | if (__in_dev_get_rtnl(dev) == NULL) |
1188 | return -EADDRNOTAVAIL; | 1181 | return -EADDRNOTAVAIL; |
1189 | t->mlink = dev->ifindex; | 1182 | t->mlink = dev->ifindex; |
1190 | ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); | 1183 | ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); |
1191 | } | 1184 | } |
1192 | return 0; | 1185 | return 0; |
1193 | } | 1186 | } |
1194 | 1187 | ||
1195 | static int ipgre_close(struct net_device *dev) | 1188 | static int ipgre_close(struct net_device *dev) |
1196 | { | 1189 | { |
1197 | struct ip_tunnel *t = netdev_priv(dev); | 1190 | struct ip_tunnel *t = netdev_priv(dev); |
1198 | 1191 | ||
1199 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { | 1192 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { |
1200 | struct in_device *in_dev; | 1193 | struct in_device *in_dev; |
1201 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); | 1194 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); |
1202 | if (in_dev) { | 1195 | if (in_dev) { |
1203 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); | 1196 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); |
1204 | in_dev_put(in_dev); | 1197 | in_dev_put(in_dev); |
1205 | } | 1198 | } |
1206 | } | 1199 | } |
1207 | return 0; | 1200 | return 0; |
1208 | } | 1201 | } |
1209 | 1202 | ||
1210 | #endif | 1203 | #endif |
1211 | 1204 | ||
1212 | static const struct net_device_ops ipgre_netdev_ops = { | 1205 | static const struct net_device_ops ipgre_netdev_ops = { |
1213 | .ndo_init = ipgre_tunnel_init, | 1206 | .ndo_init = ipgre_tunnel_init, |
1214 | .ndo_uninit = ipgre_tunnel_uninit, | 1207 | .ndo_uninit = ipgre_tunnel_uninit, |
1215 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 1208 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
1216 | .ndo_open = ipgre_open, | 1209 | .ndo_open = ipgre_open, |
1217 | .ndo_stop = ipgre_close, | 1210 | .ndo_stop = ipgre_close, |
1218 | #endif | 1211 | #endif |
1219 | .ndo_start_xmit = ipgre_tunnel_xmit, | 1212 | .ndo_start_xmit = ipgre_tunnel_xmit, |
1220 | .ndo_do_ioctl = ipgre_tunnel_ioctl, | 1213 | .ndo_do_ioctl = ipgre_tunnel_ioctl, |
1221 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1214 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1222 | }; | 1215 | }; |
1223 | 1216 | ||
1224 | static void ipgre_tunnel_setup(struct net_device *dev) | 1217 | static void ipgre_tunnel_setup(struct net_device *dev) |
1225 | { | 1218 | { |
1226 | dev->netdev_ops = &ipgre_netdev_ops; | 1219 | dev->netdev_ops = &ipgre_netdev_ops; |
1227 | dev->destructor = free_netdev; | 1220 | dev->destructor = free_netdev; |
1228 | 1221 | ||
1229 | dev->type = ARPHRD_IPGRE; | 1222 | dev->type = ARPHRD_IPGRE; |
1230 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; | 1223 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; |
1231 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; | 1224 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; |
1232 | dev->flags = IFF_NOARP; | 1225 | dev->flags = IFF_NOARP; |
1233 | dev->iflink = 0; | 1226 | dev->iflink = 0; |
1234 | dev->addr_len = 4; | 1227 | dev->addr_len = 4; |
1235 | dev->features |= NETIF_F_NETNS_LOCAL; | 1228 | dev->features |= NETIF_F_NETNS_LOCAL; |
1236 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; | 1229 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
1237 | } | 1230 | } |
1238 | 1231 | ||
1239 | static int ipgre_tunnel_init(struct net_device *dev) | 1232 | static int ipgre_tunnel_init(struct net_device *dev) |
1240 | { | 1233 | { |
1241 | struct ip_tunnel *tunnel; | 1234 | struct ip_tunnel *tunnel; |
1242 | struct iphdr *iph; | 1235 | struct iphdr *iph; |
1243 | 1236 | ||
1244 | tunnel = netdev_priv(dev); | 1237 | tunnel = netdev_priv(dev); |
1245 | iph = &tunnel->parms.iph; | 1238 | iph = &tunnel->parms.iph; |
1246 | 1239 | ||
1247 | tunnel->dev = dev; | 1240 | tunnel->dev = dev; |
1248 | strcpy(tunnel->parms.name, dev->name); | 1241 | strcpy(tunnel->parms.name, dev->name); |
1249 | 1242 | ||
1250 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 1243 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
1251 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 1244 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
1252 | 1245 | ||
1253 | if (iph->daddr) { | 1246 | if (iph->daddr) { |
1254 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 1247 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
1255 | if (ipv4_is_multicast(iph->daddr)) { | 1248 | if (ipv4_is_multicast(iph->daddr)) { |
1256 | if (!iph->saddr) | 1249 | if (!iph->saddr) |
1257 | return -EINVAL; | 1250 | return -EINVAL; |
1258 | dev->flags = IFF_BROADCAST; | 1251 | dev->flags = IFF_BROADCAST; |
1259 | dev->header_ops = &ipgre_header_ops; | 1252 | dev->header_ops = &ipgre_header_ops; |
1260 | } | 1253 | } |
1261 | #endif | 1254 | #endif |
1262 | } else | 1255 | } else |
1263 | dev->header_ops = &ipgre_header_ops; | 1256 | dev->header_ops = &ipgre_header_ops; |
1264 | 1257 | ||
1265 | return 0; | 1258 | return 0; |
1266 | } | 1259 | } |
1267 | 1260 | ||
1268 | static void ipgre_fb_tunnel_init(struct net_device *dev) | 1261 | static void ipgre_fb_tunnel_init(struct net_device *dev) |
1269 | { | 1262 | { |
1270 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1263 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1271 | struct iphdr *iph = &tunnel->parms.iph; | 1264 | struct iphdr *iph = &tunnel->parms.iph; |
1272 | struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); | 1265 | struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); |
1273 | 1266 | ||
1274 | tunnel->dev = dev; | 1267 | tunnel->dev = dev; |
1275 | strcpy(tunnel->parms.name, dev->name); | 1268 | strcpy(tunnel->parms.name, dev->name); |
1276 | 1269 | ||
1277 | iph->version = 4; | 1270 | iph->version = 4; |
1278 | iph->protocol = IPPROTO_GRE; | 1271 | iph->protocol = IPPROTO_GRE; |
1279 | iph->ihl = 5; | 1272 | iph->ihl = 5; |
1280 | tunnel->hlen = sizeof(struct iphdr) + 4; | 1273 | tunnel->hlen = sizeof(struct iphdr) + 4; |
1281 | 1274 | ||
1282 | dev_hold(dev); | 1275 | dev_hold(dev); |
1283 | ign->tunnels_wc[0] = tunnel; | 1276 | ign->tunnels_wc[0] = tunnel; |
1284 | } | 1277 | } |
1285 | 1278 | ||
1286 | 1279 | ||
1287 | static const struct net_protocol ipgre_protocol = { | 1280 | static const struct net_protocol ipgre_protocol = { |
1288 | .handler = ipgre_rcv, | 1281 | .handler = ipgre_rcv, |
1289 | .err_handler = ipgre_err, | 1282 | .err_handler = ipgre_err, |
1290 | .netns_ok = 1, | 1283 | .netns_ok = 1, |
1291 | }; | 1284 | }; |
1292 | 1285 | ||
1293 | static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) | 1286 | static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) |
1294 | { | 1287 | { |
1295 | int prio; | 1288 | int prio; |
1296 | 1289 | ||
1297 | for (prio = 0; prio < 4; prio++) { | 1290 | for (prio = 0; prio < 4; prio++) { |
1298 | int h; | 1291 | int h; |
1299 | for (h = 0; h < HASH_SIZE; h++) { | 1292 | for (h = 0; h < HASH_SIZE; h++) { |
1300 | struct ip_tunnel *t = ign->tunnels[prio][h]; | 1293 | struct ip_tunnel *t = ign->tunnels[prio][h]; |
1301 | 1294 | ||
1302 | while (t != NULL) { | 1295 | while (t != NULL) { |
1303 | unregister_netdevice_queue(t->dev, head); | 1296 | unregister_netdevice_queue(t->dev, head); |
1304 | t = t->next; | 1297 | t = t->next; |
1305 | } | 1298 | } |
1306 | } | 1299 | } |
1307 | } | 1300 | } |
1308 | } | 1301 | } |
1309 | 1302 | ||
1310 | static int __net_init ipgre_init_net(struct net *net) | 1303 | static int __net_init ipgre_init_net(struct net *net) |
1311 | { | 1304 | { |
1312 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 1305 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
1313 | int err; | 1306 | int err; |
1314 | 1307 | ||
1315 | ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", | 1308 | ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", |
1316 | ipgre_tunnel_setup); | 1309 | ipgre_tunnel_setup); |
1317 | if (!ign->fb_tunnel_dev) { | 1310 | if (!ign->fb_tunnel_dev) { |
1318 | err = -ENOMEM; | 1311 | err = -ENOMEM; |
1319 | goto err_alloc_dev; | 1312 | goto err_alloc_dev; |
1320 | } | 1313 | } |
1321 | dev_net_set(ign->fb_tunnel_dev, net); | 1314 | dev_net_set(ign->fb_tunnel_dev, net); |
1322 | 1315 | ||
1323 | ipgre_fb_tunnel_init(ign->fb_tunnel_dev); | 1316 | ipgre_fb_tunnel_init(ign->fb_tunnel_dev); |
1324 | ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; | 1317 | ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; |
1325 | 1318 | ||
1326 | if ((err = register_netdev(ign->fb_tunnel_dev))) | 1319 | if ((err = register_netdev(ign->fb_tunnel_dev))) |
1327 | goto err_reg_dev; | 1320 | goto err_reg_dev; |
1328 | 1321 | ||
1329 | return 0; | 1322 | return 0; |
1330 | 1323 | ||
1331 | err_reg_dev: | 1324 | err_reg_dev: |
1332 | free_netdev(ign->fb_tunnel_dev); | 1325 | free_netdev(ign->fb_tunnel_dev); |
1333 | err_alloc_dev: | 1326 | err_alloc_dev: |
1334 | return err; | 1327 | return err; |
1335 | } | 1328 | } |
1336 | 1329 | ||
1337 | static void __net_exit ipgre_exit_net(struct net *net) | 1330 | static void __net_exit ipgre_exit_net(struct net *net) |
1338 | { | 1331 | { |
1339 | struct ipgre_net *ign; | 1332 | struct ipgre_net *ign; |
1340 | LIST_HEAD(list); | 1333 | LIST_HEAD(list); |
1341 | 1334 | ||
1342 | ign = net_generic(net, ipgre_net_id); | 1335 | ign = net_generic(net, ipgre_net_id); |
1343 | rtnl_lock(); | 1336 | rtnl_lock(); |
1344 | ipgre_destroy_tunnels(ign, &list); | 1337 | ipgre_destroy_tunnels(ign, &list); |
1345 | unregister_netdevice_many(&list); | 1338 | unregister_netdevice_many(&list); |
1346 | rtnl_unlock(); | 1339 | rtnl_unlock(); |
1347 | } | 1340 | } |
1348 | 1341 | ||
1349 | static struct pernet_operations ipgre_net_ops = { | 1342 | static struct pernet_operations ipgre_net_ops = { |
1350 | .init = ipgre_init_net, | 1343 | .init = ipgre_init_net, |
1351 | .exit = ipgre_exit_net, | 1344 | .exit = ipgre_exit_net, |
1352 | .id = &ipgre_net_id, | 1345 | .id = &ipgre_net_id, |
1353 | .size = sizeof(struct ipgre_net), | 1346 | .size = sizeof(struct ipgre_net), |
1354 | }; | 1347 | }; |
1355 | 1348 | ||
1356 | static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) | 1349 | static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) |
1357 | { | 1350 | { |
1358 | __be16 flags; | 1351 | __be16 flags; |
1359 | 1352 | ||
1360 | if (!data) | 1353 | if (!data) |
1361 | return 0; | 1354 | return 0; |
1362 | 1355 | ||
1363 | flags = 0; | 1356 | flags = 0; |
1364 | if (data[IFLA_GRE_IFLAGS]) | 1357 | if (data[IFLA_GRE_IFLAGS]) |
1365 | flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); | 1358 | flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); |
1366 | if (data[IFLA_GRE_OFLAGS]) | 1359 | if (data[IFLA_GRE_OFLAGS]) |
1367 | flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); | 1360 | flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); |
1368 | if (flags & (GRE_VERSION|GRE_ROUTING)) | 1361 | if (flags & (GRE_VERSION|GRE_ROUTING)) |
1369 | return -EINVAL; | 1362 | return -EINVAL; |
1370 | 1363 | ||
1371 | return 0; | 1364 | return 0; |
1372 | } | 1365 | } |
1373 | 1366 | ||
1374 | static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) | 1367 | static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) |
1375 | { | 1368 | { |
1376 | __be32 daddr; | 1369 | __be32 daddr; |
1377 | 1370 | ||
1378 | if (tb[IFLA_ADDRESS]) { | 1371 | if (tb[IFLA_ADDRESS]) { |
1379 | if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) | 1372 | if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) |
1380 | return -EINVAL; | 1373 | return -EINVAL; |
1381 | if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) | 1374 | if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) |
1382 | return -EADDRNOTAVAIL; | 1375 | return -EADDRNOTAVAIL; |
1383 | } | 1376 | } |
1384 | 1377 | ||
1385 | if (!data) | 1378 | if (!data) |
1386 | goto out; | 1379 | goto out; |
1387 | 1380 | ||
1388 | if (data[IFLA_GRE_REMOTE]) { | 1381 | if (data[IFLA_GRE_REMOTE]) { |
1389 | memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); | 1382 | memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); |
1390 | if (!daddr) | 1383 | if (!daddr) |
1391 | return -EINVAL; | 1384 | return -EINVAL; |
1392 | } | 1385 | } |
1393 | 1386 | ||
1394 | out: | 1387 | out: |
1395 | return ipgre_tunnel_validate(tb, data); | 1388 | return ipgre_tunnel_validate(tb, data); |
1396 | } | 1389 | } |
1397 | 1390 | ||
1398 | static void ipgre_netlink_parms(struct nlattr *data[], | 1391 | static void ipgre_netlink_parms(struct nlattr *data[], |
1399 | struct ip_tunnel_parm *parms) | 1392 | struct ip_tunnel_parm *parms) |
1400 | { | 1393 | { |
1401 | memset(parms, 0, sizeof(*parms)); | 1394 | memset(parms, 0, sizeof(*parms)); |
1402 | 1395 | ||
1403 | parms->iph.protocol = IPPROTO_GRE; | 1396 | parms->iph.protocol = IPPROTO_GRE; |
1404 | 1397 | ||
1405 | if (!data) | 1398 | if (!data) |
1406 | return; | 1399 | return; |
1407 | 1400 | ||
1408 | if (data[IFLA_GRE_LINK]) | 1401 | if (data[IFLA_GRE_LINK]) |
1409 | parms->link = nla_get_u32(data[IFLA_GRE_LINK]); | 1402 | parms->link = nla_get_u32(data[IFLA_GRE_LINK]); |
1410 | 1403 | ||
1411 | if (data[IFLA_GRE_IFLAGS]) | 1404 | if (data[IFLA_GRE_IFLAGS]) |
1412 | parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); | 1405 | parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); |
1413 | 1406 | ||
1414 | if (data[IFLA_GRE_OFLAGS]) | 1407 | if (data[IFLA_GRE_OFLAGS]) |
1415 | parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); | 1408 | parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); |
1416 | 1409 | ||
1417 | if (data[IFLA_GRE_IKEY]) | 1410 | if (data[IFLA_GRE_IKEY]) |
1418 | parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); | 1411 | parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); |
1419 | 1412 | ||
1420 | if (data[IFLA_GRE_OKEY]) | 1413 | if (data[IFLA_GRE_OKEY]) |
1421 | parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); | 1414 | parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); |
1422 | 1415 | ||
1423 | if (data[IFLA_GRE_LOCAL]) | 1416 | if (data[IFLA_GRE_LOCAL]) |
1424 | parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); | 1417 | parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); |
1425 | 1418 | ||
1426 | if (data[IFLA_GRE_REMOTE]) | 1419 | if (data[IFLA_GRE_REMOTE]) |
1427 | parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); | 1420 | parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); |
1428 | 1421 | ||
1429 | if (data[IFLA_GRE_TTL]) | 1422 | if (data[IFLA_GRE_TTL]) |
1430 | parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); | 1423 | parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); |
1431 | 1424 | ||
1432 | if (data[IFLA_GRE_TOS]) | 1425 | if (data[IFLA_GRE_TOS]) |
1433 | parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); | 1426 | parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); |
1434 | 1427 | ||
1435 | if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) | 1428 | if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) |
1436 | parms->iph.frag_off = htons(IP_DF); | 1429 | parms->iph.frag_off = htons(IP_DF); |
1437 | } | 1430 | } |
1438 | 1431 | ||
1439 | static int ipgre_tap_init(struct net_device *dev) | 1432 | static int ipgre_tap_init(struct net_device *dev) |
1440 | { | 1433 | { |
1441 | struct ip_tunnel *tunnel; | 1434 | struct ip_tunnel *tunnel; |
1442 | 1435 | ||
1443 | tunnel = netdev_priv(dev); | 1436 | tunnel = netdev_priv(dev); |
1444 | 1437 | ||
1445 | tunnel->dev = dev; | 1438 | tunnel->dev = dev; |
1446 | strcpy(tunnel->parms.name, dev->name); | 1439 | strcpy(tunnel->parms.name, dev->name); |
1447 | 1440 | ||
1448 | ipgre_tunnel_bind_dev(dev); | 1441 | ipgre_tunnel_bind_dev(dev); |
1449 | 1442 | ||
1450 | return 0; | 1443 | return 0; |
1451 | } | 1444 | } |
1452 | 1445 | ||
1453 | static const struct net_device_ops ipgre_tap_netdev_ops = { | 1446 | static const struct net_device_ops ipgre_tap_netdev_ops = { |
1454 | .ndo_init = ipgre_tap_init, | 1447 | .ndo_init = ipgre_tap_init, |
1455 | .ndo_uninit = ipgre_tunnel_uninit, | 1448 | .ndo_uninit = ipgre_tunnel_uninit, |
1456 | .ndo_start_xmit = ipgre_tunnel_xmit, | 1449 | .ndo_start_xmit = ipgre_tunnel_xmit, |
1457 | .ndo_set_mac_address = eth_mac_addr, | 1450 | .ndo_set_mac_address = eth_mac_addr, |
1458 | .ndo_validate_addr = eth_validate_addr, | 1451 | .ndo_validate_addr = eth_validate_addr, |
1459 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1452 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1460 | }; | 1453 | }; |
1461 | 1454 | ||
1462 | static void ipgre_tap_setup(struct net_device *dev) | 1455 | static void ipgre_tap_setup(struct net_device *dev) |
1463 | { | 1456 | { |
1464 | 1457 | ||
1465 | ether_setup(dev); | 1458 | ether_setup(dev); |
1466 | 1459 | ||
1467 | dev->netdev_ops = &ipgre_tap_netdev_ops; | 1460 | dev->netdev_ops = &ipgre_tap_netdev_ops; |
1468 | dev->destructor = free_netdev; | 1461 | dev->destructor = free_netdev; |
1469 | 1462 | ||
1470 | dev->iflink = 0; | 1463 | dev->iflink = 0; |
1471 | dev->features |= NETIF_F_NETNS_LOCAL; | 1464 | dev->features |= NETIF_F_NETNS_LOCAL; |
1472 | } | 1465 | } |
1473 | 1466 | ||
1474 | static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], | 1467 | static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], |
1475 | struct nlattr *data[]) | 1468 | struct nlattr *data[]) |
1476 | { | 1469 | { |
1477 | struct ip_tunnel *nt; | 1470 | struct ip_tunnel *nt; |
1478 | struct net *net = dev_net(dev); | 1471 | struct net *net = dev_net(dev); |
1479 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 1472 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
1480 | int mtu; | 1473 | int mtu; |
1481 | int err; | 1474 | int err; |
1482 | 1475 | ||
1483 | nt = netdev_priv(dev); | 1476 | nt = netdev_priv(dev); |
1484 | ipgre_netlink_parms(data, &nt->parms); | 1477 | ipgre_netlink_parms(data, &nt->parms); |
1485 | 1478 | ||
1486 | if (ipgre_tunnel_find(net, &nt->parms, dev->type)) | 1479 | if (ipgre_tunnel_find(net, &nt->parms, dev->type)) |
1487 | return -EEXIST; | 1480 | return -EEXIST; |
1488 | 1481 | ||
1489 | if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) | 1482 | if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) |
1490 | random_ether_addr(dev->dev_addr); | 1483 | random_ether_addr(dev->dev_addr); |
1491 | 1484 | ||
1492 | mtu = ipgre_tunnel_bind_dev(dev); | 1485 | mtu = ipgre_tunnel_bind_dev(dev); |
1493 | if (!tb[IFLA_MTU]) | 1486 | if (!tb[IFLA_MTU]) |
1494 | dev->mtu = mtu; | 1487 | dev->mtu = mtu; |
1495 | 1488 | ||
1496 | err = register_netdevice(dev); | 1489 | err = register_netdevice(dev); |
1497 | if (err) | 1490 | if (err) |
1498 | goto out; | 1491 | goto out; |
1499 | 1492 | ||
1500 | dev_hold(dev); | 1493 | dev_hold(dev); |
1501 | ipgre_tunnel_link(ign, nt); | 1494 | ipgre_tunnel_link(ign, nt); |
1502 | 1495 | ||
1503 | out: | 1496 | out: |
1504 | return err; | 1497 | return err; |
1505 | } | 1498 | } |
1506 | 1499 | ||
1507 | static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], | 1500 | static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], |
1508 | struct nlattr *data[]) | 1501 | struct nlattr *data[]) |
1509 | { | 1502 | { |
1510 | struct ip_tunnel *t, *nt; | 1503 | struct ip_tunnel *t, *nt; |
1511 | struct net *net = dev_net(dev); | 1504 | struct net *net = dev_net(dev); |
1512 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 1505 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
1513 | struct ip_tunnel_parm p; | 1506 | struct ip_tunnel_parm p; |
1514 | int mtu; | 1507 | int mtu; |
1515 | 1508 | ||
1516 | if (dev == ign->fb_tunnel_dev) | 1509 | if (dev == ign->fb_tunnel_dev) |
1517 | return -EINVAL; | 1510 | return -EINVAL; |
1518 | 1511 | ||
1519 | nt = netdev_priv(dev); | 1512 | nt = netdev_priv(dev); |
1520 | ipgre_netlink_parms(data, &p); | 1513 | ipgre_netlink_parms(data, &p); |
1521 | 1514 | ||
1522 | t = ipgre_tunnel_locate(net, &p, 0); | 1515 | t = ipgre_tunnel_locate(net, &p, 0); |
1523 | 1516 | ||
1524 | if (t) { | 1517 | if (t) { |
1525 | if (t->dev != dev) | 1518 | if (t->dev != dev) |
1526 | return -EEXIST; | 1519 | return -EEXIST; |
1527 | } else { | 1520 | } else { |
1528 | t = nt; | 1521 | t = nt; |
1529 | 1522 | ||
1530 | if (dev->type != ARPHRD_ETHER) { | 1523 | if (dev->type != ARPHRD_ETHER) { |
1531 | unsigned nflags = 0; | 1524 | unsigned nflags = 0; |
1532 | 1525 | ||
1533 | if (ipv4_is_multicast(p.iph.daddr)) | 1526 | if (ipv4_is_multicast(p.iph.daddr)) |
1534 | nflags = IFF_BROADCAST; | 1527 | nflags = IFF_BROADCAST; |
1535 | else if (p.iph.daddr) | 1528 | else if (p.iph.daddr) |
1536 | nflags = IFF_POINTOPOINT; | 1529 | nflags = IFF_POINTOPOINT; |
1537 | 1530 | ||
1538 | if ((dev->flags ^ nflags) & | 1531 | if ((dev->flags ^ nflags) & |
1539 | (IFF_POINTOPOINT | IFF_BROADCAST)) | 1532 | (IFF_POINTOPOINT | IFF_BROADCAST)) |
1540 | return -EINVAL; | 1533 | return -EINVAL; |
1541 | } | 1534 | } |
1542 | 1535 | ||
1543 | ipgre_tunnel_unlink(ign, t); | 1536 | ipgre_tunnel_unlink(ign, t); |
1544 | t->parms.iph.saddr = p.iph.saddr; | 1537 | t->parms.iph.saddr = p.iph.saddr; |
1545 | t->parms.iph.daddr = p.iph.daddr; | 1538 | t->parms.iph.daddr = p.iph.daddr; |
1546 | t->parms.i_key = p.i_key; | 1539 | t->parms.i_key = p.i_key; |
1547 | if (dev->type != ARPHRD_ETHER) { | 1540 | if (dev->type != ARPHRD_ETHER) { |
1548 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 1541 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
1549 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 1542 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
1550 | } | 1543 | } |
1551 | ipgre_tunnel_link(ign, t); | 1544 | ipgre_tunnel_link(ign, t); |
1552 | netdev_state_change(dev); | 1545 | netdev_state_change(dev); |
1553 | } | 1546 | } |
1554 | 1547 | ||
1555 | t->parms.o_key = p.o_key; | 1548 | t->parms.o_key = p.o_key; |
1556 | t->parms.iph.ttl = p.iph.ttl; | 1549 | t->parms.iph.ttl = p.iph.ttl; |
1557 | t->parms.iph.tos = p.iph.tos; | 1550 | t->parms.iph.tos = p.iph.tos; |
1558 | t->parms.iph.frag_off = p.iph.frag_off; | 1551 | t->parms.iph.frag_off = p.iph.frag_off; |
1559 | 1552 | ||
1560 | if (t->parms.link != p.link) { | 1553 | if (t->parms.link != p.link) { |
1561 | t->parms.link = p.link; | 1554 | t->parms.link = p.link; |
1562 | mtu = ipgre_tunnel_bind_dev(dev); | 1555 | mtu = ipgre_tunnel_bind_dev(dev); |
1563 | if (!tb[IFLA_MTU]) | 1556 | if (!tb[IFLA_MTU]) |
1564 | dev->mtu = mtu; | 1557 | dev->mtu = mtu; |
1565 | netdev_state_change(dev); | 1558 | netdev_state_change(dev); |
1566 | } | 1559 | } |
1567 | 1560 | ||
1568 | return 0; | 1561 | return 0; |
1569 | } | 1562 | } |
1570 | 1563 | ||
1571 | static size_t ipgre_get_size(const struct net_device *dev) | 1564 | static size_t ipgre_get_size(const struct net_device *dev) |
1572 | { | 1565 | { |
1573 | return | 1566 | return |
1574 | /* IFLA_GRE_LINK */ | 1567 | /* IFLA_GRE_LINK */ |
1575 | nla_total_size(4) + | 1568 | nla_total_size(4) + |
1576 | /* IFLA_GRE_IFLAGS */ | 1569 | /* IFLA_GRE_IFLAGS */ |
1577 | nla_total_size(2) + | 1570 | nla_total_size(2) + |
1578 | /* IFLA_GRE_OFLAGS */ | 1571 | /* IFLA_GRE_OFLAGS */ |
1579 | nla_total_size(2) + | 1572 | nla_total_size(2) + |
1580 | /* IFLA_GRE_IKEY */ | 1573 | /* IFLA_GRE_IKEY */ |
1581 | nla_total_size(4) + | 1574 | nla_total_size(4) + |
1582 | /* IFLA_GRE_OKEY */ | 1575 | /* IFLA_GRE_OKEY */ |
1583 | nla_total_size(4) + | 1576 | nla_total_size(4) + |
1584 | /* IFLA_GRE_LOCAL */ | 1577 | /* IFLA_GRE_LOCAL */ |
1585 | nla_total_size(4) + | 1578 | nla_total_size(4) + |
1586 | /* IFLA_GRE_REMOTE */ | 1579 | /* IFLA_GRE_REMOTE */ |
1587 | nla_total_size(4) + | 1580 | nla_total_size(4) + |
1588 | /* IFLA_GRE_TTL */ | 1581 | /* IFLA_GRE_TTL */ |
1589 | nla_total_size(1) + | 1582 | nla_total_size(1) + |
1590 | /* IFLA_GRE_TOS */ | 1583 | /* IFLA_GRE_TOS */ |
1591 | nla_total_size(1) + | 1584 | nla_total_size(1) + |
1592 | /* IFLA_GRE_PMTUDISC */ | 1585 | /* IFLA_GRE_PMTUDISC */ |
1593 | nla_total_size(1) + | 1586 | nla_total_size(1) + |
1594 | 0; | 1587 | 0; |
1595 | } | 1588 | } |
1596 | 1589 | ||
1597 | static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) | 1590 | static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) |
1598 | { | 1591 | { |
1599 | struct ip_tunnel *t = netdev_priv(dev); | 1592 | struct ip_tunnel *t = netdev_priv(dev); |
1600 | struct ip_tunnel_parm *p = &t->parms; | 1593 | struct ip_tunnel_parm *p = &t->parms; |
1601 | 1594 | ||
1602 | NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); | 1595 | NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); |
1603 | NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); | 1596 | NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); |
1604 | NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); | 1597 | NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); |
1605 | NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); | 1598 | NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); |
1606 | NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); | 1599 | NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); |
1607 | NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); | 1600 | NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); |
1608 | NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); | 1601 | NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); |
1609 | NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); | 1602 | NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); |
1610 | NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); | 1603 | NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); |
1611 | NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); | 1604 | NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); |
1612 | 1605 | ||
1613 | return 0; | 1606 | return 0; |
1614 | 1607 | ||
1615 | nla_put_failure: | 1608 | nla_put_failure: |
1616 | return -EMSGSIZE; | 1609 | return -EMSGSIZE; |
1617 | } | 1610 | } |
1618 | 1611 | ||
1619 | static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { | 1612 | static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { |
1620 | [IFLA_GRE_LINK] = { .type = NLA_U32 }, | 1613 | [IFLA_GRE_LINK] = { .type = NLA_U32 }, |
1621 | [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, | 1614 | [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, |
1622 | [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, | 1615 | [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, |
1623 | [IFLA_GRE_IKEY] = { .type = NLA_U32 }, | 1616 | [IFLA_GRE_IKEY] = { .type = NLA_U32 }, |
1624 | [IFLA_GRE_OKEY] = { .type = NLA_U32 }, | 1617 | [IFLA_GRE_OKEY] = { .type = NLA_U32 }, |
1625 | [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, | 1618 | [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, |
1626 | [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, | 1619 | [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, |
1627 | [IFLA_GRE_TTL] = { .type = NLA_U8 }, | 1620 | [IFLA_GRE_TTL] = { .type = NLA_U8 }, |
1628 | [IFLA_GRE_TOS] = { .type = NLA_U8 }, | 1621 | [IFLA_GRE_TOS] = { .type = NLA_U8 }, |
1629 | [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, | 1622 | [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, |
1630 | }; | 1623 | }; |
1631 | 1624 | ||
1632 | static struct rtnl_link_ops ipgre_link_ops __read_mostly = { | 1625 | static struct rtnl_link_ops ipgre_link_ops __read_mostly = { |
1633 | .kind = "gre", | 1626 | .kind = "gre", |
1634 | .maxtype = IFLA_GRE_MAX, | 1627 | .maxtype = IFLA_GRE_MAX, |
1635 | .policy = ipgre_policy, | 1628 | .policy = ipgre_policy, |
1636 | .priv_size = sizeof(struct ip_tunnel), | 1629 | .priv_size = sizeof(struct ip_tunnel), |
1637 | .setup = ipgre_tunnel_setup, | 1630 | .setup = ipgre_tunnel_setup, |
1638 | .validate = ipgre_tunnel_validate, | 1631 | .validate = ipgre_tunnel_validate, |
1639 | .newlink = ipgre_newlink, | 1632 | .newlink = ipgre_newlink, |
1640 | .changelink = ipgre_changelink, | 1633 | .changelink = ipgre_changelink, |
1641 | .get_size = ipgre_get_size, | 1634 | .get_size = ipgre_get_size, |
1642 | .fill_info = ipgre_fill_info, | 1635 | .fill_info = ipgre_fill_info, |
1643 | }; | 1636 | }; |
1644 | 1637 | ||
1645 | static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { | 1638 | static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { |
1646 | .kind = "gretap", | 1639 | .kind = "gretap", |
1647 | .maxtype = IFLA_GRE_MAX, | 1640 | .maxtype = IFLA_GRE_MAX, |
1648 | .policy = ipgre_policy, | 1641 | .policy = ipgre_policy, |
1649 | .priv_size = sizeof(struct ip_tunnel), | 1642 | .priv_size = sizeof(struct ip_tunnel), |
1650 | .setup = ipgre_tap_setup, | 1643 | .setup = ipgre_tap_setup, |
1651 | .validate = ipgre_tap_validate, | 1644 | .validate = ipgre_tap_validate, |
1652 | .newlink = ipgre_newlink, | 1645 | .newlink = ipgre_newlink, |
1653 | .changelink = ipgre_changelink, | 1646 | .changelink = ipgre_changelink, |
1654 | .get_size = ipgre_get_size, | 1647 | .get_size = ipgre_get_size, |
1655 | .fill_info = ipgre_fill_info, | 1648 | .fill_info = ipgre_fill_info, |
1656 | }; | 1649 | }; |
1657 | 1650 | ||
1658 | /* | 1651 | /* |
1659 | * And now the modules code and kernel interface. | 1652 | * And now the modules code and kernel interface. |
1660 | */ | 1653 | */ |
1661 | 1654 | ||
1662 | static int __init ipgre_init(void) | 1655 | static int __init ipgre_init(void) |
1663 | { | 1656 | { |
1664 | int err; | 1657 | int err; |
1665 | 1658 | ||
1666 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | 1659 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); |
1667 | 1660 | ||
1668 | err = register_pernet_device(&ipgre_net_ops); | 1661 | err = register_pernet_device(&ipgre_net_ops); |
1669 | if (err < 0) | 1662 | if (err < 0) |
1670 | return err; | 1663 | return err; |
1671 | 1664 | ||
1672 | err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); | 1665 | err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); |
1673 | if (err < 0) { | 1666 | if (err < 0) { |
1674 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | 1667 | printk(KERN_INFO "ipgre init: can't add protocol\n"); |
1675 | goto add_proto_failed; | 1668 | goto add_proto_failed; |
1676 | } | 1669 | } |
1677 | 1670 | ||
1678 | err = rtnl_link_register(&ipgre_link_ops); | 1671 | err = rtnl_link_register(&ipgre_link_ops); |
1679 | if (err < 0) | 1672 | if (err < 0) |
1680 | goto rtnl_link_failed; | 1673 | goto rtnl_link_failed; |
1681 | 1674 | ||
1682 | err = rtnl_link_register(&ipgre_tap_ops); | 1675 | err = rtnl_link_register(&ipgre_tap_ops); |
1683 | if (err < 0) | 1676 | if (err < 0) |
1684 | goto tap_ops_failed; | 1677 | goto tap_ops_failed; |
1685 | 1678 | ||
1686 | out: | 1679 | out: |
1687 | return err; | 1680 | return err; |
1688 | 1681 | ||
1689 | tap_ops_failed: | 1682 | tap_ops_failed: |
1690 | rtnl_link_unregister(&ipgre_link_ops); | 1683 | rtnl_link_unregister(&ipgre_link_ops); |
1691 | rtnl_link_failed: | 1684 | rtnl_link_failed: |
1692 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | 1685 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); |
1693 | add_proto_failed: | 1686 | add_proto_failed: |
1694 | unregister_pernet_device(&ipgre_net_ops); | 1687 | unregister_pernet_device(&ipgre_net_ops); |
1695 | goto out; | 1688 | goto out; |
1696 | } | 1689 | } |
1697 | 1690 | ||
1698 | static void __exit ipgre_fini(void) | 1691 | static void __exit ipgre_fini(void) |
1699 | { | 1692 | { |
1700 | rtnl_link_unregister(&ipgre_tap_ops); | 1693 | rtnl_link_unregister(&ipgre_tap_ops); |
1701 | rtnl_link_unregister(&ipgre_link_ops); | 1694 | rtnl_link_unregister(&ipgre_link_ops); |
1702 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) | 1695 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) |
1703 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); | 1696 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); |
1704 | unregister_pernet_device(&ipgre_net_ops); | 1697 | unregister_pernet_device(&ipgre_net_ops); |
1705 | } | 1698 | } |
1706 | 1699 | ||
1707 | module_init(ipgre_init); | 1700 | module_init(ipgre_init); |
1708 | module_exit(ipgre_fini); | 1701 | module_exit(ipgre_fini); |
1709 | MODULE_LICENSE("GPL"); | 1702 | MODULE_LICENSE("GPL"); |
1710 | MODULE_ALIAS_RTNL_LINK("gre"); | 1703 | MODULE_ALIAS_RTNL_LINK("gre"); |
1711 | MODULE_ALIAS_RTNL_LINK("gretap"); | 1704 | MODULE_ALIAS_RTNL_LINK("gretap"); |
1712 | 1705 |
net/ipv4/ipip.c
1 | /* | 1 | /* |
2 | * Linux NET3: IP/IP protocol decoder. | 2 | * Linux NET3: IP/IP protocol decoder. |
3 | * | 3 | * |
4 | * Authors: | 4 | * Authors: |
5 | * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 | 5 | * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 |
6 | * | 6 | * |
7 | * Fixes: | 7 | * Fixes: |
8 | * Alan Cox : Merged and made usable non modular (its so tiny its silly as | 8 | * Alan Cox : Merged and made usable non modular (its so tiny its silly as |
9 | * a module taking up 2 pages). | 9 | * a module taking up 2 pages). |
10 | * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) | 10 | * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph) |
11 | * to keep ip_forward happy. | 11 | * to keep ip_forward happy. |
12 | * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). | 12 | * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). |
13 | * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL | 13 | * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL |
14 | * David Woodhouse : Perform some basic ICMP handling. | 14 | * David Woodhouse : Perform some basic ICMP handling. |
15 | * IPIP Routing without decapsulation. | 15 | * IPIP Routing without decapsulation. |
16 | * Carlos Picoto : GRE over IP support | 16 | * Carlos Picoto : GRE over IP support |
17 | * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. | 17 | * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. |
18 | * I do not want to merge them together. | 18 | * I do not want to merge them together. |
19 | * | 19 | * |
20 | * This program is free software; you can redistribute it and/or | 20 | * This program is free software; you can redistribute it and/or |
21 | * modify it under the terms of the GNU General Public License | 21 | * modify it under the terms of the GNU General Public License |
22 | * as published by the Free Software Foundation; either version | 22 | * as published by the Free Software Foundation; either version |
23 | * 2 of the License, or (at your option) any later version. | 23 | * 2 of the License, or (at your option) any later version. |
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | /* tunnel.c: an IP tunnel driver | 27 | /* tunnel.c: an IP tunnel driver |
28 | 28 | ||
29 | The purpose of this driver is to provide an IP tunnel through | 29 | The purpose of this driver is to provide an IP tunnel through |
30 | which you can tunnel network traffic transparently across subnets. | 30 | which you can tunnel network traffic transparently across subnets. |
31 | 31 | ||
32 | This was written by looking at Nick Holloway's dummy driver | 32 | This was written by looking at Nick Holloway's dummy driver |
33 | Thanks for the great code! | 33 | Thanks for the great code! |
34 | 34 | ||
35 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 | 35 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 |
36 | 36 | ||
37 | Minor tweaks: | 37 | Minor tweaks: |
38 | Cleaned up the code a little and added some pre-1.3.0 tweaks. | 38 | Cleaned up the code a little and added some pre-1.3.0 tweaks. |
39 | dev->hard_header/hard_header_len changed to use no headers. | 39 | dev->hard_header/hard_header_len changed to use no headers. |
40 | Comments/bracketing tweaked. | 40 | Comments/bracketing tweaked. |
41 | Made the tunnels use dev->name not tunnel: when error reporting. | 41 | Made the tunnels use dev->name not tunnel: when error reporting. |
42 | Added tx_dropped stat | 42 | Added tx_dropped stat |
43 | 43 | ||
44 | -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 | 44 | -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 |
45 | 45 | ||
46 | Reworked: | 46 | Reworked: |
47 | Changed to tunnel to destination gateway in addition to the | 47 | Changed to tunnel to destination gateway in addition to the |
48 | tunnel's pointopoint address | 48 | tunnel's pointopoint address |
49 | Almost completely rewritten | 49 | Almost completely rewritten |
50 | Note: There is currently no firewall or ICMP handling done. | 50 | Note: There is currently no firewall or ICMP handling done. |
51 | 51 | ||
52 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 | 52 | -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 |
53 | 53 | ||
54 | */ | 54 | */ |
55 | 55 | ||
56 | /* Things I wish I had known when writing the tunnel driver: | 56 | /* Things I wish I had known when writing the tunnel driver: |
57 | 57 | ||
58 | When the tunnel_xmit() function is called, the skb contains the | 58 | When the tunnel_xmit() function is called, the skb contains the |
59 | packet to be sent (plus a great deal of extra info), and dev | 59 | packet to be sent (plus a great deal of extra info), and dev |
60 | contains the tunnel device that _we_ are. | 60 | contains the tunnel device that _we_ are. |
61 | 61 | ||
62 | When we are passed a packet, we are expected to fill in the | 62 | When we are passed a packet, we are expected to fill in the |
63 | source address with our source IP address. | 63 | source address with our source IP address. |
64 | 64 | ||
65 | What is the proper way to allocate, copy and free a buffer? | 65 | What is the proper way to allocate, copy and free a buffer? |
66 | After you allocate it, it is a "0 length" chunk of memory | 66 | After you allocate it, it is a "0 length" chunk of memory |
67 | starting at zero. If you want to add headers to the buffer | 67 | starting at zero. If you want to add headers to the buffer |
68 | later, you'll have to call "skb_reserve(skb, amount)" with | 68 | later, you'll have to call "skb_reserve(skb, amount)" with |
69 | the amount of memory you want reserved. Then, you call | 69 | the amount of memory you want reserved. Then, you call |
70 | "skb_put(skb, amount)" with the amount of space you want in | 70 | "skb_put(skb, amount)" with the amount of space you want in |
71 | the buffer. skb_put() returns a pointer to the top (#0) of | 71 | the buffer. skb_put() returns a pointer to the top (#0) of |
72 | that buffer. skb->len is set to the amount of space you have | 72 | that buffer. skb->len is set to the amount of space you have |
73 | "allocated" with skb_put(). You can then write up to skb->len | 73 | "allocated" with skb_put(). You can then write up to skb->len |
74 | bytes to that buffer. If you need more, you can call skb_put() | 74 | bytes to that buffer. If you need more, you can call skb_put() |
75 | again with the additional amount of space you need. You can | 75 | again with the additional amount of space you need. You can |
76 | find out how much more space you can allocate by calling | 76 | find out how much more space you can allocate by calling |
77 | "skb_tailroom(skb)". | 77 | "skb_tailroom(skb)". |
78 | Now, to add header space, call "skb_push(skb, header_len)". | 78 | Now, to add header space, call "skb_push(skb, header_len)". |
79 | This creates space at the beginning of the buffer and returns | 79 | This creates space at the beginning of the buffer and returns |
80 | a pointer to this new space. If later you need to strip a | 80 | a pointer to this new space. If later you need to strip a |
81 | header from a buffer, call "skb_pull(skb, header_len)". | 81 | header from a buffer, call "skb_pull(skb, header_len)". |
82 | skb_headroom() will return how much space is left at the top | 82 | skb_headroom() will return how much space is left at the top |
83 | of the buffer (before the main data). Remember, this headroom | 83 | of the buffer (before the main data). Remember, this headroom |
84 | space must be reserved before the skb_put() function is called. | 84 | space must be reserved before the skb_put() function is called. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | /* | 87 | /* |
88 | This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c | 88 | This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c |
89 | 89 | ||
90 | For comments look at net/ipv4/ip_gre.c --ANK | 90 | For comments look at net/ipv4/ip_gre.c --ANK |
91 | */ | 91 | */ |
92 | 92 | ||
93 | 93 | ||
94 | #include <linux/capability.h> | 94 | #include <linux/capability.h> |
95 | #include <linux/module.h> | 95 | #include <linux/module.h> |
96 | #include <linux/types.h> | 96 | #include <linux/types.h> |
97 | #include <linux/kernel.h> | 97 | #include <linux/kernel.h> |
98 | #include <linux/slab.h> | 98 | #include <linux/slab.h> |
99 | #include <asm/uaccess.h> | 99 | #include <asm/uaccess.h> |
100 | #include <linux/skbuff.h> | 100 | #include <linux/skbuff.h> |
101 | #include <linux/netdevice.h> | 101 | #include <linux/netdevice.h> |
102 | #include <linux/in.h> | 102 | #include <linux/in.h> |
103 | #include <linux/tcp.h> | 103 | #include <linux/tcp.h> |
104 | #include <linux/udp.h> | 104 | #include <linux/udp.h> |
105 | #include <linux/if_arp.h> | 105 | #include <linux/if_arp.h> |
106 | #include <linux/mroute.h> | 106 | #include <linux/mroute.h> |
107 | #include <linux/init.h> | 107 | #include <linux/init.h> |
108 | #include <linux/netfilter_ipv4.h> | 108 | #include <linux/netfilter_ipv4.h> |
109 | #include <linux/if_ether.h> | 109 | #include <linux/if_ether.h> |
110 | 110 | ||
111 | #include <net/sock.h> | 111 | #include <net/sock.h> |
112 | #include <net/ip.h> | 112 | #include <net/ip.h> |
113 | #include <net/icmp.h> | 113 | #include <net/icmp.h> |
114 | #include <net/ipip.h> | 114 | #include <net/ipip.h> |
115 | #include <net/inet_ecn.h> | 115 | #include <net/inet_ecn.h> |
116 | #include <net/xfrm.h> | 116 | #include <net/xfrm.h> |
117 | #include <net/net_namespace.h> | 117 | #include <net/net_namespace.h> |
118 | #include <net/netns/generic.h> | 118 | #include <net/netns/generic.h> |
119 | 119 | ||
120 | #define HASH_SIZE 16 | 120 | #define HASH_SIZE 16 |
121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
122 | 122 | ||
123 | static int ipip_net_id __read_mostly; | 123 | static int ipip_net_id __read_mostly; |
124 | struct ipip_net { | 124 | struct ipip_net { |
125 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | 125 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; |
126 | struct ip_tunnel *tunnels_r[HASH_SIZE]; | 126 | struct ip_tunnel *tunnels_r[HASH_SIZE]; |
127 | struct ip_tunnel *tunnels_l[HASH_SIZE]; | 127 | struct ip_tunnel *tunnels_l[HASH_SIZE]; |
128 | struct ip_tunnel *tunnels_wc[1]; | 128 | struct ip_tunnel *tunnels_wc[1]; |
129 | struct ip_tunnel **tunnels[4]; | 129 | struct ip_tunnel **tunnels[4]; |
130 | 130 | ||
131 | struct net_device *fb_tunnel_dev; | 131 | struct net_device *fb_tunnel_dev; |
132 | }; | 132 | }; |
133 | 133 | ||
134 | static void ipip_tunnel_init(struct net_device *dev); | 134 | static void ipip_tunnel_init(struct net_device *dev); |
135 | static void ipip_tunnel_setup(struct net_device *dev); | 135 | static void ipip_tunnel_setup(struct net_device *dev); |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * Locking : hash tables are protected by RCU and a spinlock | 138 | * Locking : hash tables are protected by RCU and a spinlock |
139 | */ | 139 | */ |
140 | static DEFINE_SPINLOCK(ipip_lock); | 140 | static DEFINE_SPINLOCK(ipip_lock); |
141 | 141 | ||
142 | #define for_each_ip_tunnel_rcu(start) \ | 142 | #define for_each_ip_tunnel_rcu(start) \ |
143 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | 143 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) |
144 | 144 | ||
145 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, | 145 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, |
146 | __be32 remote, __be32 local) | 146 | __be32 remote, __be32 local) |
147 | { | 147 | { |
148 | unsigned h0 = HASH(remote); | 148 | unsigned h0 = HASH(remote); |
149 | unsigned h1 = HASH(local); | 149 | unsigned h1 = HASH(local); |
150 | struct ip_tunnel *t; | 150 | struct ip_tunnel *t; |
151 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 151 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
152 | 152 | ||
153 | for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) | 153 | for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) |
154 | if (local == t->parms.iph.saddr && | 154 | if (local == t->parms.iph.saddr && |
155 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 155 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
156 | return t; | 156 | return t; |
157 | 157 | ||
158 | for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) | 158 | for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) |
159 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 159 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
160 | return t; | 160 | return t; |
161 | 161 | ||
162 | for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) | 162 | for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) |
163 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 163 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
164 | return t; | 164 | return t; |
165 | 165 | ||
166 | t = rcu_dereference(ipn->tunnels_wc[0]); | 166 | t = rcu_dereference(ipn->tunnels_wc[0]); |
167 | if (t && (t->dev->flags&IFF_UP)) | 167 | if (t && (t->dev->flags&IFF_UP)) |
168 | return t; | 168 | return t; |
169 | return NULL; | 169 | return NULL; |
170 | } | 170 | } |
171 | 171 | ||
172 | static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, | 172 | static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, |
173 | struct ip_tunnel_parm *parms) | 173 | struct ip_tunnel_parm *parms) |
174 | { | 174 | { |
175 | __be32 remote = parms->iph.daddr; | 175 | __be32 remote = parms->iph.daddr; |
176 | __be32 local = parms->iph.saddr; | 176 | __be32 local = parms->iph.saddr; |
177 | unsigned h = 0; | 177 | unsigned h = 0; |
178 | int prio = 0; | 178 | int prio = 0; |
179 | 179 | ||
180 | if (remote) { | 180 | if (remote) { |
181 | prio |= 2; | 181 | prio |= 2; |
182 | h ^= HASH(remote); | 182 | h ^= HASH(remote); |
183 | } | 183 | } |
184 | if (local) { | 184 | if (local) { |
185 | prio |= 1; | 185 | prio |= 1; |
186 | h ^= HASH(local); | 186 | h ^= HASH(local); |
187 | } | 187 | } |
188 | return &ipn->tunnels[prio][h]; | 188 | return &ipn->tunnels[prio][h]; |
189 | } | 189 | } |
190 | 190 | ||
191 | static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, | 191 | static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, |
192 | struct ip_tunnel *t) | 192 | struct ip_tunnel *t) |
193 | { | 193 | { |
194 | return __ipip_bucket(ipn, &t->parms); | 194 | return __ipip_bucket(ipn, &t->parms); |
195 | } | 195 | } |
196 | 196 | ||
197 | static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) | 197 | static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) |
198 | { | 198 | { |
199 | struct ip_tunnel **tp; | 199 | struct ip_tunnel **tp; |
200 | 200 | ||
201 | for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { | 201 | for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { |
202 | if (t == *tp) { | 202 | if (t == *tp) { |
203 | spin_lock_bh(&ipip_lock); | 203 | spin_lock_bh(&ipip_lock); |
204 | *tp = t->next; | 204 | *tp = t->next; |
205 | spin_unlock_bh(&ipip_lock); | 205 | spin_unlock_bh(&ipip_lock); |
206 | break; | 206 | break; |
207 | } | 207 | } |
208 | } | 208 | } |
209 | } | 209 | } |
210 | 210 | ||
211 | static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | 211 | static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) |
212 | { | 212 | { |
213 | struct ip_tunnel **tp = ipip_bucket(ipn, t); | 213 | struct ip_tunnel **tp = ipip_bucket(ipn, t); |
214 | 214 | ||
215 | spin_lock_bh(&ipip_lock); | 215 | spin_lock_bh(&ipip_lock); |
216 | t->next = *tp; | 216 | t->next = *tp; |
217 | rcu_assign_pointer(*tp, t); | 217 | rcu_assign_pointer(*tp, t); |
218 | spin_unlock_bh(&ipip_lock); | 218 | spin_unlock_bh(&ipip_lock); |
219 | } | 219 | } |
220 | 220 | ||
221 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | 221 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, |
222 | struct ip_tunnel_parm *parms, int create) | 222 | struct ip_tunnel_parm *parms, int create) |
223 | { | 223 | { |
224 | __be32 remote = parms->iph.daddr; | 224 | __be32 remote = parms->iph.daddr; |
225 | __be32 local = parms->iph.saddr; | 225 | __be32 local = parms->iph.saddr; |
226 | struct ip_tunnel *t, **tp, *nt; | 226 | struct ip_tunnel *t, **tp, *nt; |
227 | struct net_device *dev; | 227 | struct net_device *dev; |
228 | char name[IFNAMSIZ]; | 228 | char name[IFNAMSIZ]; |
229 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 229 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
230 | 230 | ||
231 | for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { | 231 | for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { |
232 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | 232 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) |
233 | return t; | 233 | return t; |
234 | } | 234 | } |
235 | if (!create) | 235 | if (!create) |
236 | return NULL; | 236 | return NULL; |
237 | 237 | ||
238 | if (parms->name[0]) | 238 | if (parms->name[0]) |
239 | strlcpy(name, parms->name, IFNAMSIZ); | 239 | strlcpy(name, parms->name, IFNAMSIZ); |
240 | else | 240 | else |
241 | sprintf(name, "tunl%%d"); | 241 | sprintf(name, "tunl%%d"); |
242 | 242 | ||
243 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); | 243 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); |
244 | if (dev == NULL) | 244 | if (dev == NULL) |
245 | return NULL; | 245 | return NULL; |
246 | 246 | ||
247 | dev_net_set(dev, net); | 247 | dev_net_set(dev, net); |
248 | 248 | ||
249 | if (strchr(name, '%')) { | 249 | if (strchr(name, '%')) { |
250 | if (dev_alloc_name(dev, name) < 0) | 250 | if (dev_alloc_name(dev, name) < 0) |
251 | goto failed_free; | 251 | goto failed_free; |
252 | } | 252 | } |
253 | 253 | ||
254 | nt = netdev_priv(dev); | 254 | nt = netdev_priv(dev); |
255 | nt->parms = *parms; | 255 | nt->parms = *parms; |
256 | 256 | ||
257 | ipip_tunnel_init(dev); | 257 | ipip_tunnel_init(dev); |
258 | 258 | ||
259 | if (register_netdevice(dev) < 0) | 259 | if (register_netdevice(dev) < 0) |
260 | goto failed_free; | 260 | goto failed_free; |
261 | 261 | ||
262 | dev_hold(dev); | 262 | dev_hold(dev); |
263 | ipip_tunnel_link(ipn, nt); | 263 | ipip_tunnel_link(ipn, nt); |
264 | return nt; | 264 | return nt; |
265 | 265 | ||
266 | failed_free: | 266 | failed_free: |
267 | free_netdev(dev); | 267 | free_netdev(dev); |
268 | return NULL; | 268 | return NULL; |
269 | } | 269 | } |
270 | 270 | ||
271 | static void ipip_tunnel_uninit(struct net_device *dev) | 271 | static void ipip_tunnel_uninit(struct net_device *dev) |
272 | { | 272 | { |
273 | struct net *net = dev_net(dev); | 273 | struct net *net = dev_net(dev); |
274 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 274 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
275 | 275 | ||
276 | if (dev == ipn->fb_tunnel_dev) { | 276 | if (dev == ipn->fb_tunnel_dev) { |
277 | spin_lock_bh(&ipip_lock); | 277 | spin_lock_bh(&ipip_lock); |
278 | ipn->tunnels_wc[0] = NULL; | 278 | ipn->tunnels_wc[0] = NULL; |
279 | spin_unlock_bh(&ipip_lock); | 279 | spin_unlock_bh(&ipip_lock); |
280 | } else | 280 | } else |
281 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); | 281 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); |
282 | dev_put(dev); | 282 | dev_put(dev); |
283 | } | 283 | } |
284 | 284 | ||
285 | static int ipip_err(struct sk_buff *skb, u32 info) | 285 | static int ipip_err(struct sk_buff *skb, u32 info) |
286 | { | 286 | { |
287 | 287 | ||
288 | /* All the routers (except for Linux) return only | 288 | /* All the routers (except for Linux) return only |
289 | 8 bytes of packet payload. It means, that precise relaying of | 289 | 8 bytes of packet payload. It means, that precise relaying of |
290 | ICMP in the real Internet is absolutely infeasible. | 290 | ICMP in the real Internet is absolutely infeasible. |
291 | */ | 291 | */ |
292 | struct iphdr *iph = (struct iphdr *)skb->data; | 292 | struct iphdr *iph = (struct iphdr *)skb->data; |
293 | const int type = icmp_hdr(skb)->type; | 293 | const int type = icmp_hdr(skb)->type; |
294 | const int code = icmp_hdr(skb)->code; | 294 | const int code = icmp_hdr(skb)->code; |
295 | struct ip_tunnel *t; | 295 | struct ip_tunnel *t; |
296 | int err; | 296 | int err; |
297 | 297 | ||
298 | switch (type) { | 298 | switch (type) { |
299 | default: | 299 | default: |
300 | case ICMP_PARAMETERPROB: | 300 | case ICMP_PARAMETERPROB: |
301 | return 0; | 301 | return 0; |
302 | 302 | ||
303 | case ICMP_DEST_UNREACH: | 303 | case ICMP_DEST_UNREACH: |
304 | switch (code) { | 304 | switch (code) { |
305 | case ICMP_SR_FAILED: | 305 | case ICMP_SR_FAILED: |
306 | case ICMP_PORT_UNREACH: | 306 | case ICMP_PORT_UNREACH: |
307 | /* Impossible event. */ | 307 | /* Impossible event. */ |
308 | return 0; | 308 | return 0; |
309 | case ICMP_FRAG_NEEDED: | 309 | case ICMP_FRAG_NEEDED: |
310 | /* Soft state for pmtu is maintained by IP core. */ | 310 | /* Soft state for pmtu is maintained by IP core. */ |
311 | return 0; | 311 | return 0; |
312 | default: | 312 | default: |
313 | /* All others are translated to HOST_UNREACH. | 313 | /* All others are translated to HOST_UNREACH. |
314 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 314 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
315 | I believe they are just ether pollution. --ANK | 315 | I believe they are just ether pollution. --ANK |
316 | */ | 316 | */ |
317 | break; | 317 | break; |
318 | } | 318 | } |
319 | break; | 319 | break; |
320 | case ICMP_TIME_EXCEEDED: | 320 | case ICMP_TIME_EXCEEDED: |
321 | if (code != ICMP_EXC_TTL) | 321 | if (code != ICMP_EXC_TTL) |
322 | return 0; | 322 | return 0; |
323 | break; | 323 | break; |
324 | } | 324 | } |
325 | 325 | ||
326 | err = -ENOENT; | 326 | err = -ENOENT; |
327 | 327 | ||
328 | rcu_read_lock(); | 328 | rcu_read_lock(); |
329 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 329 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
330 | if (t == NULL || t->parms.iph.daddr == 0) | 330 | if (t == NULL || t->parms.iph.daddr == 0) |
331 | goto out; | 331 | goto out; |
332 | 332 | ||
333 | err = 0; | 333 | err = 0; |
334 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 334 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
335 | goto out; | 335 | goto out; |
336 | 336 | ||
337 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) | 337 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
338 | t->err_count++; | 338 | t->err_count++; |
339 | else | 339 | else |
340 | t->err_count = 1; | 340 | t->err_count = 1; |
341 | t->err_time = jiffies; | 341 | t->err_time = jiffies; |
342 | out: | 342 | out: |
343 | rcu_read_unlock(); | 343 | rcu_read_unlock(); |
344 | return err; | 344 | return err; |
345 | } | 345 | } |
346 | 346 | ||
347 | static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, | 347 | static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, |
348 | struct sk_buff *skb) | 348 | struct sk_buff *skb) |
349 | { | 349 | { |
350 | struct iphdr *inner_iph = ip_hdr(skb); | 350 | struct iphdr *inner_iph = ip_hdr(skb); |
351 | 351 | ||
352 | if (INET_ECN_is_ce(outer_iph->tos)) | 352 | if (INET_ECN_is_ce(outer_iph->tos)) |
353 | IP_ECN_set_ce(inner_iph); | 353 | IP_ECN_set_ce(inner_iph); |
354 | } | 354 | } |
355 | 355 | ||
356 | static int ipip_rcv(struct sk_buff *skb) | 356 | static int ipip_rcv(struct sk_buff *skb) |
357 | { | 357 | { |
358 | struct ip_tunnel *tunnel; | 358 | struct ip_tunnel *tunnel; |
359 | const struct iphdr *iph = ip_hdr(skb); | 359 | const struct iphdr *iph = ip_hdr(skb); |
360 | 360 | ||
361 | rcu_read_lock(); | 361 | rcu_read_lock(); |
362 | if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), | 362 | if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), |
363 | iph->saddr, iph->daddr)) != NULL) { | 363 | iph->saddr, iph->daddr)) != NULL) { |
364 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 364 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
365 | rcu_read_unlock(); | 365 | rcu_read_unlock(); |
366 | kfree_skb(skb); | 366 | kfree_skb(skb); |
367 | return 0; | 367 | return 0; |
368 | } | 368 | } |
369 | 369 | ||
370 | secpath_reset(skb); | 370 | secpath_reset(skb); |
371 | 371 | ||
372 | skb->mac_header = skb->network_header; | 372 | skb->mac_header = skb->network_header; |
373 | skb_reset_network_header(skb); | 373 | skb_reset_network_header(skb); |
374 | skb->protocol = htons(ETH_P_IP); | 374 | skb->protocol = htons(ETH_P_IP); |
375 | skb->pkt_type = PACKET_HOST; | 375 | skb->pkt_type = PACKET_HOST; |
376 | 376 | ||
377 | tunnel->dev->stats.rx_packets++; | 377 | skb_tunnel_rx(skb, tunnel->dev); |
378 | tunnel->dev->stats.rx_bytes += skb->len; | 378 | |
379 | skb->dev = tunnel->dev; | ||
380 | skb_dst_drop(skb); | ||
381 | nf_reset(skb); | ||
382 | ipip_ecn_decapsulate(iph, skb); | 379 | ipip_ecn_decapsulate(iph, skb); |
383 | netif_rx(skb); | 380 | netif_rx(skb); |
384 | rcu_read_unlock(); | 381 | rcu_read_unlock(); |
385 | return 0; | 382 | return 0; |
386 | } | 383 | } |
387 | rcu_read_unlock(); | 384 | rcu_read_unlock(); |
388 | 385 | ||
389 | return -1; | 386 | return -1; |
390 | } | 387 | } |
391 | 388 | ||
392 | /* | 389 | /* |
393 | * This function assumes it is being called from dev_queue_xmit() | 390 | * This function assumes it is being called from dev_queue_xmit() |
394 | * and that skb is filled properly by that function. | 391 | * and that skb is filled properly by that function. |
395 | */ | 392 | */ |
396 | 393 | ||
397 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 394 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
398 | { | 395 | { |
399 | struct ip_tunnel *tunnel = netdev_priv(dev); | 396 | struct ip_tunnel *tunnel = netdev_priv(dev); |
400 | struct net_device_stats *stats = &dev->stats; | 397 | struct net_device_stats *stats = &dev->stats; |
401 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | 398 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); |
402 | struct iphdr *tiph = &tunnel->parms.iph; | 399 | struct iphdr *tiph = &tunnel->parms.iph; |
403 | u8 tos = tunnel->parms.iph.tos; | 400 | u8 tos = tunnel->parms.iph.tos; |
404 | __be16 df = tiph->frag_off; | 401 | __be16 df = tiph->frag_off; |
405 | struct rtable *rt; /* Route to the other host */ | 402 | struct rtable *rt; /* Route to the other host */ |
406 | struct net_device *tdev; /* Device to other host */ | 403 | struct net_device *tdev; /* Device to other host */ |
407 | struct iphdr *old_iph = ip_hdr(skb); | 404 | struct iphdr *old_iph = ip_hdr(skb); |
408 | struct iphdr *iph; /* Our new IP header */ | 405 | struct iphdr *iph; /* Our new IP header */ |
409 | unsigned int max_headroom; /* The extra header space needed */ | 406 | unsigned int max_headroom; /* The extra header space needed */ |
410 | __be32 dst = tiph->daddr; | 407 | __be32 dst = tiph->daddr; |
411 | int mtu; | 408 | int mtu; |
412 | 409 | ||
413 | if (skb->protocol != htons(ETH_P_IP)) | 410 | if (skb->protocol != htons(ETH_P_IP)) |
414 | goto tx_error; | 411 | goto tx_error; |
415 | 412 | ||
416 | if (tos&1) | 413 | if (tos&1) |
417 | tos = old_iph->tos; | 414 | tos = old_iph->tos; |
418 | 415 | ||
419 | if (!dst) { | 416 | if (!dst) { |
420 | /* NBMA tunnel */ | 417 | /* NBMA tunnel */ |
421 | if ((rt = skb_rtable(skb)) == NULL) { | 418 | if ((rt = skb_rtable(skb)) == NULL) { |
422 | stats->tx_fifo_errors++; | 419 | stats->tx_fifo_errors++; |
423 | goto tx_error; | 420 | goto tx_error; |
424 | } | 421 | } |
425 | if ((dst = rt->rt_gateway) == 0) | 422 | if ((dst = rt->rt_gateway) == 0) |
426 | goto tx_error_icmp; | 423 | goto tx_error_icmp; |
427 | } | 424 | } |
428 | 425 | ||
429 | { | 426 | { |
430 | struct flowi fl = { .oif = tunnel->parms.link, | 427 | struct flowi fl = { .oif = tunnel->parms.link, |
431 | .nl_u = { .ip4_u = | 428 | .nl_u = { .ip4_u = |
432 | { .daddr = dst, | 429 | { .daddr = dst, |
433 | .saddr = tiph->saddr, | 430 | .saddr = tiph->saddr, |
434 | .tos = RT_TOS(tos) } }, | 431 | .tos = RT_TOS(tos) } }, |
435 | .proto = IPPROTO_IPIP }; | 432 | .proto = IPPROTO_IPIP }; |
436 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | 433 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
437 | stats->tx_carrier_errors++; | 434 | stats->tx_carrier_errors++; |
438 | goto tx_error_icmp; | 435 | goto tx_error_icmp; |
439 | } | 436 | } |
440 | } | 437 | } |
441 | tdev = rt->u.dst.dev; | 438 | tdev = rt->u.dst.dev; |
442 | 439 | ||
443 | if (tdev == dev) { | 440 | if (tdev == dev) { |
444 | ip_rt_put(rt); | 441 | ip_rt_put(rt); |
445 | stats->collisions++; | 442 | stats->collisions++; |
446 | goto tx_error; | 443 | goto tx_error; |
447 | } | 444 | } |
448 | 445 | ||
449 | df |= old_iph->frag_off & htons(IP_DF); | 446 | df |= old_iph->frag_off & htons(IP_DF); |
450 | 447 | ||
451 | if (df) { | 448 | if (df) { |
452 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 449 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); |
453 | 450 | ||
454 | if (mtu < 68) { | 451 | if (mtu < 68) { |
455 | stats->collisions++; | 452 | stats->collisions++; |
456 | ip_rt_put(rt); | 453 | ip_rt_put(rt); |
457 | goto tx_error; | 454 | goto tx_error; |
458 | } | 455 | } |
459 | 456 | ||
460 | if (skb_dst(skb)) | 457 | if (skb_dst(skb)) |
461 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 458 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
462 | 459 | ||
463 | if ((old_iph->frag_off & htons(IP_DF)) && | 460 | if ((old_iph->frag_off & htons(IP_DF)) && |
464 | mtu < ntohs(old_iph->tot_len)) { | 461 | mtu < ntohs(old_iph->tot_len)) { |
465 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 462 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
466 | htonl(mtu)); | 463 | htonl(mtu)); |
467 | ip_rt_put(rt); | 464 | ip_rt_put(rt); |
468 | goto tx_error; | 465 | goto tx_error; |
469 | } | 466 | } |
470 | } | 467 | } |
471 | 468 | ||
472 | if (tunnel->err_count > 0) { | 469 | if (tunnel->err_count > 0) { |
473 | if (time_before(jiffies, | 470 | if (time_before(jiffies, |
474 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | 471 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { |
475 | tunnel->err_count--; | 472 | tunnel->err_count--; |
476 | dst_link_failure(skb); | 473 | dst_link_failure(skb); |
477 | } else | 474 | } else |
478 | tunnel->err_count = 0; | 475 | tunnel->err_count = 0; |
479 | } | 476 | } |
480 | 477 | ||
481 | /* | 478 | /* |
482 | * Okay, now see if we can stuff it in the buffer as-is. | 479 | * Okay, now see if we can stuff it in the buffer as-is. |
483 | */ | 480 | */ |
484 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); | 481 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); |
485 | 482 | ||
486 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | 483 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || |
487 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 484 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
488 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 485 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
489 | if (!new_skb) { | 486 | if (!new_skb) { |
490 | ip_rt_put(rt); | 487 | ip_rt_put(rt); |
491 | txq->tx_dropped++; | 488 | txq->tx_dropped++; |
492 | dev_kfree_skb(skb); | 489 | dev_kfree_skb(skb); |
493 | return NETDEV_TX_OK; | 490 | return NETDEV_TX_OK; |
494 | } | 491 | } |
495 | if (skb->sk) | 492 | if (skb->sk) |
496 | skb_set_owner_w(new_skb, skb->sk); | 493 | skb_set_owner_w(new_skb, skb->sk); |
497 | dev_kfree_skb(skb); | 494 | dev_kfree_skb(skb); |
498 | skb = new_skb; | 495 | skb = new_skb; |
499 | old_iph = ip_hdr(skb); | 496 | old_iph = ip_hdr(skb); |
500 | } | 497 | } |
501 | 498 | ||
502 | skb->transport_header = skb->network_header; | 499 | skb->transport_header = skb->network_header; |
503 | skb_push(skb, sizeof(struct iphdr)); | 500 | skb_push(skb, sizeof(struct iphdr)); |
504 | skb_reset_network_header(skb); | 501 | skb_reset_network_header(skb); |
505 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 502 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
506 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 503 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
507 | IPSKB_REROUTED); | 504 | IPSKB_REROUTED); |
508 | skb_dst_drop(skb); | 505 | skb_dst_drop(skb); |
509 | skb_dst_set(skb, &rt->u.dst); | 506 | skb_dst_set(skb, &rt->u.dst); |
510 | 507 | ||
511 | /* | 508 | /* |
512 | * Push down and install the IPIP header. | 509 | * Push down and install the IPIP header. |
513 | */ | 510 | */ |
514 | 511 | ||
515 | iph = ip_hdr(skb); | 512 | iph = ip_hdr(skb); |
516 | iph->version = 4; | 513 | iph->version = 4; |
517 | iph->ihl = sizeof(struct iphdr)>>2; | 514 | iph->ihl = sizeof(struct iphdr)>>2; |
518 | iph->frag_off = df; | 515 | iph->frag_off = df; |
519 | iph->protocol = IPPROTO_IPIP; | 516 | iph->protocol = IPPROTO_IPIP; |
520 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); | 517 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); |
521 | iph->daddr = rt->rt_dst; | 518 | iph->daddr = rt->rt_dst; |
522 | iph->saddr = rt->rt_src; | 519 | iph->saddr = rt->rt_src; |
523 | 520 | ||
524 | if ((iph->ttl = tiph->ttl) == 0) | 521 | if ((iph->ttl = tiph->ttl) == 0) |
525 | iph->ttl = old_iph->ttl; | 522 | iph->ttl = old_iph->ttl; |
526 | 523 | ||
527 | nf_reset(skb); | 524 | nf_reset(skb); |
528 | 525 | ||
529 | IPTUNNEL_XMIT(); | 526 | IPTUNNEL_XMIT(); |
530 | return NETDEV_TX_OK; | 527 | return NETDEV_TX_OK; |
531 | 528 | ||
532 | tx_error_icmp: | 529 | tx_error_icmp: |
533 | dst_link_failure(skb); | 530 | dst_link_failure(skb); |
534 | tx_error: | 531 | tx_error: |
535 | stats->tx_errors++; | 532 | stats->tx_errors++; |
536 | dev_kfree_skb(skb); | 533 | dev_kfree_skb(skb); |
537 | return NETDEV_TX_OK; | 534 | return NETDEV_TX_OK; |
538 | } | 535 | } |
539 | 536 | ||
540 | static void ipip_tunnel_bind_dev(struct net_device *dev) | 537 | static void ipip_tunnel_bind_dev(struct net_device *dev) |
541 | { | 538 | { |
542 | struct net_device *tdev = NULL; | 539 | struct net_device *tdev = NULL; |
543 | struct ip_tunnel *tunnel; | 540 | struct ip_tunnel *tunnel; |
544 | struct iphdr *iph; | 541 | struct iphdr *iph; |
545 | 542 | ||
546 | tunnel = netdev_priv(dev); | 543 | tunnel = netdev_priv(dev); |
547 | iph = &tunnel->parms.iph; | 544 | iph = &tunnel->parms.iph; |
548 | 545 | ||
549 | if (iph->daddr) { | 546 | if (iph->daddr) { |
550 | struct flowi fl = { .oif = tunnel->parms.link, | 547 | struct flowi fl = { .oif = tunnel->parms.link, |
551 | .nl_u = { .ip4_u = | 548 | .nl_u = { .ip4_u = |
552 | { .daddr = iph->daddr, | 549 | { .daddr = iph->daddr, |
553 | .saddr = iph->saddr, | 550 | .saddr = iph->saddr, |
554 | .tos = RT_TOS(iph->tos) } }, | 551 | .tos = RT_TOS(iph->tos) } }, |
555 | .proto = IPPROTO_IPIP }; | 552 | .proto = IPPROTO_IPIP }; |
556 | struct rtable *rt; | 553 | struct rtable *rt; |
557 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 554 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
558 | tdev = rt->u.dst.dev; | 555 | tdev = rt->u.dst.dev; |
559 | ip_rt_put(rt); | 556 | ip_rt_put(rt); |
560 | } | 557 | } |
561 | dev->flags |= IFF_POINTOPOINT; | 558 | dev->flags |= IFF_POINTOPOINT; |
562 | } | 559 | } |
563 | 560 | ||
564 | if (!tdev && tunnel->parms.link) | 561 | if (!tdev && tunnel->parms.link) |
565 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); | 562 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); |
566 | 563 | ||
567 | if (tdev) { | 564 | if (tdev) { |
568 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | 565 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); |
569 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | 566 | dev->mtu = tdev->mtu - sizeof(struct iphdr); |
570 | } | 567 | } |
571 | dev->iflink = tunnel->parms.link; | 568 | dev->iflink = tunnel->parms.link; |
572 | } | 569 | } |
573 | 570 | ||
574 | static int | 571 | static int |
575 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 572 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
576 | { | 573 | { |
577 | int err = 0; | 574 | int err = 0; |
578 | struct ip_tunnel_parm p; | 575 | struct ip_tunnel_parm p; |
579 | struct ip_tunnel *t; | 576 | struct ip_tunnel *t; |
580 | struct net *net = dev_net(dev); | 577 | struct net *net = dev_net(dev); |
581 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 578 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
582 | 579 | ||
583 | switch (cmd) { | 580 | switch (cmd) { |
584 | case SIOCGETTUNNEL: | 581 | case SIOCGETTUNNEL: |
585 | t = NULL; | 582 | t = NULL; |
586 | if (dev == ipn->fb_tunnel_dev) { | 583 | if (dev == ipn->fb_tunnel_dev) { |
587 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 584 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
588 | err = -EFAULT; | 585 | err = -EFAULT; |
589 | break; | 586 | break; |
590 | } | 587 | } |
591 | t = ipip_tunnel_locate(net, &p, 0); | 588 | t = ipip_tunnel_locate(net, &p, 0); |
592 | } | 589 | } |
593 | if (t == NULL) | 590 | if (t == NULL) |
594 | t = netdev_priv(dev); | 591 | t = netdev_priv(dev); |
595 | memcpy(&p, &t->parms, sizeof(p)); | 592 | memcpy(&p, &t->parms, sizeof(p)); |
596 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | 593 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
597 | err = -EFAULT; | 594 | err = -EFAULT; |
598 | break; | 595 | break; |
599 | 596 | ||
600 | case SIOCADDTUNNEL: | 597 | case SIOCADDTUNNEL: |
601 | case SIOCCHGTUNNEL: | 598 | case SIOCCHGTUNNEL: |
602 | err = -EPERM; | 599 | err = -EPERM; |
603 | if (!capable(CAP_NET_ADMIN)) | 600 | if (!capable(CAP_NET_ADMIN)) |
604 | goto done; | 601 | goto done; |
605 | 602 | ||
606 | err = -EFAULT; | 603 | err = -EFAULT; |
607 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 604 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
608 | goto done; | 605 | goto done; |
609 | 606 | ||
610 | err = -EINVAL; | 607 | err = -EINVAL; |
611 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || | 608 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || |
612 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | 609 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
613 | goto done; | 610 | goto done; |
614 | if (p.iph.ttl) | 611 | if (p.iph.ttl) |
615 | p.iph.frag_off |= htons(IP_DF); | 612 | p.iph.frag_off |= htons(IP_DF); |
616 | 613 | ||
617 | t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); | 614 | t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); |
618 | 615 | ||
619 | if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 616 | if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
620 | if (t != NULL) { | 617 | if (t != NULL) { |
621 | if (t->dev != dev) { | 618 | if (t->dev != dev) { |
622 | err = -EEXIST; | 619 | err = -EEXIST; |
623 | break; | 620 | break; |
624 | } | 621 | } |
625 | } else { | 622 | } else { |
626 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | 623 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || |
627 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | 624 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { |
628 | err = -EINVAL; | 625 | err = -EINVAL; |
629 | break; | 626 | break; |
630 | } | 627 | } |
631 | t = netdev_priv(dev); | 628 | t = netdev_priv(dev); |
632 | ipip_tunnel_unlink(ipn, t); | 629 | ipip_tunnel_unlink(ipn, t); |
633 | t->parms.iph.saddr = p.iph.saddr; | 630 | t->parms.iph.saddr = p.iph.saddr; |
634 | t->parms.iph.daddr = p.iph.daddr; | 631 | t->parms.iph.daddr = p.iph.daddr; |
635 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 632 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
636 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 633 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
637 | ipip_tunnel_link(ipn, t); | 634 | ipip_tunnel_link(ipn, t); |
638 | netdev_state_change(dev); | 635 | netdev_state_change(dev); |
639 | } | 636 | } |
640 | } | 637 | } |
641 | 638 | ||
642 | if (t) { | 639 | if (t) { |
643 | err = 0; | 640 | err = 0; |
644 | if (cmd == SIOCCHGTUNNEL) { | 641 | if (cmd == SIOCCHGTUNNEL) { |
645 | t->parms.iph.ttl = p.iph.ttl; | 642 | t->parms.iph.ttl = p.iph.ttl; |
646 | t->parms.iph.tos = p.iph.tos; | 643 | t->parms.iph.tos = p.iph.tos; |
647 | t->parms.iph.frag_off = p.iph.frag_off; | 644 | t->parms.iph.frag_off = p.iph.frag_off; |
648 | if (t->parms.link != p.link) { | 645 | if (t->parms.link != p.link) { |
649 | t->parms.link = p.link; | 646 | t->parms.link = p.link; |
650 | ipip_tunnel_bind_dev(dev); | 647 | ipip_tunnel_bind_dev(dev); |
651 | netdev_state_change(dev); | 648 | netdev_state_change(dev); |
652 | } | 649 | } |
653 | } | 650 | } |
654 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 651 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
655 | err = -EFAULT; | 652 | err = -EFAULT; |
656 | } else | 653 | } else |
657 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 654 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
658 | break; | 655 | break; |
659 | 656 | ||
660 | case SIOCDELTUNNEL: | 657 | case SIOCDELTUNNEL: |
661 | err = -EPERM; | 658 | err = -EPERM; |
662 | if (!capable(CAP_NET_ADMIN)) | 659 | if (!capable(CAP_NET_ADMIN)) |
663 | goto done; | 660 | goto done; |
664 | 661 | ||
665 | if (dev == ipn->fb_tunnel_dev) { | 662 | if (dev == ipn->fb_tunnel_dev) { |
666 | err = -EFAULT; | 663 | err = -EFAULT; |
667 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 664 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
668 | goto done; | 665 | goto done; |
669 | err = -ENOENT; | 666 | err = -ENOENT; |
670 | if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) | 667 | if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) |
671 | goto done; | 668 | goto done; |
672 | err = -EPERM; | 669 | err = -EPERM; |
673 | if (t->dev == ipn->fb_tunnel_dev) | 670 | if (t->dev == ipn->fb_tunnel_dev) |
674 | goto done; | 671 | goto done; |
675 | dev = t->dev; | 672 | dev = t->dev; |
676 | } | 673 | } |
677 | unregister_netdevice(dev); | 674 | unregister_netdevice(dev); |
678 | err = 0; | 675 | err = 0; |
679 | break; | 676 | break; |
680 | 677 | ||
681 | default: | 678 | default: |
682 | err = -EINVAL; | 679 | err = -EINVAL; |
683 | } | 680 | } |
684 | 681 | ||
685 | done: | 682 | done: |
686 | return err; | 683 | return err; |
687 | } | 684 | } |
688 | 685 | ||
689 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 686 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
690 | { | 687 | { |
691 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 688 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) |
692 | return -EINVAL; | 689 | return -EINVAL; |
693 | dev->mtu = new_mtu; | 690 | dev->mtu = new_mtu; |
694 | return 0; | 691 | return 0; |
695 | } | 692 | } |
696 | 693 | ||
697 | static const struct net_device_ops ipip_netdev_ops = { | 694 | static const struct net_device_ops ipip_netdev_ops = { |
698 | .ndo_uninit = ipip_tunnel_uninit, | 695 | .ndo_uninit = ipip_tunnel_uninit, |
699 | .ndo_start_xmit = ipip_tunnel_xmit, | 696 | .ndo_start_xmit = ipip_tunnel_xmit, |
700 | .ndo_do_ioctl = ipip_tunnel_ioctl, | 697 | .ndo_do_ioctl = ipip_tunnel_ioctl, |
701 | .ndo_change_mtu = ipip_tunnel_change_mtu, | 698 | .ndo_change_mtu = ipip_tunnel_change_mtu, |
702 | 699 | ||
703 | }; | 700 | }; |
704 | 701 | ||
705 | static void ipip_tunnel_setup(struct net_device *dev) | 702 | static void ipip_tunnel_setup(struct net_device *dev) |
706 | { | 703 | { |
707 | dev->netdev_ops = &ipip_netdev_ops; | 704 | dev->netdev_ops = &ipip_netdev_ops; |
708 | dev->destructor = free_netdev; | 705 | dev->destructor = free_netdev; |
709 | 706 | ||
710 | dev->type = ARPHRD_TUNNEL; | 707 | dev->type = ARPHRD_TUNNEL; |
711 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | 708 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); |
712 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | 709 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); |
713 | dev->flags = IFF_NOARP; | 710 | dev->flags = IFF_NOARP; |
714 | dev->iflink = 0; | 711 | dev->iflink = 0; |
715 | dev->addr_len = 4; | 712 | dev->addr_len = 4; |
716 | dev->features |= NETIF_F_NETNS_LOCAL; | 713 | dev->features |= NETIF_F_NETNS_LOCAL; |
717 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; | 714 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
718 | } | 715 | } |
719 | 716 | ||
720 | static void ipip_tunnel_init(struct net_device *dev) | 717 | static void ipip_tunnel_init(struct net_device *dev) |
721 | { | 718 | { |
722 | struct ip_tunnel *tunnel = netdev_priv(dev); | 719 | struct ip_tunnel *tunnel = netdev_priv(dev); |
723 | 720 | ||
724 | tunnel->dev = dev; | 721 | tunnel->dev = dev; |
725 | strcpy(tunnel->parms.name, dev->name); | 722 | strcpy(tunnel->parms.name, dev->name); |
726 | 723 | ||
727 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 724 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
728 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 725 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
729 | 726 | ||
730 | ipip_tunnel_bind_dev(dev); | 727 | ipip_tunnel_bind_dev(dev); |
731 | } | 728 | } |
732 | 729 | ||
733 | static void __net_init ipip_fb_tunnel_init(struct net_device *dev) | 730 | static void __net_init ipip_fb_tunnel_init(struct net_device *dev) |
734 | { | 731 | { |
735 | struct ip_tunnel *tunnel = netdev_priv(dev); | 732 | struct ip_tunnel *tunnel = netdev_priv(dev); |
736 | struct iphdr *iph = &tunnel->parms.iph; | 733 | struct iphdr *iph = &tunnel->parms.iph; |
737 | struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); | 734 | struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); |
738 | 735 | ||
739 | tunnel->dev = dev; | 736 | tunnel->dev = dev; |
740 | strcpy(tunnel->parms.name, dev->name); | 737 | strcpy(tunnel->parms.name, dev->name); |
741 | 738 | ||
742 | iph->version = 4; | 739 | iph->version = 4; |
743 | iph->protocol = IPPROTO_IPIP; | 740 | iph->protocol = IPPROTO_IPIP; |
744 | iph->ihl = 5; | 741 | iph->ihl = 5; |
745 | 742 | ||
746 | dev_hold(dev); | 743 | dev_hold(dev); |
747 | ipn->tunnels_wc[0] = tunnel; | 744 | ipn->tunnels_wc[0] = tunnel; |
748 | } | 745 | } |
749 | 746 | ||
750 | static struct xfrm_tunnel ipip_handler = { | 747 | static struct xfrm_tunnel ipip_handler = { |
751 | .handler = ipip_rcv, | 748 | .handler = ipip_rcv, |
752 | .err_handler = ipip_err, | 749 | .err_handler = ipip_err, |
753 | .priority = 1, | 750 | .priority = 1, |
754 | }; | 751 | }; |
755 | 752 | ||
756 | static const char banner[] __initconst = | 753 | static const char banner[] __initconst = |
757 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 754 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
758 | 755 | ||
759 | static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) | 756 | static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) |
760 | { | 757 | { |
761 | int prio; | 758 | int prio; |
762 | 759 | ||
763 | for (prio = 1; prio < 4; prio++) { | 760 | for (prio = 1; prio < 4; prio++) { |
764 | int h; | 761 | int h; |
765 | for (h = 0; h < HASH_SIZE; h++) { | 762 | for (h = 0; h < HASH_SIZE; h++) { |
766 | struct ip_tunnel *t = ipn->tunnels[prio][h]; | 763 | struct ip_tunnel *t = ipn->tunnels[prio][h]; |
767 | 764 | ||
768 | while (t != NULL) { | 765 | while (t != NULL) { |
769 | unregister_netdevice_queue(t->dev, head); | 766 | unregister_netdevice_queue(t->dev, head); |
770 | t = t->next; | 767 | t = t->next; |
771 | } | 768 | } |
772 | } | 769 | } |
773 | } | 770 | } |
774 | } | 771 | } |
775 | 772 | ||
776 | static int __net_init ipip_init_net(struct net *net) | 773 | static int __net_init ipip_init_net(struct net *net) |
777 | { | 774 | { |
778 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 775 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
779 | int err; | 776 | int err; |
780 | 777 | ||
781 | ipn->tunnels[0] = ipn->tunnels_wc; | 778 | ipn->tunnels[0] = ipn->tunnels_wc; |
782 | ipn->tunnels[1] = ipn->tunnels_l; | 779 | ipn->tunnels[1] = ipn->tunnels_l; |
783 | ipn->tunnels[2] = ipn->tunnels_r; | 780 | ipn->tunnels[2] = ipn->tunnels_r; |
784 | ipn->tunnels[3] = ipn->tunnels_r_l; | 781 | ipn->tunnels[3] = ipn->tunnels_r_l; |
785 | 782 | ||
786 | ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | 783 | ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), |
787 | "tunl0", | 784 | "tunl0", |
788 | ipip_tunnel_setup); | 785 | ipip_tunnel_setup); |
789 | if (!ipn->fb_tunnel_dev) { | 786 | if (!ipn->fb_tunnel_dev) { |
790 | err = -ENOMEM; | 787 | err = -ENOMEM; |
791 | goto err_alloc_dev; | 788 | goto err_alloc_dev; |
792 | } | 789 | } |
793 | dev_net_set(ipn->fb_tunnel_dev, net); | 790 | dev_net_set(ipn->fb_tunnel_dev, net); |
794 | 791 | ||
795 | ipip_fb_tunnel_init(ipn->fb_tunnel_dev); | 792 | ipip_fb_tunnel_init(ipn->fb_tunnel_dev); |
796 | 793 | ||
797 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | 794 | if ((err = register_netdev(ipn->fb_tunnel_dev))) |
798 | goto err_reg_dev; | 795 | goto err_reg_dev; |
799 | 796 | ||
800 | return 0; | 797 | return 0; |
801 | 798 | ||
802 | err_reg_dev: | 799 | err_reg_dev: |
803 | free_netdev(ipn->fb_tunnel_dev); | 800 | free_netdev(ipn->fb_tunnel_dev); |
804 | err_alloc_dev: | 801 | err_alloc_dev: |
805 | /* nothing */ | 802 | /* nothing */ |
806 | return err; | 803 | return err; |
807 | } | 804 | } |
808 | 805 | ||
809 | static void __net_exit ipip_exit_net(struct net *net) | 806 | static void __net_exit ipip_exit_net(struct net *net) |
810 | { | 807 | { |
811 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 808 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
812 | LIST_HEAD(list); | 809 | LIST_HEAD(list); |
813 | 810 | ||
814 | rtnl_lock(); | 811 | rtnl_lock(); |
815 | ipip_destroy_tunnels(ipn, &list); | 812 | ipip_destroy_tunnels(ipn, &list); |
816 | unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); | 813 | unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); |
817 | unregister_netdevice_many(&list); | 814 | unregister_netdevice_many(&list); |
818 | rtnl_unlock(); | 815 | rtnl_unlock(); |
819 | } | 816 | } |
820 | 817 | ||
821 | static struct pernet_operations ipip_net_ops = { | 818 | static struct pernet_operations ipip_net_ops = { |
822 | .init = ipip_init_net, | 819 | .init = ipip_init_net, |
823 | .exit = ipip_exit_net, | 820 | .exit = ipip_exit_net, |
824 | .id = &ipip_net_id, | 821 | .id = &ipip_net_id, |
825 | .size = sizeof(struct ipip_net), | 822 | .size = sizeof(struct ipip_net), |
826 | }; | 823 | }; |
827 | 824 | ||
828 | static int __init ipip_init(void) | 825 | static int __init ipip_init(void) |
829 | { | 826 | { |
830 | int err; | 827 | int err; |
831 | 828 | ||
832 | printk(banner); | 829 | printk(banner); |
833 | 830 | ||
834 | err = register_pernet_device(&ipip_net_ops); | 831 | err = register_pernet_device(&ipip_net_ops); |
835 | if (err < 0) | 832 | if (err < 0) |
836 | return err; | 833 | return err; |
837 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); | 834 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); |
838 | if (err < 0) { | 835 | if (err < 0) { |
839 | unregister_pernet_device(&ipip_net_ops); | 836 | unregister_pernet_device(&ipip_net_ops); |
840 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | 837 | printk(KERN_INFO "ipip init: can't register tunnel\n"); |
841 | } | 838 | } |
842 | return err; | 839 | return err; |
843 | } | 840 | } |
844 | 841 | ||
845 | static void __exit ipip_fini(void) | 842 | static void __exit ipip_fini(void) |
846 | { | 843 | { |
847 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) | 844 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) |
848 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); | 845 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); |
849 | 846 | ||
850 | unregister_pernet_device(&ipip_net_ops); | 847 | unregister_pernet_device(&ipip_net_ops); |
851 | } | 848 | } |
852 | 849 | ||
853 | module_init(ipip_init); | 850 | module_init(ipip_init); |
854 | module_exit(ipip_fini); | 851 | module_exit(ipip_fini); |
855 | MODULE_LICENSE("GPL"); | 852 | MODULE_LICENSE("GPL"); |
856 | 853 |
net/ipv4/ipmr.c
1 | /* | 1 | /* |
2 | * IP multicast routing support for mrouted 3.6/3.8 | 2 | * IP multicast routing support for mrouted 3.6/3.8 |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> |
5 | * Linux Consultancy and Custom Driver Development | 5 | * Linux Consultancy and Custom Driver Development |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Fixes: | 12 | * Fixes: |
13 | * Michael Chastain : Incorrect size of copying. | 13 | * Michael Chastain : Incorrect size of copying. |
14 | * Alan Cox : Added the cache manager code | 14 | * Alan Cox : Added the cache manager code |
15 | * Alan Cox : Fixed the clone/copy bug and device race. | 15 | * Alan Cox : Fixed the clone/copy bug and device race. |
16 | * Mike McLagan : Routing by source | 16 | * Mike McLagan : Routing by source |
17 | * Malcolm Beattie : Buffer handling fixes. | 17 | * Malcolm Beattie : Buffer handling fixes. |
18 | * Alexey Kuznetsov : Double buffer free and other fixes. | 18 | * Alexey Kuznetsov : Double buffer free and other fixes. |
19 | * SVR Anand : Fixed several multicast bugs and problems. | 19 | * SVR Anand : Fixed several multicast bugs and problems. |
20 | * Alexey Kuznetsov : Status, optimisations and more. | 20 | * Alexey Kuznetsov : Status, optimisations and more. |
21 | * Brad Parker : Better behaviour on mrouted upcall | 21 | * Brad Parker : Better behaviour on mrouted upcall |
22 | * overflow. | 22 | * overflow. |
23 | * Carlos Picoto : PIMv1 Support | 23 | * Carlos Picoto : PIMv1 Support |
24 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header | 24 | * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header |
25 | * Relax this requrement to work with older peers. | 25 | * Relax this requrement to work with older peers. |
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <asm/system.h> | 29 | #include <asm/system.h> |
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
31 | #include <linux/types.h> | 31 | #include <linux/types.h> |
32 | #include <linux/capability.h> | 32 | #include <linux/capability.h> |
33 | #include <linux/errno.h> | 33 | #include <linux/errno.h> |
34 | #include <linux/timer.h> | 34 | #include <linux/timer.h> |
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
37 | #include <linux/fcntl.h> | 37 | #include <linux/fcntl.h> |
38 | #include <linux/stat.h> | 38 | #include <linux/stat.h> |
39 | #include <linux/socket.h> | 39 | #include <linux/socket.h> |
40 | #include <linux/in.h> | 40 | #include <linux/in.h> |
41 | #include <linux/inet.h> | 41 | #include <linux/inet.h> |
42 | #include <linux/netdevice.h> | 42 | #include <linux/netdevice.h> |
43 | #include <linux/inetdevice.h> | 43 | #include <linux/inetdevice.h> |
44 | #include <linux/igmp.h> | 44 | #include <linux/igmp.h> |
45 | #include <linux/proc_fs.h> | 45 | #include <linux/proc_fs.h> |
46 | #include <linux/seq_file.h> | 46 | #include <linux/seq_file.h> |
47 | #include <linux/mroute.h> | 47 | #include <linux/mroute.h> |
48 | #include <linux/init.h> | 48 | #include <linux/init.h> |
49 | #include <linux/if_ether.h> | 49 | #include <linux/if_ether.h> |
50 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
51 | #include <net/net_namespace.h> | 51 | #include <net/net_namespace.h> |
52 | #include <net/ip.h> | 52 | #include <net/ip.h> |
53 | #include <net/protocol.h> | 53 | #include <net/protocol.h> |
54 | #include <linux/skbuff.h> | 54 | #include <linux/skbuff.h> |
55 | #include <net/route.h> | 55 | #include <net/route.h> |
56 | #include <net/sock.h> | 56 | #include <net/sock.h> |
57 | #include <net/icmp.h> | 57 | #include <net/icmp.h> |
58 | #include <net/udp.h> | 58 | #include <net/udp.h> |
59 | #include <net/raw.h> | 59 | #include <net/raw.h> |
60 | #include <linux/notifier.h> | 60 | #include <linux/notifier.h> |
61 | #include <linux/if_arp.h> | 61 | #include <linux/if_arp.h> |
62 | #include <linux/netfilter_ipv4.h> | 62 | #include <linux/netfilter_ipv4.h> |
63 | #include <net/ipip.h> | 63 | #include <net/ipip.h> |
64 | #include <net/checksum.h> | 64 | #include <net/checksum.h> |
65 | #include <net/netlink.h> | 65 | #include <net/netlink.h> |
66 | #include <net/fib_rules.h> | 66 | #include <net/fib_rules.h> |
67 | 67 | ||
68 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | 68 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) |
69 | #define CONFIG_IP_PIMSM 1 | 69 | #define CONFIG_IP_PIMSM 1 |
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | struct mr_table { | 72 | struct mr_table { |
73 | struct list_head list; | 73 | struct list_head list; |
74 | #ifdef CONFIG_NET_NS | 74 | #ifdef CONFIG_NET_NS |
75 | struct net *net; | 75 | struct net *net; |
76 | #endif | 76 | #endif |
77 | u32 id; | 77 | u32 id; |
78 | struct sock *mroute_sk; | 78 | struct sock *mroute_sk; |
79 | struct timer_list ipmr_expire_timer; | 79 | struct timer_list ipmr_expire_timer; |
80 | struct list_head mfc_unres_queue; | 80 | struct list_head mfc_unres_queue; |
81 | struct list_head mfc_cache_array[MFC_LINES]; | 81 | struct list_head mfc_cache_array[MFC_LINES]; |
82 | struct vif_device vif_table[MAXVIFS]; | 82 | struct vif_device vif_table[MAXVIFS]; |
83 | int maxvif; | 83 | int maxvif; |
84 | atomic_t cache_resolve_queue_len; | 84 | atomic_t cache_resolve_queue_len; |
85 | int mroute_do_assert; | 85 | int mroute_do_assert; |
86 | int mroute_do_pim; | 86 | int mroute_do_pim; |
87 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | 87 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) |
88 | int mroute_reg_vif_num; | 88 | int mroute_reg_vif_num; |
89 | #endif | 89 | #endif |
90 | }; | 90 | }; |
91 | 91 | ||
92 | struct ipmr_rule { | 92 | struct ipmr_rule { |
93 | struct fib_rule common; | 93 | struct fib_rule common; |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct ipmr_result { | 96 | struct ipmr_result { |
97 | struct mr_table *mrt; | 97 | struct mr_table *mrt; |
98 | }; | 98 | }; |
99 | 99 | ||
100 | /* Big lock, protecting vif table, mrt cache and mroute socket state. | 100 | /* Big lock, protecting vif table, mrt cache and mroute socket state. |
101 | Note that the changes are semaphored via rtnl_lock. | 101 | Note that the changes are semaphored via rtnl_lock. |
102 | */ | 102 | */ |
103 | 103 | ||
104 | static DEFINE_RWLOCK(mrt_lock); | 104 | static DEFINE_RWLOCK(mrt_lock); |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Multicast router control variables | 107 | * Multicast router control variables |
108 | */ | 108 | */ |
109 | 109 | ||
110 | #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) | 110 | #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) |
111 | 111 | ||
112 | /* Special spinlock for queue of unresolved entries */ | 112 | /* Special spinlock for queue of unresolved entries */ |
113 | static DEFINE_SPINLOCK(mfc_unres_lock); | 113 | static DEFINE_SPINLOCK(mfc_unres_lock); |
114 | 114 | ||
115 | /* We return to original Alan's scheme. Hash table of resolved | 115 | /* We return to original Alan's scheme. Hash table of resolved |
116 | entries is changed only in process context and protected | 116 | entries is changed only in process context and protected |
117 | with weak lock mrt_lock. Queue of unresolved entries is protected | 117 | with weak lock mrt_lock. Queue of unresolved entries is protected |
118 | with strong spinlock mfc_unres_lock. | 118 | with strong spinlock mfc_unres_lock. |
119 | 119 | ||
120 | In this case data path is free of exclusive locks at all. | 120 | In this case data path is free of exclusive locks at all. |
121 | */ | 121 | */ |
122 | 122 | ||
123 | static struct kmem_cache *mrt_cachep __read_mostly; | 123 | static struct kmem_cache *mrt_cachep __read_mostly; |
124 | 124 | ||
125 | static struct mr_table *ipmr_new_table(struct net *net, u32 id); | 125 | static struct mr_table *ipmr_new_table(struct net *net, u32 id); |
126 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, | 126 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, |
127 | struct sk_buff *skb, struct mfc_cache *cache, | 127 | struct sk_buff *skb, struct mfc_cache *cache, |
128 | int local); | 128 | int local); |
129 | static int ipmr_cache_report(struct mr_table *mrt, | 129 | static int ipmr_cache_report(struct mr_table *mrt, |
130 | struct sk_buff *pkt, vifi_t vifi, int assert); | 130 | struct sk_buff *pkt, vifi_t vifi, int assert); |
131 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 131 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
132 | struct mfc_cache *c, struct rtmsg *rtm); | 132 | struct mfc_cache *c, struct rtmsg *rtm); |
133 | static void ipmr_expire_process(unsigned long arg); | 133 | static void ipmr_expire_process(unsigned long arg); |
134 | 134 | ||
135 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | 135 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES |
136 | #define ipmr_for_each_table(mrt, net) \ | 136 | #define ipmr_for_each_table(mrt, net) \ |
137 | list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) | 137 | list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) |
138 | 138 | ||
139 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) | 139 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) |
140 | { | 140 | { |
141 | struct mr_table *mrt; | 141 | struct mr_table *mrt; |
142 | 142 | ||
143 | ipmr_for_each_table(mrt, net) { | 143 | ipmr_for_each_table(mrt, net) { |
144 | if (mrt->id == id) | 144 | if (mrt->id == id) |
145 | return mrt; | 145 | return mrt; |
146 | } | 146 | } |
147 | return NULL; | 147 | return NULL; |
148 | } | 148 | } |
149 | 149 | ||
150 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 150 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, |
151 | struct mr_table **mrt) | 151 | struct mr_table **mrt) |
152 | { | 152 | { |
153 | struct ipmr_result res; | 153 | struct ipmr_result res; |
154 | struct fib_lookup_arg arg = { .result = &res, }; | 154 | struct fib_lookup_arg arg = { .result = &res, }; |
155 | int err; | 155 | int err; |
156 | 156 | ||
157 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); | 157 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); |
158 | if (err < 0) | 158 | if (err < 0) |
159 | return err; | 159 | return err; |
160 | *mrt = res.mrt; | 160 | *mrt = res.mrt; |
161 | return 0; | 161 | return 0; |
162 | } | 162 | } |
163 | 163 | ||
164 | static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, | 164 | static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, |
165 | int flags, struct fib_lookup_arg *arg) | 165 | int flags, struct fib_lookup_arg *arg) |
166 | { | 166 | { |
167 | struct ipmr_result *res = arg->result; | 167 | struct ipmr_result *res = arg->result; |
168 | struct mr_table *mrt; | 168 | struct mr_table *mrt; |
169 | 169 | ||
170 | switch (rule->action) { | 170 | switch (rule->action) { |
171 | case FR_ACT_TO_TBL: | 171 | case FR_ACT_TO_TBL: |
172 | break; | 172 | break; |
173 | case FR_ACT_UNREACHABLE: | 173 | case FR_ACT_UNREACHABLE: |
174 | return -ENETUNREACH; | 174 | return -ENETUNREACH; |
175 | case FR_ACT_PROHIBIT: | 175 | case FR_ACT_PROHIBIT: |
176 | return -EACCES; | 176 | return -EACCES; |
177 | case FR_ACT_BLACKHOLE: | 177 | case FR_ACT_BLACKHOLE: |
178 | default: | 178 | default: |
179 | return -EINVAL; | 179 | return -EINVAL; |
180 | } | 180 | } |
181 | 181 | ||
182 | mrt = ipmr_get_table(rule->fr_net, rule->table); | 182 | mrt = ipmr_get_table(rule->fr_net, rule->table); |
183 | if (mrt == NULL) | 183 | if (mrt == NULL) |
184 | return -EAGAIN; | 184 | return -EAGAIN; |
185 | res->mrt = mrt; | 185 | res->mrt = mrt; |
186 | return 0; | 186 | return 0; |
187 | } | 187 | } |
188 | 188 | ||
189 | static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) | 189 | static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) |
190 | { | 190 | { |
191 | return 1; | 191 | return 1; |
192 | } | 192 | } |
193 | 193 | ||
194 | static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { | 194 | static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { |
195 | FRA_GENERIC_POLICY, | 195 | FRA_GENERIC_POLICY, |
196 | }; | 196 | }; |
197 | 197 | ||
198 | static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | 198 | static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, |
199 | struct fib_rule_hdr *frh, struct nlattr **tb) | 199 | struct fib_rule_hdr *frh, struct nlattr **tb) |
200 | { | 200 | { |
201 | return 0; | 201 | return 0; |
202 | } | 202 | } |
203 | 203 | ||
204 | static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | 204 | static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, |
205 | struct nlattr **tb) | 205 | struct nlattr **tb) |
206 | { | 206 | { |
207 | return 1; | 207 | return 1; |
208 | } | 208 | } |
209 | 209 | ||
210 | static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | 210 | static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, |
211 | struct fib_rule_hdr *frh) | 211 | struct fib_rule_hdr *frh) |
212 | { | 212 | { |
213 | frh->dst_len = 0; | 213 | frh->dst_len = 0; |
214 | frh->src_len = 0; | 214 | frh->src_len = 0; |
215 | frh->tos = 0; | 215 | frh->tos = 0; |
216 | return 0; | 216 | return 0; |
217 | } | 217 | } |
218 | 218 | ||
219 | static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { | 219 | static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { |
220 | .family = RTNL_FAMILY_IPMR, | 220 | .family = RTNL_FAMILY_IPMR, |
221 | .rule_size = sizeof(struct ipmr_rule), | 221 | .rule_size = sizeof(struct ipmr_rule), |
222 | .addr_size = sizeof(u32), | 222 | .addr_size = sizeof(u32), |
223 | .action = ipmr_rule_action, | 223 | .action = ipmr_rule_action, |
224 | .match = ipmr_rule_match, | 224 | .match = ipmr_rule_match, |
225 | .configure = ipmr_rule_configure, | 225 | .configure = ipmr_rule_configure, |
226 | .compare = ipmr_rule_compare, | 226 | .compare = ipmr_rule_compare, |
227 | .default_pref = fib_default_rule_pref, | 227 | .default_pref = fib_default_rule_pref, |
228 | .fill = ipmr_rule_fill, | 228 | .fill = ipmr_rule_fill, |
229 | .nlgroup = RTNLGRP_IPV4_RULE, | 229 | .nlgroup = RTNLGRP_IPV4_RULE, |
230 | .policy = ipmr_rule_policy, | 230 | .policy = ipmr_rule_policy, |
231 | .owner = THIS_MODULE, | 231 | .owner = THIS_MODULE, |
232 | }; | 232 | }; |
233 | 233 | ||
234 | static int __net_init ipmr_rules_init(struct net *net) | 234 | static int __net_init ipmr_rules_init(struct net *net) |
235 | { | 235 | { |
236 | struct fib_rules_ops *ops; | 236 | struct fib_rules_ops *ops; |
237 | struct mr_table *mrt; | 237 | struct mr_table *mrt; |
238 | int err; | 238 | int err; |
239 | 239 | ||
240 | ops = fib_rules_register(&ipmr_rules_ops_template, net); | 240 | ops = fib_rules_register(&ipmr_rules_ops_template, net); |
241 | if (IS_ERR(ops)) | 241 | if (IS_ERR(ops)) |
242 | return PTR_ERR(ops); | 242 | return PTR_ERR(ops); |
243 | 243 | ||
244 | INIT_LIST_HEAD(&net->ipv4.mr_tables); | 244 | INIT_LIST_HEAD(&net->ipv4.mr_tables); |
245 | 245 | ||
246 | mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); | 246 | mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); |
247 | if (mrt == NULL) { | 247 | if (mrt == NULL) { |
248 | err = -ENOMEM; | 248 | err = -ENOMEM; |
249 | goto err1; | 249 | goto err1; |
250 | } | 250 | } |
251 | 251 | ||
252 | err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); | 252 | err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); |
253 | if (err < 0) | 253 | if (err < 0) |
254 | goto err2; | 254 | goto err2; |
255 | 255 | ||
256 | net->ipv4.mr_rules_ops = ops; | 256 | net->ipv4.mr_rules_ops = ops; |
257 | return 0; | 257 | return 0; |
258 | 258 | ||
259 | err2: | 259 | err2: |
260 | kfree(mrt); | 260 | kfree(mrt); |
261 | err1: | 261 | err1: |
262 | fib_rules_unregister(ops); | 262 | fib_rules_unregister(ops); |
263 | return err; | 263 | return err; |
264 | } | 264 | } |
265 | 265 | ||
266 | static void __net_exit ipmr_rules_exit(struct net *net) | 266 | static void __net_exit ipmr_rules_exit(struct net *net) |
267 | { | 267 | { |
268 | struct mr_table *mrt, *next; | 268 | struct mr_table *mrt, *next; |
269 | 269 | ||
270 | list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) | 270 | list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) |
271 | kfree(mrt); | 271 | kfree(mrt); |
272 | fib_rules_unregister(net->ipv4.mr_rules_ops); | 272 | fib_rules_unregister(net->ipv4.mr_rules_ops); |
273 | } | 273 | } |
274 | #else | 274 | #else |
275 | #define ipmr_for_each_table(mrt, net) \ | 275 | #define ipmr_for_each_table(mrt, net) \ |
276 | for (mrt = net->ipv4.mrt; mrt; mrt = NULL) | 276 | for (mrt = net->ipv4.mrt; mrt; mrt = NULL) |
277 | 277 | ||
278 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) | 278 | static struct mr_table *ipmr_get_table(struct net *net, u32 id) |
279 | { | 279 | { |
280 | return net->ipv4.mrt; | 280 | return net->ipv4.mrt; |
281 | } | 281 | } |
282 | 282 | ||
283 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 283 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, |
284 | struct mr_table **mrt) | 284 | struct mr_table **mrt) |
285 | { | 285 | { |
286 | *mrt = net->ipv4.mrt; | 286 | *mrt = net->ipv4.mrt; |
287 | return 0; | 287 | return 0; |
288 | } | 288 | } |
289 | 289 | ||
290 | static int __net_init ipmr_rules_init(struct net *net) | 290 | static int __net_init ipmr_rules_init(struct net *net) |
291 | { | 291 | { |
292 | net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); | 292 | net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); |
293 | return net->ipv4.mrt ? 0 : -ENOMEM; | 293 | return net->ipv4.mrt ? 0 : -ENOMEM; |
294 | } | 294 | } |
295 | 295 | ||
296 | static void __net_exit ipmr_rules_exit(struct net *net) | 296 | static void __net_exit ipmr_rules_exit(struct net *net) |
297 | { | 297 | { |
298 | kfree(net->ipv4.mrt); | 298 | kfree(net->ipv4.mrt); |
299 | } | 299 | } |
300 | #endif | 300 | #endif |
301 | 301 | ||
302 | static struct mr_table *ipmr_new_table(struct net *net, u32 id) | 302 | static struct mr_table *ipmr_new_table(struct net *net, u32 id) |
303 | { | 303 | { |
304 | struct mr_table *mrt; | 304 | struct mr_table *mrt; |
305 | unsigned int i; | 305 | unsigned int i; |
306 | 306 | ||
307 | mrt = ipmr_get_table(net, id); | 307 | mrt = ipmr_get_table(net, id); |
308 | if (mrt != NULL) | 308 | if (mrt != NULL) |
309 | return mrt; | 309 | return mrt; |
310 | 310 | ||
311 | mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); | 311 | mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); |
312 | if (mrt == NULL) | 312 | if (mrt == NULL) |
313 | return NULL; | 313 | return NULL; |
314 | write_pnet(&mrt->net, net); | 314 | write_pnet(&mrt->net, net); |
315 | mrt->id = id; | 315 | mrt->id = id; |
316 | 316 | ||
317 | /* Forwarding cache */ | 317 | /* Forwarding cache */ |
318 | for (i = 0; i < MFC_LINES; i++) | 318 | for (i = 0; i < MFC_LINES; i++) |
319 | INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); | 319 | INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); |
320 | 320 | ||
321 | INIT_LIST_HEAD(&mrt->mfc_unres_queue); | 321 | INIT_LIST_HEAD(&mrt->mfc_unres_queue); |
322 | 322 | ||
323 | setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, | 323 | setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, |
324 | (unsigned long)mrt); | 324 | (unsigned long)mrt); |
325 | 325 | ||
326 | #ifdef CONFIG_IP_PIMSM | 326 | #ifdef CONFIG_IP_PIMSM |
327 | mrt->mroute_reg_vif_num = -1; | 327 | mrt->mroute_reg_vif_num = -1; |
328 | #endif | 328 | #endif |
329 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | 329 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES |
330 | list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); | 330 | list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); |
331 | #endif | 331 | #endif |
332 | return mrt; | 332 | return mrt; |
333 | } | 333 | } |
334 | 334 | ||
335 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ | 335 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ |
336 | 336 | ||
337 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | 337 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) |
338 | { | 338 | { |
339 | struct net *net = dev_net(dev); | 339 | struct net *net = dev_net(dev); |
340 | 340 | ||
341 | dev_close(dev); | 341 | dev_close(dev); |
342 | 342 | ||
343 | dev = __dev_get_by_name(net, "tunl0"); | 343 | dev = __dev_get_by_name(net, "tunl0"); |
344 | if (dev) { | 344 | if (dev) { |
345 | const struct net_device_ops *ops = dev->netdev_ops; | 345 | const struct net_device_ops *ops = dev->netdev_ops; |
346 | struct ifreq ifr; | 346 | struct ifreq ifr; |
347 | struct ip_tunnel_parm p; | 347 | struct ip_tunnel_parm p; |
348 | 348 | ||
349 | memset(&p, 0, sizeof(p)); | 349 | memset(&p, 0, sizeof(p)); |
350 | p.iph.daddr = v->vifc_rmt_addr.s_addr; | 350 | p.iph.daddr = v->vifc_rmt_addr.s_addr; |
351 | p.iph.saddr = v->vifc_lcl_addr.s_addr; | 351 | p.iph.saddr = v->vifc_lcl_addr.s_addr; |
352 | p.iph.version = 4; | 352 | p.iph.version = 4; |
353 | p.iph.ihl = 5; | 353 | p.iph.ihl = 5; |
354 | p.iph.protocol = IPPROTO_IPIP; | 354 | p.iph.protocol = IPPROTO_IPIP; |
355 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); | 355 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); |
356 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; | 356 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; |
357 | 357 | ||
358 | if (ops->ndo_do_ioctl) { | 358 | if (ops->ndo_do_ioctl) { |
359 | mm_segment_t oldfs = get_fs(); | 359 | mm_segment_t oldfs = get_fs(); |
360 | 360 | ||
361 | set_fs(KERNEL_DS); | 361 | set_fs(KERNEL_DS); |
362 | ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); | 362 | ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); |
363 | set_fs(oldfs); | 363 | set_fs(oldfs); |
364 | } | 364 | } |
365 | } | 365 | } |
366 | } | 366 | } |
367 | 367 | ||
368 | static | 368 | static |
369 | struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) | 369 | struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) |
370 | { | 370 | { |
371 | struct net_device *dev; | 371 | struct net_device *dev; |
372 | 372 | ||
373 | dev = __dev_get_by_name(net, "tunl0"); | 373 | dev = __dev_get_by_name(net, "tunl0"); |
374 | 374 | ||
375 | if (dev) { | 375 | if (dev) { |
376 | const struct net_device_ops *ops = dev->netdev_ops; | 376 | const struct net_device_ops *ops = dev->netdev_ops; |
377 | int err; | 377 | int err; |
378 | struct ifreq ifr; | 378 | struct ifreq ifr; |
379 | struct ip_tunnel_parm p; | 379 | struct ip_tunnel_parm p; |
380 | struct in_device *in_dev; | 380 | struct in_device *in_dev; |
381 | 381 | ||
382 | memset(&p, 0, sizeof(p)); | 382 | memset(&p, 0, sizeof(p)); |
383 | p.iph.daddr = v->vifc_rmt_addr.s_addr; | 383 | p.iph.daddr = v->vifc_rmt_addr.s_addr; |
384 | p.iph.saddr = v->vifc_lcl_addr.s_addr; | 384 | p.iph.saddr = v->vifc_lcl_addr.s_addr; |
385 | p.iph.version = 4; | 385 | p.iph.version = 4; |
386 | p.iph.ihl = 5; | 386 | p.iph.ihl = 5; |
387 | p.iph.protocol = IPPROTO_IPIP; | 387 | p.iph.protocol = IPPROTO_IPIP; |
388 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); | 388 | sprintf(p.name, "dvmrp%d", v->vifc_vifi); |
389 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; | 389 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; |
390 | 390 | ||
391 | if (ops->ndo_do_ioctl) { | 391 | if (ops->ndo_do_ioctl) { |
392 | mm_segment_t oldfs = get_fs(); | 392 | mm_segment_t oldfs = get_fs(); |
393 | 393 | ||
394 | set_fs(KERNEL_DS); | 394 | set_fs(KERNEL_DS); |
395 | err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); | 395 | err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); |
396 | set_fs(oldfs); | 396 | set_fs(oldfs); |
397 | } else | 397 | } else |
398 | err = -EOPNOTSUPP; | 398 | err = -EOPNOTSUPP; |
399 | 399 | ||
400 | dev = NULL; | 400 | dev = NULL; |
401 | 401 | ||
402 | if (err == 0 && | 402 | if (err == 0 && |
403 | (dev = __dev_get_by_name(net, p.name)) != NULL) { | 403 | (dev = __dev_get_by_name(net, p.name)) != NULL) { |
404 | dev->flags |= IFF_MULTICAST; | 404 | dev->flags |= IFF_MULTICAST; |
405 | 405 | ||
406 | in_dev = __in_dev_get_rtnl(dev); | 406 | in_dev = __in_dev_get_rtnl(dev); |
407 | if (in_dev == NULL) | 407 | if (in_dev == NULL) |
408 | goto failure; | 408 | goto failure; |
409 | 409 | ||
410 | ipv4_devconf_setall(in_dev); | 410 | ipv4_devconf_setall(in_dev); |
411 | IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; | 411 | IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; |
412 | 412 | ||
413 | if (dev_open(dev)) | 413 | if (dev_open(dev)) |
414 | goto failure; | 414 | goto failure; |
415 | dev_hold(dev); | 415 | dev_hold(dev); |
416 | } | 416 | } |
417 | } | 417 | } |
418 | return dev; | 418 | return dev; |
419 | 419 | ||
420 | failure: | 420 | failure: |
421 | /* allow the register to be completed before unregistering. */ | 421 | /* allow the register to be completed before unregistering. */ |
422 | rtnl_unlock(); | 422 | rtnl_unlock(); |
423 | rtnl_lock(); | 423 | rtnl_lock(); |
424 | 424 | ||
425 | unregister_netdevice(dev); | 425 | unregister_netdevice(dev); |
426 | return NULL; | 426 | return NULL; |
427 | } | 427 | } |
428 | 428 | ||
429 | #ifdef CONFIG_IP_PIMSM | 429 | #ifdef CONFIG_IP_PIMSM |
430 | 430 | ||
431 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 431 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
432 | { | 432 | { |
433 | struct net *net = dev_net(dev); | 433 | struct net *net = dev_net(dev); |
434 | struct mr_table *mrt; | 434 | struct mr_table *mrt; |
435 | struct flowi fl = { | 435 | struct flowi fl = { |
436 | .oif = dev->ifindex, | 436 | .oif = dev->ifindex, |
437 | .iif = skb->skb_iif, | 437 | .iif = skb->skb_iif, |
438 | .mark = skb->mark, | 438 | .mark = skb->mark, |
439 | }; | 439 | }; |
440 | int err; | 440 | int err; |
441 | 441 | ||
442 | err = ipmr_fib_lookup(net, &fl, &mrt); | 442 | err = ipmr_fib_lookup(net, &fl, &mrt); |
443 | if (err < 0) | 443 | if (err < 0) |
444 | return err; | 444 | return err; |
445 | 445 | ||
446 | read_lock(&mrt_lock); | 446 | read_lock(&mrt_lock); |
447 | dev->stats.tx_bytes += skb->len; | 447 | dev->stats.tx_bytes += skb->len; |
448 | dev->stats.tx_packets++; | 448 | dev->stats.tx_packets++; |
449 | ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); | 449 | ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); |
450 | read_unlock(&mrt_lock); | 450 | read_unlock(&mrt_lock); |
451 | kfree_skb(skb); | 451 | kfree_skb(skb); |
452 | return NETDEV_TX_OK; | 452 | return NETDEV_TX_OK; |
453 | } | 453 | } |
454 | 454 | ||
455 | static const struct net_device_ops reg_vif_netdev_ops = { | 455 | static const struct net_device_ops reg_vif_netdev_ops = { |
456 | .ndo_start_xmit = reg_vif_xmit, | 456 | .ndo_start_xmit = reg_vif_xmit, |
457 | }; | 457 | }; |
458 | 458 | ||
459 | static void reg_vif_setup(struct net_device *dev) | 459 | static void reg_vif_setup(struct net_device *dev) |
460 | { | 460 | { |
461 | dev->type = ARPHRD_PIMREG; | 461 | dev->type = ARPHRD_PIMREG; |
462 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; | 462 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; |
463 | dev->flags = IFF_NOARP; | 463 | dev->flags = IFF_NOARP; |
464 | dev->netdev_ops = ®_vif_netdev_ops, | 464 | dev->netdev_ops = ®_vif_netdev_ops, |
465 | dev->destructor = free_netdev; | 465 | dev->destructor = free_netdev; |
466 | dev->features |= NETIF_F_NETNS_LOCAL; | 466 | dev->features |= NETIF_F_NETNS_LOCAL; |
467 | } | 467 | } |
468 | 468 | ||
469 | static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) | 469 | static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) |
470 | { | 470 | { |
471 | struct net_device *dev; | 471 | struct net_device *dev; |
472 | struct in_device *in_dev; | 472 | struct in_device *in_dev; |
473 | char name[IFNAMSIZ]; | 473 | char name[IFNAMSIZ]; |
474 | 474 | ||
475 | if (mrt->id == RT_TABLE_DEFAULT) | 475 | if (mrt->id == RT_TABLE_DEFAULT) |
476 | sprintf(name, "pimreg"); | 476 | sprintf(name, "pimreg"); |
477 | else | 477 | else |
478 | sprintf(name, "pimreg%u", mrt->id); | 478 | sprintf(name, "pimreg%u", mrt->id); |
479 | 479 | ||
480 | dev = alloc_netdev(0, name, reg_vif_setup); | 480 | dev = alloc_netdev(0, name, reg_vif_setup); |
481 | 481 | ||
482 | if (dev == NULL) | 482 | if (dev == NULL) |
483 | return NULL; | 483 | return NULL; |
484 | 484 | ||
485 | dev_net_set(dev, net); | 485 | dev_net_set(dev, net); |
486 | 486 | ||
487 | if (register_netdevice(dev)) { | 487 | if (register_netdevice(dev)) { |
488 | free_netdev(dev); | 488 | free_netdev(dev); |
489 | return NULL; | 489 | return NULL; |
490 | } | 490 | } |
491 | dev->iflink = 0; | 491 | dev->iflink = 0; |
492 | 492 | ||
493 | rcu_read_lock(); | 493 | rcu_read_lock(); |
494 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { | 494 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { |
495 | rcu_read_unlock(); | 495 | rcu_read_unlock(); |
496 | goto failure; | 496 | goto failure; |
497 | } | 497 | } |
498 | 498 | ||
499 | ipv4_devconf_setall(in_dev); | 499 | ipv4_devconf_setall(in_dev); |
500 | IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; | 500 | IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; |
501 | rcu_read_unlock(); | 501 | rcu_read_unlock(); |
502 | 502 | ||
503 | if (dev_open(dev)) | 503 | if (dev_open(dev)) |
504 | goto failure; | 504 | goto failure; |
505 | 505 | ||
506 | dev_hold(dev); | 506 | dev_hold(dev); |
507 | 507 | ||
508 | return dev; | 508 | return dev; |
509 | 509 | ||
510 | failure: | 510 | failure: |
511 | /* allow the register to be completed before unregistering. */ | 511 | /* allow the register to be completed before unregistering. */ |
512 | rtnl_unlock(); | 512 | rtnl_unlock(); |
513 | rtnl_lock(); | 513 | rtnl_lock(); |
514 | 514 | ||
515 | unregister_netdevice(dev); | 515 | unregister_netdevice(dev); |
516 | return NULL; | 516 | return NULL; |
517 | } | 517 | } |
518 | #endif | 518 | #endif |
519 | 519 | ||
520 | /* | 520 | /* |
521 | * Delete a VIF entry | 521 | * Delete a VIF entry |
522 | * @notify: Set to 1, if the caller is a notifier_call | 522 | * @notify: Set to 1, if the caller is a notifier_call |
523 | */ | 523 | */ |
524 | 524 | ||
525 | static int vif_delete(struct mr_table *mrt, int vifi, int notify, | 525 | static int vif_delete(struct mr_table *mrt, int vifi, int notify, |
526 | struct list_head *head) | 526 | struct list_head *head) |
527 | { | 527 | { |
528 | struct vif_device *v; | 528 | struct vif_device *v; |
529 | struct net_device *dev; | 529 | struct net_device *dev; |
530 | struct in_device *in_dev; | 530 | struct in_device *in_dev; |
531 | 531 | ||
532 | if (vifi < 0 || vifi >= mrt->maxvif) | 532 | if (vifi < 0 || vifi >= mrt->maxvif) |
533 | return -EADDRNOTAVAIL; | 533 | return -EADDRNOTAVAIL; |
534 | 534 | ||
535 | v = &mrt->vif_table[vifi]; | 535 | v = &mrt->vif_table[vifi]; |
536 | 536 | ||
537 | write_lock_bh(&mrt_lock); | 537 | write_lock_bh(&mrt_lock); |
538 | dev = v->dev; | 538 | dev = v->dev; |
539 | v->dev = NULL; | 539 | v->dev = NULL; |
540 | 540 | ||
541 | if (!dev) { | 541 | if (!dev) { |
542 | write_unlock_bh(&mrt_lock); | 542 | write_unlock_bh(&mrt_lock); |
543 | return -EADDRNOTAVAIL; | 543 | return -EADDRNOTAVAIL; |
544 | } | 544 | } |
545 | 545 | ||
546 | #ifdef CONFIG_IP_PIMSM | 546 | #ifdef CONFIG_IP_PIMSM |
547 | if (vifi == mrt->mroute_reg_vif_num) | 547 | if (vifi == mrt->mroute_reg_vif_num) |
548 | mrt->mroute_reg_vif_num = -1; | 548 | mrt->mroute_reg_vif_num = -1; |
549 | #endif | 549 | #endif |
550 | 550 | ||
551 | if (vifi+1 == mrt->maxvif) { | 551 | if (vifi+1 == mrt->maxvif) { |
552 | int tmp; | 552 | int tmp; |
553 | for (tmp=vifi-1; tmp>=0; tmp--) { | 553 | for (tmp=vifi-1; tmp>=0; tmp--) { |
554 | if (VIF_EXISTS(mrt, tmp)) | 554 | if (VIF_EXISTS(mrt, tmp)) |
555 | break; | 555 | break; |
556 | } | 556 | } |
557 | mrt->maxvif = tmp+1; | 557 | mrt->maxvif = tmp+1; |
558 | } | 558 | } |
559 | 559 | ||
560 | write_unlock_bh(&mrt_lock); | 560 | write_unlock_bh(&mrt_lock); |
561 | 561 | ||
562 | dev_set_allmulti(dev, -1); | 562 | dev_set_allmulti(dev, -1); |
563 | 563 | ||
564 | if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { | 564 | if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { |
565 | IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; | 565 | IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; |
566 | ip_rt_multicast_event(in_dev); | 566 | ip_rt_multicast_event(in_dev); |
567 | } | 567 | } |
568 | 568 | ||
569 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) | 569 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) |
570 | unregister_netdevice_queue(dev, head); | 570 | unregister_netdevice_queue(dev, head); |
571 | 571 | ||
572 | dev_put(dev); | 572 | dev_put(dev); |
573 | return 0; | 573 | return 0; |
574 | } | 574 | } |
575 | 575 | ||
576 | static inline void ipmr_cache_free(struct mfc_cache *c) | 576 | static inline void ipmr_cache_free(struct mfc_cache *c) |
577 | { | 577 | { |
578 | kmem_cache_free(mrt_cachep, c); | 578 | kmem_cache_free(mrt_cachep, c); |
579 | } | 579 | } |
580 | 580 | ||
581 | /* Destroy an unresolved cache entry, killing queued skbs | 581 | /* Destroy an unresolved cache entry, killing queued skbs |
582 | and reporting error to netlink readers. | 582 | and reporting error to netlink readers. |
583 | */ | 583 | */ |
584 | 584 | ||
585 | static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) | 585 | static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) |
586 | { | 586 | { |
587 | struct net *net = read_pnet(&mrt->net); | 587 | struct net *net = read_pnet(&mrt->net); |
588 | struct sk_buff *skb; | 588 | struct sk_buff *skb; |
589 | struct nlmsgerr *e; | 589 | struct nlmsgerr *e; |
590 | 590 | ||
591 | atomic_dec(&mrt->cache_resolve_queue_len); | 591 | atomic_dec(&mrt->cache_resolve_queue_len); |
592 | 592 | ||
593 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { | 593 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { |
594 | if (ip_hdr(skb)->version == 0) { | 594 | if (ip_hdr(skb)->version == 0) { |
595 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 595 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
596 | nlh->nlmsg_type = NLMSG_ERROR; | 596 | nlh->nlmsg_type = NLMSG_ERROR; |
597 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 597 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
598 | skb_trim(skb, nlh->nlmsg_len); | 598 | skb_trim(skb, nlh->nlmsg_len); |
599 | e = NLMSG_DATA(nlh); | 599 | e = NLMSG_DATA(nlh); |
600 | e->error = -ETIMEDOUT; | 600 | e->error = -ETIMEDOUT; |
601 | memset(&e->msg, 0, sizeof(e->msg)); | 601 | memset(&e->msg, 0, sizeof(e->msg)); |
602 | 602 | ||
603 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 603 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
604 | } else | 604 | } else |
605 | kfree_skb(skb); | 605 | kfree_skb(skb); |
606 | } | 606 | } |
607 | 607 | ||
608 | ipmr_cache_free(c); | 608 | ipmr_cache_free(c); |
609 | } | 609 | } |
610 | 610 | ||
611 | 611 | ||
612 | /* Timer process for the unresolved queue. */ | 612 | /* Timer process for the unresolved queue. */ |
613 | 613 | ||
614 | static void ipmr_expire_process(unsigned long arg) | 614 | static void ipmr_expire_process(unsigned long arg) |
615 | { | 615 | { |
616 | struct mr_table *mrt = (struct mr_table *)arg; | 616 | struct mr_table *mrt = (struct mr_table *)arg; |
617 | unsigned long now; | 617 | unsigned long now; |
618 | unsigned long expires; | 618 | unsigned long expires; |
619 | struct mfc_cache *c, *next; | 619 | struct mfc_cache *c, *next; |
620 | 620 | ||
621 | if (!spin_trylock(&mfc_unres_lock)) { | 621 | if (!spin_trylock(&mfc_unres_lock)) { |
622 | mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); | 622 | mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); |
623 | return; | 623 | return; |
624 | } | 624 | } |
625 | 625 | ||
626 | if (list_empty(&mrt->mfc_unres_queue)) | 626 | if (list_empty(&mrt->mfc_unres_queue)) |
627 | goto out; | 627 | goto out; |
628 | 628 | ||
629 | now = jiffies; | 629 | now = jiffies; |
630 | expires = 10*HZ; | 630 | expires = 10*HZ; |
631 | 631 | ||
632 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { | 632 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { |
633 | if (time_after(c->mfc_un.unres.expires, now)) { | 633 | if (time_after(c->mfc_un.unres.expires, now)) { |
634 | unsigned long interval = c->mfc_un.unres.expires - now; | 634 | unsigned long interval = c->mfc_un.unres.expires - now; |
635 | if (interval < expires) | 635 | if (interval < expires) |
636 | expires = interval; | 636 | expires = interval; |
637 | continue; | 637 | continue; |
638 | } | 638 | } |
639 | 639 | ||
640 | list_del(&c->list); | 640 | list_del(&c->list); |
641 | ipmr_destroy_unres(mrt, c); | 641 | ipmr_destroy_unres(mrt, c); |
642 | } | 642 | } |
643 | 643 | ||
644 | if (!list_empty(&mrt->mfc_unres_queue)) | 644 | if (!list_empty(&mrt->mfc_unres_queue)) |
645 | mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); | 645 | mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); |
646 | 646 | ||
647 | out: | 647 | out: |
648 | spin_unlock(&mfc_unres_lock); | 648 | spin_unlock(&mfc_unres_lock); |
649 | } | 649 | } |
650 | 650 | ||
651 | /* Fill oifs list. It is called under write locked mrt_lock. */ | 651 | /* Fill oifs list. It is called under write locked mrt_lock. */ |
652 | 652 | ||
653 | static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, | 653 | static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, |
654 | unsigned char *ttls) | 654 | unsigned char *ttls) |
655 | { | 655 | { |
656 | int vifi; | 656 | int vifi; |
657 | 657 | ||
658 | cache->mfc_un.res.minvif = MAXVIFS; | 658 | cache->mfc_un.res.minvif = MAXVIFS; |
659 | cache->mfc_un.res.maxvif = 0; | 659 | cache->mfc_un.res.maxvif = 0; |
660 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); | 660 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); |
661 | 661 | ||
662 | for (vifi = 0; vifi < mrt->maxvif; vifi++) { | 662 | for (vifi = 0; vifi < mrt->maxvif; vifi++) { |
663 | if (VIF_EXISTS(mrt, vifi) && | 663 | if (VIF_EXISTS(mrt, vifi) && |
664 | ttls[vifi] && ttls[vifi] < 255) { | 664 | ttls[vifi] && ttls[vifi] < 255) { |
665 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; | 665 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; |
666 | if (cache->mfc_un.res.minvif > vifi) | 666 | if (cache->mfc_un.res.minvif > vifi) |
667 | cache->mfc_un.res.minvif = vifi; | 667 | cache->mfc_un.res.minvif = vifi; |
668 | if (cache->mfc_un.res.maxvif <= vifi) | 668 | if (cache->mfc_un.res.maxvif <= vifi) |
669 | cache->mfc_un.res.maxvif = vifi + 1; | 669 | cache->mfc_un.res.maxvif = vifi + 1; |
670 | } | 670 | } |
671 | } | 671 | } |
672 | } | 672 | } |
673 | 673 | ||
674 | static int vif_add(struct net *net, struct mr_table *mrt, | 674 | static int vif_add(struct net *net, struct mr_table *mrt, |
675 | struct vifctl *vifc, int mrtsock) | 675 | struct vifctl *vifc, int mrtsock) |
676 | { | 676 | { |
677 | int vifi = vifc->vifc_vifi; | 677 | int vifi = vifc->vifc_vifi; |
678 | struct vif_device *v = &mrt->vif_table[vifi]; | 678 | struct vif_device *v = &mrt->vif_table[vifi]; |
679 | struct net_device *dev; | 679 | struct net_device *dev; |
680 | struct in_device *in_dev; | 680 | struct in_device *in_dev; |
681 | int err; | 681 | int err; |
682 | 682 | ||
683 | /* Is vif busy ? */ | 683 | /* Is vif busy ? */ |
684 | if (VIF_EXISTS(mrt, vifi)) | 684 | if (VIF_EXISTS(mrt, vifi)) |
685 | return -EADDRINUSE; | 685 | return -EADDRINUSE; |
686 | 686 | ||
687 | switch (vifc->vifc_flags) { | 687 | switch (vifc->vifc_flags) { |
688 | #ifdef CONFIG_IP_PIMSM | 688 | #ifdef CONFIG_IP_PIMSM |
689 | case VIFF_REGISTER: | 689 | case VIFF_REGISTER: |
690 | /* | 690 | /* |
691 | * Special Purpose VIF in PIM | 691 | * Special Purpose VIF in PIM |
692 | * All the packets will be sent to the daemon | 692 | * All the packets will be sent to the daemon |
693 | */ | 693 | */ |
694 | if (mrt->mroute_reg_vif_num >= 0) | 694 | if (mrt->mroute_reg_vif_num >= 0) |
695 | return -EADDRINUSE; | 695 | return -EADDRINUSE; |
696 | dev = ipmr_reg_vif(net, mrt); | 696 | dev = ipmr_reg_vif(net, mrt); |
697 | if (!dev) | 697 | if (!dev) |
698 | return -ENOBUFS; | 698 | return -ENOBUFS; |
699 | err = dev_set_allmulti(dev, 1); | 699 | err = dev_set_allmulti(dev, 1); |
700 | if (err) { | 700 | if (err) { |
701 | unregister_netdevice(dev); | 701 | unregister_netdevice(dev); |
702 | dev_put(dev); | 702 | dev_put(dev); |
703 | return err; | 703 | return err; |
704 | } | 704 | } |
705 | break; | 705 | break; |
706 | #endif | 706 | #endif |
707 | case VIFF_TUNNEL: | 707 | case VIFF_TUNNEL: |
708 | dev = ipmr_new_tunnel(net, vifc); | 708 | dev = ipmr_new_tunnel(net, vifc); |
709 | if (!dev) | 709 | if (!dev) |
710 | return -ENOBUFS; | 710 | return -ENOBUFS; |
711 | err = dev_set_allmulti(dev, 1); | 711 | err = dev_set_allmulti(dev, 1); |
712 | if (err) { | 712 | if (err) { |
713 | ipmr_del_tunnel(dev, vifc); | 713 | ipmr_del_tunnel(dev, vifc); |
714 | dev_put(dev); | 714 | dev_put(dev); |
715 | return err; | 715 | return err; |
716 | } | 716 | } |
717 | break; | 717 | break; |
718 | 718 | ||
719 | case VIFF_USE_IFINDEX: | 719 | case VIFF_USE_IFINDEX: |
720 | case 0: | 720 | case 0: |
721 | if (vifc->vifc_flags == VIFF_USE_IFINDEX) { | 721 | if (vifc->vifc_flags == VIFF_USE_IFINDEX) { |
722 | dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); | 722 | dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); |
723 | if (dev && dev->ip_ptr == NULL) { | 723 | if (dev && dev->ip_ptr == NULL) { |
724 | dev_put(dev); | 724 | dev_put(dev); |
725 | return -EADDRNOTAVAIL; | 725 | return -EADDRNOTAVAIL; |
726 | } | 726 | } |
727 | } else | 727 | } else |
728 | dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); | 728 | dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); |
729 | 729 | ||
730 | if (!dev) | 730 | if (!dev) |
731 | return -EADDRNOTAVAIL; | 731 | return -EADDRNOTAVAIL; |
732 | err = dev_set_allmulti(dev, 1); | 732 | err = dev_set_allmulti(dev, 1); |
733 | if (err) { | 733 | if (err) { |
734 | dev_put(dev); | 734 | dev_put(dev); |
735 | return err; | 735 | return err; |
736 | } | 736 | } |
737 | break; | 737 | break; |
738 | default: | 738 | default: |
739 | return -EINVAL; | 739 | return -EINVAL; |
740 | } | 740 | } |
741 | 741 | ||
742 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { | 742 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { |
743 | dev_put(dev); | 743 | dev_put(dev); |
744 | return -EADDRNOTAVAIL; | 744 | return -EADDRNOTAVAIL; |
745 | } | 745 | } |
746 | IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; | 746 | IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; |
747 | ip_rt_multicast_event(in_dev); | 747 | ip_rt_multicast_event(in_dev); |
748 | 748 | ||
749 | /* | 749 | /* |
750 | * Fill in the VIF structures | 750 | * Fill in the VIF structures |
751 | */ | 751 | */ |
752 | v->rate_limit = vifc->vifc_rate_limit; | 752 | v->rate_limit = vifc->vifc_rate_limit; |
753 | v->local = vifc->vifc_lcl_addr.s_addr; | 753 | v->local = vifc->vifc_lcl_addr.s_addr; |
754 | v->remote = vifc->vifc_rmt_addr.s_addr; | 754 | v->remote = vifc->vifc_rmt_addr.s_addr; |
755 | v->flags = vifc->vifc_flags; | 755 | v->flags = vifc->vifc_flags; |
756 | if (!mrtsock) | 756 | if (!mrtsock) |
757 | v->flags |= VIFF_STATIC; | 757 | v->flags |= VIFF_STATIC; |
758 | v->threshold = vifc->vifc_threshold; | 758 | v->threshold = vifc->vifc_threshold; |
759 | v->bytes_in = 0; | 759 | v->bytes_in = 0; |
760 | v->bytes_out = 0; | 760 | v->bytes_out = 0; |
761 | v->pkt_in = 0; | 761 | v->pkt_in = 0; |
762 | v->pkt_out = 0; | 762 | v->pkt_out = 0; |
763 | v->link = dev->ifindex; | 763 | v->link = dev->ifindex; |
764 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) | 764 | if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) |
765 | v->link = dev->iflink; | 765 | v->link = dev->iflink; |
766 | 766 | ||
767 | /* And finish update writing critical data */ | 767 | /* And finish update writing critical data */ |
768 | write_lock_bh(&mrt_lock); | 768 | write_lock_bh(&mrt_lock); |
769 | v->dev = dev; | 769 | v->dev = dev; |
770 | #ifdef CONFIG_IP_PIMSM | 770 | #ifdef CONFIG_IP_PIMSM |
771 | if (v->flags&VIFF_REGISTER) | 771 | if (v->flags&VIFF_REGISTER) |
772 | mrt->mroute_reg_vif_num = vifi; | 772 | mrt->mroute_reg_vif_num = vifi; |
773 | #endif | 773 | #endif |
774 | if (vifi+1 > mrt->maxvif) | 774 | if (vifi+1 > mrt->maxvif) |
775 | mrt->maxvif = vifi+1; | 775 | mrt->maxvif = vifi+1; |
776 | write_unlock_bh(&mrt_lock); | 776 | write_unlock_bh(&mrt_lock); |
777 | return 0; | 777 | return 0; |
778 | } | 778 | } |
779 | 779 | ||
780 | static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, | 780 | static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, |
781 | __be32 origin, | 781 | __be32 origin, |
782 | __be32 mcastgrp) | 782 | __be32 mcastgrp) |
783 | { | 783 | { |
784 | int line = MFC_HASH(mcastgrp, origin); | 784 | int line = MFC_HASH(mcastgrp, origin); |
785 | struct mfc_cache *c; | 785 | struct mfc_cache *c; |
786 | 786 | ||
787 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { | 787 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { |
788 | if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) | 788 | if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) |
789 | return c; | 789 | return c; |
790 | } | 790 | } |
791 | return NULL; | 791 | return NULL; |
792 | } | 792 | } |
793 | 793 | ||
794 | /* | 794 | /* |
795 | * Allocate a multicast cache entry | 795 | * Allocate a multicast cache entry |
796 | */ | 796 | */ |
797 | static struct mfc_cache *ipmr_cache_alloc(void) | 797 | static struct mfc_cache *ipmr_cache_alloc(void) |
798 | { | 798 | { |
799 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 799 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
800 | if (c == NULL) | 800 | if (c == NULL) |
801 | return NULL; | 801 | return NULL; |
802 | c->mfc_un.res.minvif = MAXVIFS; | 802 | c->mfc_un.res.minvif = MAXVIFS; |
803 | return c; | 803 | return c; |
804 | } | 804 | } |
805 | 805 | ||
806 | static struct mfc_cache *ipmr_cache_alloc_unres(void) | 806 | static struct mfc_cache *ipmr_cache_alloc_unres(void) |
807 | { | 807 | { |
808 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 808 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
809 | if (c == NULL) | 809 | if (c == NULL) |
810 | return NULL; | 810 | return NULL; |
811 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 811 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
812 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 812 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
813 | return c; | 813 | return c; |
814 | } | 814 | } |
815 | 815 | ||
816 | /* | 816 | /* |
817 | * A cache entry has gone into a resolved state from queued | 817 | * A cache entry has gone into a resolved state from queued |
818 | */ | 818 | */ |
819 | 819 | ||
820 | static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, | 820 | static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, |
821 | struct mfc_cache *uc, struct mfc_cache *c) | 821 | struct mfc_cache *uc, struct mfc_cache *c) |
822 | { | 822 | { |
823 | struct sk_buff *skb; | 823 | struct sk_buff *skb; |
824 | struct nlmsgerr *e; | 824 | struct nlmsgerr *e; |
825 | 825 | ||
826 | /* | 826 | /* |
827 | * Play the pending entries through our router | 827 | * Play the pending entries through our router |
828 | */ | 828 | */ |
829 | 829 | ||
830 | while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 830 | while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
831 | if (ip_hdr(skb)->version == 0) { | 831 | if (ip_hdr(skb)->version == 0) { |
832 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 832 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
833 | 833 | ||
834 | if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { | 834 | if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { |
835 | nlh->nlmsg_len = (skb_tail_pointer(skb) - | 835 | nlh->nlmsg_len = (skb_tail_pointer(skb) - |
836 | (u8 *)nlh); | 836 | (u8 *)nlh); |
837 | } else { | 837 | } else { |
838 | nlh->nlmsg_type = NLMSG_ERROR; | 838 | nlh->nlmsg_type = NLMSG_ERROR; |
839 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 839 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
840 | skb_trim(skb, nlh->nlmsg_len); | 840 | skb_trim(skb, nlh->nlmsg_len); |
841 | e = NLMSG_DATA(nlh); | 841 | e = NLMSG_DATA(nlh); |
842 | e->error = -EMSGSIZE; | 842 | e->error = -EMSGSIZE; |
843 | memset(&e->msg, 0, sizeof(e->msg)); | 843 | memset(&e->msg, 0, sizeof(e->msg)); |
844 | } | 844 | } |
845 | 845 | ||
846 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 846 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
847 | } else | 847 | } else |
848 | ip_mr_forward(net, mrt, skb, c, 0); | 848 | ip_mr_forward(net, mrt, skb, c, 0); |
849 | } | 849 | } |
850 | } | 850 | } |
851 | 851 | ||
852 | /* | 852 | /* |
853 | * Bounce a cache query up to mrouted. We could use netlink for this but mrouted | 853 | * Bounce a cache query up to mrouted. We could use netlink for this but mrouted |
854 | * expects the following bizarre scheme. | 854 | * expects the following bizarre scheme. |
855 | * | 855 | * |
856 | * Called under mrt_lock. | 856 | * Called under mrt_lock. |
857 | */ | 857 | */ |
858 | 858 | ||
859 | static int ipmr_cache_report(struct mr_table *mrt, | 859 | static int ipmr_cache_report(struct mr_table *mrt, |
860 | struct sk_buff *pkt, vifi_t vifi, int assert) | 860 | struct sk_buff *pkt, vifi_t vifi, int assert) |
861 | { | 861 | { |
862 | struct sk_buff *skb; | 862 | struct sk_buff *skb; |
863 | const int ihl = ip_hdrlen(pkt); | 863 | const int ihl = ip_hdrlen(pkt); |
864 | struct igmphdr *igmp; | 864 | struct igmphdr *igmp; |
865 | struct igmpmsg *msg; | 865 | struct igmpmsg *msg; |
866 | int ret; | 866 | int ret; |
867 | 867 | ||
868 | #ifdef CONFIG_IP_PIMSM | 868 | #ifdef CONFIG_IP_PIMSM |
869 | if (assert == IGMPMSG_WHOLEPKT) | 869 | if (assert == IGMPMSG_WHOLEPKT) |
870 | skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); | 870 | skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); |
871 | else | 871 | else |
872 | #endif | 872 | #endif |
873 | skb = alloc_skb(128, GFP_ATOMIC); | 873 | skb = alloc_skb(128, GFP_ATOMIC); |
874 | 874 | ||
875 | if (!skb) | 875 | if (!skb) |
876 | return -ENOBUFS; | 876 | return -ENOBUFS; |
877 | 877 | ||
878 | #ifdef CONFIG_IP_PIMSM | 878 | #ifdef CONFIG_IP_PIMSM |
879 | if (assert == IGMPMSG_WHOLEPKT) { | 879 | if (assert == IGMPMSG_WHOLEPKT) { |
880 | /* Ugly, but we have no choice with this interface. | 880 | /* Ugly, but we have no choice with this interface. |
881 | Duplicate old header, fix ihl, length etc. | 881 | Duplicate old header, fix ihl, length etc. |
882 | And all this only to mangle msg->im_msgtype and | 882 | And all this only to mangle msg->im_msgtype and |
883 | to set msg->im_mbz to "mbz" :-) | 883 | to set msg->im_mbz to "mbz" :-) |
884 | */ | 884 | */ |
885 | skb_push(skb, sizeof(struct iphdr)); | 885 | skb_push(skb, sizeof(struct iphdr)); |
886 | skb_reset_network_header(skb); | 886 | skb_reset_network_header(skb); |
887 | skb_reset_transport_header(skb); | 887 | skb_reset_transport_header(skb); |
888 | msg = (struct igmpmsg *)skb_network_header(skb); | 888 | msg = (struct igmpmsg *)skb_network_header(skb); |
889 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); | 889 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); |
890 | msg->im_msgtype = IGMPMSG_WHOLEPKT; | 890 | msg->im_msgtype = IGMPMSG_WHOLEPKT; |
891 | msg->im_mbz = 0; | 891 | msg->im_mbz = 0; |
892 | msg->im_vif = mrt->mroute_reg_vif_num; | 892 | msg->im_vif = mrt->mroute_reg_vif_num; |
893 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; | 893 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; |
894 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + | 894 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + |
895 | sizeof(struct iphdr)); | 895 | sizeof(struct iphdr)); |
896 | } else | 896 | } else |
897 | #endif | 897 | #endif |
898 | { | 898 | { |
899 | 899 | ||
900 | /* | 900 | /* |
901 | * Copy the IP header | 901 | * Copy the IP header |
902 | */ | 902 | */ |
903 | 903 | ||
904 | skb->network_header = skb->tail; | 904 | skb->network_header = skb->tail; |
905 | skb_put(skb, ihl); | 905 | skb_put(skb, ihl); |
906 | skb_copy_to_linear_data(skb, pkt->data, ihl); | 906 | skb_copy_to_linear_data(skb, pkt->data, ihl); |
907 | ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ | 907 | ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ |
908 | msg = (struct igmpmsg *)skb_network_header(skb); | 908 | msg = (struct igmpmsg *)skb_network_header(skb); |
909 | msg->im_vif = vifi; | 909 | msg->im_vif = vifi; |
910 | skb_dst_set(skb, dst_clone(skb_dst(pkt))); | 910 | skb_dst_set(skb, dst_clone(skb_dst(pkt))); |
911 | 911 | ||
912 | /* | 912 | /* |
913 | * Add our header | 913 | * Add our header |
914 | */ | 914 | */ |
915 | 915 | ||
916 | igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); | 916 | igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); |
917 | igmp->type = | 917 | igmp->type = |
918 | msg->im_msgtype = assert; | 918 | msg->im_msgtype = assert; |
919 | igmp->code = 0; | 919 | igmp->code = 0; |
920 | ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ | 920 | ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ |
921 | skb->transport_header = skb->network_header; | 921 | skb->transport_header = skb->network_header; |
922 | } | 922 | } |
923 | 923 | ||
924 | if (mrt->mroute_sk == NULL) { | 924 | if (mrt->mroute_sk == NULL) { |
925 | kfree_skb(skb); | 925 | kfree_skb(skb); |
926 | return -EINVAL; | 926 | return -EINVAL; |
927 | } | 927 | } |
928 | 928 | ||
929 | /* | 929 | /* |
930 | * Deliver to mrouted | 930 | * Deliver to mrouted |
931 | */ | 931 | */ |
932 | ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); | 932 | ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); |
933 | if (ret < 0) { | 933 | if (ret < 0) { |
934 | if (net_ratelimit()) | 934 | if (net_ratelimit()) |
935 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); | 935 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); |
936 | kfree_skb(skb); | 936 | kfree_skb(skb); |
937 | } | 937 | } |
938 | 938 | ||
939 | return ret; | 939 | return ret; |
940 | } | 940 | } |
941 | 941 | ||
942 | /* | 942 | /* |
943 | * Queue a packet for resolution. It gets locked cache entry! | 943 | * Queue a packet for resolution. It gets locked cache entry! |
944 | */ | 944 | */ |
945 | 945 | ||
946 | static int | 946 | static int |
947 | ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) | 947 | ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) |
948 | { | 948 | { |
949 | bool found = false; | 949 | bool found = false; |
950 | int err; | 950 | int err; |
951 | struct mfc_cache *c; | 951 | struct mfc_cache *c; |
952 | const struct iphdr *iph = ip_hdr(skb); | 952 | const struct iphdr *iph = ip_hdr(skb); |
953 | 953 | ||
954 | spin_lock_bh(&mfc_unres_lock); | 954 | spin_lock_bh(&mfc_unres_lock); |
955 | list_for_each_entry(c, &mrt->mfc_unres_queue, list) { | 955 | list_for_each_entry(c, &mrt->mfc_unres_queue, list) { |
956 | if (c->mfc_mcastgrp == iph->daddr && | 956 | if (c->mfc_mcastgrp == iph->daddr && |
957 | c->mfc_origin == iph->saddr) { | 957 | c->mfc_origin == iph->saddr) { |
958 | found = true; | 958 | found = true; |
959 | break; | 959 | break; |
960 | } | 960 | } |
961 | } | 961 | } |
962 | 962 | ||
963 | if (!found) { | 963 | if (!found) { |
964 | /* | 964 | /* |
965 | * Create a new entry if allowable | 965 | * Create a new entry if allowable |
966 | */ | 966 | */ |
967 | 967 | ||
968 | if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || | 968 | if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || |
969 | (c = ipmr_cache_alloc_unres()) == NULL) { | 969 | (c = ipmr_cache_alloc_unres()) == NULL) { |
970 | spin_unlock_bh(&mfc_unres_lock); | 970 | spin_unlock_bh(&mfc_unres_lock); |
971 | 971 | ||
972 | kfree_skb(skb); | 972 | kfree_skb(skb); |
973 | return -ENOBUFS; | 973 | return -ENOBUFS; |
974 | } | 974 | } |
975 | 975 | ||
976 | /* | 976 | /* |
977 | * Fill in the new cache entry | 977 | * Fill in the new cache entry |
978 | */ | 978 | */ |
979 | c->mfc_parent = -1; | 979 | c->mfc_parent = -1; |
980 | c->mfc_origin = iph->saddr; | 980 | c->mfc_origin = iph->saddr; |
981 | c->mfc_mcastgrp = iph->daddr; | 981 | c->mfc_mcastgrp = iph->daddr; |
982 | 982 | ||
983 | /* | 983 | /* |
984 | * Reflect first query at mrouted. | 984 | * Reflect first query at mrouted. |
985 | */ | 985 | */ |
986 | err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); | 986 | err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); |
987 | if (err < 0) { | 987 | if (err < 0) { |
988 | /* If the report failed throw the cache entry | 988 | /* If the report failed throw the cache entry |
989 | out - Brad Parker | 989 | out - Brad Parker |
990 | */ | 990 | */ |
991 | spin_unlock_bh(&mfc_unres_lock); | 991 | spin_unlock_bh(&mfc_unres_lock); |
992 | 992 | ||
993 | ipmr_cache_free(c); | 993 | ipmr_cache_free(c); |
994 | kfree_skb(skb); | 994 | kfree_skb(skb); |
995 | return err; | 995 | return err; |
996 | } | 996 | } |
997 | 997 | ||
998 | atomic_inc(&mrt->cache_resolve_queue_len); | 998 | atomic_inc(&mrt->cache_resolve_queue_len); |
999 | list_add(&c->list, &mrt->mfc_unres_queue); | 999 | list_add(&c->list, &mrt->mfc_unres_queue); |
1000 | 1000 | ||
1001 | if (atomic_read(&mrt->cache_resolve_queue_len) == 1) | 1001 | if (atomic_read(&mrt->cache_resolve_queue_len) == 1) |
1002 | mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); | 1002 | mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | /* | 1005 | /* |
1006 | * See if we can append the packet | 1006 | * See if we can append the packet |
1007 | */ | 1007 | */ |
1008 | if (c->mfc_un.unres.unresolved.qlen>3) { | 1008 | if (c->mfc_un.unres.unresolved.qlen>3) { |
1009 | kfree_skb(skb); | 1009 | kfree_skb(skb); |
1010 | err = -ENOBUFS; | 1010 | err = -ENOBUFS; |
1011 | } else { | 1011 | } else { |
1012 | skb_queue_tail(&c->mfc_un.unres.unresolved, skb); | 1012 | skb_queue_tail(&c->mfc_un.unres.unresolved, skb); |
1013 | err = 0; | 1013 | err = 0; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | spin_unlock_bh(&mfc_unres_lock); | 1016 | spin_unlock_bh(&mfc_unres_lock); |
1017 | return err; | 1017 | return err; |
1018 | } | 1018 | } |
1019 | 1019 | ||
1020 | /* | 1020 | /* |
1021 | * MFC cache manipulation by user space mroute daemon | 1021 | * MFC cache manipulation by user space mroute daemon |
1022 | */ | 1022 | */ |
1023 | 1023 | ||
1024 | static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) | 1024 | static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) |
1025 | { | 1025 | { |
1026 | int line; | 1026 | int line; |
1027 | struct mfc_cache *c, *next; | 1027 | struct mfc_cache *c, *next; |
1028 | 1028 | ||
1029 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 1029 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
1030 | 1030 | ||
1031 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { | 1031 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { |
1032 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 1032 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
1033 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { | 1033 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
1034 | write_lock_bh(&mrt_lock); | 1034 | write_lock_bh(&mrt_lock); |
1035 | list_del(&c->list); | 1035 | list_del(&c->list); |
1036 | write_unlock_bh(&mrt_lock); | 1036 | write_unlock_bh(&mrt_lock); |
1037 | 1037 | ||
1038 | ipmr_cache_free(c); | 1038 | ipmr_cache_free(c); |
1039 | return 0; | 1039 | return 0; |
1040 | } | 1040 | } |
1041 | } | 1041 | } |
1042 | return -ENOENT; | 1042 | return -ENOENT; |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, | 1045 | static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, |
1046 | struct mfcctl *mfc, int mrtsock) | 1046 | struct mfcctl *mfc, int mrtsock) |
1047 | { | 1047 | { |
1048 | bool found = false; | 1048 | bool found = false; |
1049 | int line; | 1049 | int line; |
1050 | struct mfc_cache *uc, *c; | 1050 | struct mfc_cache *uc, *c; |
1051 | 1051 | ||
1052 | if (mfc->mfcc_parent >= MAXVIFS) | 1052 | if (mfc->mfcc_parent >= MAXVIFS) |
1053 | return -ENFILE; | 1053 | return -ENFILE; |
1054 | 1054 | ||
1055 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 1055 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
1056 | 1056 | ||
1057 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { | 1057 | list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { |
1058 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 1058 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
1059 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { | 1059 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
1060 | found = true; | 1060 | found = true; |
1061 | break; | 1061 | break; |
1062 | } | 1062 | } |
1063 | } | 1063 | } |
1064 | 1064 | ||
1065 | if (found) { | 1065 | if (found) { |
1066 | write_lock_bh(&mrt_lock); | 1066 | write_lock_bh(&mrt_lock); |
1067 | c->mfc_parent = mfc->mfcc_parent; | 1067 | c->mfc_parent = mfc->mfcc_parent; |
1068 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); | 1068 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); |
1069 | if (!mrtsock) | 1069 | if (!mrtsock) |
1070 | c->mfc_flags |= MFC_STATIC; | 1070 | c->mfc_flags |= MFC_STATIC; |
1071 | write_unlock_bh(&mrt_lock); | 1071 | write_unlock_bh(&mrt_lock); |
1072 | return 0; | 1072 | return 0; |
1073 | } | 1073 | } |
1074 | 1074 | ||
1075 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) | 1075 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) |
1076 | return -EINVAL; | 1076 | return -EINVAL; |
1077 | 1077 | ||
1078 | c = ipmr_cache_alloc(); | 1078 | c = ipmr_cache_alloc(); |
1079 | if (c == NULL) | 1079 | if (c == NULL) |
1080 | return -ENOMEM; | 1080 | return -ENOMEM; |
1081 | 1081 | ||
1082 | c->mfc_origin = mfc->mfcc_origin.s_addr; | 1082 | c->mfc_origin = mfc->mfcc_origin.s_addr; |
1083 | c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; | 1083 | c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; |
1084 | c->mfc_parent = mfc->mfcc_parent; | 1084 | c->mfc_parent = mfc->mfcc_parent; |
1085 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); | 1085 | ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); |
1086 | if (!mrtsock) | 1086 | if (!mrtsock) |
1087 | c->mfc_flags |= MFC_STATIC; | 1087 | c->mfc_flags |= MFC_STATIC; |
1088 | 1088 | ||
1089 | write_lock_bh(&mrt_lock); | 1089 | write_lock_bh(&mrt_lock); |
1090 | list_add(&c->list, &mrt->mfc_cache_array[line]); | 1090 | list_add(&c->list, &mrt->mfc_cache_array[line]); |
1091 | write_unlock_bh(&mrt_lock); | 1091 | write_unlock_bh(&mrt_lock); |
1092 | 1092 | ||
1093 | /* | 1093 | /* |
1094 | * Check to see if we resolved a queued list. If so we | 1094 | * Check to see if we resolved a queued list. If so we |
1095 | * need to send on the frames and tidy up. | 1095 | * need to send on the frames and tidy up. |
1096 | */ | 1096 | */ |
1097 | found = false; | 1097 | found = false; |
1098 | spin_lock_bh(&mfc_unres_lock); | 1098 | spin_lock_bh(&mfc_unres_lock); |
1099 | list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { | 1099 | list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { |
1100 | if (uc->mfc_origin == c->mfc_origin && | 1100 | if (uc->mfc_origin == c->mfc_origin && |
1101 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { | 1101 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { |
1102 | list_del(&uc->list); | 1102 | list_del(&uc->list); |
1103 | atomic_dec(&mrt->cache_resolve_queue_len); | 1103 | atomic_dec(&mrt->cache_resolve_queue_len); |
1104 | found = true; | 1104 | found = true; |
1105 | break; | 1105 | break; |
1106 | } | 1106 | } |
1107 | } | 1107 | } |
1108 | if (list_empty(&mrt->mfc_unres_queue)) | 1108 | if (list_empty(&mrt->mfc_unres_queue)) |
1109 | del_timer(&mrt->ipmr_expire_timer); | 1109 | del_timer(&mrt->ipmr_expire_timer); |
1110 | spin_unlock_bh(&mfc_unres_lock); | 1110 | spin_unlock_bh(&mfc_unres_lock); |
1111 | 1111 | ||
1112 | if (found) { | 1112 | if (found) { |
1113 | ipmr_cache_resolve(net, mrt, uc, c); | 1113 | ipmr_cache_resolve(net, mrt, uc, c); |
1114 | ipmr_cache_free(uc); | 1114 | ipmr_cache_free(uc); |
1115 | } | 1115 | } |
1116 | return 0; | 1116 | return 0; |
1117 | } | 1117 | } |
1118 | 1118 | ||
1119 | /* | 1119 | /* |
1120 | * Close the multicast socket, and clear the vif tables etc | 1120 | * Close the multicast socket, and clear the vif tables etc |
1121 | */ | 1121 | */ |
1122 | 1122 | ||
1123 | static void mroute_clean_tables(struct mr_table *mrt) | 1123 | static void mroute_clean_tables(struct mr_table *mrt) |
1124 | { | 1124 | { |
1125 | int i; | 1125 | int i; |
1126 | LIST_HEAD(list); | 1126 | LIST_HEAD(list); |
1127 | struct mfc_cache *c, *next; | 1127 | struct mfc_cache *c, *next; |
1128 | 1128 | ||
1129 | /* | 1129 | /* |
1130 | * Shut down all active vif entries | 1130 | * Shut down all active vif entries |
1131 | */ | 1131 | */ |
1132 | for (i = 0; i < mrt->maxvif; i++) { | 1132 | for (i = 0; i < mrt->maxvif; i++) { |
1133 | if (!(mrt->vif_table[i].flags&VIFF_STATIC)) | 1133 | if (!(mrt->vif_table[i].flags&VIFF_STATIC)) |
1134 | vif_delete(mrt, i, 0, &list); | 1134 | vif_delete(mrt, i, 0, &list); |
1135 | } | 1135 | } |
1136 | unregister_netdevice_many(&list); | 1136 | unregister_netdevice_many(&list); |
1137 | 1137 | ||
1138 | /* | 1138 | /* |
1139 | * Wipe the cache | 1139 | * Wipe the cache |
1140 | */ | 1140 | */ |
1141 | for (i = 0; i < MFC_LINES; i++) { | 1141 | for (i = 0; i < MFC_LINES; i++) { |
1142 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { | 1142 | list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { |
1143 | if (c->mfc_flags&MFC_STATIC) | 1143 | if (c->mfc_flags&MFC_STATIC) |
1144 | continue; | 1144 | continue; |
1145 | write_lock_bh(&mrt_lock); | 1145 | write_lock_bh(&mrt_lock); |
1146 | list_del(&c->list); | 1146 | list_del(&c->list); |
1147 | write_unlock_bh(&mrt_lock); | 1147 | write_unlock_bh(&mrt_lock); |
1148 | 1148 | ||
1149 | ipmr_cache_free(c); | 1149 | ipmr_cache_free(c); |
1150 | } | 1150 | } |
1151 | } | 1151 | } |
1152 | 1152 | ||
1153 | if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { | 1153 | if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { |
1154 | spin_lock_bh(&mfc_unres_lock); | 1154 | spin_lock_bh(&mfc_unres_lock); |
1155 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { | 1155 | list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { |
1156 | list_del(&c->list); | 1156 | list_del(&c->list); |
1157 | ipmr_destroy_unres(mrt, c); | 1157 | ipmr_destroy_unres(mrt, c); |
1158 | } | 1158 | } |
1159 | spin_unlock_bh(&mfc_unres_lock); | 1159 | spin_unlock_bh(&mfc_unres_lock); |
1160 | } | 1160 | } |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | static void mrtsock_destruct(struct sock *sk) | 1163 | static void mrtsock_destruct(struct sock *sk) |
1164 | { | 1164 | { |
1165 | struct net *net = sock_net(sk); | 1165 | struct net *net = sock_net(sk); |
1166 | struct mr_table *mrt; | 1166 | struct mr_table *mrt; |
1167 | 1167 | ||
1168 | rtnl_lock(); | 1168 | rtnl_lock(); |
1169 | ipmr_for_each_table(mrt, net) { | 1169 | ipmr_for_each_table(mrt, net) { |
1170 | if (sk == mrt->mroute_sk) { | 1170 | if (sk == mrt->mroute_sk) { |
1171 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; | 1171 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; |
1172 | 1172 | ||
1173 | write_lock_bh(&mrt_lock); | 1173 | write_lock_bh(&mrt_lock); |
1174 | mrt->mroute_sk = NULL; | 1174 | mrt->mroute_sk = NULL; |
1175 | write_unlock_bh(&mrt_lock); | 1175 | write_unlock_bh(&mrt_lock); |
1176 | 1176 | ||
1177 | mroute_clean_tables(mrt); | 1177 | mroute_clean_tables(mrt); |
1178 | } | 1178 | } |
1179 | } | 1179 | } |
1180 | rtnl_unlock(); | 1180 | rtnl_unlock(); |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | /* | 1183 | /* |
1184 | * Socket options and virtual interface manipulation. The whole | 1184 | * Socket options and virtual interface manipulation. The whole |
1185 | * virtual interface system is a complete heap, but unfortunately | 1185 | * virtual interface system is a complete heap, but unfortunately |
1186 | * that's how BSD mrouted happens to think. Maybe one day with a proper | 1186 | * that's how BSD mrouted happens to think. Maybe one day with a proper |
1187 | * MOSPF/PIM router set up we can clean this up. | 1187 | * MOSPF/PIM router set up we can clean this up. |
1188 | */ | 1188 | */ |
1189 | 1189 | ||
1190 | int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) | 1190 | int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) |
1191 | { | 1191 | { |
1192 | int ret; | 1192 | int ret; |
1193 | struct vifctl vif; | 1193 | struct vifctl vif; |
1194 | struct mfcctl mfc; | 1194 | struct mfcctl mfc; |
1195 | struct net *net = sock_net(sk); | 1195 | struct net *net = sock_net(sk); |
1196 | struct mr_table *mrt; | 1196 | struct mr_table *mrt; |
1197 | 1197 | ||
1198 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | 1198 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); |
1199 | if (mrt == NULL) | 1199 | if (mrt == NULL) |
1200 | return -ENOENT; | 1200 | return -ENOENT; |
1201 | 1201 | ||
1202 | if (optname != MRT_INIT) { | 1202 | if (optname != MRT_INIT) { |
1203 | if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) | 1203 | if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) |
1204 | return -EACCES; | 1204 | return -EACCES; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | switch (optname) { | 1207 | switch (optname) { |
1208 | case MRT_INIT: | 1208 | case MRT_INIT: |
1209 | if (sk->sk_type != SOCK_RAW || | 1209 | if (sk->sk_type != SOCK_RAW || |
1210 | inet_sk(sk)->inet_num != IPPROTO_IGMP) | 1210 | inet_sk(sk)->inet_num != IPPROTO_IGMP) |
1211 | return -EOPNOTSUPP; | 1211 | return -EOPNOTSUPP; |
1212 | if (optlen != sizeof(int)) | 1212 | if (optlen != sizeof(int)) |
1213 | return -ENOPROTOOPT; | 1213 | return -ENOPROTOOPT; |
1214 | 1214 | ||
1215 | rtnl_lock(); | 1215 | rtnl_lock(); |
1216 | if (mrt->mroute_sk) { | 1216 | if (mrt->mroute_sk) { |
1217 | rtnl_unlock(); | 1217 | rtnl_unlock(); |
1218 | return -EADDRINUSE; | 1218 | return -EADDRINUSE; |
1219 | } | 1219 | } |
1220 | 1220 | ||
1221 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 1221 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
1222 | if (ret == 0) { | 1222 | if (ret == 0) { |
1223 | write_lock_bh(&mrt_lock); | 1223 | write_lock_bh(&mrt_lock); |
1224 | mrt->mroute_sk = sk; | 1224 | mrt->mroute_sk = sk; |
1225 | write_unlock_bh(&mrt_lock); | 1225 | write_unlock_bh(&mrt_lock); |
1226 | 1226 | ||
1227 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; | 1227 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; |
1228 | } | 1228 | } |
1229 | rtnl_unlock(); | 1229 | rtnl_unlock(); |
1230 | return ret; | 1230 | return ret; |
1231 | case MRT_DONE: | 1231 | case MRT_DONE: |
1232 | if (sk != mrt->mroute_sk) | 1232 | if (sk != mrt->mroute_sk) |
1233 | return -EACCES; | 1233 | return -EACCES; |
1234 | return ip_ra_control(sk, 0, NULL); | 1234 | return ip_ra_control(sk, 0, NULL); |
1235 | case MRT_ADD_VIF: | 1235 | case MRT_ADD_VIF: |
1236 | case MRT_DEL_VIF: | 1236 | case MRT_DEL_VIF: |
1237 | if (optlen != sizeof(vif)) | 1237 | if (optlen != sizeof(vif)) |
1238 | return -EINVAL; | 1238 | return -EINVAL; |
1239 | if (copy_from_user(&vif, optval, sizeof(vif))) | 1239 | if (copy_from_user(&vif, optval, sizeof(vif))) |
1240 | return -EFAULT; | 1240 | return -EFAULT; |
1241 | if (vif.vifc_vifi >= MAXVIFS) | 1241 | if (vif.vifc_vifi >= MAXVIFS) |
1242 | return -ENFILE; | 1242 | return -ENFILE; |
1243 | rtnl_lock(); | 1243 | rtnl_lock(); |
1244 | if (optname == MRT_ADD_VIF) { | 1244 | if (optname == MRT_ADD_VIF) { |
1245 | ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); | 1245 | ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); |
1246 | } else { | 1246 | } else { |
1247 | ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); | 1247 | ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); |
1248 | } | 1248 | } |
1249 | rtnl_unlock(); | 1249 | rtnl_unlock(); |
1250 | return ret; | 1250 | return ret; |
1251 | 1251 | ||
1252 | /* | 1252 | /* |
1253 | * Manipulate the forwarding caches. These live | 1253 | * Manipulate the forwarding caches. These live |
1254 | * in a sort of kernel/user symbiosis. | 1254 | * in a sort of kernel/user symbiosis. |
1255 | */ | 1255 | */ |
1256 | case MRT_ADD_MFC: | 1256 | case MRT_ADD_MFC: |
1257 | case MRT_DEL_MFC: | 1257 | case MRT_DEL_MFC: |
1258 | if (optlen != sizeof(mfc)) | 1258 | if (optlen != sizeof(mfc)) |
1259 | return -EINVAL; | 1259 | return -EINVAL; |
1260 | if (copy_from_user(&mfc, optval, sizeof(mfc))) | 1260 | if (copy_from_user(&mfc, optval, sizeof(mfc))) |
1261 | return -EFAULT; | 1261 | return -EFAULT; |
1262 | rtnl_lock(); | 1262 | rtnl_lock(); |
1263 | if (optname == MRT_DEL_MFC) | 1263 | if (optname == MRT_DEL_MFC) |
1264 | ret = ipmr_mfc_delete(mrt, &mfc); | 1264 | ret = ipmr_mfc_delete(mrt, &mfc); |
1265 | else | 1265 | else |
1266 | ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); | 1266 | ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); |
1267 | rtnl_unlock(); | 1267 | rtnl_unlock(); |
1268 | return ret; | 1268 | return ret; |
1269 | /* | 1269 | /* |
1270 | * Control PIM assert. | 1270 | * Control PIM assert. |
1271 | */ | 1271 | */ |
1272 | case MRT_ASSERT: | 1272 | case MRT_ASSERT: |
1273 | { | 1273 | { |
1274 | int v; | 1274 | int v; |
1275 | if (get_user(v,(int __user *)optval)) | 1275 | if (get_user(v,(int __user *)optval)) |
1276 | return -EFAULT; | 1276 | return -EFAULT; |
1277 | mrt->mroute_do_assert = (v) ? 1 : 0; | 1277 | mrt->mroute_do_assert = (v) ? 1 : 0; |
1278 | return 0; | 1278 | return 0; |
1279 | } | 1279 | } |
1280 | #ifdef CONFIG_IP_PIMSM | 1280 | #ifdef CONFIG_IP_PIMSM |
1281 | case MRT_PIM: | 1281 | case MRT_PIM: |
1282 | { | 1282 | { |
1283 | int v; | 1283 | int v; |
1284 | 1284 | ||
1285 | if (get_user(v,(int __user *)optval)) | 1285 | if (get_user(v,(int __user *)optval)) |
1286 | return -EFAULT; | 1286 | return -EFAULT; |
1287 | v = (v) ? 1 : 0; | 1287 | v = (v) ? 1 : 0; |
1288 | 1288 | ||
1289 | rtnl_lock(); | 1289 | rtnl_lock(); |
1290 | ret = 0; | 1290 | ret = 0; |
1291 | if (v != mrt->mroute_do_pim) { | 1291 | if (v != mrt->mroute_do_pim) { |
1292 | mrt->mroute_do_pim = v; | 1292 | mrt->mroute_do_pim = v; |
1293 | mrt->mroute_do_assert = v; | 1293 | mrt->mroute_do_assert = v; |
1294 | } | 1294 | } |
1295 | rtnl_unlock(); | 1295 | rtnl_unlock(); |
1296 | return ret; | 1296 | return ret; |
1297 | } | 1297 | } |
1298 | #endif | 1298 | #endif |
1299 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | 1299 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES |
1300 | case MRT_TABLE: | 1300 | case MRT_TABLE: |
1301 | { | 1301 | { |
1302 | u32 v; | 1302 | u32 v; |
1303 | 1303 | ||
1304 | if (optlen != sizeof(u32)) | 1304 | if (optlen != sizeof(u32)) |
1305 | return -EINVAL; | 1305 | return -EINVAL; |
1306 | if (get_user(v, (u32 __user *)optval)) | 1306 | if (get_user(v, (u32 __user *)optval)) |
1307 | return -EFAULT; | 1307 | return -EFAULT; |
1308 | if (sk == mrt->mroute_sk) | 1308 | if (sk == mrt->mroute_sk) |
1309 | return -EBUSY; | 1309 | return -EBUSY; |
1310 | 1310 | ||
1311 | rtnl_lock(); | 1311 | rtnl_lock(); |
1312 | ret = 0; | 1312 | ret = 0; |
1313 | if (!ipmr_new_table(net, v)) | 1313 | if (!ipmr_new_table(net, v)) |
1314 | ret = -ENOMEM; | 1314 | ret = -ENOMEM; |
1315 | raw_sk(sk)->ipmr_table = v; | 1315 | raw_sk(sk)->ipmr_table = v; |
1316 | rtnl_unlock(); | 1316 | rtnl_unlock(); |
1317 | return ret; | 1317 | return ret; |
1318 | } | 1318 | } |
1319 | #endif | 1319 | #endif |
1320 | /* | 1320 | /* |
1321 | * Spurious command, or MRT_VERSION which you cannot | 1321 | * Spurious command, or MRT_VERSION which you cannot |
1322 | * set. | 1322 | * set. |
1323 | */ | 1323 | */ |
1324 | default: | 1324 | default: |
1325 | return -ENOPROTOOPT; | 1325 | return -ENOPROTOOPT; |
1326 | } | 1326 | } |
1327 | } | 1327 | } |
1328 | 1328 | ||
1329 | /* | 1329 | /* |
1330 | * Getsock opt support for the multicast routing system. | 1330 | * Getsock opt support for the multicast routing system. |
1331 | */ | 1331 | */ |
1332 | 1332 | ||
1333 | int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) | 1333 | int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) |
1334 | { | 1334 | { |
1335 | int olr; | 1335 | int olr; |
1336 | int val; | 1336 | int val; |
1337 | struct net *net = sock_net(sk); | 1337 | struct net *net = sock_net(sk); |
1338 | struct mr_table *mrt; | 1338 | struct mr_table *mrt; |
1339 | 1339 | ||
1340 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | 1340 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); |
1341 | if (mrt == NULL) | 1341 | if (mrt == NULL) |
1342 | return -ENOENT; | 1342 | return -ENOENT; |
1343 | 1343 | ||
1344 | if (optname != MRT_VERSION && | 1344 | if (optname != MRT_VERSION && |
1345 | #ifdef CONFIG_IP_PIMSM | 1345 | #ifdef CONFIG_IP_PIMSM |
1346 | optname!=MRT_PIM && | 1346 | optname!=MRT_PIM && |
1347 | #endif | 1347 | #endif |
1348 | optname!=MRT_ASSERT) | 1348 | optname!=MRT_ASSERT) |
1349 | return -ENOPROTOOPT; | 1349 | return -ENOPROTOOPT; |
1350 | 1350 | ||
1351 | if (get_user(olr, optlen)) | 1351 | if (get_user(olr, optlen)) |
1352 | return -EFAULT; | 1352 | return -EFAULT; |
1353 | 1353 | ||
1354 | olr = min_t(unsigned int, olr, sizeof(int)); | 1354 | olr = min_t(unsigned int, olr, sizeof(int)); |
1355 | if (olr < 0) | 1355 | if (olr < 0) |
1356 | return -EINVAL; | 1356 | return -EINVAL; |
1357 | 1357 | ||
1358 | if (put_user(olr, optlen)) | 1358 | if (put_user(olr, optlen)) |
1359 | return -EFAULT; | 1359 | return -EFAULT; |
1360 | if (optname == MRT_VERSION) | 1360 | if (optname == MRT_VERSION) |
1361 | val = 0x0305; | 1361 | val = 0x0305; |
1362 | #ifdef CONFIG_IP_PIMSM | 1362 | #ifdef CONFIG_IP_PIMSM |
1363 | else if (optname == MRT_PIM) | 1363 | else if (optname == MRT_PIM) |
1364 | val = mrt->mroute_do_pim; | 1364 | val = mrt->mroute_do_pim; |
1365 | #endif | 1365 | #endif |
1366 | else | 1366 | else |
1367 | val = mrt->mroute_do_assert; | 1367 | val = mrt->mroute_do_assert; |
1368 | if (copy_to_user(optval, &val, olr)) | 1368 | if (copy_to_user(optval, &val, olr)) |
1369 | return -EFAULT; | 1369 | return -EFAULT; |
1370 | return 0; | 1370 | return 0; |
1371 | } | 1371 | } |
1372 | 1372 | ||
1373 | /* | 1373 | /* |
1374 | * The IP multicast ioctl support routines. | 1374 | * The IP multicast ioctl support routines. |
1375 | */ | 1375 | */ |
1376 | 1376 | ||
1377 | int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | 1377 | int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) |
1378 | { | 1378 | { |
1379 | struct sioc_sg_req sr; | 1379 | struct sioc_sg_req sr; |
1380 | struct sioc_vif_req vr; | 1380 | struct sioc_vif_req vr; |
1381 | struct vif_device *vif; | 1381 | struct vif_device *vif; |
1382 | struct mfc_cache *c; | 1382 | struct mfc_cache *c; |
1383 | struct net *net = sock_net(sk); | 1383 | struct net *net = sock_net(sk); |
1384 | struct mr_table *mrt; | 1384 | struct mr_table *mrt; |
1385 | 1385 | ||
1386 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | 1386 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); |
1387 | if (mrt == NULL) | 1387 | if (mrt == NULL) |
1388 | return -ENOENT; | 1388 | return -ENOENT; |
1389 | 1389 | ||
1390 | switch (cmd) { | 1390 | switch (cmd) { |
1391 | case SIOCGETVIFCNT: | 1391 | case SIOCGETVIFCNT: |
1392 | if (copy_from_user(&vr, arg, sizeof(vr))) | 1392 | if (copy_from_user(&vr, arg, sizeof(vr))) |
1393 | return -EFAULT; | 1393 | return -EFAULT; |
1394 | if (vr.vifi >= mrt->maxvif) | 1394 | if (vr.vifi >= mrt->maxvif) |
1395 | return -EINVAL; | 1395 | return -EINVAL; |
1396 | read_lock(&mrt_lock); | 1396 | read_lock(&mrt_lock); |
1397 | vif = &mrt->vif_table[vr.vifi]; | 1397 | vif = &mrt->vif_table[vr.vifi]; |
1398 | if (VIF_EXISTS(mrt, vr.vifi)) { | 1398 | if (VIF_EXISTS(mrt, vr.vifi)) { |
1399 | vr.icount = vif->pkt_in; | 1399 | vr.icount = vif->pkt_in; |
1400 | vr.ocount = vif->pkt_out; | 1400 | vr.ocount = vif->pkt_out; |
1401 | vr.ibytes = vif->bytes_in; | 1401 | vr.ibytes = vif->bytes_in; |
1402 | vr.obytes = vif->bytes_out; | 1402 | vr.obytes = vif->bytes_out; |
1403 | read_unlock(&mrt_lock); | 1403 | read_unlock(&mrt_lock); |
1404 | 1404 | ||
1405 | if (copy_to_user(arg, &vr, sizeof(vr))) | 1405 | if (copy_to_user(arg, &vr, sizeof(vr))) |
1406 | return -EFAULT; | 1406 | return -EFAULT; |
1407 | return 0; | 1407 | return 0; |
1408 | } | 1408 | } |
1409 | read_unlock(&mrt_lock); | 1409 | read_unlock(&mrt_lock); |
1410 | return -EADDRNOTAVAIL; | 1410 | return -EADDRNOTAVAIL; |
1411 | case SIOCGETSGCNT: | 1411 | case SIOCGETSGCNT: |
1412 | if (copy_from_user(&sr, arg, sizeof(sr))) | 1412 | if (copy_from_user(&sr, arg, sizeof(sr))) |
1413 | return -EFAULT; | 1413 | return -EFAULT; |
1414 | 1414 | ||
1415 | read_lock(&mrt_lock); | 1415 | read_lock(&mrt_lock); |
1416 | c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); | 1416 | c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); |
1417 | if (c) { | 1417 | if (c) { |
1418 | sr.pktcnt = c->mfc_un.res.pkt; | 1418 | sr.pktcnt = c->mfc_un.res.pkt; |
1419 | sr.bytecnt = c->mfc_un.res.bytes; | 1419 | sr.bytecnt = c->mfc_un.res.bytes; |
1420 | sr.wrong_if = c->mfc_un.res.wrong_if; | 1420 | sr.wrong_if = c->mfc_un.res.wrong_if; |
1421 | read_unlock(&mrt_lock); | 1421 | read_unlock(&mrt_lock); |
1422 | 1422 | ||
1423 | if (copy_to_user(arg, &sr, sizeof(sr))) | 1423 | if (copy_to_user(arg, &sr, sizeof(sr))) |
1424 | return -EFAULT; | 1424 | return -EFAULT; |
1425 | return 0; | 1425 | return 0; |
1426 | } | 1426 | } |
1427 | read_unlock(&mrt_lock); | 1427 | read_unlock(&mrt_lock); |
1428 | return -EADDRNOTAVAIL; | 1428 | return -EADDRNOTAVAIL; |
1429 | default: | 1429 | default: |
1430 | return -ENOIOCTLCMD; | 1430 | return -ENOIOCTLCMD; |
1431 | } | 1431 | } |
1432 | } | 1432 | } |
1433 | 1433 | ||
1434 | 1434 | ||
1435 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) | 1435 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) |
1436 | { | 1436 | { |
1437 | struct net_device *dev = ptr; | 1437 | struct net_device *dev = ptr; |
1438 | struct net *net = dev_net(dev); | 1438 | struct net *net = dev_net(dev); |
1439 | struct mr_table *mrt; | 1439 | struct mr_table *mrt; |
1440 | struct vif_device *v; | 1440 | struct vif_device *v; |
1441 | int ct; | 1441 | int ct; |
1442 | LIST_HEAD(list); | 1442 | LIST_HEAD(list); |
1443 | 1443 | ||
1444 | if (event != NETDEV_UNREGISTER) | 1444 | if (event != NETDEV_UNREGISTER) |
1445 | return NOTIFY_DONE; | 1445 | return NOTIFY_DONE; |
1446 | 1446 | ||
1447 | ipmr_for_each_table(mrt, net) { | 1447 | ipmr_for_each_table(mrt, net) { |
1448 | v = &mrt->vif_table[0]; | 1448 | v = &mrt->vif_table[0]; |
1449 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { | 1449 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { |
1450 | if (v->dev == dev) | 1450 | if (v->dev == dev) |
1451 | vif_delete(mrt, ct, 1, &list); | 1451 | vif_delete(mrt, ct, 1, &list); |
1452 | } | 1452 | } |
1453 | } | 1453 | } |
1454 | unregister_netdevice_many(&list); | 1454 | unregister_netdevice_many(&list); |
1455 | return NOTIFY_DONE; | 1455 | return NOTIFY_DONE; |
1456 | } | 1456 | } |
1457 | 1457 | ||
1458 | 1458 | ||
1459 | static struct notifier_block ip_mr_notifier = { | 1459 | static struct notifier_block ip_mr_notifier = { |
1460 | .notifier_call = ipmr_device_event, | 1460 | .notifier_call = ipmr_device_event, |
1461 | }; | 1461 | }; |
1462 | 1462 | ||
1463 | /* | 1463 | /* |
1464 | * Encapsulate a packet by attaching a valid IPIP header to it. | 1464 | * Encapsulate a packet by attaching a valid IPIP header to it. |
1465 | * This avoids tunnel drivers and other mess and gives us the speed so | 1465 | * This avoids tunnel drivers and other mess and gives us the speed so |
1466 | * important for multicast video. | 1466 | * important for multicast video. |
1467 | */ | 1467 | */ |
1468 | 1468 | ||
1469 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | 1469 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) |
1470 | { | 1470 | { |
1471 | struct iphdr *iph; | 1471 | struct iphdr *iph; |
1472 | struct iphdr *old_iph = ip_hdr(skb); | 1472 | struct iphdr *old_iph = ip_hdr(skb); |
1473 | 1473 | ||
1474 | skb_push(skb, sizeof(struct iphdr)); | 1474 | skb_push(skb, sizeof(struct iphdr)); |
1475 | skb->transport_header = skb->network_header; | 1475 | skb->transport_header = skb->network_header; |
1476 | skb_reset_network_header(skb); | 1476 | skb_reset_network_header(skb); |
1477 | iph = ip_hdr(skb); | 1477 | iph = ip_hdr(skb); |
1478 | 1478 | ||
1479 | iph->version = 4; | 1479 | iph->version = 4; |
1480 | iph->tos = old_iph->tos; | 1480 | iph->tos = old_iph->tos; |
1481 | iph->ttl = old_iph->ttl; | 1481 | iph->ttl = old_iph->ttl; |
1482 | iph->frag_off = 0; | 1482 | iph->frag_off = 0; |
1483 | iph->daddr = daddr; | 1483 | iph->daddr = daddr; |
1484 | iph->saddr = saddr; | 1484 | iph->saddr = saddr; |
1485 | iph->protocol = IPPROTO_IPIP; | 1485 | iph->protocol = IPPROTO_IPIP; |
1486 | iph->ihl = 5; | 1486 | iph->ihl = 5; |
1487 | iph->tot_len = htons(skb->len); | 1487 | iph->tot_len = htons(skb->len); |
1488 | ip_select_ident(iph, skb_dst(skb), NULL); | 1488 | ip_select_ident(iph, skb_dst(skb), NULL); |
1489 | ip_send_check(iph); | 1489 | ip_send_check(iph); |
1490 | 1490 | ||
1491 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1491 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
1492 | nf_reset(skb); | 1492 | nf_reset(skb); |
1493 | } | 1493 | } |
1494 | 1494 | ||
1495 | static inline int ipmr_forward_finish(struct sk_buff *skb) | 1495 | static inline int ipmr_forward_finish(struct sk_buff *skb) |
1496 | { | 1496 | { |
1497 | struct ip_options * opt = &(IPCB(skb)->opt); | 1497 | struct ip_options * opt = &(IPCB(skb)->opt); |
1498 | 1498 | ||
1499 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); | 1499 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); |
1500 | 1500 | ||
1501 | if (unlikely(opt->optlen)) | 1501 | if (unlikely(opt->optlen)) |
1502 | ip_forward_options(skb); | 1502 | ip_forward_options(skb); |
1503 | 1503 | ||
1504 | return dst_output(skb); | 1504 | return dst_output(skb); |
1505 | } | 1505 | } |
1506 | 1506 | ||
1507 | /* | 1507 | /* |
1508 | * Processing handlers for ipmr_forward | 1508 | * Processing handlers for ipmr_forward |
1509 | */ | 1509 | */ |
1510 | 1510 | ||
1511 | static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | 1511 | static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, |
1512 | struct sk_buff *skb, struct mfc_cache *c, int vifi) | 1512 | struct sk_buff *skb, struct mfc_cache *c, int vifi) |
1513 | { | 1513 | { |
1514 | const struct iphdr *iph = ip_hdr(skb); | 1514 | const struct iphdr *iph = ip_hdr(skb); |
1515 | struct vif_device *vif = &mrt->vif_table[vifi]; | 1515 | struct vif_device *vif = &mrt->vif_table[vifi]; |
1516 | struct net_device *dev; | 1516 | struct net_device *dev; |
1517 | struct rtable *rt; | 1517 | struct rtable *rt; |
1518 | int encap = 0; | 1518 | int encap = 0; |
1519 | 1519 | ||
1520 | if (vif->dev == NULL) | 1520 | if (vif->dev == NULL) |
1521 | goto out_free; | 1521 | goto out_free; |
1522 | 1522 | ||
1523 | #ifdef CONFIG_IP_PIMSM | 1523 | #ifdef CONFIG_IP_PIMSM |
1524 | if (vif->flags & VIFF_REGISTER) { | 1524 | if (vif->flags & VIFF_REGISTER) { |
1525 | vif->pkt_out++; | 1525 | vif->pkt_out++; |
1526 | vif->bytes_out += skb->len; | 1526 | vif->bytes_out += skb->len; |
1527 | vif->dev->stats.tx_bytes += skb->len; | 1527 | vif->dev->stats.tx_bytes += skb->len; |
1528 | vif->dev->stats.tx_packets++; | 1528 | vif->dev->stats.tx_packets++; |
1529 | ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); | 1529 | ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); |
1530 | goto out_free; | 1530 | goto out_free; |
1531 | } | 1531 | } |
1532 | #endif | 1532 | #endif |
1533 | 1533 | ||
1534 | if (vif->flags&VIFF_TUNNEL) { | 1534 | if (vif->flags&VIFF_TUNNEL) { |
1535 | struct flowi fl = { .oif = vif->link, | 1535 | struct flowi fl = { .oif = vif->link, |
1536 | .nl_u = { .ip4_u = | 1536 | .nl_u = { .ip4_u = |
1537 | { .daddr = vif->remote, | 1537 | { .daddr = vif->remote, |
1538 | .saddr = vif->local, | 1538 | .saddr = vif->local, |
1539 | .tos = RT_TOS(iph->tos) } }, | 1539 | .tos = RT_TOS(iph->tos) } }, |
1540 | .proto = IPPROTO_IPIP }; | 1540 | .proto = IPPROTO_IPIP }; |
1541 | if (ip_route_output_key(net, &rt, &fl)) | 1541 | if (ip_route_output_key(net, &rt, &fl)) |
1542 | goto out_free; | 1542 | goto out_free; |
1543 | encap = sizeof(struct iphdr); | 1543 | encap = sizeof(struct iphdr); |
1544 | } else { | 1544 | } else { |
1545 | struct flowi fl = { .oif = vif->link, | 1545 | struct flowi fl = { .oif = vif->link, |
1546 | .nl_u = { .ip4_u = | 1546 | .nl_u = { .ip4_u = |
1547 | { .daddr = iph->daddr, | 1547 | { .daddr = iph->daddr, |
1548 | .tos = RT_TOS(iph->tos) } }, | 1548 | .tos = RT_TOS(iph->tos) } }, |
1549 | .proto = IPPROTO_IPIP }; | 1549 | .proto = IPPROTO_IPIP }; |
1550 | if (ip_route_output_key(net, &rt, &fl)) | 1550 | if (ip_route_output_key(net, &rt, &fl)) |
1551 | goto out_free; | 1551 | goto out_free; |
1552 | } | 1552 | } |
1553 | 1553 | ||
1554 | dev = rt->u.dst.dev; | 1554 | dev = rt->u.dst.dev; |
1555 | 1555 | ||
1556 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { | 1556 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { |
1557 | /* Do not fragment multicasts. Alas, IPv4 does not | 1557 | /* Do not fragment multicasts. Alas, IPv4 does not |
1558 | allow to send ICMP, so that packets will disappear | 1558 | allow to send ICMP, so that packets will disappear |
1559 | to blackhole. | 1559 | to blackhole. |
1560 | */ | 1560 | */ |
1561 | 1561 | ||
1562 | IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 1562 | IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
1563 | ip_rt_put(rt); | 1563 | ip_rt_put(rt); |
1564 | goto out_free; | 1564 | goto out_free; |
1565 | } | 1565 | } |
1566 | 1566 | ||
1567 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; | 1567 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; |
1568 | 1568 | ||
1569 | if (skb_cow(skb, encap)) { | 1569 | if (skb_cow(skb, encap)) { |
1570 | ip_rt_put(rt); | 1570 | ip_rt_put(rt); |
1571 | goto out_free; | 1571 | goto out_free; |
1572 | } | 1572 | } |
1573 | 1573 | ||
1574 | vif->pkt_out++; | 1574 | vif->pkt_out++; |
1575 | vif->bytes_out += skb->len; | 1575 | vif->bytes_out += skb->len; |
1576 | 1576 | ||
1577 | skb_dst_drop(skb); | 1577 | skb_dst_drop(skb); |
1578 | skb_dst_set(skb, &rt->u.dst); | 1578 | skb_dst_set(skb, &rt->u.dst); |
1579 | ip_decrease_ttl(ip_hdr(skb)); | 1579 | ip_decrease_ttl(ip_hdr(skb)); |
1580 | 1580 | ||
1581 | /* FIXME: forward and output firewalls used to be called here. | 1581 | /* FIXME: forward and output firewalls used to be called here. |
1582 | * What do we do with netfilter? -- RR */ | 1582 | * What do we do with netfilter? -- RR */ |
1583 | if (vif->flags & VIFF_TUNNEL) { | 1583 | if (vif->flags & VIFF_TUNNEL) { |
1584 | ip_encap(skb, vif->local, vif->remote); | 1584 | ip_encap(skb, vif->local, vif->remote); |
1585 | /* FIXME: extra output firewall step used to be here. --RR */ | 1585 | /* FIXME: extra output firewall step used to be here. --RR */ |
1586 | vif->dev->stats.tx_packets++; | 1586 | vif->dev->stats.tx_packets++; |
1587 | vif->dev->stats.tx_bytes += skb->len; | 1587 | vif->dev->stats.tx_bytes += skb->len; |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | IPCB(skb)->flags |= IPSKB_FORWARDED; | 1590 | IPCB(skb)->flags |= IPSKB_FORWARDED; |
1591 | 1591 | ||
1592 | /* | 1592 | /* |
1593 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally | 1593 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally |
1594 | * not only before forwarding, but after forwarding on all output | 1594 | * not only before forwarding, but after forwarding on all output |
1595 | * interfaces. It is clear, if mrouter runs a multicasting | 1595 | * interfaces. It is clear, if mrouter runs a multicasting |
1596 | * program, it should receive packets not depending to what interface | 1596 | * program, it should receive packets not depending to what interface |
1597 | * program is joined. | 1597 | * program is joined. |
1598 | * If we will not make it, the program will have to join on all | 1598 | * If we will not make it, the program will have to join on all |
1599 | * interfaces. On the other hand, multihoming host (or router, but | 1599 | * interfaces. On the other hand, multihoming host (or router, but |
1600 | * not mrouter) cannot join to more than one interface - it will | 1600 | * not mrouter) cannot join to more than one interface - it will |
1601 | * result in receiving multiple packets. | 1601 | * result in receiving multiple packets. |
1602 | */ | 1602 | */ |
1603 | NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, | 1603 | NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, |
1604 | ipmr_forward_finish); | 1604 | ipmr_forward_finish); |
1605 | return; | 1605 | return; |
1606 | 1606 | ||
1607 | out_free: | 1607 | out_free: |
1608 | kfree_skb(skb); | 1608 | kfree_skb(skb); |
1609 | return; | 1609 | return; |
1610 | } | 1610 | } |
1611 | 1611 | ||
1612 | static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) | 1612 | static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) |
1613 | { | 1613 | { |
1614 | int ct; | 1614 | int ct; |
1615 | 1615 | ||
1616 | for (ct = mrt->maxvif-1; ct >= 0; ct--) { | 1616 | for (ct = mrt->maxvif-1; ct >= 0; ct--) { |
1617 | if (mrt->vif_table[ct].dev == dev) | 1617 | if (mrt->vif_table[ct].dev == dev) |
1618 | break; | 1618 | break; |
1619 | } | 1619 | } |
1620 | return ct; | 1620 | return ct; |
1621 | } | 1621 | } |
1622 | 1622 | ||
1623 | /* "local" means that we should preserve one skb (for local delivery) */ | 1623 | /* "local" means that we should preserve one skb (for local delivery) */ |
1624 | 1624 | ||
1625 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, | 1625 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, |
1626 | struct sk_buff *skb, struct mfc_cache *cache, | 1626 | struct sk_buff *skb, struct mfc_cache *cache, |
1627 | int local) | 1627 | int local) |
1628 | { | 1628 | { |
1629 | int psend = -1; | 1629 | int psend = -1; |
1630 | int vif, ct; | 1630 | int vif, ct; |
1631 | 1631 | ||
1632 | vif = cache->mfc_parent; | 1632 | vif = cache->mfc_parent; |
1633 | cache->mfc_un.res.pkt++; | 1633 | cache->mfc_un.res.pkt++; |
1634 | cache->mfc_un.res.bytes += skb->len; | 1634 | cache->mfc_un.res.bytes += skb->len; |
1635 | 1635 | ||
1636 | /* | 1636 | /* |
1637 | * Wrong interface: drop packet and (maybe) send PIM assert. | 1637 | * Wrong interface: drop packet and (maybe) send PIM assert. |
1638 | */ | 1638 | */ |
1639 | if (mrt->vif_table[vif].dev != skb->dev) { | 1639 | if (mrt->vif_table[vif].dev != skb->dev) { |
1640 | int true_vifi; | 1640 | int true_vifi; |
1641 | 1641 | ||
1642 | if (skb_rtable(skb)->fl.iif == 0) { | 1642 | if (skb_rtable(skb)->fl.iif == 0) { |
1643 | /* It is our own packet, looped back. | 1643 | /* It is our own packet, looped back. |
1644 | Very complicated situation... | 1644 | Very complicated situation... |
1645 | 1645 | ||
1646 | The best workaround until routing daemons will be | 1646 | The best workaround until routing daemons will be |
1647 | fixed is not to redistribute packet, if it was | 1647 | fixed is not to redistribute packet, if it was |
1648 | send through wrong interface. It means, that | 1648 | send through wrong interface. It means, that |
1649 | multicast applications WILL NOT work for | 1649 | multicast applications WILL NOT work for |
1650 | (S,G), which have default multicast route pointing | 1650 | (S,G), which have default multicast route pointing |
1651 | to wrong oif. In any case, it is not a good | 1651 | to wrong oif. In any case, it is not a good |
1652 | idea to use multicasting applications on router. | 1652 | idea to use multicasting applications on router. |
1653 | */ | 1653 | */ |
1654 | goto dont_forward; | 1654 | goto dont_forward; |
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | cache->mfc_un.res.wrong_if++; | 1657 | cache->mfc_un.res.wrong_if++; |
1658 | true_vifi = ipmr_find_vif(mrt, skb->dev); | 1658 | true_vifi = ipmr_find_vif(mrt, skb->dev); |
1659 | 1659 | ||
1660 | if (true_vifi >= 0 && mrt->mroute_do_assert && | 1660 | if (true_vifi >= 0 && mrt->mroute_do_assert && |
1661 | /* pimsm uses asserts, when switching from RPT to SPT, | 1661 | /* pimsm uses asserts, when switching from RPT to SPT, |
1662 | so that we cannot check that packet arrived on an oif. | 1662 | so that we cannot check that packet arrived on an oif. |
1663 | It is bad, but otherwise we would need to move pretty | 1663 | It is bad, but otherwise we would need to move pretty |
1664 | large chunk of pimd to kernel. Ough... --ANK | 1664 | large chunk of pimd to kernel. Ough... --ANK |
1665 | */ | 1665 | */ |
1666 | (mrt->mroute_do_pim || | 1666 | (mrt->mroute_do_pim || |
1667 | cache->mfc_un.res.ttls[true_vifi] < 255) && | 1667 | cache->mfc_un.res.ttls[true_vifi] < 255) && |
1668 | time_after(jiffies, | 1668 | time_after(jiffies, |
1669 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { | 1669 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { |
1670 | cache->mfc_un.res.last_assert = jiffies; | 1670 | cache->mfc_un.res.last_assert = jiffies; |
1671 | ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); | 1671 | ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); |
1672 | } | 1672 | } |
1673 | goto dont_forward; | 1673 | goto dont_forward; |
1674 | } | 1674 | } |
1675 | 1675 | ||
1676 | mrt->vif_table[vif].pkt_in++; | 1676 | mrt->vif_table[vif].pkt_in++; |
1677 | mrt->vif_table[vif].bytes_in += skb->len; | 1677 | mrt->vif_table[vif].bytes_in += skb->len; |
1678 | 1678 | ||
1679 | /* | 1679 | /* |
1680 | * Forward the frame | 1680 | * Forward the frame |
1681 | */ | 1681 | */ |
1682 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { | 1682 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { |
1683 | if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { | 1683 | if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { |
1684 | if (psend != -1) { | 1684 | if (psend != -1) { |
1685 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1685 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1686 | if (skb2) | 1686 | if (skb2) |
1687 | ipmr_queue_xmit(net, mrt, skb2, cache, | 1687 | ipmr_queue_xmit(net, mrt, skb2, cache, |
1688 | psend); | 1688 | psend); |
1689 | } | 1689 | } |
1690 | psend = ct; | 1690 | psend = ct; |
1691 | } | 1691 | } |
1692 | } | 1692 | } |
1693 | if (psend != -1) { | 1693 | if (psend != -1) { |
1694 | if (local) { | 1694 | if (local) { |
1695 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1695 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1696 | if (skb2) | 1696 | if (skb2) |
1697 | ipmr_queue_xmit(net, mrt, skb2, cache, psend); | 1697 | ipmr_queue_xmit(net, mrt, skb2, cache, psend); |
1698 | } else { | 1698 | } else { |
1699 | ipmr_queue_xmit(net, mrt, skb, cache, psend); | 1699 | ipmr_queue_xmit(net, mrt, skb, cache, psend); |
1700 | return 0; | 1700 | return 0; |
1701 | } | 1701 | } |
1702 | } | 1702 | } |
1703 | 1703 | ||
1704 | dont_forward: | 1704 | dont_forward: |
1705 | if (!local) | 1705 | if (!local) |
1706 | kfree_skb(skb); | 1706 | kfree_skb(skb); |
1707 | return 0; | 1707 | return 0; |
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | 1710 | ||
1711 | /* | 1711 | /* |
1712 | * Multicast packets for forwarding arrive here | 1712 | * Multicast packets for forwarding arrive here |
1713 | */ | 1713 | */ |
1714 | 1714 | ||
1715 | int ip_mr_input(struct sk_buff *skb) | 1715 | int ip_mr_input(struct sk_buff *skb) |
1716 | { | 1716 | { |
1717 | struct mfc_cache *cache; | 1717 | struct mfc_cache *cache; |
1718 | struct net *net = dev_net(skb->dev); | 1718 | struct net *net = dev_net(skb->dev); |
1719 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; | 1719 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; |
1720 | struct mr_table *mrt; | 1720 | struct mr_table *mrt; |
1721 | int err; | 1721 | int err; |
1722 | 1722 | ||
1723 | /* Packet is looped back after forward, it should not be | 1723 | /* Packet is looped back after forward, it should not be |
1724 | forwarded second time, but still can be delivered locally. | 1724 | forwarded second time, but still can be delivered locally. |
1725 | */ | 1725 | */ |
1726 | if (IPCB(skb)->flags&IPSKB_FORWARDED) | 1726 | if (IPCB(skb)->flags&IPSKB_FORWARDED) |
1727 | goto dont_forward; | 1727 | goto dont_forward; |
1728 | 1728 | ||
1729 | err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); | 1729 | err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); |
1730 | if (err < 0) | 1730 | if (err < 0) |
1731 | return err; | 1731 | return err; |
1732 | 1732 | ||
1733 | if (!local) { | 1733 | if (!local) { |
1734 | if (IPCB(skb)->opt.router_alert) { | 1734 | if (IPCB(skb)->opt.router_alert) { |
1735 | if (ip_call_ra_chain(skb)) | 1735 | if (ip_call_ra_chain(skb)) |
1736 | return 0; | 1736 | return 0; |
1737 | } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ | 1737 | } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ |
1738 | /* IGMPv1 (and broken IGMPv2 implementations sort of | 1738 | /* IGMPv1 (and broken IGMPv2 implementations sort of |
1739 | Cisco IOS <= 11.2(8)) do not put router alert | 1739 | Cisco IOS <= 11.2(8)) do not put router alert |
1740 | option to IGMP packets destined to routable | 1740 | option to IGMP packets destined to routable |
1741 | groups. It is very bad, because it means | 1741 | groups. It is very bad, because it means |
1742 | that we can forward NO IGMP messages. | 1742 | that we can forward NO IGMP messages. |
1743 | */ | 1743 | */ |
1744 | read_lock(&mrt_lock); | 1744 | read_lock(&mrt_lock); |
1745 | if (mrt->mroute_sk) { | 1745 | if (mrt->mroute_sk) { |
1746 | nf_reset(skb); | 1746 | nf_reset(skb); |
1747 | raw_rcv(mrt->mroute_sk, skb); | 1747 | raw_rcv(mrt->mroute_sk, skb); |
1748 | read_unlock(&mrt_lock); | 1748 | read_unlock(&mrt_lock); |
1749 | return 0; | 1749 | return 0; |
1750 | } | 1750 | } |
1751 | read_unlock(&mrt_lock); | 1751 | read_unlock(&mrt_lock); |
1752 | } | 1752 | } |
1753 | } | 1753 | } |
1754 | 1754 | ||
1755 | read_lock(&mrt_lock); | 1755 | read_lock(&mrt_lock); |
1756 | cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); | 1756 | cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); |
1757 | 1757 | ||
1758 | /* | 1758 | /* |
1759 | * No usable cache entry | 1759 | * No usable cache entry |
1760 | */ | 1760 | */ |
1761 | if (cache == NULL) { | 1761 | if (cache == NULL) { |
1762 | int vif; | 1762 | int vif; |
1763 | 1763 | ||
1764 | if (local) { | 1764 | if (local) { |
1765 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1765 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1766 | ip_local_deliver(skb); | 1766 | ip_local_deliver(skb); |
1767 | if (skb2 == NULL) { | 1767 | if (skb2 == NULL) { |
1768 | read_unlock(&mrt_lock); | 1768 | read_unlock(&mrt_lock); |
1769 | return -ENOBUFS; | 1769 | return -ENOBUFS; |
1770 | } | 1770 | } |
1771 | skb = skb2; | 1771 | skb = skb2; |
1772 | } | 1772 | } |
1773 | 1773 | ||
1774 | vif = ipmr_find_vif(mrt, skb->dev); | 1774 | vif = ipmr_find_vif(mrt, skb->dev); |
1775 | if (vif >= 0) { | 1775 | if (vif >= 0) { |
1776 | int err2 = ipmr_cache_unresolved(mrt, vif, skb); | 1776 | int err2 = ipmr_cache_unresolved(mrt, vif, skb); |
1777 | read_unlock(&mrt_lock); | 1777 | read_unlock(&mrt_lock); |
1778 | 1778 | ||
1779 | return err2; | 1779 | return err2; |
1780 | } | 1780 | } |
1781 | read_unlock(&mrt_lock); | 1781 | read_unlock(&mrt_lock); |
1782 | kfree_skb(skb); | 1782 | kfree_skb(skb); |
1783 | return -ENODEV; | 1783 | return -ENODEV; |
1784 | } | 1784 | } |
1785 | 1785 | ||
1786 | ip_mr_forward(net, mrt, skb, cache, local); | 1786 | ip_mr_forward(net, mrt, skb, cache, local); |
1787 | 1787 | ||
1788 | read_unlock(&mrt_lock); | 1788 | read_unlock(&mrt_lock); |
1789 | 1789 | ||
1790 | if (local) | 1790 | if (local) |
1791 | return ip_local_deliver(skb); | 1791 | return ip_local_deliver(skb); |
1792 | 1792 | ||
1793 | return 0; | 1793 | return 0; |
1794 | 1794 | ||
1795 | dont_forward: | 1795 | dont_forward: |
1796 | if (local) | 1796 | if (local) |
1797 | return ip_local_deliver(skb); | 1797 | return ip_local_deliver(skb); |
1798 | kfree_skb(skb); | 1798 | kfree_skb(skb); |
1799 | return 0; | 1799 | return 0; |
1800 | } | 1800 | } |
1801 | 1801 | ||
1802 | #ifdef CONFIG_IP_PIMSM | 1802 | #ifdef CONFIG_IP_PIMSM |
1803 | static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, | 1803 | static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, |
1804 | unsigned int pimlen) | 1804 | unsigned int pimlen) |
1805 | { | 1805 | { |
1806 | struct net_device *reg_dev = NULL; | 1806 | struct net_device *reg_dev = NULL; |
1807 | struct iphdr *encap; | 1807 | struct iphdr *encap; |
1808 | 1808 | ||
1809 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); | 1809 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); |
1810 | /* | 1810 | /* |
1811 | Check that: | 1811 | Check that: |
1812 | a. packet is really destinted to a multicast group | 1812 | a. packet is really destinted to a multicast group |
1813 | b. packet is not a NULL-REGISTER | 1813 | b. packet is not a NULL-REGISTER |
1814 | c. packet is not truncated | 1814 | c. packet is not truncated |
1815 | */ | 1815 | */ |
1816 | if (!ipv4_is_multicast(encap->daddr) || | 1816 | if (!ipv4_is_multicast(encap->daddr) || |
1817 | encap->tot_len == 0 || | 1817 | encap->tot_len == 0 || |
1818 | ntohs(encap->tot_len) + pimlen > skb->len) | 1818 | ntohs(encap->tot_len) + pimlen > skb->len) |
1819 | return 1; | 1819 | return 1; |
1820 | 1820 | ||
1821 | read_lock(&mrt_lock); | 1821 | read_lock(&mrt_lock); |
1822 | if (mrt->mroute_reg_vif_num >= 0) | 1822 | if (mrt->mroute_reg_vif_num >= 0) |
1823 | reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; | 1823 | reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; |
1824 | if (reg_dev) | 1824 | if (reg_dev) |
1825 | dev_hold(reg_dev); | 1825 | dev_hold(reg_dev); |
1826 | read_unlock(&mrt_lock); | 1826 | read_unlock(&mrt_lock); |
1827 | 1827 | ||
1828 | if (reg_dev == NULL) | 1828 | if (reg_dev == NULL) |
1829 | return 1; | 1829 | return 1; |
1830 | 1830 | ||
1831 | skb->mac_header = skb->network_header; | 1831 | skb->mac_header = skb->network_header; |
1832 | skb_pull(skb, (u8*)encap - skb->data); | 1832 | skb_pull(skb, (u8*)encap - skb->data); |
1833 | skb_reset_network_header(skb); | 1833 | skb_reset_network_header(skb); |
1834 | skb->dev = reg_dev; | ||
1835 | skb->protocol = htons(ETH_P_IP); | 1834 | skb->protocol = htons(ETH_P_IP); |
1836 | skb->ip_summed = 0; | 1835 | skb->ip_summed = 0; |
1837 | skb->pkt_type = PACKET_HOST; | 1836 | skb->pkt_type = PACKET_HOST; |
1838 | skb_dst_drop(skb); | 1837 | |
1839 | reg_dev->stats.rx_bytes += skb->len; | 1838 | skb_tunnel_rx(skb, reg_dev); |
1840 | reg_dev->stats.rx_packets++; | 1839 | |
1841 | nf_reset(skb); | ||
1842 | netif_rx(skb); | 1840 | netif_rx(skb); |
1843 | dev_put(reg_dev); | 1841 | dev_put(reg_dev); |
1844 | 1842 | ||
1845 | return 0; | 1843 | return 0; |
1846 | } | 1844 | } |
1847 | #endif | 1845 | #endif |
1848 | 1846 | ||
1849 | #ifdef CONFIG_IP_PIMSM_V1 | 1847 | #ifdef CONFIG_IP_PIMSM_V1 |
1850 | /* | 1848 | /* |
1851 | * Handle IGMP messages of PIMv1 | 1849 | * Handle IGMP messages of PIMv1 |
1852 | */ | 1850 | */ |
1853 | 1851 | ||
1854 | int pim_rcv_v1(struct sk_buff * skb) | 1852 | int pim_rcv_v1(struct sk_buff * skb) |
1855 | { | 1853 | { |
1856 | struct igmphdr *pim; | 1854 | struct igmphdr *pim; |
1857 | struct net *net = dev_net(skb->dev); | 1855 | struct net *net = dev_net(skb->dev); |
1858 | struct mr_table *mrt; | 1856 | struct mr_table *mrt; |
1859 | 1857 | ||
1860 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | 1858 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
1861 | goto drop; | 1859 | goto drop; |
1862 | 1860 | ||
1863 | pim = igmp_hdr(skb); | 1861 | pim = igmp_hdr(skb); |
1864 | 1862 | ||
1865 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1863 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) |
1866 | goto drop; | 1864 | goto drop; |
1867 | 1865 | ||
1868 | if (!mrt->mroute_do_pim || | 1866 | if (!mrt->mroute_do_pim || |
1869 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1867 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
1870 | goto drop; | 1868 | goto drop; |
1871 | 1869 | ||
1872 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | 1870 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
1873 | drop: | 1871 | drop: |
1874 | kfree_skb(skb); | 1872 | kfree_skb(skb); |
1875 | } | 1873 | } |
1876 | return 0; | 1874 | return 0; |
1877 | } | 1875 | } |
1878 | #endif | 1876 | #endif |
1879 | 1877 | ||
1880 | #ifdef CONFIG_IP_PIMSM_V2 | 1878 | #ifdef CONFIG_IP_PIMSM_V2 |
1881 | static int pim_rcv(struct sk_buff * skb) | 1879 | static int pim_rcv(struct sk_buff * skb) |
1882 | { | 1880 | { |
1883 | struct pimreghdr *pim; | 1881 | struct pimreghdr *pim; |
1884 | struct net *net = dev_net(skb->dev); | 1882 | struct net *net = dev_net(skb->dev); |
1885 | struct mr_table *mrt; | 1883 | struct mr_table *mrt; |
1886 | 1884 | ||
1887 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | 1885 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
1888 | goto drop; | 1886 | goto drop; |
1889 | 1887 | ||
1890 | pim = (struct pimreghdr *)skb_transport_header(skb); | 1888 | pim = (struct pimreghdr *)skb_transport_header(skb); |
1891 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || | 1889 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || |
1892 | (pim->flags&PIM_NULL_REGISTER) || | 1890 | (pim->flags&PIM_NULL_REGISTER) || |
1893 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && | 1891 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && |
1894 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1892 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
1895 | goto drop; | 1893 | goto drop; |
1896 | 1894 | ||
1897 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1895 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) |
1898 | goto drop; | 1896 | goto drop; |
1899 | 1897 | ||
1900 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | 1898 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
1901 | drop: | 1899 | drop: |
1902 | kfree_skb(skb); | 1900 | kfree_skb(skb); |
1903 | } | 1901 | } |
1904 | return 0; | 1902 | return 0; |
1905 | } | 1903 | } |
1906 | #endif | 1904 | #endif |
1907 | 1905 | ||
1908 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 1906 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
1909 | struct mfc_cache *c, struct rtmsg *rtm) | 1907 | struct mfc_cache *c, struct rtmsg *rtm) |
1910 | { | 1908 | { |
1911 | int ct; | 1909 | int ct; |
1912 | struct rtnexthop *nhp; | 1910 | struct rtnexthop *nhp; |
1913 | u8 *b = skb_tail_pointer(skb); | 1911 | u8 *b = skb_tail_pointer(skb); |
1914 | struct rtattr *mp_head; | 1912 | struct rtattr *mp_head; |
1915 | 1913 | ||
1916 | /* If cache is unresolved, don't try to parse IIF and OIF */ | 1914 | /* If cache is unresolved, don't try to parse IIF and OIF */ |
1917 | if (c->mfc_parent > MAXVIFS) | 1915 | if (c->mfc_parent > MAXVIFS) |
1918 | return -ENOENT; | 1916 | return -ENOENT; |
1919 | 1917 | ||
1920 | if (VIF_EXISTS(mrt, c->mfc_parent)) | 1918 | if (VIF_EXISTS(mrt, c->mfc_parent)) |
1921 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); | 1919 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); |
1922 | 1920 | ||
1923 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); | 1921 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); |
1924 | 1922 | ||
1925 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 1923 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
1926 | if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { | 1924 | if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { |
1927 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 1925 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
1928 | goto rtattr_failure; | 1926 | goto rtattr_failure; |
1929 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1927 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
1930 | nhp->rtnh_flags = 0; | 1928 | nhp->rtnh_flags = 0; |
1931 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 1929 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
1932 | nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; | 1930 | nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; |
1933 | nhp->rtnh_len = sizeof(*nhp); | 1931 | nhp->rtnh_len = sizeof(*nhp); |
1934 | } | 1932 | } |
1935 | } | 1933 | } |
1936 | mp_head->rta_type = RTA_MULTIPATH; | 1934 | mp_head->rta_type = RTA_MULTIPATH; |
1937 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; | 1935 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; |
1938 | rtm->rtm_type = RTN_MULTICAST; | 1936 | rtm->rtm_type = RTN_MULTICAST; |
1939 | return 1; | 1937 | return 1; |
1940 | 1938 | ||
1941 | rtattr_failure: | 1939 | rtattr_failure: |
1942 | nlmsg_trim(skb, b); | 1940 | nlmsg_trim(skb, b); |
1943 | return -EMSGSIZE; | 1941 | return -EMSGSIZE; |
1944 | } | 1942 | } |
1945 | 1943 | ||
1946 | int ipmr_get_route(struct net *net, | 1944 | int ipmr_get_route(struct net *net, |
1947 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 1945 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) |
1948 | { | 1946 | { |
1949 | int err; | 1947 | int err; |
1950 | struct mr_table *mrt; | 1948 | struct mr_table *mrt; |
1951 | struct mfc_cache *cache; | 1949 | struct mfc_cache *cache; |
1952 | struct rtable *rt = skb_rtable(skb); | 1950 | struct rtable *rt = skb_rtable(skb); |
1953 | 1951 | ||
1954 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | 1952 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); |
1955 | if (mrt == NULL) | 1953 | if (mrt == NULL) |
1956 | return -ENOENT; | 1954 | return -ENOENT; |
1957 | 1955 | ||
1958 | read_lock(&mrt_lock); | 1956 | read_lock(&mrt_lock); |
1959 | cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); | 1957 | cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); |
1960 | 1958 | ||
1961 | if (cache == NULL) { | 1959 | if (cache == NULL) { |
1962 | struct sk_buff *skb2; | 1960 | struct sk_buff *skb2; |
1963 | struct iphdr *iph; | 1961 | struct iphdr *iph; |
1964 | struct net_device *dev; | 1962 | struct net_device *dev; |
1965 | int vif; | 1963 | int vif; |
1966 | 1964 | ||
1967 | if (nowait) { | 1965 | if (nowait) { |
1968 | read_unlock(&mrt_lock); | 1966 | read_unlock(&mrt_lock); |
1969 | return -EAGAIN; | 1967 | return -EAGAIN; |
1970 | } | 1968 | } |
1971 | 1969 | ||
1972 | dev = skb->dev; | 1970 | dev = skb->dev; |
1973 | if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { | 1971 | if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { |
1974 | read_unlock(&mrt_lock); | 1972 | read_unlock(&mrt_lock); |
1975 | return -ENODEV; | 1973 | return -ENODEV; |
1976 | } | 1974 | } |
1977 | skb2 = skb_clone(skb, GFP_ATOMIC); | 1975 | skb2 = skb_clone(skb, GFP_ATOMIC); |
1978 | if (!skb2) { | 1976 | if (!skb2) { |
1979 | read_unlock(&mrt_lock); | 1977 | read_unlock(&mrt_lock); |
1980 | return -ENOMEM; | 1978 | return -ENOMEM; |
1981 | } | 1979 | } |
1982 | 1980 | ||
1983 | skb_push(skb2, sizeof(struct iphdr)); | 1981 | skb_push(skb2, sizeof(struct iphdr)); |
1984 | skb_reset_network_header(skb2); | 1982 | skb_reset_network_header(skb2); |
1985 | iph = ip_hdr(skb2); | 1983 | iph = ip_hdr(skb2); |
1986 | iph->ihl = sizeof(struct iphdr) >> 2; | 1984 | iph->ihl = sizeof(struct iphdr) >> 2; |
1987 | iph->saddr = rt->rt_src; | 1985 | iph->saddr = rt->rt_src; |
1988 | iph->daddr = rt->rt_dst; | 1986 | iph->daddr = rt->rt_dst; |
1989 | iph->version = 0; | 1987 | iph->version = 0; |
1990 | err = ipmr_cache_unresolved(mrt, vif, skb2); | 1988 | err = ipmr_cache_unresolved(mrt, vif, skb2); |
1991 | read_unlock(&mrt_lock); | 1989 | read_unlock(&mrt_lock); |
1992 | return err; | 1990 | return err; |
1993 | } | 1991 | } |
1994 | 1992 | ||
1995 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) | 1993 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) |
1996 | cache->mfc_flags |= MFC_NOTIFY; | 1994 | cache->mfc_flags |= MFC_NOTIFY; |
1997 | err = __ipmr_fill_mroute(mrt, skb, cache, rtm); | 1995 | err = __ipmr_fill_mroute(mrt, skb, cache, rtm); |
1998 | read_unlock(&mrt_lock); | 1996 | read_unlock(&mrt_lock); |
1999 | return err; | 1997 | return err; |
2000 | } | 1998 | } |
2001 | 1999 | ||
2002 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 2000 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
2003 | u32 pid, u32 seq, struct mfc_cache *c) | 2001 | u32 pid, u32 seq, struct mfc_cache *c) |
2004 | { | 2002 | { |
2005 | struct nlmsghdr *nlh; | 2003 | struct nlmsghdr *nlh; |
2006 | struct rtmsg *rtm; | 2004 | struct rtmsg *rtm; |
2007 | 2005 | ||
2008 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); | 2006 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); |
2009 | if (nlh == NULL) | 2007 | if (nlh == NULL) |
2010 | return -EMSGSIZE; | 2008 | return -EMSGSIZE; |
2011 | 2009 | ||
2012 | rtm = nlmsg_data(nlh); | 2010 | rtm = nlmsg_data(nlh); |
2013 | rtm->rtm_family = RTNL_FAMILY_IPMR; | 2011 | rtm->rtm_family = RTNL_FAMILY_IPMR; |
2014 | rtm->rtm_dst_len = 32; | 2012 | rtm->rtm_dst_len = 32; |
2015 | rtm->rtm_src_len = 32; | 2013 | rtm->rtm_src_len = 32; |
2016 | rtm->rtm_tos = 0; | 2014 | rtm->rtm_tos = 0; |
2017 | rtm->rtm_table = mrt->id; | 2015 | rtm->rtm_table = mrt->id; |
2018 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); | 2016 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); |
2019 | rtm->rtm_type = RTN_MULTICAST; | 2017 | rtm->rtm_type = RTN_MULTICAST; |
2020 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | 2018 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; |
2021 | rtm->rtm_protocol = RTPROT_UNSPEC; | 2019 | rtm->rtm_protocol = RTPROT_UNSPEC; |
2022 | rtm->rtm_flags = 0; | 2020 | rtm->rtm_flags = 0; |
2023 | 2021 | ||
2024 | NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); | 2022 | NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); |
2025 | NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); | 2023 | NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); |
2026 | 2024 | ||
2027 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) | 2025 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) |
2028 | goto nla_put_failure; | 2026 | goto nla_put_failure; |
2029 | 2027 | ||
2030 | return nlmsg_end(skb, nlh); | 2028 | return nlmsg_end(skb, nlh); |
2031 | 2029 | ||
2032 | nla_put_failure: | 2030 | nla_put_failure: |
2033 | nlmsg_cancel(skb, nlh); | 2031 | nlmsg_cancel(skb, nlh); |
2034 | return -EMSGSIZE; | 2032 | return -EMSGSIZE; |
2035 | } | 2033 | } |
2036 | 2034 | ||
2037 | static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) | 2035 | static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) |
2038 | { | 2036 | { |
2039 | struct net *net = sock_net(skb->sk); | 2037 | struct net *net = sock_net(skb->sk); |
2040 | struct mr_table *mrt; | 2038 | struct mr_table *mrt; |
2041 | struct mfc_cache *mfc; | 2039 | struct mfc_cache *mfc; |
2042 | unsigned int t = 0, s_t; | 2040 | unsigned int t = 0, s_t; |
2043 | unsigned int h = 0, s_h; | 2041 | unsigned int h = 0, s_h; |
2044 | unsigned int e = 0, s_e; | 2042 | unsigned int e = 0, s_e; |
2045 | 2043 | ||
2046 | s_t = cb->args[0]; | 2044 | s_t = cb->args[0]; |
2047 | s_h = cb->args[1]; | 2045 | s_h = cb->args[1]; |
2048 | s_e = cb->args[2]; | 2046 | s_e = cb->args[2]; |
2049 | 2047 | ||
2050 | read_lock(&mrt_lock); | 2048 | read_lock(&mrt_lock); |
2051 | ipmr_for_each_table(mrt, net) { | 2049 | ipmr_for_each_table(mrt, net) { |
2052 | if (t < s_t) | 2050 | if (t < s_t) |
2053 | goto next_table; | 2051 | goto next_table; |
2054 | if (t > s_t) | 2052 | if (t > s_t) |
2055 | s_h = 0; | 2053 | s_h = 0; |
2056 | for (h = s_h; h < MFC_LINES; h++) { | 2054 | for (h = s_h; h < MFC_LINES; h++) { |
2057 | list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { | 2055 | list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { |
2058 | if (e < s_e) | 2056 | if (e < s_e) |
2059 | goto next_entry; | 2057 | goto next_entry; |
2060 | if (ipmr_fill_mroute(mrt, skb, | 2058 | if (ipmr_fill_mroute(mrt, skb, |
2061 | NETLINK_CB(cb->skb).pid, | 2059 | NETLINK_CB(cb->skb).pid, |
2062 | cb->nlh->nlmsg_seq, | 2060 | cb->nlh->nlmsg_seq, |
2063 | mfc) < 0) | 2061 | mfc) < 0) |
2064 | goto done; | 2062 | goto done; |
2065 | next_entry: | 2063 | next_entry: |
2066 | e++; | 2064 | e++; |
2067 | } | 2065 | } |
2068 | e = s_e = 0; | 2066 | e = s_e = 0; |
2069 | } | 2067 | } |
2070 | s_h = 0; | 2068 | s_h = 0; |
2071 | next_table: | 2069 | next_table: |
2072 | t++; | 2070 | t++; |
2073 | } | 2071 | } |
2074 | done: | 2072 | done: |
2075 | read_unlock(&mrt_lock); | 2073 | read_unlock(&mrt_lock); |
2076 | 2074 | ||
2077 | cb->args[2] = e; | 2075 | cb->args[2] = e; |
2078 | cb->args[1] = h; | 2076 | cb->args[1] = h; |
2079 | cb->args[0] = t; | 2077 | cb->args[0] = t; |
2080 | 2078 | ||
2081 | return skb->len; | 2079 | return skb->len; |
2082 | } | 2080 | } |
2083 | 2081 | ||
2084 | #ifdef CONFIG_PROC_FS | 2082 | #ifdef CONFIG_PROC_FS |
2085 | /* | 2083 | /* |
2086 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif | 2084 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif |
2087 | */ | 2085 | */ |
2088 | struct ipmr_vif_iter { | 2086 | struct ipmr_vif_iter { |
2089 | struct seq_net_private p; | 2087 | struct seq_net_private p; |
2090 | struct mr_table *mrt; | 2088 | struct mr_table *mrt; |
2091 | int ct; | 2089 | int ct; |
2092 | }; | 2090 | }; |
2093 | 2091 | ||
2094 | static struct vif_device *ipmr_vif_seq_idx(struct net *net, | 2092 | static struct vif_device *ipmr_vif_seq_idx(struct net *net, |
2095 | struct ipmr_vif_iter *iter, | 2093 | struct ipmr_vif_iter *iter, |
2096 | loff_t pos) | 2094 | loff_t pos) |
2097 | { | 2095 | { |
2098 | struct mr_table *mrt = iter->mrt; | 2096 | struct mr_table *mrt = iter->mrt; |
2099 | 2097 | ||
2100 | for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { | 2098 | for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { |
2101 | if (!VIF_EXISTS(mrt, iter->ct)) | 2099 | if (!VIF_EXISTS(mrt, iter->ct)) |
2102 | continue; | 2100 | continue; |
2103 | if (pos-- == 0) | 2101 | if (pos-- == 0) |
2104 | return &mrt->vif_table[iter->ct]; | 2102 | return &mrt->vif_table[iter->ct]; |
2105 | } | 2103 | } |
2106 | return NULL; | 2104 | return NULL; |
2107 | } | 2105 | } |
2108 | 2106 | ||
2109 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) | 2107 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) |
2110 | __acquires(mrt_lock) | 2108 | __acquires(mrt_lock) |
2111 | { | 2109 | { |
2112 | struct ipmr_vif_iter *iter = seq->private; | 2110 | struct ipmr_vif_iter *iter = seq->private; |
2113 | struct net *net = seq_file_net(seq); | 2111 | struct net *net = seq_file_net(seq); |
2114 | struct mr_table *mrt; | 2112 | struct mr_table *mrt; |
2115 | 2113 | ||
2116 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | 2114 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); |
2117 | if (mrt == NULL) | 2115 | if (mrt == NULL) |
2118 | return ERR_PTR(-ENOENT); | 2116 | return ERR_PTR(-ENOENT); |
2119 | 2117 | ||
2120 | iter->mrt = mrt; | 2118 | iter->mrt = mrt; |
2121 | 2119 | ||
2122 | read_lock(&mrt_lock); | 2120 | read_lock(&mrt_lock); |
2123 | return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) | 2121 | return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) |
2124 | : SEQ_START_TOKEN; | 2122 | : SEQ_START_TOKEN; |
2125 | } | 2123 | } |
2126 | 2124 | ||
2127 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2125 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2128 | { | 2126 | { |
2129 | struct ipmr_vif_iter *iter = seq->private; | 2127 | struct ipmr_vif_iter *iter = seq->private; |
2130 | struct net *net = seq_file_net(seq); | 2128 | struct net *net = seq_file_net(seq); |
2131 | struct mr_table *mrt = iter->mrt; | 2129 | struct mr_table *mrt = iter->mrt; |
2132 | 2130 | ||
2133 | ++*pos; | 2131 | ++*pos; |
2134 | if (v == SEQ_START_TOKEN) | 2132 | if (v == SEQ_START_TOKEN) |
2135 | return ipmr_vif_seq_idx(net, iter, 0); | 2133 | return ipmr_vif_seq_idx(net, iter, 0); |
2136 | 2134 | ||
2137 | while (++iter->ct < mrt->maxvif) { | 2135 | while (++iter->ct < mrt->maxvif) { |
2138 | if (!VIF_EXISTS(mrt, iter->ct)) | 2136 | if (!VIF_EXISTS(mrt, iter->ct)) |
2139 | continue; | 2137 | continue; |
2140 | return &mrt->vif_table[iter->ct]; | 2138 | return &mrt->vif_table[iter->ct]; |
2141 | } | 2139 | } |
2142 | return NULL; | 2140 | return NULL; |
2143 | } | 2141 | } |
2144 | 2142 | ||
2145 | static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) | 2143 | static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) |
2146 | __releases(mrt_lock) | 2144 | __releases(mrt_lock) |
2147 | { | 2145 | { |
2148 | read_unlock(&mrt_lock); | 2146 | read_unlock(&mrt_lock); |
2149 | } | 2147 | } |
2150 | 2148 | ||
2151 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | 2149 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) |
2152 | { | 2150 | { |
2153 | struct ipmr_vif_iter *iter = seq->private; | 2151 | struct ipmr_vif_iter *iter = seq->private; |
2154 | struct mr_table *mrt = iter->mrt; | 2152 | struct mr_table *mrt = iter->mrt; |
2155 | 2153 | ||
2156 | if (v == SEQ_START_TOKEN) { | 2154 | if (v == SEQ_START_TOKEN) { |
2157 | seq_puts(seq, | 2155 | seq_puts(seq, |
2158 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); | 2156 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); |
2159 | } else { | 2157 | } else { |
2160 | const struct vif_device *vif = v; | 2158 | const struct vif_device *vif = v; |
2161 | const char *name = vif->dev ? vif->dev->name : "none"; | 2159 | const char *name = vif->dev ? vif->dev->name : "none"; |
2162 | 2160 | ||
2163 | seq_printf(seq, | 2161 | seq_printf(seq, |
2164 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", | 2162 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", |
2165 | vif - mrt->vif_table, | 2163 | vif - mrt->vif_table, |
2166 | name, vif->bytes_in, vif->pkt_in, | 2164 | name, vif->bytes_in, vif->pkt_in, |
2167 | vif->bytes_out, vif->pkt_out, | 2165 | vif->bytes_out, vif->pkt_out, |
2168 | vif->flags, vif->local, vif->remote); | 2166 | vif->flags, vif->local, vif->remote); |
2169 | } | 2167 | } |
2170 | return 0; | 2168 | return 0; |
2171 | } | 2169 | } |
2172 | 2170 | ||
2173 | static const struct seq_operations ipmr_vif_seq_ops = { | 2171 | static const struct seq_operations ipmr_vif_seq_ops = { |
2174 | .start = ipmr_vif_seq_start, | 2172 | .start = ipmr_vif_seq_start, |
2175 | .next = ipmr_vif_seq_next, | 2173 | .next = ipmr_vif_seq_next, |
2176 | .stop = ipmr_vif_seq_stop, | 2174 | .stop = ipmr_vif_seq_stop, |
2177 | .show = ipmr_vif_seq_show, | 2175 | .show = ipmr_vif_seq_show, |
2178 | }; | 2176 | }; |
2179 | 2177 | ||
2180 | static int ipmr_vif_open(struct inode *inode, struct file *file) | 2178 | static int ipmr_vif_open(struct inode *inode, struct file *file) |
2181 | { | 2179 | { |
2182 | return seq_open_net(inode, file, &ipmr_vif_seq_ops, | 2180 | return seq_open_net(inode, file, &ipmr_vif_seq_ops, |
2183 | sizeof(struct ipmr_vif_iter)); | 2181 | sizeof(struct ipmr_vif_iter)); |
2184 | } | 2182 | } |
2185 | 2183 | ||
2186 | static const struct file_operations ipmr_vif_fops = { | 2184 | static const struct file_operations ipmr_vif_fops = { |
2187 | .owner = THIS_MODULE, | 2185 | .owner = THIS_MODULE, |
2188 | .open = ipmr_vif_open, | 2186 | .open = ipmr_vif_open, |
2189 | .read = seq_read, | 2187 | .read = seq_read, |
2190 | .llseek = seq_lseek, | 2188 | .llseek = seq_lseek, |
2191 | .release = seq_release_net, | 2189 | .release = seq_release_net, |
2192 | }; | 2190 | }; |
2193 | 2191 | ||
2194 | struct ipmr_mfc_iter { | 2192 | struct ipmr_mfc_iter { |
2195 | struct seq_net_private p; | 2193 | struct seq_net_private p; |
2196 | struct mr_table *mrt; | 2194 | struct mr_table *mrt; |
2197 | struct list_head *cache; | 2195 | struct list_head *cache; |
2198 | int ct; | 2196 | int ct; |
2199 | }; | 2197 | }; |
2200 | 2198 | ||
2201 | 2199 | ||
2202 | static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, | 2200 | static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, |
2203 | struct ipmr_mfc_iter *it, loff_t pos) | 2201 | struct ipmr_mfc_iter *it, loff_t pos) |
2204 | { | 2202 | { |
2205 | struct mr_table *mrt = it->mrt; | 2203 | struct mr_table *mrt = it->mrt; |
2206 | struct mfc_cache *mfc; | 2204 | struct mfc_cache *mfc; |
2207 | 2205 | ||
2208 | read_lock(&mrt_lock); | 2206 | read_lock(&mrt_lock); |
2209 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { | 2207 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { |
2210 | it->cache = &mrt->mfc_cache_array[it->ct]; | 2208 | it->cache = &mrt->mfc_cache_array[it->ct]; |
2211 | list_for_each_entry(mfc, it->cache, list) | 2209 | list_for_each_entry(mfc, it->cache, list) |
2212 | if (pos-- == 0) | 2210 | if (pos-- == 0) |
2213 | return mfc; | 2211 | return mfc; |
2214 | } | 2212 | } |
2215 | read_unlock(&mrt_lock); | 2213 | read_unlock(&mrt_lock); |
2216 | 2214 | ||
2217 | spin_lock_bh(&mfc_unres_lock); | 2215 | spin_lock_bh(&mfc_unres_lock); |
2218 | it->cache = &mrt->mfc_unres_queue; | 2216 | it->cache = &mrt->mfc_unres_queue; |
2219 | list_for_each_entry(mfc, it->cache, list) | 2217 | list_for_each_entry(mfc, it->cache, list) |
2220 | if (pos-- == 0) | 2218 | if (pos-- == 0) |
2221 | return mfc; | 2219 | return mfc; |
2222 | spin_unlock_bh(&mfc_unres_lock); | 2220 | spin_unlock_bh(&mfc_unres_lock); |
2223 | 2221 | ||
2224 | it->cache = NULL; | 2222 | it->cache = NULL; |
2225 | return NULL; | 2223 | return NULL; |
2226 | } | 2224 | } |
2227 | 2225 | ||
2228 | 2226 | ||
2229 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | 2227 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) |
2230 | { | 2228 | { |
2231 | struct ipmr_mfc_iter *it = seq->private; | 2229 | struct ipmr_mfc_iter *it = seq->private; |
2232 | struct net *net = seq_file_net(seq); | 2230 | struct net *net = seq_file_net(seq); |
2233 | struct mr_table *mrt; | 2231 | struct mr_table *mrt; |
2234 | 2232 | ||
2235 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | 2233 | mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); |
2236 | if (mrt == NULL) | 2234 | if (mrt == NULL) |
2237 | return ERR_PTR(-ENOENT); | 2235 | return ERR_PTR(-ENOENT); |
2238 | 2236 | ||
2239 | it->mrt = mrt; | 2237 | it->mrt = mrt; |
2240 | it->cache = NULL; | 2238 | it->cache = NULL; |
2241 | it->ct = 0; | 2239 | it->ct = 0; |
2242 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) | 2240 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) |
2243 | : SEQ_START_TOKEN; | 2241 | : SEQ_START_TOKEN; |
2244 | } | 2242 | } |
2245 | 2243 | ||
2246 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2244 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2247 | { | 2245 | { |
2248 | struct mfc_cache *mfc = v; | 2246 | struct mfc_cache *mfc = v; |
2249 | struct ipmr_mfc_iter *it = seq->private; | 2247 | struct ipmr_mfc_iter *it = seq->private; |
2250 | struct net *net = seq_file_net(seq); | 2248 | struct net *net = seq_file_net(seq); |
2251 | struct mr_table *mrt = it->mrt; | 2249 | struct mr_table *mrt = it->mrt; |
2252 | 2250 | ||
2253 | ++*pos; | 2251 | ++*pos; |
2254 | 2252 | ||
2255 | if (v == SEQ_START_TOKEN) | 2253 | if (v == SEQ_START_TOKEN) |
2256 | return ipmr_mfc_seq_idx(net, seq->private, 0); | 2254 | return ipmr_mfc_seq_idx(net, seq->private, 0); |
2257 | 2255 | ||
2258 | if (mfc->list.next != it->cache) | 2256 | if (mfc->list.next != it->cache) |
2259 | return list_entry(mfc->list.next, struct mfc_cache, list); | 2257 | return list_entry(mfc->list.next, struct mfc_cache, list); |
2260 | 2258 | ||
2261 | if (it->cache == &mrt->mfc_unres_queue) | 2259 | if (it->cache == &mrt->mfc_unres_queue) |
2262 | goto end_of_list; | 2260 | goto end_of_list; |
2263 | 2261 | ||
2264 | BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); | 2262 | BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); |
2265 | 2263 | ||
2266 | while (++it->ct < MFC_LINES) { | 2264 | while (++it->ct < MFC_LINES) { |
2267 | it->cache = &mrt->mfc_cache_array[it->ct]; | 2265 | it->cache = &mrt->mfc_cache_array[it->ct]; |
2268 | if (list_empty(it->cache)) | 2266 | if (list_empty(it->cache)) |
2269 | continue; | 2267 | continue; |
2270 | return list_first_entry(it->cache, struct mfc_cache, list); | 2268 | return list_first_entry(it->cache, struct mfc_cache, list); |
2271 | } | 2269 | } |
2272 | 2270 | ||
2273 | /* exhausted cache_array, show unresolved */ | 2271 | /* exhausted cache_array, show unresolved */ |
2274 | read_unlock(&mrt_lock); | 2272 | read_unlock(&mrt_lock); |
2275 | it->cache = &mrt->mfc_unres_queue; | 2273 | it->cache = &mrt->mfc_unres_queue; |
2276 | it->ct = 0; | 2274 | it->ct = 0; |
2277 | 2275 | ||
2278 | spin_lock_bh(&mfc_unres_lock); | 2276 | spin_lock_bh(&mfc_unres_lock); |
2279 | if (!list_empty(it->cache)) | 2277 | if (!list_empty(it->cache)) |
2280 | return list_first_entry(it->cache, struct mfc_cache, list); | 2278 | return list_first_entry(it->cache, struct mfc_cache, list); |
2281 | 2279 | ||
2282 | end_of_list: | 2280 | end_of_list: |
2283 | spin_unlock_bh(&mfc_unres_lock); | 2281 | spin_unlock_bh(&mfc_unres_lock); |
2284 | it->cache = NULL; | 2282 | it->cache = NULL; |
2285 | 2283 | ||
2286 | return NULL; | 2284 | return NULL; |
2287 | } | 2285 | } |
2288 | 2286 | ||
2289 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) | 2287 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) |
2290 | { | 2288 | { |
2291 | struct ipmr_mfc_iter *it = seq->private; | 2289 | struct ipmr_mfc_iter *it = seq->private; |
2292 | struct mr_table *mrt = it->mrt; | 2290 | struct mr_table *mrt = it->mrt; |
2293 | 2291 | ||
2294 | if (it->cache == &mrt->mfc_unres_queue) | 2292 | if (it->cache == &mrt->mfc_unres_queue) |
2295 | spin_unlock_bh(&mfc_unres_lock); | 2293 | spin_unlock_bh(&mfc_unres_lock); |
2296 | else if (it->cache == &mrt->mfc_cache_array[it->ct]) | 2294 | else if (it->cache == &mrt->mfc_cache_array[it->ct]) |
2297 | read_unlock(&mrt_lock); | 2295 | read_unlock(&mrt_lock); |
2298 | } | 2296 | } |
2299 | 2297 | ||
2300 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | 2298 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) |
2301 | { | 2299 | { |
2302 | int n; | 2300 | int n; |
2303 | 2301 | ||
2304 | if (v == SEQ_START_TOKEN) { | 2302 | if (v == SEQ_START_TOKEN) { |
2305 | seq_puts(seq, | 2303 | seq_puts(seq, |
2306 | "Group Origin Iif Pkts Bytes Wrong Oifs\n"); | 2304 | "Group Origin Iif Pkts Bytes Wrong Oifs\n"); |
2307 | } else { | 2305 | } else { |
2308 | const struct mfc_cache *mfc = v; | 2306 | const struct mfc_cache *mfc = v; |
2309 | const struct ipmr_mfc_iter *it = seq->private; | 2307 | const struct ipmr_mfc_iter *it = seq->private; |
2310 | const struct mr_table *mrt = it->mrt; | 2308 | const struct mr_table *mrt = it->mrt; |
2311 | 2309 | ||
2312 | seq_printf(seq, "%08X %08X %-3hd", | 2310 | seq_printf(seq, "%08X %08X %-3hd", |
2313 | (__force u32) mfc->mfc_mcastgrp, | 2311 | (__force u32) mfc->mfc_mcastgrp, |
2314 | (__force u32) mfc->mfc_origin, | 2312 | (__force u32) mfc->mfc_origin, |
2315 | mfc->mfc_parent); | 2313 | mfc->mfc_parent); |
2316 | 2314 | ||
2317 | if (it->cache != &mrt->mfc_unres_queue) { | 2315 | if (it->cache != &mrt->mfc_unres_queue) { |
2318 | seq_printf(seq, " %8lu %8lu %8lu", | 2316 | seq_printf(seq, " %8lu %8lu %8lu", |
2319 | mfc->mfc_un.res.pkt, | 2317 | mfc->mfc_un.res.pkt, |
2320 | mfc->mfc_un.res.bytes, | 2318 | mfc->mfc_un.res.bytes, |
2321 | mfc->mfc_un.res.wrong_if); | 2319 | mfc->mfc_un.res.wrong_if); |
2322 | for (n = mfc->mfc_un.res.minvif; | 2320 | for (n = mfc->mfc_un.res.minvif; |
2323 | n < mfc->mfc_un.res.maxvif; n++ ) { | 2321 | n < mfc->mfc_un.res.maxvif; n++ ) { |
2324 | if (VIF_EXISTS(mrt, n) && | 2322 | if (VIF_EXISTS(mrt, n) && |
2325 | mfc->mfc_un.res.ttls[n] < 255) | 2323 | mfc->mfc_un.res.ttls[n] < 255) |
2326 | seq_printf(seq, | 2324 | seq_printf(seq, |
2327 | " %2d:%-3d", | 2325 | " %2d:%-3d", |
2328 | n, mfc->mfc_un.res.ttls[n]); | 2326 | n, mfc->mfc_un.res.ttls[n]); |
2329 | } | 2327 | } |
2330 | } else { | 2328 | } else { |
2331 | /* unresolved mfc_caches don't contain | 2329 | /* unresolved mfc_caches don't contain |
2332 | * pkt, bytes and wrong_if values | 2330 | * pkt, bytes and wrong_if values |
2333 | */ | 2331 | */ |
2334 | seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); | 2332 | seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); |
2335 | } | 2333 | } |
2336 | seq_putc(seq, '\n'); | 2334 | seq_putc(seq, '\n'); |
2337 | } | 2335 | } |
2338 | return 0; | 2336 | return 0; |
2339 | } | 2337 | } |
2340 | 2338 | ||
2341 | static const struct seq_operations ipmr_mfc_seq_ops = { | 2339 | static const struct seq_operations ipmr_mfc_seq_ops = { |
2342 | .start = ipmr_mfc_seq_start, | 2340 | .start = ipmr_mfc_seq_start, |
2343 | .next = ipmr_mfc_seq_next, | 2341 | .next = ipmr_mfc_seq_next, |
2344 | .stop = ipmr_mfc_seq_stop, | 2342 | .stop = ipmr_mfc_seq_stop, |
2345 | .show = ipmr_mfc_seq_show, | 2343 | .show = ipmr_mfc_seq_show, |
2346 | }; | 2344 | }; |
2347 | 2345 | ||
2348 | static int ipmr_mfc_open(struct inode *inode, struct file *file) | 2346 | static int ipmr_mfc_open(struct inode *inode, struct file *file) |
2349 | { | 2347 | { |
2350 | return seq_open_net(inode, file, &ipmr_mfc_seq_ops, | 2348 | return seq_open_net(inode, file, &ipmr_mfc_seq_ops, |
2351 | sizeof(struct ipmr_mfc_iter)); | 2349 | sizeof(struct ipmr_mfc_iter)); |
2352 | } | 2350 | } |
2353 | 2351 | ||
2354 | static const struct file_operations ipmr_mfc_fops = { | 2352 | static const struct file_operations ipmr_mfc_fops = { |
2355 | .owner = THIS_MODULE, | 2353 | .owner = THIS_MODULE, |
2356 | .open = ipmr_mfc_open, | 2354 | .open = ipmr_mfc_open, |
2357 | .read = seq_read, | 2355 | .read = seq_read, |
2358 | .llseek = seq_lseek, | 2356 | .llseek = seq_lseek, |
2359 | .release = seq_release_net, | 2357 | .release = seq_release_net, |
2360 | }; | 2358 | }; |
2361 | #endif | 2359 | #endif |
2362 | 2360 | ||
2363 | #ifdef CONFIG_IP_PIMSM_V2 | 2361 | #ifdef CONFIG_IP_PIMSM_V2 |
2364 | static const struct net_protocol pim_protocol = { | 2362 | static const struct net_protocol pim_protocol = { |
2365 | .handler = pim_rcv, | 2363 | .handler = pim_rcv, |
2366 | .netns_ok = 1, | 2364 | .netns_ok = 1, |
2367 | }; | 2365 | }; |
2368 | #endif | 2366 | #endif |
2369 | 2367 | ||
2370 | 2368 | ||
2371 | /* | 2369 | /* |
2372 | * Setup for IP multicast routing | 2370 | * Setup for IP multicast routing |
2373 | */ | 2371 | */ |
2374 | static int __net_init ipmr_net_init(struct net *net) | 2372 | static int __net_init ipmr_net_init(struct net *net) |
2375 | { | 2373 | { |
2376 | int err; | 2374 | int err; |
2377 | 2375 | ||
2378 | err = ipmr_rules_init(net); | 2376 | err = ipmr_rules_init(net); |
2379 | if (err < 0) | 2377 | if (err < 0) |
2380 | goto fail; | 2378 | goto fail; |
2381 | 2379 | ||
2382 | #ifdef CONFIG_PROC_FS | 2380 | #ifdef CONFIG_PROC_FS |
2383 | err = -ENOMEM; | 2381 | err = -ENOMEM; |
2384 | if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) | 2382 | if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) |
2385 | goto proc_vif_fail; | 2383 | goto proc_vif_fail; |
2386 | if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) | 2384 | if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) |
2387 | goto proc_cache_fail; | 2385 | goto proc_cache_fail; |
2388 | #endif | 2386 | #endif |
2389 | return 0; | 2387 | return 0; |
2390 | 2388 | ||
2391 | #ifdef CONFIG_PROC_FS | 2389 | #ifdef CONFIG_PROC_FS |
2392 | proc_cache_fail: | 2390 | proc_cache_fail: |
2393 | proc_net_remove(net, "ip_mr_vif"); | 2391 | proc_net_remove(net, "ip_mr_vif"); |
2394 | proc_vif_fail: | 2392 | proc_vif_fail: |
2395 | ipmr_rules_exit(net); | 2393 | ipmr_rules_exit(net); |
2396 | #endif | 2394 | #endif |
2397 | fail: | 2395 | fail: |
2398 | return err; | 2396 | return err; |
2399 | } | 2397 | } |
2400 | 2398 | ||
2401 | static void __net_exit ipmr_net_exit(struct net *net) | 2399 | static void __net_exit ipmr_net_exit(struct net *net) |
2402 | { | 2400 | { |
2403 | #ifdef CONFIG_PROC_FS | 2401 | #ifdef CONFIG_PROC_FS |
2404 | proc_net_remove(net, "ip_mr_cache"); | 2402 | proc_net_remove(net, "ip_mr_cache"); |
2405 | proc_net_remove(net, "ip_mr_vif"); | 2403 | proc_net_remove(net, "ip_mr_vif"); |
2406 | #endif | 2404 | #endif |
2407 | ipmr_rules_exit(net); | 2405 | ipmr_rules_exit(net); |
2408 | } | 2406 | } |
2409 | 2407 | ||
2410 | static struct pernet_operations ipmr_net_ops = { | 2408 | static struct pernet_operations ipmr_net_ops = { |
2411 | .init = ipmr_net_init, | 2409 | .init = ipmr_net_init, |
2412 | .exit = ipmr_net_exit, | 2410 | .exit = ipmr_net_exit, |
2413 | }; | 2411 | }; |
2414 | 2412 | ||
2415 | int __init ip_mr_init(void) | 2413 | int __init ip_mr_init(void) |
2416 | { | 2414 | { |
2417 | int err; | 2415 | int err; |
2418 | 2416 | ||
2419 | mrt_cachep = kmem_cache_create("ip_mrt_cache", | 2417 | mrt_cachep = kmem_cache_create("ip_mrt_cache", |
2420 | sizeof(struct mfc_cache), | 2418 | sizeof(struct mfc_cache), |
2421 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, | 2419 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
2422 | NULL); | 2420 | NULL); |
2423 | if (!mrt_cachep) | 2421 | if (!mrt_cachep) |
2424 | return -ENOMEM; | 2422 | return -ENOMEM; |
2425 | 2423 | ||
2426 | err = register_pernet_subsys(&ipmr_net_ops); | 2424 | err = register_pernet_subsys(&ipmr_net_ops); |
2427 | if (err) | 2425 | if (err) |
2428 | goto reg_pernet_fail; | 2426 | goto reg_pernet_fail; |
2429 | 2427 | ||
2430 | err = register_netdevice_notifier(&ip_mr_notifier); | 2428 | err = register_netdevice_notifier(&ip_mr_notifier); |
2431 | if (err) | 2429 | if (err) |
2432 | goto reg_notif_fail; | 2430 | goto reg_notif_fail; |
2433 | #ifdef CONFIG_IP_PIMSM_V2 | 2431 | #ifdef CONFIG_IP_PIMSM_V2 |
2434 | if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { | 2432 | if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { |
2435 | printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); | 2433 | printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); |
2436 | err = -EAGAIN; | 2434 | err = -EAGAIN; |
2437 | goto add_proto_fail; | 2435 | goto add_proto_fail; |
2438 | } | 2436 | } |
2439 | #endif | 2437 | #endif |
2440 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); | 2438 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); |
2441 | return 0; | 2439 | return 0; |
2442 | 2440 | ||
2443 | #ifdef CONFIG_IP_PIMSM_V2 | 2441 | #ifdef CONFIG_IP_PIMSM_V2 |
2444 | add_proto_fail: | 2442 | add_proto_fail: |
2445 | unregister_netdevice_notifier(&ip_mr_notifier); | 2443 | unregister_netdevice_notifier(&ip_mr_notifier); |
2446 | #endif | 2444 | #endif |
2447 | reg_notif_fail: | 2445 | reg_notif_fail: |
2448 | unregister_pernet_subsys(&ipmr_net_ops); | 2446 | unregister_pernet_subsys(&ipmr_net_ops); |
2449 | reg_pernet_fail: | 2447 | reg_pernet_fail: |
2450 | kmem_cache_destroy(mrt_cachep); | 2448 | kmem_cache_destroy(mrt_cachep); |
2451 | return err; | 2449 | return err; |
2452 | } | 2450 | } |
2453 | 2451 |
net/ipv6/ip6_tunnel.c
1 | /* | 1 | /* |
2 | * IPv6 tunneling device | 2 | * IPv6 tunneling device |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Ville Nuorvala <vnuorval@tcs.hut.fi> | 6 | * Ville Nuorvala <vnuorval@tcs.hut.fi> |
7 | * Yasuyuki Kozakai <kozakai@linux-ipv6.org> | 7 | * Yasuyuki Kozakai <kozakai@linux-ipv6.org> |
8 | * | 8 | * |
9 | * Based on: | 9 | * Based on: |
10 | * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c | 10 | * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c |
11 | * | 11 | * |
12 | * RFC 2473 | 12 | * RFC 2473 |
13 | * | 13 | * |
14 | * This program is free software; you can redistribute it and/or | 14 | * This program is free software; you can redistribute it and/or |
15 | * modify it under the terms of the GNU General Public License | 15 | * modify it under the terms of the GNU General Public License |
16 | * as published by the Free Software Foundation; either version | 16 | * as published by the Free Software Foundation; either version |
17 | * 2 of the License, or (at your option) any later version. | 17 | * 2 of the License, or (at your option) any later version. |
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/capability.h> | 22 | #include <linux/capability.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/sockios.h> | 25 | #include <linux/sockios.h> |
26 | #include <linux/icmp.h> | 26 | #include <linux/icmp.h> |
27 | #include <linux/if.h> | 27 | #include <linux/if.h> |
28 | #include <linux/in.h> | 28 | #include <linux/in.h> |
29 | #include <linux/ip.h> | 29 | #include <linux/ip.h> |
30 | #include <linux/if_tunnel.h> | 30 | #include <linux/if_tunnel.h> |
31 | #include <linux/net.h> | 31 | #include <linux/net.h> |
32 | #include <linux/in6.h> | 32 | #include <linux/in6.h> |
33 | #include <linux/netdevice.h> | 33 | #include <linux/netdevice.h> |
34 | #include <linux/if_arp.h> | 34 | #include <linux/if_arp.h> |
35 | #include <linux/icmpv6.h> | 35 | #include <linux/icmpv6.h> |
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/route.h> | 37 | #include <linux/route.h> |
38 | #include <linux/rtnetlink.h> | 38 | #include <linux/rtnetlink.h> |
39 | #include <linux/netfilter_ipv6.h> | 39 | #include <linux/netfilter_ipv6.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | 41 | ||
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <asm/atomic.h> | 43 | #include <asm/atomic.h> |
44 | 44 | ||
45 | #include <net/icmp.h> | 45 | #include <net/icmp.h> |
46 | #include <net/ip.h> | 46 | #include <net/ip.h> |
47 | #include <net/ipv6.h> | 47 | #include <net/ipv6.h> |
48 | #include <net/ip6_route.h> | 48 | #include <net/ip6_route.h> |
49 | #include <net/addrconf.h> | 49 | #include <net/addrconf.h> |
50 | #include <net/ip6_tunnel.h> | 50 | #include <net/ip6_tunnel.h> |
51 | #include <net/xfrm.h> | 51 | #include <net/xfrm.h> |
52 | #include <net/dsfield.h> | 52 | #include <net/dsfield.h> |
53 | #include <net/inet_ecn.h> | 53 | #include <net/inet_ecn.h> |
54 | #include <net/net_namespace.h> | 54 | #include <net/net_namespace.h> |
55 | #include <net/netns/generic.h> | 55 | #include <net/netns/generic.h> |
56 | 56 | ||
57 | MODULE_AUTHOR("Ville Nuorvala"); | 57 | MODULE_AUTHOR("Ville Nuorvala"); |
58 | MODULE_DESCRIPTION("IPv6 tunneling device"); | 58 | MODULE_DESCRIPTION("IPv6 tunneling device"); |
59 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
60 | 60 | ||
61 | #define IPV6_TLV_TEL_DST_SIZE 8 | 61 | #define IPV6_TLV_TEL_DST_SIZE 8 |
62 | 62 | ||
63 | #ifdef IP6_TNL_DEBUG | 63 | #ifdef IP6_TNL_DEBUG |
64 | #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) | 64 | #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) |
65 | #else | 65 | #else |
66 | #define IP6_TNL_TRACE(x...) do {;} while(0) | 66 | #define IP6_TNL_TRACE(x...) do {;} while(0) |
67 | #endif | 67 | #endif |
68 | 68 | ||
69 | #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) | 69 | #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) |
70 | #define IPV6_TCLASS_SHIFT 20 | 70 | #define IPV6_TCLASS_SHIFT 20 |
71 | 71 | ||
72 | #define HASH_SIZE 32 | 72 | #define HASH_SIZE 32 |
73 | 73 | ||
74 | #define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ | 74 | #define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ |
75 | (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ | 75 | (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ |
76 | (HASH_SIZE - 1)) | 76 | (HASH_SIZE - 1)) |
77 | 77 | ||
78 | static void ip6_tnl_dev_init(struct net_device *dev); | 78 | static void ip6_tnl_dev_init(struct net_device *dev); |
79 | static void ip6_tnl_dev_setup(struct net_device *dev); | 79 | static void ip6_tnl_dev_setup(struct net_device *dev); |
80 | 80 | ||
81 | static int ip6_tnl_net_id __read_mostly; | 81 | static int ip6_tnl_net_id __read_mostly; |
82 | struct ip6_tnl_net { | 82 | struct ip6_tnl_net { |
83 | /* the IPv6 tunnel fallback device */ | 83 | /* the IPv6 tunnel fallback device */ |
84 | struct net_device *fb_tnl_dev; | 84 | struct net_device *fb_tnl_dev; |
85 | /* lists for storing tunnels in use */ | 85 | /* lists for storing tunnels in use */ |
86 | struct ip6_tnl *tnls_r_l[HASH_SIZE]; | 86 | struct ip6_tnl *tnls_r_l[HASH_SIZE]; |
87 | struct ip6_tnl *tnls_wc[1]; | 87 | struct ip6_tnl *tnls_wc[1]; |
88 | struct ip6_tnl **tnls[2]; | 88 | struct ip6_tnl **tnls[2]; |
89 | }; | 89 | }; |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Locking : hash tables are protected by RCU and a spinlock | 92 | * Locking : hash tables are protected by RCU and a spinlock |
93 | */ | 93 | */ |
94 | static DEFINE_SPINLOCK(ip6_tnl_lock); | 94 | static DEFINE_SPINLOCK(ip6_tnl_lock); |
95 | 95 | ||
96 | static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) | 96 | static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) |
97 | { | 97 | { |
98 | struct dst_entry *dst = t->dst_cache; | 98 | struct dst_entry *dst = t->dst_cache; |
99 | 99 | ||
100 | if (dst && dst->obsolete && | 100 | if (dst && dst->obsolete && |
101 | dst->ops->check(dst, t->dst_cookie) == NULL) { | 101 | dst->ops->check(dst, t->dst_cookie) == NULL) { |
102 | t->dst_cache = NULL; | 102 | t->dst_cache = NULL; |
103 | dst_release(dst); | 103 | dst_release(dst); |
104 | return NULL; | 104 | return NULL; |
105 | } | 105 | } |
106 | 106 | ||
107 | return dst; | 107 | return dst; |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) | 110 | static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) |
111 | { | 111 | { |
112 | dst_release(t->dst_cache); | 112 | dst_release(t->dst_cache); |
113 | t->dst_cache = NULL; | 113 | t->dst_cache = NULL; |
114 | } | 114 | } |
115 | 115 | ||
116 | static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) | 116 | static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) |
117 | { | 117 | { |
118 | struct rt6_info *rt = (struct rt6_info *) dst; | 118 | struct rt6_info *rt = (struct rt6_info *) dst; |
119 | t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; | 119 | t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; |
120 | dst_release(t->dst_cache); | 120 | dst_release(t->dst_cache); |
121 | t->dst_cache = dst; | 121 | t->dst_cache = dst; |
122 | } | 122 | } |
123 | 123 | ||
124 | /** | 124 | /** |
125 | * ip6_tnl_lookup - fetch tunnel matching the end-point addresses | 125 | * ip6_tnl_lookup - fetch tunnel matching the end-point addresses |
126 | * @remote: the address of the tunnel exit-point | 126 | * @remote: the address of the tunnel exit-point |
127 | * @local: the address of the tunnel entry-point | 127 | * @local: the address of the tunnel entry-point |
128 | * | 128 | * |
129 | * Return: | 129 | * Return: |
130 | * tunnel matching given end-points if found, | 130 | * tunnel matching given end-points if found, |
131 | * else fallback tunnel if its device is up, | 131 | * else fallback tunnel if its device is up, |
132 | * else %NULL | 132 | * else %NULL |
133 | **/ | 133 | **/ |
134 | 134 | ||
135 | #define for_each_ip6_tunnel_rcu(start) \ | 135 | #define for_each_ip6_tunnel_rcu(start) \ |
136 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | 136 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) |
137 | 137 | ||
138 | static struct ip6_tnl * | 138 | static struct ip6_tnl * |
139 | ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) | 139 | ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) |
140 | { | 140 | { |
141 | unsigned h0 = HASH(remote); | 141 | unsigned h0 = HASH(remote); |
142 | unsigned h1 = HASH(local); | 142 | unsigned h1 = HASH(local); |
143 | struct ip6_tnl *t; | 143 | struct ip6_tnl *t; |
144 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 144 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
145 | 145 | ||
146 | for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { | 146 | for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { |
147 | if (ipv6_addr_equal(local, &t->parms.laddr) && | 147 | if (ipv6_addr_equal(local, &t->parms.laddr) && |
148 | ipv6_addr_equal(remote, &t->parms.raddr) && | 148 | ipv6_addr_equal(remote, &t->parms.raddr) && |
149 | (t->dev->flags & IFF_UP)) | 149 | (t->dev->flags & IFF_UP)) |
150 | return t; | 150 | return t; |
151 | } | 151 | } |
152 | t = rcu_dereference(ip6n->tnls_wc[0]); | 152 | t = rcu_dereference(ip6n->tnls_wc[0]); |
153 | if (t && (t->dev->flags & IFF_UP)) | 153 | if (t && (t->dev->flags & IFF_UP)) |
154 | return t; | 154 | return t; |
155 | 155 | ||
156 | return NULL; | 156 | return NULL; |
157 | } | 157 | } |
158 | 158 | ||
159 | /** | 159 | /** |
160 | * ip6_tnl_bucket - get head of list matching given tunnel parameters | 160 | * ip6_tnl_bucket - get head of list matching given tunnel parameters |
161 | * @p: parameters containing tunnel end-points | 161 | * @p: parameters containing tunnel end-points |
162 | * | 162 | * |
163 | * Description: | 163 | * Description: |
164 | * ip6_tnl_bucket() returns the head of the list matching the | 164 | * ip6_tnl_bucket() returns the head of the list matching the |
165 | * &struct in6_addr entries laddr and raddr in @p. | 165 | * &struct in6_addr entries laddr and raddr in @p. |
166 | * | 166 | * |
167 | * Return: head of IPv6 tunnel list | 167 | * Return: head of IPv6 tunnel list |
168 | **/ | 168 | **/ |
169 | 169 | ||
170 | static struct ip6_tnl ** | 170 | static struct ip6_tnl ** |
171 | ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) | 171 | ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) |
172 | { | 172 | { |
173 | struct in6_addr *remote = &p->raddr; | 173 | struct in6_addr *remote = &p->raddr; |
174 | struct in6_addr *local = &p->laddr; | 174 | struct in6_addr *local = &p->laddr; |
175 | unsigned h = 0; | 175 | unsigned h = 0; |
176 | int prio = 0; | 176 | int prio = 0; |
177 | 177 | ||
178 | if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { | 178 | if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { |
179 | prio = 1; | 179 | prio = 1; |
180 | h = HASH(remote) ^ HASH(local); | 180 | h = HASH(remote) ^ HASH(local); |
181 | } | 181 | } |
182 | return &ip6n->tnls[prio][h]; | 182 | return &ip6n->tnls[prio][h]; |
183 | } | 183 | } |
184 | 184 | ||
185 | /** | 185 | /** |
186 | * ip6_tnl_link - add tunnel to hash table | 186 | * ip6_tnl_link - add tunnel to hash table |
187 | * @t: tunnel to be added | 187 | * @t: tunnel to be added |
188 | **/ | 188 | **/ |
189 | 189 | ||
190 | static void | 190 | static void |
191 | ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) | 191 | ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) |
192 | { | 192 | { |
193 | struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); | 193 | struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); |
194 | 194 | ||
195 | spin_lock_bh(&ip6_tnl_lock); | 195 | spin_lock_bh(&ip6_tnl_lock); |
196 | t->next = *tp; | 196 | t->next = *tp; |
197 | rcu_assign_pointer(*tp, t); | 197 | rcu_assign_pointer(*tp, t); |
198 | spin_unlock_bh(&ip6_tnl_lock); | 198 | spin_unlock_bh(&ip6_tnl_lock); |
199 | } | 199 | } |
200 | 200 | ||
201 | /** | 201 | /** |
202 | * ip6_tnl_unlink - remove tunnel from hash table | 202 | * ip6_tnl_unlink - remove tunnel from hash table |
203 | * @t: tunnel to be removed | 203 | * @t: tunnel to be removed |
204 | **/ | 204 | **/ |
205 | 205 | ||
206 | static void | 206 | static void |
207 | ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) | 207 | ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) |
208 | { | 208 | { |
209 | struct ip6_tnl **tp; | 209 | struct ip6_tnl **tp; |
210 | 210 | ||
211 | for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { | 211 | for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { |
212 | if (t == *tp) { | 212 | if (t == *tp) { |
213 | spin_lock_bh(&ip6_tnl_lock); | 213 | spin_lock_bh(&ip6_tnl_lock); |
214 | *tp = t->next; | 214 | *tp = t->next; |
215 | spin_unlock_bh(&ip6_tnl_lock); | 215 | spin_unlock_bh(&ip6_tnl_lock); |
216 | break; | 216 | break; |
217 | } | 217 | } |
218 | } | 218 | } |
219 | } | 219 | } |
220 | 220 | ||
221 | /** | 221 | /** |
222 | * ip6_tnl_create() - create a new tunnel | 222 | * ip6_tnl_create() - create a new tunnel |
223 | * @p: tunnel parameters | 223 | * @p: tunnel parameters |
224 | * @pt: pointer to new tunnel | 224 | * @pt: pointer to new tunnel |
225 | * | 225 | * |
226 | * Description: | 226 | * Description: |
227 | * Create tunnel matching given parameters. | 227 | * Create tunnel matching given parameters. |
228 | * | 228 | * |
229 | * Return: | 229 | * Return: |
230 | * created tunnel or NULL | 230 | * created tunnel or NULL |
231 | **/ | 231 | **/ |
232 | 232 | ||
233 | static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) | 233 | static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) |
234 | { | 234 | { |
235 | struct net_device *dev; | 235 | struct net_device *dev; |
236 | struct ip6_tnl *t; | 236 | struct ip6_tnl *t; |
237 | char name[IFNAMSIZ]; | 237 | char name[IFNAMSIZ]; |
238 | int err; | 238 | int err; |
239 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 239 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
240 | 240 | ||
241 | if (p->name[0]) | 241 | if (p->name[0]) |
242 | strlcpy(name, p->name, IFNAMSIZ); | 242 | strlcpy(name, p->name, IFNAMSIZ); |
243 | else | 243 | else |
244 | sprintf(name, "ip6tnl%%d"); | 244 | sprintf(name, "ip6tnl%%d"); |
245 | 245 | ||
246 | dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); | 246 | dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); |
247 | if (dev == NULL) | 247 | if (dev == NULL) |
248 | goto failed; | 248 | goto failed; |
249 | 249 | ||
250 | dev_net_set(dev, net); | 250 | dev_net_set(dev, net); |
251 | 251 | ||
252 | if (strchr(name, '%')) { | 252 | if (strchr(name, '%')) { |
253 | if (dev_alloc_name(dev, name) < 0) | 253 | if (dev_alloc_name(dev, name) < 0) |
254 | goto failed_free; | 254 | goto failed_free; |
255 | } | 255 | } |
256 | 256 | ||
257 | t = netdev_priv(dev); | 257 | t = netdev_priv(dev); |
258 | t->parms = *p; | 258 | t->parms = *p; |
259 | ip6_tnl_dev_init(dev); | 259 | ip6_tnl_dev_init(dev); |
260 | 260 | ||
261 | if ((err = register_netdevice(dev)) < 0) | 261 | if ((err = register_netdevice(dev)) < 0) |
262 | goto failed_free; | 262 | goto failed_free; |
263 | 263 | ||
264 | dev_hold(dev); | 264 | dev_hold(dev); |
265 | ip6_tnl_link(ip6n, t); | 265 | ip6_tnl_link(ip6n, t); |
266 | return t; | 266 | return t; |
267 | 267 | ||
268 | failed_free: | 268 | failed_free: |
269 | free_netdev(dev); | 269 | free_netdev(dev); |
270 | failed: | 270 | failed: |
271 | return NULL; | 271 | return NULL; |
272 | } | 272 | } |
273 | 273 | ||
274 | /** | 274 | /** |
275 | * ip6_tnl_locate - find or create tunnel matching given parameters | 275 | * ip6_tnl_locate - find or create tunnel matching given parameters |
276 | * @p: tunnel parameters | 276 | * @p: tunnel parameters |
277 | * @create: != 0 if allowed to create new tunnel if no match found | 277 | * @create: != 0 if allowed to create new tunnel if no match found |
278 | * | 278 | * |
279 | * Description: | 279 | * Description: |
280 | * ip6_tnl_locate() first tries to locate an existing tunnel | 280 | * ip6_tnl_locate() first tries to locate an existing tunnel |
281 | * based on @parms. If this is unsuccessful, but @create is set a new | 281 | * based on @parms. If this is unsuccessful, but @create is set a new |
282 | * tunnel device is created and registered for use. | 282 | * tunnel device is created and registered for use. |
283 | * | 283 | * |
284 | * Return: | 284 | * Return: |
285 | * matching tunnel or NULL | 285 | * matching tunnel or NULL |
286 | **/ | 286 | **/ |
287 | 287 | ||
288 | static struct ip6_tnl *ip6_tnl_locate(struct net *net, | 288 | static struct ip6_tnl *ip6_tnl_locate(struct net *net, |
289 | struct ip6_tnl_parm *p, int create) | 289 | struct ip6_tnl_parm *p, int create) |
290 | { | 290 | { |
291 | struct in6_addr *remote = &p->raddr; | 291 | struct in6_addr *remote = &p->raddr; |
292 | struct in6_addr *local = &p->laddr; | 292 | struct in6_addr *local = &p->laddr; |
293 | struct ip6_tnl *t; | 293 | struct ip6_tnl *t; |
294 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 294 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
295 | 295 | ||
296 | for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { | 296 | for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { |
297 | if (ipv6_addr_equal(local, &t->parms.laddr) && | 297 | if (ipv6_addr_equal(local, &t->parms.laddr) && |
298 | ipv6_addr_equal(remote, &t->parms.raddr)) | 298 | ipv6_addr_equal(remote, &t->parms.raddr)) |
299 | return t; | 299 | return t; |
300 | } | 300 | } |
301 | if (!create) | 301 | if (!create) |
302 | return NULL; | 302 | return NULL; |
303 | return ip6_tnl_create(net, p); | 303 | return ip6_tnl_create(net, p); |
304 | } | 304 | } |
305 | 305 | ||
306 | /** | 306 | /** |
307 | * ip6_tnl_dev_uninit - tunnel device uninitializer | 307 | * ip6_tnl_dev_uninit - tunnel device uninitializer |
308 | * @dev: the device to be destroyed | 308 | * @dev: the device to be destroyed |
309 | * | 309 | * |
310 | * Description: | 310 | * Description: |
311 | * ip6_tnl_dev_uninit() removes tunnel from its list | 311 | * ip6_tnl_dev_uninit() removes tunnel from its list |
312 | **/ | 312 | **/ |
313 | 313 | ||
314 | static void | 314 | static void |
315 | ip6_tnl_dev_uninit(struct net_device *dev) | 315 | ip6_tnl_dev_uninit(struct net_device *dev) |
316 | { | 316 | { |
317 | struct ip6_tnl *t = netdev_priv(dev); | 317 | struct ip6_tnl *t = netdev_priv(dev); |
318 | struct net *net = dev_net(dev); | 318 | struct net *net = dev_net(dev); |
319 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 319 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
320 | 320 | ||
321 | if (dev == ip6n->fb_tnl_dev) { | 321 | if (dev == ip6n->fb_tnl_dev) { |
322 | spin_lock_bh(&ip6_tnl_lock); | 322 | spin_lock_bh(&ip6_tnl_lock); |
323 | ip6n->tnls_wc[0] = NULL; | 323 | ip6n->tnls_wc[0] = NULL; |
324 | spin_unlock_bh(&ip6_tnl_lock); | 324 | spin_unlock_bh(&ip6_tnl_lock); |
325 | } else { | 325 | } else { |
326 | ip6_tnl_unlink(ip6n, t); | 326 | ip6_tnl_unlink(ip6n, t); |
327 | } | 327 | } |
328 | ip6_tnl_dst_reset(t); | 328 | ip6_tnl_dst_reset(t); |
329 | dev_put(dev); | 329 | dev_put(dev); |
330 | } | 330 | } |
331 | 331 | ||
332 | /** | 332 | /** |
333 | * parse_tvl_tnl_enc_lim - handle encapsulation limit option | 333 | * parse_tvl_tnl_enc_lim - handle encapsulation limit option |
334 | * @skb: received socket buffer | 334 | * @skb: received socket buffer |
335 | * | 335 | * |
336 | * Return: | 336 | * Return: |
337 | * 0 if none was found, | 337 | * 0 if none was found, |
338 | * else index to encapsulation limit | 338 | * else index to encapsulation limit |
339 | **/ | 339 | **/ |
340 | 340 | ||
341 | static __u16 | 341 | static __u16 |
342 | parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) | 342 | parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) |
343 | { | 343 | { |
344 | struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw; | 344 | struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw; |
345 | __u8 nexthdr = ipv6h->nexthdr; | 345 | __u8 nexthdr = ipv6h->nexthdr; |
346 | __u16 off = sizeof (*ipv6h); | 346 | __u16 off = sizeof (*ipv6h); |
347 | 347 | ||
348 | while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { | 348 | while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { |
349 | __u16 optlen = 0; | 349 | __u16 optlen = 0; |
350 | struct ipv6_opt_hdr *hdr; | 350 | struct ipv6_opt_hdr *hdr; |
351 | if (raw + off + sizeof (*hdr) > skb->data && | 351 | if (raw + off + sizeof (*hdr) > skb->data && |
352 | !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) | 352 | !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) |
353 | break; | 353 | break; |
354 | 354 | ||
355 | hdr = (struct ipv6_opt_hdr *) (raw + off); | 355 | hdr = (struct ipv6_opt_hdr *) (raw + off); |
356 | if (nexthdr == NEXTHDR_FRAGMENT) { | 356 | if (nexthdr == NEXTHDR_FRAGMENT) { |
357 | struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; | 357 | struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; |
358 | if (frag_hdr->frag_off) | 358 | if (frag_hdr->frag_off) |
359 | break; | 359 | break; |
360 | optlen = 8; | 360 | optlen = 8; |
361 | } else if (nexthdr == NEXTHDR_AUTH) { | 361 | } else if (nexthdr == NEXTHDR_AUTH) { |
362 | optlen = (hdr->hdrlen + 2) << 2; | 362 | optlen = (hdr->hdrlen + 2) << 2; |
363 | } else { | 363 | } else { |
364 | optlen = ipv6_optlen(hdr); | 364 | optlen = ipv6_optlen(hdr); |
365 | } | 365 | } |
366 | if (nexthdr == NEXTHDR_DEST) { | 366 | if (nexthdr == NEXTHDR_DEST) { |
367 | __u16 i = off + 2; | 367 | __u16 i = off + 2; |
368 | while (1) { | 368 | while (1) { |
369 | struct ipv6_tlv_tnl_enc_lim *tel; | 369 | struct ipv6_tlv_tnl_enc_lim *tel; |
370 | 370 | ||
371 | /* No more room for encapsulation limit */ | 371 | /* No more room for encapsulation limit */ |
372 | if (i + sizeof (*tel) > off + optlen) | 372 | if (i + sizeof (*tel) > off + optlen) |
373 | break; | 373 | break; |
374 | 374 | ||
375 | tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; | 375 | tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; |
376 | /* return index of option if found and valid */ | 376 | /* return index of option if found and valid */ |
377 | if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && | 377 | if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && |
378 | tel->length == 1) | 378 | tel->length == 1) |
379 | return i; | 379 | return i; |
380 | /* else jump to next option */ | 380 | /* else jump to next option */ |
381 | if (tel->type) | 381 | if (tel->type) |
382 | i += tel->length + 2; | 382 | i += tel->length + 2; |
383 | else | 383 | else |
384 | i++; | 384 | i++; |
385 | } | 385 | } |
386 | } | 386 | } |
387 | nexthdr = hdr->nexthdr; | 387 | nexthdr = hdr->nexthdr; |
388 | off += optlen; | 388 | off += optlen; |
389 | } | 389 | } |
390 | return 0; | 390 | return 0; |
391 | } | 391 | } |
392 | 392 | ||
393 | /** | 393 | /** |
394 | * ip6_tnl_err - tunnel error handler | 394 | * ip6_tnl_err - tunnel error handler |
395 | * | 395 | * |
396 | * Description: | 396 | * Description: |
397 | * ip6_tnl_err() should handle errors in the tunnel according | 397 | * ip6_tnl_err() should handle errors in the tunnel according |
398 | * to the specifications in RFC 2473. | 398 | * to the specifications in RFC 2473. |
399 | **/ | 399 | **/ |
400 | 400 | ||
401 | static int | 401 | static int |
402 | ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, | 402 | ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, |
403 | u8 *type, u8 *code, int *msg, __u32 *info, int offset) | 403 | u8 *type, u8 *code, int *msg, __u32 *info, int offset) |
404 | { | 404 | { |
405 | struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; | 405 | struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; |
406 | struct ip6_tnl *t; | 406 | struct ip6_tnl *t; |
407 | int rel_msg = 0; | 407 | int rel_msg = 0; |
408 | u8 rel_type = ICMPV6_DEST_UNREACH; | 408 | u8 rel_type = ICMPV6_DEST_UNREACH; |
409 | u8 rel_code = ICMPV6_ADDR_UNREACH; | 409 | u8 rel_code = ICMPV6_ADDR_UNREACH; |
410 | __u32 rel_info = 0; | 410 | __u32 rel_info = 0; |
411 | __u16 len; | 411 | __u16 len; |
412 | int err = -ENOENT; | 412 | int err = -ENOENT; |
413 | 413 | ||
414 | /* If the packet doesn't contain the original IPv6 header we are | 414 | /* If the packet doesn't contain the original IPv6 header we are |
415 | in trouble since we might need the source address for further | 415 | in trouble since we might need the source address for further |
416 | processing of the error. */ | 416 | processing of the error. */ |
417 | 417 | ||
418 | rcu_read_lock(); | 418 | rcu_read_lock(); |
419 | if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, | 419 | if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, |
420 | &ipv6h->saddr)) == NULL) | 420 | &ipv6h->saddr)) == NULL) |
421 | goto out; | 421 | goto out; |
422 | 422 | ||
423 | if (t->parms.proto != ipproto && t->parms.proto != 0) | 423 | if (t->parms.proto != ipproto && t->parms.proto != 0) |
424 | goto out; | 424 | goto out; |
425 | 425 | ||
426 | err = 0; | 426 | err = 0; |
427 | 427 | ||
428 | switch (*type) { | 428 | switch (*type) { |
429 | __u32 teli; | 429 | __u32 teli; |
430 | struct ipv6_tlv_tnl_enc_lim *tel; | 430 | struct ipv6_tlv_tnl_enc_lim *tel; |
431 | __u32 mtu; | 431 | __u32 mtu; |
432 | case ICMPV6_DEST_UNREACH: | 432 | case ICMPV6_DEST_UNREACH: |
433 | if (net_ratelimit()) | 433 | if (net_ratelimit()) |
434 | printk(KERN_WARNING | 434 | printk(KERN_WARNING |
435 | "%s: Path to destination invalid " | 435 | "%s: Path to destination invalid " |
436 | "or inactive!\n", t->parms.name); | 436 | "or inactive!\n", t->parms.name); |
437 | rel_msg = 1; | 437 | rel_msg = 1; |
438 | break; | 438 | break; |
439 | case ICMPV6_TIME_EXCEED: | 439 | case ICMPV6_TIME_EXCEED: |
440 | if ((*code) == ICMPV6_EXC_HOPLIMIT) { | 440 | if ((*code) == ICMPV6_EXC_HOPLIMIT) { |
441 | if (net_ratelimit()) | 441 | if (net_ratelimit()) |
442 | printk(KERN_WARNING | 442 | printk(KERN_WARNING |
443 | "%s: Too small hop limit or " | 443 | "%s: Too small hop limit or " |
444 | "routing loop in tunnel!\n", | 444 | "routing loop in tunnel!\n", |
445 | t->parms.name); | 445 | t->parms.name); |
446 | rel_msg = 1; | 446 | rel_msg = 1; |
447 | } | 447 | } |
448 | break; | 448 | break; |
449 | case ICMPV6_PARAMPROB: | 449 | case ICMPV6_PARAMPROB: |
450 | teli = 0; | 450 | teli = 0; |
451 | if ((*code) == ICMPV6_HDR_FIELD) | 451 | if ((*code) == ICMPV6_HDR_FIELD) |
452 | teli = parse_tlv_tnl_enc_lim(skb, skb->data); | 452 | teli = parse_tlv_tnl_enc_lim(skb, skb->data); |
453 | 453 | ||
454 | if (teli && teli == *info - 2) { | 454 | if (teli && teli == *info - 2) { |
455 | tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; | 455 | tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; |
456 | if (tel->encap_limit == 0) { | 456 | if (tel->encap_limit == 0) { |
457 | if (net_ratelimit()) | 457 | if (net_ratelimit()) |
458 | printk(KERN_WARNING | 458 | printk(KERN_WARNING |
459 | "%s: Too small encapsulation " | 459 | "%s: Too small encapsulation " |
460 | "limit or routing loop in " | 460 | "limit or routing loop in " |
461 | "tunnel!\n", t->parms.name); | 461 | "tunnel!\n", t->parms.name); |
462 | rel_msg = 1; | 462 | rel_msg = 1; |
463 | } | 463 | } |
464 | } else if (net_ratelimit()) { | 464 | } else if (net_ratelimit()) { |
465 | printk(KERN_WARNING | 465 | printk(KERN_WARNING |
466 | "%s: Recipient unable to parse tunneled " | 466 | "%s: Recipient unable to parse tunneled " |
467 | "packet!\n ", t->parms.name); | 467 | "packet!\n ", t->parms.name); |
468 | } | 468 | } |
469 | break; | 469 | break; |
470 | case ICMPV6_PKT_TOOBIG: | 470 | case ICMPV6_PKT_TOOBIG: |
471 | mtu = *info - offset; | 471 | mtu = *info - offset; |
472 | if (mtu < IPV6_MIN_MTU) | 472 | if (mtu < IPV6_MIN_MTU) |
473 | mtu = IPV6_MIN_MTU; | 473 | mtu = IPV6_MIN_MTU; |
474 | t->dev->mtu = mtu; | 474 | t->dev->mtu = mtu; |
475 | 475 | ||
476 | if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { | 476 | if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { |
477 | rel_type = ICMPV6_PKT_TOOBIG; | 477 | rel_type = ICMPV6_PKT_TOOBIG; |
478 | rel_code = 0; | 478 | rel_code = 0; |
479 | rel_info = mtu; | 479 | rel_info = mtu; |
480 | rel_msg = 1; | 480 | rel_msg = 1; |
481 | } | 481 | } |
482 | break; | 482 | break; |
483 | } | 483 | } |
484 | 484 | ||
485 | *type = rel_type; | 485 | *type = rel_type; |
486 | *code = rel_code; | 486 | *code = rel_code; |
487 | *info = rel_info; | 487 | *info = rel_info; |
488 | *msg = rel_msg; | 488 | *msg = rel_msg; |
489 | 489 | ||
490 | out: | 490 | out: |
491 | rcu_read_unlock(); | 491 | rcu_read_unlock(); |
492 | return err; | 492 | return err; |
493 | } | 493 | } |
494 | 494 | ||
495 | static int | 495 | static int |
496 | ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 496 | ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
497 | u8 type, u8 code, int offset, __be32 info) | 497 | u8 type, u8 code, int offset, __be32 info) |
498 | { | 498 | { |
499 | int rel_msg = 0; | 499 | int rel_msg = 0; |
500 | u8 rel_type = type; | 500 | u8 rel_type = type; |
501 | u8 rel_code = code; | 501 | u8 rel_code = code; |
502 | __u32 rel_info = ntohl(info); | 502 | __u32 rel_info = ntohl(info); |
503 | int err; | 503 | int err; |
504 | struct sk_buff *skb2; | 504 | struct sk_buff *skb2; |
505 | struct iphdr *eiph; | 505 | struct iphdr *eiph; |
506 | struct flowi fl; | 506 | struct flowi fl; |
507 | struct rtable *rt; | 507 | struct rtable *rt; |
508 | 508 | ||
509 | err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, | 509 | err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, |
510 | &rel_msg, &rel_info, offset); | 510 | &rel_msg, &rel_info, offset); |
511 | if (err < 0) | 511 | if (err < 0) |
512 | return err; | 512 | return err; |
513 | 513 | ||
514 | if (rel_msg == 0) | 514 | if (rel_msg == 0) |
515 | return 0; | 515 | return 0; |
516 | 516 | ||
517 | switch (rel_type) { | 517 | switch (rel_type) { |
518 | case ICMPV6_DEST_UNREACH: | 518 | case ICMPV6_DEST_UNREACH: |
519 | if (rel_code != ICMPV6_ADDR_UNREACH) | 519 | if (rel_code != ICMPV6_ADDR_UNREACH) |
520 | return 0; | 520 | return 0; |
521 | rel_type = ICMP_DEST_UNREACH; | 521 | rel_type = ICMP_DEST_UNREACH; |
522 | rel_code = ICMP_HOST_UNREACH; | 522 | rel_code = ICMP_HOST_UNREACH; |
523 | break; | 523 | break; |
524 | case ICMPV6_PKT_TOOBIG: | 524 | case ICMPV6_PKT_TOOBIG: |
525 | if (rel_code != 0) | 525 | if (rel_code != 0) |
526 | return 0; | 526 | return 0; |
527 | rel_type = ICMP_DEST_UNREACH; | 527 | rel_type = ICMP_DEST_UNREACH; |
528 | rel_code = ICMP_FRAG_NEEDED; | 528 | rel_code = ICMP_FRAG_NEEDED; |
529 | break; | 529 | break; |
530 | default: | 530 | default: |
531 | return 0; | 531 | return 0; |
532 | } | 532 | } |
533 | 533 | ||
534 | if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) | 534 | if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) |
535 | return 0; | 535 | return 0; |
536 | 536 | ||
537 | skb2 = skb_clone(skb, GFP_ATOMIC); | 537 | skb2 = skb_clone(skb, GFP_ATOMIC); |
538 | if (!skb2) | 538 | if (!skb2) |
539 | return 0; | 539 | return 0; |
540 | 540 | ||
541 | skb_dst_drop(skb2); | 541 | skb_dst_drop(skb2); |
542 | 542 | ||
543 | skb_pull(skb2, offset); | 543 | skb_pull(skb2, offset); |
544 | skb_reset_network_header(skb2); | 544 | skb_reset_network_header(skb2); |
545 | eiph = ip_hdr(skb2); | 545 | eiph = ip_hdr(skb2); |
546 | 546 | ||
547 | /* Try to guess incoming interface */ | 547 | /* Try to guess incoming interface */ |
548 | memset(&fl, 0, sizeof(fl)); | 548 | memset(&fl, 0, sizeof(fl)); |
549 | fl.fl4_dst = eiph->saddr; | 549 | fl.fl4_dst = eiph->saddr; |
550 | fl.fl4_tos = RT_TOS(eiph->tos); | 550 | fl.fl4_tos = RT_TOS(eiph->tos); |
551 | fl.proto = IPPROTO_IPIP; | 551 | fl.proto = IPPROTO_IPIP; |
552 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) | 552 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) |
553 | goto out; | 553 | goto out; |
554 | 554 | ||
555 | skb2->dev = rt->u.dst.dev; | 555 | skb2->dev = rt->u.dst.dev; |
556 | 556 | ||
557 | /* route "incoming" packet */ | 557 | /* route "incoming" packet */ |
558 | if (rt->rt_flags & RTCF_LOCAL) { | 558 | if (rt->rt_flags & RTCF_LOCAL) { |
559 | ip_rt_put(rt); | 559 | ip_rt_put(rt); |
560 | rt = NULL; | 560 | rt = NULL; |
561 | fl.fl4_dst = eiph->daddr; | 561 | fl.fl4_dst = eiph->daddr; |
562 | fl.fl4_src = eiph->saddr; | 562 | fl.fl4_src = eiph->saddr; |
563 | fl.fl4_tos = eiph->tos; | 563 | fl.fl4_tos = eiph->tos; |
564 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || | 564 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || |
565 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { | 565 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { |
566 | ip_rt_put(rt); | 566 | ip_rt_put(rt); |
567 | goto out; | 567 | goto out; |
568 | } | 568 | } |
569 | skb_dst_set(skb2, (struct dst_entry *)rt); | 569 | skb_dst_set(skb2, (struct dst_entry *)rt); |
570 | } else { | 570 | } else { |
571 | ip_rt_put(rt); | 571 | ip_rt_put(rt); |
572 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, | 572 | if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, |
573 | skb2->dev) || | 573 | skb2->dev) || |
574 | skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) | 574 | skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) |
575 | goto out; | 575 | goto out; |
576 | } | 576 | } |
577 | 577 | ||
578 | /* change mtu on this route */ | 578 | /* change mtu on this route */ |
579 | if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { | 579 | if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { |
580 | if (rel_info > dst_mtu(skb_dst(skb2))) | 580 | if (rel_info > dst_mtu(skb_dst(skb2))) |
581 | goto out; | 581 | goto out; |
582 | 582 | ||
583 | skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); | 583 | skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); |
584 | } | 584 | } |
585 | 585 | ||
586 | icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); | 586 | icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); |
587 | 587 | ||
588 | out: | 588 | out: |
589 | kfree_skb(skb2); | 589 | kfree_skb(skb2); |
590 | return 0; | 590 | return 0; |
591 | } | 591 | } |
592 | 592 | ||
593 | static int | 593 | static int |
594 | ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 594 | ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
595 | u8 type, u8 code, int offset, __be32 info) | 595 | u8 type, u8 code, int offset, __be32 info) |
596 | { | 596 | { |
597 | int rel_msg = 0; | 597 | int rel_msg = 0; |
598 | u8 rel_type = type; | 598 | u8 rel_type = type; |
599 | u8 rel_code = code; | 599 | u8 rel_code = code; |
600 | __u32 rel_info = ntohl(info); | 600 | __u32 rel_info = ntohl(info); |
601 | int err; | 601 | int err; |
602 | 602 | ||
603 | err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, | 603 | err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, |
604 | &rel_msg, &rel_info, offset); | 604 | &rel_msg, &rel_info, offset); |
605 | if (err < 0) | 605 | if (err < 0) |
606 | return err; | 606 | return err; |
607 | 607 | ||
608 | if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { | 608 | if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { |
609 | struct rt6_info *rt; | 609 | struct rt6_info *rt; |
610 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 610 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
611 | 611 | ||
612 | if (!skb2) | 612 | if (!skb2) |
613 | return 0; | 613 | return 0; |
614 | 614 | ||
615 | skb_dst_drop(skb2); | 615 | skb_dst_drop(skb2); |
616 | skb_pull(skb2, offset); | 616 | skb_pull(skb2, offset); |
617 | skb_reset_network_header(skb2); | 617 | skb_reset_network_header(skb2); |
618 | 618 | ||
619 | /* Try to guess incoming interface */ | 619 | /* Try to guess incoming interface */ |
620 | rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, | 620 | rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, |
621 | NULL, 0, 0); | 621 | NULL, 0, 0); |
622 | 622 | ||
623 | if (rt && rt->rt6i_dev) | 623 | if (rt && rt->rt6i_dev) |
624 | skb2->dev = rt->rt6i_dev; | 624 | skb2->dev = rt->rt6i_dev; |
625 | 625 | ||
626 | icmpv6_send(skb2, rel_type, rel_code, rel_info); | 626 | icmpv6_send(skb2, rel_type, rel_code, rel_info); |
627 | 627 | ||
628 | if (rt) | 628 | if (rt) |
629 | dst_release(&rt->u.dst); | 629 | dst_release(&rt->u.dst); |
630 | 630 | ||
631 | kfree_skb(skb2); | 631 | kfree_skb(skb2); |
632 | } | 632 | } |
633 | 633 | ||
634 | return 0; | 634 | return 0; |
635 | } | 635 | } |
636 | 636 | ||
637 | static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, | 637 | static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, |
638 | struct ipv6hdr *ipv6h, | 638 | struct ipv6hdr *ipv6h, |
639 | struct sk_buff *skb) | 639 | struct sk_buff *skb) |
640 | { | 640 | { |
641 | __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; | 641 | __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; |
642 | 642 | ||
643 | if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) | 643 | if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) |
644 | ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); | 644 | ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); |
645 | 645 | ||
646 | if (INET_ECN_is_ce(dsfield)) | 646 | if (INET_ECN_is_ce(dsfield)) |
647 | IP_ECN_set_ce(ip_hdr(skb)); | 647 | IP_ECN_set_ce(ip_hdr(skb)); |
648 | } | 648 | } |
649 | 649 | ||
650 | static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, | 650 | static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, |
651 | struct ipv6hdr *ipv6h, | 651 | struct ipv6hdr *ipv6h, |
652 | struct sk_buff *skb) | 652 | struct sk_buff *skb) |
653 | { | 653 | { |
654 | if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) | 654 | if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) |
655 | ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); | 655 | ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); |
656 | 656 | ||
657 | if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) | 657 | if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) |
658 | IP6_ECN_set_ce(ipv6_hdr(skb)); | 658 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
659 | } | 659 | } |
660 | 660 | ||
661 | /* called with rcu_read_lock() */ | 661 | /* called with rcu_read_lock() */ |
662 | static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) | 662 | static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) |
663 | { | 663 | { |
664 | struct ip6_tnl_parm *p = &t->parms; | 664 | struct ip6_tnl_parm *p = &t->parms; |
665 | int ret = 0; | 665 | int ret = 0; |
666 | struct net *net = dev_net(t->dev); | 666 | struct net *net = dev_net(t->dev); |
667 | 667 | ||
668 | if (p->flags & IP6_TNL_F_CAP_RCV) { | 668 | if (p->flags & IP6_TNL_F_CAP_RCV) { |
669 | struct net_device *ldev = NULL; | 669 | struct net_device *ldev = NULL; |
670 | 670 | ||
671 | if (p->link) | 671 | if (p->link) |
672 | ldev = dev_get_by_index_rcu(net, p->link); | 672 | ldev = dev_get_by_index_rcu(net, p->link); |
673 | 673 | ||
674 | if ((ipv6_addr_is_multicast(&p->laddr) || | 674 | if ((ipv6_addr_is_multicast(&p->laddr) || |
675 | likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && | 675 | likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && |
676 | likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) | 676 | likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) |
677 | ret = 1; | 677 | ret = 1; |
678 | 678 | ||
679 | } | 679 | } |
680 | return ret; | 680 | return ret; |
681 | } | 681 | } |
682 | 682 | ||
683 | /** | 683 | /** |
684 | * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally | 684 | * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally |
685 | * @skb: received socket buffer | 685 | * @skb: received socket buffer |
686 | * @protocol: ethernet protocol ID | 686 | * @protocol: ethernet protocol ID |
687 | * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN | 687 | * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN |
688 | * | 688 | * |
689 | * Return: 0 | 689 | * Return: 0 |
690 | **/ | 690 | **/ |
691 | 691 | ||
692 | static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, | 692 | static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, |
693 | __u8 ipproto, | 693 | __u8 ipproto, |
694 | void (*dscp_ecn_decapsulate)(struct ip6_tnl *t, | 694 | void (*dscp_ecn_decapsulate)(struct ip6_tnl *t, |
695 | struct ipv6hdr *ipv6h, | 695 | struct ipv6hdr *ipv6h, |
696 | struct sk_buff *skb)) | 696 | struct sk_buff *skb)) |
697 | { | 697 | { |
698 | struct ip6_tnl *t; | 698 | struct ip6_tnl *t; |
699 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); | 699 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); |
700 | 700 | ||
701 | rcu_read_lock(); | 701 | rcu_read_lock(); |
702 | 702 | ||
703 | if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, | 703 | if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, |
704 | &ipv6h->daddr)) != NULL) { | 704 | &ipv6h->daddr)) != NULL) { |
705 | if (t->parms.proto != ipproto && t->parms.proto != 0) { | 705 | if (t->parms.proto != ipproto && t->parms.proto != 0) { |
706 | rcu_read_unlock(); | 706 | rcu_read_unlock(); |
707 | goto discard; | 707 | goto discard; |
708 | } | 708 | } |
709 | 709 | ||
710 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 710 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
711 | rcu_read_unlock(); | 711 | rcu_read_unlock(); |
712 | goto discard; | 712 | goto discard; |
713 | } | 713 | } |
714 | 714 | ||
715 | if (!ip6_tnl_rcv_ctl(t)) { | 715 | if (!ip6_tnl_rcv_ctl(t)) { |
716 | t->dev->stats.rx_dropped++; | 716 | t->dev->stats.rx_dropped++; |
717 | rcu_read_unlock(); | 717 | rcu_read_unlock(); |
718 | goto discard; | 718 | goto discard; |
719 | } | 719 | } |
720 | secpath_reset(skb); | 720 | secpath_reset(skb); |
721 | skb->mac_header = skb->network_header; | 721 | skb->mac_header = skb->network_header; |
722 | skb_reset_network_header(skb); | 722 | skb_reset_network_header(skb); |
723 | skb->protocol = htons(protocol); | 723 | skb->protocol = htons(protocol); |
724 | skb->pkt_type = PACKET_HOST; | 724 | skb->pkt_type = PACKET_HOST; |
725 | memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); | 725 | memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); |
726 | skb->dev = t->dev; | ||
727 | skb_dst_drop(skb); | ||
728 | nf_reset(skb); | ||
729 | 726 | ||
730 | dscp_ecn_decapsulate(t, ipv6h, skb); | 727 | skb_tunnel_rx(skb, t->dev); |
731 | 728 | ||
732 | t->dev->stats.rx_packets++; | 729 | dscp_ecn_decapsulate(t, ipv6h, skb); |
733 | t->dev->stats.rx_bytes += skb->len; | ||
734 | netif_rx(skb); | 730 | netif_rx(skb); |
735 | rcu_read_unlock(); | 731 | rcu_read_unlock(); |
736 | return 0; | 732 | return 0; |
737 | } | 733 | } |
738 | rcu_read_unlock(); | 734 | rcu_read_unlock(); |
739 | return 1; | 735 | return 1; |
740 | 736 | ||
741 | discard: | 737 | discard: |
742 | kfree_skb(skb); | 738 | kfree_skb(skb); |
743 | return 0; | 739 | return 0; |
744 | } | 740 | } |
745 | 741 | ||
746 | static int ip4ip6_rcv(struct sk_buff *skb) | 742 | static int ip4ip6_rcv(struct sk_buff *skb) |
747 | { | 743 | { |
748 | return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, | 744 | return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, |
749 | ip4ip6_dscp_ecn_decapsulate); | 745 | ip4ip6_dscp_ecn_decapsulate); |
750 | } | 746 | } |
751 | 747 | ||
752 | static int ip6ip6_rcv(struct sk_buff *skb) | 748 | static int ip6ip6_rcv(struct sk_buff *skb) |
753 | { | 749 | { |
754 | return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, | 750 | return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, |
755 | ip6ip6_dscp_ecn_decapsulate); | 751 | ip6ip6_dscp_ecn_decapsulate); |
756 | } | 752 | } |
757 | 753 | ||
758 | struct ipv6_tel_txoption { | 754 | struct ipv6_tel_txoption { |
759 | struct ipv6_txoptions ops; | 755 | struct ipv6_txoptions ops; |
760 | __u8 dst_opt[8]; | 756 | __u8 dst_opt[8]; |
761 | }; | 757 | }; |
762 | 758 | ||
763 | static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) | 759 | static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) |
764 | { | 760 | { |
765 | memset(opt, 0, sizeof(struct ipv6_tel_txoption)); | 761 | memset(opt, 0, sizeof(struct ipv6_tel_txoption)); |
766 | 762 | ||
767 | opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; | 763 | opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; |
768 | opt->dst_opt[3] = 1; | 764 | opt->dst_opt[3] = 1; |
769 | opt->dst_opt[4] = encap_limit; | 765 | opt->dst_opt[4] = encap_limit; |
770 | opt->dst_opt[5] = IPV6_TLV_PADN; | 766 | opt->dst_opt[5] = IPV6_TLV_PADN; |
771 | opt->dst_opt[6] = 1; | 767 | opt->dst_opt[6] = 1; |
772 | 768 | ||
773 | opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; | 769 | opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; |
774 | opt->ops.opt_nflen = 8; | 770 | opt->ops.opt_nflen = 8; |
775 | } | 771 | } |
776 | 772 | ||
777 | /** | 773 | /** |
778 | * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own | 774 | * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own |
779 | * @t: the outgoing tunnel device | 775 | * @t: the outgoing tunnel device |
780 | * @hdr: IPv6 header from the incoming packet | 776 | * @hdr: IPv6 header from the incoming packet |
781 | * | 777 | * |
782 | * Description: | 778 | * Description: |
783 | * Avoid trivial tunneling loop by checking that tunnel exit-point | 779 | * Avoid trivial tunneling loop by checking that tunnel exit-point |
784 | * doesn't match source of incoming packet. | 780 | * doesn't match source of incoming packet. |
785 | * | 781 | * |
786 | * Return: | 782 | * Return: |
787 | * 1 if conflict, | 783 | * 1 if conflict, |
788 | * 0 else | 784 | * 0 else |
789 | **/ | 785 | **/ |
790 | 786 | ||
791 | static inline int | 787 | static inline int |
792 | ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) | 788 | ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) |
793 | { | 789 | { |
794 | return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); | 790 | return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); |
795 | } | 791 | } |
796 | 792 | ||
797 | static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) | 793 | static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) |
798 | { | 794 | { |
799 | struct ip6_tnl_parm *p = &t->parms; | 795 | struct ip6_tnl_parm *p = &t->parms; |
800 | int ret = 0; | 796 | int ret = 0; |
801 | struct net *net = dev_net(t->dev); | 797 | struct net *net = dev_net(t->dev); |
802 | 798 | ||
803 | if (p->flags & IP6_TNL_F_CAP_XMIT) { | 799 | if (p->flags & IP6_TNL_F_CAP_XMIT) { |
804 | struct net_device *ldev = NULL; | 800 | struct net_device *ldev = NULL; |
805 | 801 | ||
806 | rcu_read_lock(); | 802 | rcu_read_lock(); |
807 | if (p->link) | 803 | if (p->link) |
808 | ldev = dev_get_by_index_rcu(net, p->link); | 804 | ldev = dev_get_by_index_rcu(net, p->link); |
809 | 805 | ||
810 | if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) | 806 | if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) |
811 | printk(KERN_WARNING | 807 | printk(KERN_WARNING |
812 | "%s xmit: Local address not yet configured!\n", | 808 | "%s xmit: Local address not yet configured!\n", |
813 | p->name); | 809 | p->name); |
814 | else if (!ipv6_addr_is_multicast(&p->raddr) && | 810 | else if (!ipv6_addr_is_multicast(&p->raddr) && |
815 | unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) | 811 | unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) |
816 | printk(KERN_WARNING | 812 | printk(KERN_WARNING |
817 | "%s xmit: Routing loop! " | 813 | "%s xmit: Routing loop! " |
818 | "Remote address found on this node!\n", | 814 | "Remote address found on this node!\n", |
819 | p->name); | 815 | p->name); |
820 | else | 816 | else |
821 | ret = 1; | 817 | ret = 1; |
822 | rcu_read_unlock(); | 818 | rcu_read_unlock(); |
823 | } | 819 | } |
824 | return ret; | 820 | return ret; |
825 | } | 821 | } |
826 | /** | 822 | /** |
827 | * ip6_tnl_xmit2 - encapsulate packet and send | 823 | * ip6_tnl_xmit2 - encapsulate packet and send |
828 | * @skb: the outgoing socket buffer | 824 | * @skb: the outgoing socket buffer |
829 | * @dev: the outgoing tunnel device | 825 | * @dev: the outgoing tunnel device |
830 | * @dsfield: dscp code for outer header | 826 | * @dsfield: dscp code for outer header |
831 | * @fl: flow of tunneled packet | 827 | * @fl: flow of tunneled packet |
832 | * @encap_limit: encapsulation limit | 828 | * @encap_limit: encapsulation limit |
833 | * @pmtu: Path MTU is stored if packet is too big | 829 | * @pmtu: Path MTU is stored if packet is too big |
834 | * | 830 | * |
835 | * Description: | 831 | * Description: |
836 | * Build new header and do some sanity checks on the packet before sending | 832 | * Build new header and do some sanity checks on the packet before sending |
837 | * it. | 833 | * it. |
838 | * | 834 | * |
839 | * Return: | 835 | * Return: |
840 | * 0 on success | 836 | * 0 on success |
841 | * -1 fail | 837 | * -1 fail |
842 | * %-EMSGSIZE message too big. return mtu in this case. | 838 | * %-EMSGSIZE message too big. return mtu in this case. |
843 | **/ | 839 | **/ |
844 | 840 | ||
845 | static int ip6_tnl_xmit2(struct sk_buff *skb, | 841 | static int ip6_tnl_xmit2(struct sk_buff *skb, |
846 | struct net_device *dev, | 842 | struct net_device *dev, |
847 | __u8 dsfield, | 843 | __u8 dsfield, |
848 | struct flowi *fl, | 844 | struct flowi *fl, |
849 | int encap_limit, | 845 | int encap_limit, |
850 | __u32 *pmtu) | 846 | __u32 *pmtu) |
851 | { | 847 | { |
852 | struct net *net = dev_net(dev); | 848 | struct net *net = dev_net(dev); |
853 | struct ip6_tnl *t = netdev_priv(dev); | 849 | struct ip6_tnl *t = netdev_priv(dev); |
854 | struct net_device_stats *stats = &t->dev->stats; | 850 | struct net_device_stats *stats = &t->dev->stats; |
855 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); | 851 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); |
856 | struct ipv6_tel_txoption opt; | 852 | struct ipv6_tel_txoption opt; |
857 | struct dst_entry *dst; | 853 | struct dst_entry *dst; |
858 | struct net_device *tdev; | 854 | struct net_device *tdev; |
859 | int mtu; | 855 | int mtu; |
860 | unsigned int max_headroom = sizeof(struct ipv6hdr); | 856 | unsigned int max_headroom = sizeof(struct ipv6hdr); |
861 | u8 proto; | 857 | u8 proto; |
862 | int err = -1; | 858 | int err = -1; |
863 | int pkt_len; | 859 | int pkt_len; |
864 | 860 | ||
865 | if ((dst = ip6_tnl_dst_check(t)) != NULL) | 861 | if ((dst = ip6_tnl_dst_check(t)) != NULL) |
866 | dst_hold(dst); | 862 | dst_hold(dst); |
867 | else { | 863 | else { |
868 | dst = ip6_route_output(net, NULL, fl); | 864 | dst = ip6_route_output(net, NULL, fl); |
869 | 865 | ||
870 | if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) | 866 | if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) |
871 | goto tx_err_link_failure; | 867 | goto tx_err_link_failure; |
872 | } | 868 | } |
873 | 869 | ||
874 | tdev = dst->dev; | 870 | tdev = dst->dev; |
875 | 871 | ||
876 | if (tdev == dev) { | 872 | if (tdev == dev) { |
877 | stats->collisions++; | 873 | stats->collisions++; |
878 | if (net_ratelimit()) | 874 | if (net_ratelimit()) |
879 | printk(KERN_WARNING | 875 | printk(KERN_WARNING |
880 | "%s: Local routing loop detected!\n", | 876 | "%s: Local routing loop detected!\n", |
881 | t->parms.name); | 877 | t->parms.name); |
882 | goto tx_err_dst_release; | 878 | goto tx_err_dst_release; |
883 | } | 879 | } |
884 | mtu = dst_mtu(dst) - sizeof (*ipv6h); | 880 | mtu = dst_mtu(dst) - sizeof (*ipv6h); |
885 | if (encap_limit >= 0) { | 881 | if (encap_limit >= 0) { |
886 | max_headroom += 8; | 882 | max_headroom += 8; |
887 | mtu -= 8; | 883 | mtu -= 8; |
888 | } | 884 | } |
889 | if (mtu < IPV6_MIN_MTU) | 885 | if (mtu < IPV6_MIN_MTU) |
890 | mtu = IPV6_MIN_MTU; | 886 | mtu = IPV6_MIN_MTU; |
891 | if (skb_dst(skb)) | 887 | if (skb_dst(skb)) |
892 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 888 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
893 | if (skb->len > mtu) { | 889 | if (skb->len > mtu) { |
894 | *pmtu = mtu; | 890 | *pmtu = mtu; |
895 | err = -EMSGSIZE; | 891 | err = -EMSGSIZE; |
896 | goto tx_err_dst_release; | 892 | goto tx_err_dst_release; |
897 | } | 893 | } |
898 | 894 | ||
899 | /* | 895 | /* |
900 | * Okay, now see if we can stuff it in the buffer as-is. | 896 | * Okay, now see if we can stuff it in the buffer as-is. |
901 | */ | 897 | */ |
902 | max_headroom += LL_RESERVED_SPACE(tdev); | 898 | max_headroom += LL_RESERVED_SPACE(tdev); |
903 | 899 | ||
904 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | 900 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || |
905 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 901 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
906 | struct sk_buff *new_skb; | 902 | struct sk_buff *new_skb; |
907 | 903 | ||
908 | if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) | 904 | if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) |
909 | goto tx_err_dst_release; | 905 | goto tx_err_dst_release; |
910 | 906 | ||
911 | if (skb->sk) | 907 | if (skb->sk) |
912 | skb_set_owner_w(new_skb, skb->sk); | 908 | skb_set_owner_w(new_skb, skb->sk); |
913 | kfree_skb(skb); | 909 | kfree_skb(skb); |
914 | skb = new_skb; | 910 | skb = new_skb; |
915 | } | 911 | } |
916 | skb_dst_drop(skb); | 912 | skb_dst_drop(skb); |
917 | skb_dst_set(skb, dst_clone(dst)); | 913 | skb_dst_set(skb, dst_clone(dst)); |
918 | 914 | ||
919 | skb->transport_header = skb->network_header; | 915 | skb->transport_header = skb->network_header; |
920 | 916 | ||
921 | proto = fl->proto; | 917 | proto = fl->proto; |
922 | if (encap_limit >= 0) { | 918 | if (encap_limit >= 0) { |
923 | init_tel_txopt(&opt, encap_limit); | 919 | init_tel_txopt(&opt, encap_limit); |
924 | ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); | 920 | ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); |
925 | } | 921 | } |
926 | skb_push(skb, sizeof(struct ipv6hdr)); | 922 | skb_push(skb, sizeof(struct ipv6hdr)); |
927 | skb_reset_network_header(skb); | 923 | skb_reset_network_header(skb); |
928 | ipv6h = ipv6_hdr(skb); | 924 | ipv6h = ipv6_hdr(skb); |
929 | *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); | 925 | *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); |
930 | dsfield = INET_ECN_encapsulate(0, dsfield); | 926 | dsfield = INET_ECN_encapsulate(0, dsfield); |
931 | ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); | 927 | ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); |
932 | ipv6h->hop_limit = t->parms.hop_limit; | 928 | ipv6h->hop_limit = t->parms.hop_limit; |
933 | ipv6h->nexthdr = proto; | 929 | ipv6h->nexthdr = proto; |
934 | ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); | 930 | ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); |
935 | ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); | 931 | ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); |
936 | nf_reset(skb); | 932 | nf_reset(skb); |
937 | pkt_len = skb->len; | 933 | pkt_len = skb->len; |
938 | err = ip6_local_out(skb); | 934 | err = ip6_local_out(skb); |
939 | 935 | ||
940 | if (net_xmit_eval(err) == 0) { | 936 | if (net_xmit_eval(err) == 0) { |
941 | stats->tx_bytes += pkt_len; | 937 | stats->tx_bytes += pkt_len; |
942 | stats->tx_packets++; | 938 | stats->tx_packets++; |
943 | } else { | 939 | } else { |
944 | stats->tx_errors++; | 940 | stats->tx_errors++; |
945 | stats->tx_aborted_errors++; | 941 | stats->tx_aborted_errors++; |
946 | } | 942 | } |
947 | ip6_tnl_dst_store(t, dst); | 943 | ip6_tnl_dst_store(t, dst); |
948 | return 0; | 944 | return 0; |
949 | tx_err_link_failure: | 945 | tx_err_link_failure: |
950 | stats->tx_carrier_errors++; | 946 | stats->tx_carrier_errors++; |
951 | dst_link_failure(skb); | 947 | dst_link_failure(skb); |
952 | tx_err_dst_release: | 948 | tx_err_dst_release: |
953 | dst_release(dst); | 949 | dst_release(dst); |
954 | return err; | 950 | return err; |
955 | } | 951 | } |
956 | 952 | ||
957 | static inline int | 953 | static inline int |
958 | ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | 954 | ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) |
959 | { | 955 | { |
960 | struct ip6_tnl *t = netdev_priv(dev); | 956 | struct ip6_tnl *t = netdev_priv(dev); |
961 | struct iphdr *iph = ip_hdr(skb); | 957 | struct iphdr *iph = ip_hdr(skb); |
962 | int encap_limit = -1; | 958 | int encap_limit = -1; |
963 | struct flowi fl; | 959 | struct flowi fl; |
964 | __u8 dsfield; | 960 | __u8 dsfield; |
965 | __u32 mtu; | 961 | __u32 mtu; |
966 | int err; | 962 | int err; |
967 | 963 | ||
968 | if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || | 964 | if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || |
969 | !ip6_tnl_xmit_ctl(t)) | 965 | !ip6_tnl_xmit_ctl(t)) |
970 | return -1; | 966 | return -1; |
971 | 967 | ||
972 | if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) | 968 | if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) |
973 | encap_limit = t->parms.encap_limit; | 969 | encap_limit = t->parms.encap_limit; |
974 | 970 | ||
975 | memcpy(&fl, &t->fl, sizeof (fl)); | 971 | memcpy(&fl, &t->fl, sizeof (fl)); |
976 | fl.proto = IPPROTO_IPIP; | 972 | fl.proto = IPPROTO_IPIP; |
977 | 973 | ||
978 | dsfield = ipv4_get_dsfield(iph); | 974 | dsfield = ipv4_get_dsfield(iph); |
979 | 975 | ||
980 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) | 976 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) |
981 | fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) | 977 | fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) |
982 | & IPV6_TCLASS_MASK; | 978 | & IPV6_TCLASS_MASK; |
983 | 979 | ||
984 | err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); | 980 | err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); |
985 | if (err != 0) { | 981 | if (err != 0) { |
986 | /* XXX: send ICMP error even if DF is not set. */ | 982 | /* XXX: send ICMP error even if DF is not set. */ |
987 | if (err == -EMSGSIZE) | 983 | if (err == -EMSGSIZE) |
988 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 984 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
989 | htonl(mtu)); | 985 | htonl(mtu)); |
990 | return -1; | 986 | return -1; |
991 | } | 987 | } |
992 | 988 | ||
993 | return 0; | 989 | return 0; |
994 | } | 990 | } |
995 | 991 | ||
996 | static inline int | 992 | static inline int |
997 | ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | 993 | ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) |
998 | { | 994 | { |
999 | struct ip6_tnl *t = netdev_priv(dev); | 995 | struct ip6_tnl *t = netdev_priv(dev); |
1000 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); | 996 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); |
1001 | int encap_limit = -1; | 997 | int encap_limit = -1; |
1002 | __u16 offset; | 998 | __u16 offset; |
1003 | struct flowi fl; | 999 | struct flowi fl; |
1004 | __u8 dsfield; | 1000 | __u8 dsfield; |
1005 | __u32 mtu; | 1001 | __u32 mtu; |
1006 | int err; | 1002 | int err; |
1007 | 1003 | ||
1008 | if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || | 1004 | if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || |
1009 | !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) | 1005 | !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) |
1010 | return -1; | 1006 | return -1; |
1011 | 1007 | ||
1012 | offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); | 1008 | offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); |
1013 | if (offset > 0) { | 1009 | if (offset > 0) { |
1014 | struct ipv6_tlv_tnl_enc_lim *tel; | 1010 | struct ipv6_tlv_tnl_enc_lim *tel; |
1015 | tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; | 1011 | tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; |
1016 | if (tel->encap_limit == 0) { | 1012 | if (tel->encap_limit == 0) { |
1017 | icmpv6_send(skb, ICMPV6_PARAMPROB, | 1013 | icmpv6_send(skb, ICMPV6_PARAMPROB, |
1018 | ICMPV6_HDR_FIELD, offset + 2); | 1014 | ICMPV6_HDR_FIELD, offset + 2); |
1019 | return -1; | 1015 | return -1; |
1020 | } | 1016 | } |
1021 | encap_limit = tel->encap_limit - 1; | 1017 | encap_limit = tel->encap_limit - 1; |
1022 | } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) | 1018 | } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) |
1023 | encap_limit = t->parms.encap_limit; | 1019 | encap_limit = t->parms.encap_limit; |
1024 | 1020 | ||
1025 | memcpy(&fl, &t->fl, sizeof (fl)); | 1021 | memcpy(&fl, &t->fl, sizeof (fl)); |
1026 | fl.proto = IPPROTO_IPV6; | 1022 | fl.proto = IPPROTO_IPV6; |
1027 | 1023 | ||
1028 | dsfield = ipv6_get_dsfield(ipv6h); | 1024 | dsfield = ipv6_get_dsfield(ipv6h); |
1029 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) | 1025 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) |
1030 | fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); | 1026 | fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); |
1031 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) | 1027 | if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) |
1032 | fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); | 1028 | fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); |
1033 | 1029 | ||
1034 | err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); | 1030 | err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); |
1035 | if (err != 0) { | 1031 | if (err != 0) { |
1036 | if (err == -EMSGSIZE) | 1032 | if (err == -EMSGSIZE) |
1037 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1033 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
1038 | return -1; | 1034 | return -1; |
1039 | } | 1035 | } |
1040 | 1036 | ||
1041 | return 0; | 1037 | return 0; |
1042 | } | 1038 | } |
1043 | 1039 | ||
1044 | static netdev_tx_t | 1040 | static netdev_tx_t |
1045 | ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | 1041 | ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) |
1046 | { | 1042 | { |
1047 | struct ip6_tnl *t = netdev_priv(dev); | 1043 | struct ip6_tnl *t = netdev_priv(dev); |
1048 | struct net_device_stats *stats = &t->dev->stats; | 1044 | struct net_device_stats *stats = &t->dev->stats; |
1049 | int ret; | 1045 | int ret; |
1050 | 1046 | ||
1051 | switch (skb->protocol) { | 1047 | switch (skb->protocol) { |
1052 | case htons(ETH_P_IP): | 1048 | case htons(ETH_P_IP): |
1053 | ret = ip4ip6_tnl_xmit(skb, dev); | 1049 | ret = ip4ip6_tnl_xmit(skb, dev); |
1054 | break; | 1050 | break; |
1055 | case htons(ETH_P_IPV6): | 1051 | case htons(ETH_P_IPV6): |
1056 | ret = ip6ip6_tnl_xmit(skb, dev); | 1052 | ret = ip6ip6_tnl_xmit(skb, dev); |
1057 | break; | 1053 | break; |
1058 | default: | 1054 | default: |
1059 | goto tx_err; | 1055 | goto tx_err; |
1060 | } | 1056 | } |
1061 | 1057 | ||
1062 | if (ret < 0) | 1058 | if (ret < 0) |
1063 | goto tx_err; | 1059 | goto tx_err; |
1064 | 1060 | ||
1065 | return NETDEV_TX_OK; | 1061 | return NETDEV_TX_OK; |
1066 | 1062 | ||
1067 | tx_err: | 1063 | tx_err: |
1068 | stats->tx_errors++; | 1064 | stats->tx_errors++; |
1069 | stats->tx_dropped++; | 1065 | stats->tx_dropped++; |
1070 | kfree_skb(skb); | 1066 | kfree_skb(skb); |
1071 | return NETDEV_TX_OK; | 1067 | return NETDEV_TX_OK; |
1072 | } | 1068 | } |
1073 | 1069 | ||
1074 | static void ip6_tnl_set_cap(struct ip6_tnl *t) | 1070 | static void ip6_tnl_set_cap(struct ip6_tnl *t) |
1075 | { | 1071 | { |
1076 | struct ip6_tnl_parm *p = &t->parms; | 1072 | struct ip6_tnl_parm *p = &t->parms; |
1077 | int ltype = ipv6_addr_type(&p->laddr); | 1073 | int ltype = ipv6_addr_type(&p->laddr); |
1078 | int rtype = ipv6_addr_type(&p->raddr); | 1074 | int rtype = ipv6_addr_type(&p->raddr); |
1079 | 1075 | ||
1080 | p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); | 1076 | p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); |
1081 | 1077 | ||
1082 | if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && | 1078 | if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && |
1083 | rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && | 1079 | rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && |
1084 | !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && | 1080 | !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && |
1085 | (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { | 1081 | (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { |
1086 | if (ltype&IPV6_ADDR_UNICAST) | 1082 | if (ltype&IPV6_ADDR_UNICAST) |
1087 | p->flags |= IP6_TNL_F_CAP_XMIT; | 1083 | p->flags |= IP6_TNL_F_CAP_XMIT; |
1088 | if (rtype&IPV6_ADDR_UNICAST) | 1084 | if (rtype&IPV6_ADDR_UNICAST) |
1089 | p->flags |= IP6_TNL_F_CAP_RCV; | 1085 | p->flags |= IP6_TNL_F_CAP_RCV; |
1090 | } | 1086 | } |
1091 | } | 1087 | } |
1092 | 1088 | ||
1093 | static void ip6_tnl_link_config(struct ip6_tnl *t) | 1089 | static void ip6_tnl_link_config(struct ip6_tnl *t) |
1094 | { | 1090 | { |
1095 | struct net_device *dev = t->dev; | 1091 | struct net_device *dev = t->dev; |
1096 | struct ip6_tnl_parm *p = &t->parms; | 1092 | struct ip6_tnl_parm *p = &t->parms; |
1097 | struct flowi *fl = &t->fl; | 1093 | struct flowi *fl = &t->fl; |
1098 | 1094 | ||
1099 | memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); | 1095 | memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); |
1100 | memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); | 1096 | memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); |
1101 | 1097 | ||
1102 | /* Set up flowi template */ | 1098 | /* Set up flowi template */ |
1103 | ipv6_addr_copy(&fl->fl6_src, &p->laddr); | 1099 | ipv6_addr_copy(&fl->fl6_src, &p->laddr); |
1104 | ipv6_addr_copy(&fl->fl6_dst, &p->raddr); | 1100 | ipv6_addr_copy(&fl->fl6_dst, &p->raddr); |
1105 | fl->oif = p->link; | 1101 | fl->oif = p->link; |
1106 | fl->fl6_flowlabel = 0; | 1102 | fl->fl6_flowlabel = 0; |
1107 | 1103 | ||
1108 | if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) | 1104 | if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) |
1109 | fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; | 1105 | fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; |
1110 | if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) | 1106 | if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) |
1111 | fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; | 1107 | fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; |
1112 | 1108 | ||
1113 | ip6_tnl_set_cap(t); | 1109 | ip6_tnl_set_cap(t); |
1114 | 1110 | ||
1115 | if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) | 1111 | if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) |
1116 | dev->flags |= IFF_POINTOPOINT; | 1112 | dev->flags |= IFF_POINTOPOINT; |
1117 | else | 1113 | else |
1118 | dev->flags &= ~IFF_POINTOPOINT; | 1114 | dev->flags &= ~IFF_POINTOPOINT; |
1119 | 1115 | ||
1120 | dev->iflink = p->link; | 1116 | dev->iflink = p->link; |
1121 | 1117 | ||
1122 | if (p->flags & IP6_TNL_F_CAP_XMIT) { | 1118 | if (p->flags & IP6_TNL_F_CAP_XMIT) { |
1123 | int strict = (ipv6_addr_type(&p->raddr) & | 1119 | int strict = (ipv6_addr_type(&p->raddr) & |
1124 | (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); | 1120 | (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); |
1125 | 1121 | ||
1126 | struct rt6_info *rt = rt6_lookup(dev_net(dev), | 1122 | struct rt6_info *rt = rt6_lookup(dev_net(dev), |
1127 | &p->raddr, &p->laddr, | 1123 | &p->raddr, &p->laddr, |
1128 | p->link, strict); | 1124 | p->link, strict); |
1129 | 1125 | ||
1130 | if (rt == NULL) | 1126 | if (rt == NULL) |
1131 | return; | 1127 | return; |
1132 | 1128 | ||
1133 | if (rt->rt6i_dev) { | 1129 | if (rt->rt6i_dev) { |
1134 | dev->hard_header_len = rt->rt6i_dev->hard_header_len + | 1130 | dev->hard_header_len = rt->rt6i_dev->hard_header_len + |
1135 | sizeof (struct ipv6hdr); | 1131 | sizeof (struct ipv6hdr); |
1136 | 1132 | ||
1137 | dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); | 1133 | dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); |
1138 | 1134 | ||
1139 | if (dev->mtu < IPV6_MIN_MTU) | 1135 | if (dev->mtu < IPV6_MIN_MTU) |
1140 | dev->mtu = IPV6_MIN_MTU; | 1136 | dev->mtu = IPV6_MIN_MTU; |
1141 | } | 1137 | } |
1142 | dst_release(&rt->u.dst); | 1138 | dst_release(&rt->u.dst); |
1143 | } | 1139 | } |
1144 | } | 1140 | } |
1145 | 1141 | ||
1146 | /** | 1142 | /** |
1147 | * ip6_tnl_change - update the tunnel parameters | 1143 | * ip6_tnl_change - update the tunnel parameters |
1148 | * @t: tunnel to be changed | 1144 | * @t: tunnel to be changed |
1149 | * @p: tunnel configuration parameters | 1145 | * @p: tunnel configuration parameters |
1150 | * | 1146 | * |
1151 | * Description: | 1147 | * Description: |
1152 | * ip6_tnl_change() updates the tunnel parameters | 1148 | * ip6_tnl_change() updates the tunnel parameters |
1153 | **/ | 1149 | **/ |
1154 | 1150 | ||
1155 | static int | 1151 | static int |
1156 | ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) | 1152 | ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) |
1157 | { | 1153 | { |
1158 | ipv6_addr_copy(&t->parms.laddr, &p->laddr); | 1154 | ipv6_addr_copy(&t->parms.laddr, &p->laddr); |
1159 | ipv6_addr_copy(&t->parms.raddr, &p->raddr); | 1155 | ipv6_addr_copy(&t->parms.raddr, &p->raddr); |
1160 | t->parms.flags = p->flags; | 1156 | t->parms.flags = p->flags; |
1161 | t->parms.hop_limit = p->hop_limit; | 1157 | t->parms.hop_limit = p->hop_limit; |
1162 | t->parms.encap_limit = p->encap_limit; | 1158 | t->parms.encap_limit = p->encap_limit; |
1163 | t->parms.flowinfo = p->flowinfo; | 1159 | t->parms.flowinfo = p->flowinfo; |
1164 | t->parms.link = p->link; | 1160 | t->parms.link = p->link; |
1165 | t->parms.proto = p->proto; | 1161 | t->parms.proto = p->proto; |
1166 | ip6_tnl_dst_reset(t); | 1162 | ip6_tnl_dst_reset(t); |
1167 | ip6_tnl_link_config(t); | 1163 | ip6_tnl_link_config(t); |
1168 | return 0; | 1164 | return 0; |
1169 | } | 1165 | } |
1170 | 1166 | ||
1171 | /** | 1167 | /** |
1172 | * ip6_tnl_ioctl - configure ipv6 tunnels from userspace | 1168 | * ip6_tnl_ioctl - configure ipv6 tunnels from userspace |
1173 | * @dev: virtual device associated with tunnel | 1169 | * @dev: virtual device associated with tunnel |
1174 | * @ifr: parameters passed from userspace | 1170 | * @ifr: parameters passed from userspace |
1175 | * @cmd: command to be performed | 1171 | * @cmd: command to be performed |
1176 | * | 1172 | * |
1177 | * Description: | 1173 | * Description: |
1178 | * ip6_tnl_ioctl() is used for managing IPv6 tunnels | 1174 | * ip6_tnl_ioctl() is used for managing IPv6 tunnels |
1179 | * from userspace. | 1175 | * from userspace. |
1180 | * | 1176 | * |
1181 | * The possible commands are the following: | 1177 | * The possible commands are the following: |
1182 | * %SIOCGETTUNNEL: get tunnel parameters for device | 1178 | * %SIOCGETTUNNEL: get tunnel parameters for device |
1183 | * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters | 1179 | * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters |
1184 | * %SIOCCHGTUNNEL: change tunnel parameters to those given | 1180 | * %SIOCCHGTUNNEL: change tunnel parameters to those given |
1185 | * %SIOCDELTUNNEL: delete tunnel | 1181 | * %SIOCDELTUNNEL: delete tunnel |
1186 | * | 1182 | * |
1187 | * The fallback device "ip6tnl0", created during module | 1183 | * The fallback device "ip6tnl0", created during module |
1188 | * initialization, can be used for creating other tunnel devices. | 1184 | * initialization, can be used for creating other tunnel devices. |
1189 | * | 1185 | * |
1190 | * Return: | 1186 | * Return: |
1191 | * 0 on success, | 1187 | * 0 on success, |
1192 | * %-EFAULT if unable to copy data to or from userspace, | 1188 | * %-EFAULT if unable to copy data to or from userspace, |
1193 | * %-EPERM if current process hasn't %CAP_NET_ADMIN set | 1189 | * %-EPERM if current process hasn't %CAP_NET_ADMIN set |
1194 | * %-EINVAL if passed tunnel parameters are invalid, | 1190 | * %-EINVAL if passed tunnel parameters are invalid, |
1195 | * %-EEXIST if changing a tunnel's parameters would cause a conflict | 1191 | * %-EEXIST if changing a tunnel's parameters would cause a conflict |
1196 | * %-ENODEV if attempting to change or delete a nonexisting device | 1192 | * %-ENODEV if attempting to change or delete a nonexisting device |
1197 | **/ | 1193 | **/ |
1198 | 1194 | ||
1199 | static int | 1195 | static int |
1200 | ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | 1196 | ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) |
1201 | { | 1197 | { |
1202 | int err = 0; | 1198 | int err = 0; |
1203 | struct ip6_tnl_parm p; | 1199 | struct ip6_tnl_parm p; |
1204 | struct ip6_tnl *t = NULL; | 1200 | struct ip6_tnl *t = NULL; |
1205 | struct net *net = dev_net(dev); | 1201 | struct net *net = dev_net(dev); |
1206 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 1202 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
1207 | 1203 | ||
1208 | switch (cmd) { | 1204 | switch (cmd) { |
1209 | case SIOCGETTUNNEL: | 1205 | case SIOCGETTUNNEL: |
1210 | if (dev == ip6n->fb_tnl_dev) { | 1206 | if (dev == ip6n->fb_tnl_dev) { |
1211 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { | 1207 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { |
1212 | err = -EFAULT; | 1208 | err = -EFAULT; |
1213 | break; | 1209 | break; |
1214 | } | 1210 | } |
1215 | t = ip6_tnl_locate(net, &p, 0); | 1211 | t = ip6_tnl_locate(net, &p, 0); |
1216 | } | 1212 | } |
1217 | if (t == NULL) | 1213 | if (t == NULL) |
1218 | t = netdev_priv(dev); | 1214 | t = netdev_priv(dev); |
1219 | memcpy(&p, &t->parms, sizeof (p)); | 1215 | memcpy(&p, &t->parms, sizeof (p)); |
1220 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { | 1216 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { |
1221 | err = -EFAULT; | 1217 | err = -EFAULT; |
1222 | } | 1218 | } |
1223 | break; | 1219 | break; |
1224 | case SIOCADDTUNNEL: | 1220 | case SIOCADDTUNNEL: |
1225 | case SIOCCHGTUNNEL: | 1221 | case SIOCCHGTUNNEL: |
1226 | err = -EPERM; | 1222 | err = -EPERM; |
1227 | if (!capable(CAP_NET_ADMIN)) | 1223 | if (!capable(CAP_NET_ADMIN)) |
1228 | break; | 1224 | break; |
1229 | err = -EFAULT; | 1225 | err = -EFAULT; |
1230 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) | 1226 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) |
1231 | break; | 1227 | break; |
1232 | err = -EINVAL; | 1228 | err = -EINVAL; |
1233 | if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && | 1229 | if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && |
1234 | p.proto != 0) | 1230 | p.proto != 0) |
1235 | break; | 1231 | break; |
1236 | t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); | 1232 | t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); |
1237 | if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { | 1233 | if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { |
1238 | if (t != NULL) { | 1234 | if (t != NULL) { |
1239 | if (t->dev != dev) { | 1235 | if (t->dev != dev) { |
1240 | err = -EEXIST; | 1236 | err = -EEXIST; |
1241 | break; | 1237 | break; |
1242 | } | 1238 | } |
1243 | } else | 1239 | } else |
1244 | t = netdev_priv(dev); | 1240 | t = netdev_priv(dev); |
1245 | 1241 | ||
1246 | ip6_tnl_unlink(ip6n, t); | 1242 | ip6_tnl_unlink(ip6n, t); |
1247 | err = ip6_tnl_change(t, &p); | 1243 | err = ip6_tnl_change(t, &p); |
1248 | ip6_tnl_link(ip6n, t); | 1244 | ip6_tnl_link(ip6n, t); |
1249 | netdev_state_change(dev); | 1245 | netdev_state_change(dev); |
1250 | } | 1246 | } |
1251 | if (t) { | 1247 | if (t) { |
1252 | err = 0; | 1248 | err = 0; |
1253 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) | 1249 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) |
1254 | err = -EFAULT; | 1250 | err = -EFAULT; |
1255 | 1251 | ||
1256 | } else | 1252 | } else |
1257 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 1253 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
1258 | break; | 1254 | break; |
1259 | case SIOCDELTUNNEL: | 1255 | case SIOCDELTUNNEL: |
1260 | err = -EPERM; | 1256 | err = -EPERM; |
1261 | if (!capable(CAP_NET_ADMIN)) | 1257 | if (!capable(CAP_NET_ADMIN)) |
1262 | break; | 1258 | break; |
1263 | 1259 | ||
1264 | if (dev == ip6n->fb_tnl_dev) { | 1260 | if (dev == ip6n->fb_tnl_dev) { |
1265 | err = -EFAULT; | 1261 | err = -EFAULT; |
1266 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) | 1262 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) |
1267 | break; | 1263 | break; |
1268 | err = -ENOENT; | 1264 | err = -ENOENT; |
1269 | if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) | 1265 | if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) |
1270 | break; | 1266 | break; |
1271 | err = -EPERM; | 1267 | err = -EPERM; |
1272 | if (t->dev == ip6n->fb_tnl_dev) | 1268 | if (t->dev == ip6n->fb_tnl_dev) |
1273 | break; | 1269 | break; |
1274 | dev = t->dev; | 1270 | dev = t->dev; |
1275 | } | 1271 | } |
1276 | err = 0; | 1272 | err = 0; |
1277 | unregister_netdevice(dev); | 1273 | unregister_netdevice(dev); |
1278 | break; | 1274 | break; |
1279 | default: | 1275 | default: |
1280 | err = -EINVAL; | 1276 | err = -EINVAL; |
1281 | } | 1277 | } |
1282 | return err; | 1278 | return err; |
1283 | } | 1279 | } |
1284 | 1280 | ||
1285 | /** | 1281 | /** |
1286 | * ip6_tnl_change_mtu - change mtu manually for tunnel device | 1282 | * ip6_tnl_change_mtu - change mtu manually for tunnel device |
1287 | * @dev: virtual device associated with tunnel | 1283 | * @dev: virtual device associated with tunnel |
1288 | * @new_mtu: the new mtu | 1284 | * @new_mtu: the new mtu |
1289 | * | 1285 | * |
1290 | * Return: | 1286 | * Return: |
1291 | * 0 on success, | 1287 | * 0 on success, |
1292 | * %-EINVAL if mtu too small | 1288 | * %-EINVAL if mtu too small |
1293 | **/ | 1289 | **/ |
1294 | 1290 | ||
1295 | static int | 1291 | static int |
1296 | ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) | 1292 | ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) |
1297 | { | 1293 | { |
1298 | if (new_mtu < IPV6_MIN_MTU) { | 1294 | if (new_mtu < IPV6_MIN_MTU) { |
1299 | return -EINVAL; | 1295 | return -EINVAL; |
1300 | } | 1296 | } |
1301 | dev->mtu = new_mtu; | 1297 | dev->mtu = new_mtu; |
1302 | return 0; | 1298 | return 0; |
1303 | } | 1299 | } |
1304 | 1300 | ||
1305 | 1301 | ||
1306 | static const struct net_device_ops ip6_tnl_netdev_ops = { | 1302 | static const struct net_device_ops ip6_tnl_netdev_ops = { |
1307 | .ndo_uninit = ip6_tnl_dev_uninit, | 1303 | .ndo_uninit = ip6_tnl_dev_uninit, |
1308 | .ndo_start_xmit = ip6_tnl_xmit, | 1304 | .ndo_start_xmit = ip6_tnl_xmit, |
1309 | .ndo_do_ioctl = ip6_tnl_ioctl, | 1305 | .ndo_do_ioctl = ip6_tnl_ioctl, |
1310 | .ndo_change_mtu = ip6_tnl_change_mtu, | 1306 | .ndo_change_mtu = ip6_tnl_change_mtu, |
1311 | }; | 1307 | }; |
1312 | 1308 | ||
1313 | /** | 1309 | /** |
1314 | * ip6_tnl_dev_setup - setup virtual tunnel device | 1310 | * ip6_tnl_dev_setup - setup virtual tunnel device |
1315 | * @dev: virtual device associated with tunnel | 1311 | * @dev: virtual device associated with tunnel |
1316 | * | 1312 | * |
1317 | * Description: | 1313 | * Description: |
1318 | * Initialize function pointers and device parameters | 1314 | * Initialize function pointers and device parameters |
1319 | **/ | 1315 | **/ |
1320 | 1316 | ||
1321 | static void ip6_tnl_dev_setup(struct net_device *dev) | 1317 | static void ip6_tnl_dev_setup(struct net_device *dev) |
1322 | { | 1318 | { |
1323 | dev->netdev_ops = &ip6_tnl_netdev_ops; | 1319 | dev->netdev_ops = &ip6_tnl_netdev_ops; |
1324 | dev->destructor = free_netdev; | 1320 | dev->destructor = free_netdev; |
1325 | 1321 | ||
1326 | dev->type = ARPHRD_TUNNEL6; | 1322 | dev->type = ARPHRD_TUNNEL6; |
1327 | dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); | 1323 | dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); |
1328 | dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); | 1324 | dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); |
1329 | dev->flags |= IFF_NOARP; | 1325 | dev->flags |= IFF_NOARP; |
1330 | dev->addr_len = sizeof(struct in6_addr); | 1326 | dev->addr_len = sizeof(struct in6_addr); |
1331 | dev->features |= NETIF_F_NETNS_LOCAL; | 1327 | dev->features |= NETIF_F_NETNS_LOCAL; |
1332 | } | 1328 | } |
1333 | 1329 | ||
1334 | 1330 | ||
1335 | /** | 1331 | /** |
1336 | * ip6_tnl_dev_init_gen - general initializer for all tunnel devices | 1332 | * ip6_tnl_dev_init_gen - general initializer for all tunnel devices |
1337 | * @dev: virtual device associated with tunnel | 1333 | * @dev: virtual device associated with tunnel |
1338 | **/ | 1334 | **/ |
1339 | 1335 | ||
1340 | static inline void | 1336 | static inline void |
1341 | ip6_tnl_dev_init_gen(struct net_device *dev) | 1337 | ip6_tnl_dev_init_gen(struct net_device *dev) |
1342 | { | 1338 | { |
1343 | struct ip6_tnl *t = netdev_priv(dev); | 1339 | struct ip6_tnl *t = netdev_priv(dev); |
1344 | t->dev = dev; | 1340 | t->dev = dev; |
1345 | strcpy(t->parms.name, dev->name); | 1341 | strcpy(t->parms.name, dev->name); |
1346 | } | 1342 | } |
1347 | 1343 | ||
1348 | /** | 1344 | /** |
1349 | * ip6_tnl_dev_init - initializer for all non fallback tunnel devices | 1345 | * ip6_tnl_dev_init - initializer for all non fallback tunnel devices |
1350 | * @dev: virtual device associated with tunnel | 1346 | * @dev: virtual device associated with tunnel |
1351 | **/ | 1347 | **/ |
1352 | 1348 | ||
1353 | static void ip6_tnl_dev_init(struct net_device *dev) | 1349 | static void ip6_tnl_dev_init(struct net_device *dev) |
1354 | { | 1350 | { |
1355 | struct ip6_tnl *t = netdev_priv(dev); | 1351 | struct ip6_tnl *t = netdev_priv(dev); |
1356 | ip6_tnl_dev_init_gen(dev); | 1352 | ip6_tnl_dev_init_gen(dev); |
1357 | ip6_tnl_link_config(t); | 1353 | ip6_tnl_link_config(t); |
1358 | } | 1354 | } |
1359 | 1355 | ||
1360 | /** | 1356 | /** |
1361 | * ip6_fb_tnl_dev_init - initializer for fallback tunnel device | 1357 | * ip6_fb_tnl_dev_init - initializer for fallback tunnel device |
1362 | * @dev: fallback device | 1358 | * @dev: fallback device |
1363 | * | 1359 | * |
1364 | * Return: 0 | 1360 | * Return: 0 |
1365 | **/ | 1361 | **/ |
1366 | 1362 | ||
1367 | static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) | 1363 | static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) |
1368 | { | 1364 | { |
1369 | struct ip6_tnl *t = netdev_priv(dev); | 1365 | struct ip6_tnl *t = netdev_priv(dev); |
1370 | struct net *net = dev_net(dev); | 1366 | struct net *net = dev_net(dev); |
1371 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 1367 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
1372 | 1368 | ||
1373 | ip6_tnl_dev_init_gen(dev); | 1369 | ip6_tnl_dev_init_gen(dev); |
1374 | t->parms.proto = IPPROTO_IPV6; | 1370 | t->parms.proto = IPPROTO_IPV6; |
1375 | dev_hold(dev); | 1371 | dev_hold(dev); |
1376 | ip6n->tnls_wc[0] = t; | 1372 | ip6n->tnls_wc[0] = t; |
1377 | } | 1373 | } |
1378 | 1374 | ||
1379 | static struct xfrm6_tunnel ip4ip6_handler = { | 1375 | static struct xfrm6_tunnel ip4ip6_handler = { |
1380 | .handler = ip4ip6_rcv, | 1376 | .handler = ip4ip6_rcv, |
1381 | .err_handler = ip4ip6_err, | 1377 | .err_handler = ip4ip6_err, |
1382 | .priority = 1, | 1378 | .priority = 1, |
1383 | }; | 1379 | }; |
1384 | 1380 | ||
1385 | static struct xfrm6_tunnel ip6ip6_handler = { | 1381 | static struct xfrm6_tunnel ip6ip6_handler = { |
1386 | .handler = ip6ip6_rcv, | 1382 | .handler = ip6ip6_rcv, |
1387 | .err_handler = ip6ip6_err, | 1383 | .err_handler = ip6ip6_err, |
1388 | .priority = 1, | 1384 | .priority = 1, |
1389 | }; | 1385 | }; |
1390 | 1386 | ||
1391 | static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) | 1387 | static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) |
1392 | { | 1388 | { |
1393 | int h; | 1389 | int h; |
1394 | struct ip6_tnl *t; | 1390 | struct ip6_tnl *t; |
1395 | LIST_HEAD(list); | 1391 | LIST_HEAD(list); |
1396 | 1392 | ||
1397 | for (h = 0; h < HASH_SIZE; h++) { | 1393 | for (h = 0; h < HASH_SIZE; h++) { |
1398 | t = ip6n->tnls_r_l[h]; | 1394 | t = ip6n->tnls_r_l[h]; |
1399 | while (t != NULL) { | 1395 | while (t != NULL) { |
1400 | unregister_netdevice_queue(t->dev, &list); | 1396 | unregister_netdevice_queue(t->dev, &list); |
1401 | t = t->next; | 1397 | t = t->next; |
1402 | } | 1398 | } |
1403 | } | 1399 | } |
1404 | 1400 | ||
1405 | t = ip6n->tnls_wc[0]; | 1401 | t = ip6n->tnls_wc[0]; |
1406 | unregister_netdevice_queue(t->dev, &list); | 1402 | unregister_netdevice_queue(t->dev, &list); |
1407 | unregister_netdevice_many(&list); | 1403 | unregister_netdevice_many(&list); |
1408 | } | 1404 | } |
1409 | 1405 | ||
1410 | static int __net_init ip6_tnl_init_net(struct net *net) | 1406 | static int __net_init ip6_tnl_init_net(struct net *net) |
1411 | { | 1407 | { |
1412 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 1408 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
1413 | int err; | 1409 | int err; |
1414 | 1410 | ||
1415 | ip6n->tnls[0] = ip6n->tnls_wc; | 1411 | ip6n->tnls[0] = ip6n->tnls_wc; |
1416 | ip6n->tnls[1] = ip6n->tnls_r_l; | 1412 | ip6n->tnls[1] = ip6n->tnls_r_l; |
1417 | 1413 | ||
1418 | err = -ENOMEM; | 1414 | err = -ENOMEM; |
1419 | ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", | 1415 | ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", |
1420 | ip6_tnl_dev_setup); | 1416 | ip6_tnl_dev_setup); |
1421 | 1417 | ||
1422 | if (!ip6n->fb_tnl_dev) | 1418 | if (!ip6n->fb_tnl_dev) |
1423 | goto err_alloc_dev; | 1419 | goto err_alloc_dev; |
1424 | dev_net_set(ip6n->fb_tnl_dev, net); | 1420 | dev_net_set(ip6n->fb_tnl_dev, net); |
1425 | 1421 | ||
1426 | ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); | 1422 | ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); |
1427 | 1423 | ||
1428 | err = register_netdev(ip6n->fb_tnl_dev); | 1424 | err = register_netdev(ip6n->fb_tnl_dev); |
1429 | if (err < 0) | 1425 | if (err < 0) |
1430 | goto err_register; | 1426 | goto err_register; |
1431 | return 0; | 1427 | return 0; |
1432 | 1428 | ||
1433 | err_register: | 1429 | err_register: |
1434 | free_netdev(ip6n->fb_tnl_dev); | 1430 | free_netdev(ip6n->fb_tnl_dev); |
1435 | err_alloc_dev: | 1431 | err_alloc_dev: |
1436 | return err; | 1432 | return err; |
1437 | } | 1433 | } |
1438 | 1434 | ||
1439 | static void __net_exit ip6_tnl_exit_net(struct net *net) | 1435 | static void __net_exit ip6_tnl_exit_net(struct net *net) |
1440 | { | 1436 | { |
1441 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | 1437 | struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); |
1442 | 1438 | ||
1443 | rtnl_lock(); | 1439 | rtnl_lock(); |
1444 | ip6_tnl_destroy_tunnels(ip6n); | 1440 | ip6_tnl_destroy_tunnels(ip6n); |
1445 | rtnl_unlock(); | 1441 | rtnl_unlock(); |
1446 | } | 1442 | } |
1447 | 1443 | ||
1448 | static struct pernet_operations ip6_tnl_net_ops = { | 1444 | static struct pernet_operations ip6_tnl_net_ops = { |
1449 | .init = ip6_tnl_init_net, | 1445 | .init = ip6_tnl_init_net, |
1450 | .exit = ip6_tnl_exit_net, | 1446 | .exit = ip6_tnl_exit_net, |
1451 | .id = &ip6_tnl_net_id, | 1447 | .id = &ip6_tnl_net_id, |
1452 | .size = sizeof(struct ip6_tnl_net), | 1448 | .size = sizeof(struct ip6_tnl_net), |
1453 | }; | 1449 | }; |
1454 | 1450 | ||
1455 | /** | 1451 | /** |
1456 | * ip6_tunnel_init - register protocol and reserve needed resources | 1452 | * ip6_tunnel_init - register protocol and reserve needed resources |
1457 | * | 1453 | * |
1458 | * Return: 0 on success | 1454 | * Return: 0 on success |
1459 | **/ | 1455 | **/ |
1460 | 1456 | ||
1461 | static int __init ip6_tunnel_init(void) | 1457 | static int __init ip6_tunnel_init(void) |
1462 | { | 1458 | { |
1463 | int err; | 1459 | int err; |
1464 | 1460 | ||
1465 | err = register_pernet_device(&ip6_tnl_net_ops); | 1461 | err = register_pernet_device(&ip6_tnl_net_ops); |
1466 | if (err < 0) | 1462 | if (err < 0) |
1467 | goto out_pernet; | 1463 | goto out_pernet; |
1468 | 1464 | ||
1469 | err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); | 1465 | err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); |
1470 | if (err < 0) { | 1466 | if (err < 0) { |
1471 | printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n"); | 1467 | printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n"); |
1472 | goto out_ip4ip6; | 1468 | goto out_ip4ip6; |
1473 | } | 1469 | } |
1474 | 1470 | ||
1475 | err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); | 1471 | err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); |
1476 | if (err < 0) { | 1472 | if (err < 0) { |
1477 | printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n"); | 1473 | printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n"); |
1478 | goto out_ip6ip6; | 1474 | goto out_ip6ip6; |
1479 | } | 1475 | } |
1480 | 1476 | ||
1481 | return 0; | 1477 | return 0; |
1482 | 1478 | ||
1483 | out_ip6ip6: | 1479 | out_ip6ip6: |
1484 | xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); | 1480 | xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); |
1485 | out_ip4ip6: | 1481 | out_ip4ip6: |
1486 | unregister_pernet_device(&ip6_tnl_net_ops); | 1482 | unregister_pernet_device(&ip6_tnl_net_ops); |
1487 | out_pernet: | 1483 | out_pernet: |
1488 | return err; | 1484 | return err; |
1489 | } | 1485 | } |
1490 | 1486 | ||
1491 | /** | 1487 | /** |
1492 | * ip6_tunnel_cleanup - free resources and unregister protocol | 1488 | * ip6_tunnel_cleanup - free resources and unregister protocol |
1493 | **/ | 1489 | **/ |
1494 | 1490 | ||
1495 | static void __exit ip6_tunnel_cleanup(void) | 1491 | static void __exit ip6_tunnel_cleanup(void) |
1496 | { | 1492 | { |
1497 | if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) | 1493 | if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) |
1498 | printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n"); | 1494 | printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n"); |
1499 | 1495 | ||
1500 | if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) | 1496 | if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) |
1501 | printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); | 1497 | printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); |
1502 | 1498 | ||
1503 | unregister_pernet_device(&ip6_tnl_net_ops); | 1499 | unregister_pernet_device(&ip6_tnl_net_ops); |
1504 | } | 1500 | } |
1505 | 1501 | ||
1506 | module_init(ip6_tunnel_init); | 1502 | module_init(ip6_tunnel_init); |
1507 | module_exit(ip6_tunnel_cleanup); | 1503 | module_exit(ip6_tunnel_cleanup); |
1508 | 1504 |
net/ipv6/ip6mr.c
1 | /* | 1 | /* |
2 | * Linux IPv6 multicast routing support for BSD pim6sd | 2 | * Linux IPv6 multicast routing support for BSD pim6sd |
3 | * Based on net/ipv4/ipmr.c. | 3 | * Based on net/ipv4/ipmr.c. |
4 | * | 4 | * |
5 | * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> | 5 | * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> |
6 | * LSIIT Laboratory, Strasbourg, France | 6 | * LSIIT Laboratory, Strasbourg, France |
7 | * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> | 7 | * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> |
8 | * 6WIND, Paris, France | 8 | * 6WIND, Paris, France |
9 | * Copyright (C)2007,2008 USAGI/WIDE Project | 9 | * Copyright (C)2007,2008 USAGI/WIDE Project |
10 | * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> | 10 | * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or | 12 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License | 13 | * modify it under the terms of the GNU General Public License |
14 | * as published by the Free Software Foundation; either version | 14 | * as published by the Free Software Foundation; either version |
15 | * 2 of the License, or (at your option) any later version. | 15 | * 2 of the License, or (at your option) any later version. |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
21 | #include <linux/types.h> | 21 | #include <linux/types.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
27 | #include <linux/fcntl.h> | 27 | #include <linux/fcntl.h> |
28 | #include <linux/stat.h> | 28 | #include <linux/stat.h> |
29 | #include <linux/socket.h> | 29 | #include <linux/socket.h> |
30 | #include <linux/inet.h> | 30 | #include <linux/inet.h> |
31 | #include <linux/netdevice.h> | 31 | #include <linux/netdevice.h> |
32 | #include <linux/inetdevice.h> | 32 | #include <linux/inetdevice.h> |
33 | #include <linux/proc_fs.h> | 33 | #include <linux/proc_fs.h> |
34 | #include <linux/seq_file.h> | 34 | #include <linux/seq_file.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <net/protocol.h> | 37 | #include <net/protocol.h> |
38 | #include <linux/skbuff.h> | 38 | #include <linux/skbuff.h> |
39 | #include <net/sock.h> | 39 | #include <net/sock.h> |
40 | #include <net/raw.h> | 40 | #include <net/raw.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/if_arp.h> | 42 | #include <linux/if_arp.h> |
43 | #include <net/checksum.h> | 43 | #include <net/checksum.h> |
44 | #include <net/netlink.h> | 44 | #include <net/netlink.h> |
45 | #include <net/fib_rules.h> | 45 | #include <net/fib_rules.h> |
46 | 46 | ||
47 | #include <net/ipv6.h> | 47 | #include <net/ipv6.h> |
48 | #include <net/ip6_route.h> | 48 | #include <net/ip6_route.h> |
49 | #include <linux/mroute6.h> | 49 | #include <linux/mroute6.h> |
50 | #include <linux/pim.h> | 50 | #include <linux/pim.h> |
51 | #include <net/addrconf.h> | 51 | #include <net/addrconf.h> |
52 | #include <linux/netfilter_ipv6.h> | 52 | #include <linux/netfilter_ipv6.h> |
53 | #include <net/ip6_checksum.h> | 53 | #include <net/ip6_checksum.h> |
54 | 54 | ||
55 | struct mr6_table { | 55 | struct mr6_table { |
56 | struct list_head list; | 56 | struct list_head list; |
57 | #ifdef CONFIG_NET_NS | 57 | #ifdef CONFIG_NET_NS |
58 | struct net *net; | 58 | struct net *net; |
59 | #endif | 59 | #endif |
60 | u32 id; | 60 | u32 id; |
61 | struct sock *mroute6_sk; | 61 | struct sock *mroute6_sk; |
62 | struct timer_list ipmr_expire_timer; | 62 | struct timer_list ipmr_expire_timer; |
63 | struct list_head mfc6_unres_queue; | 63 | struct list_head mfc6_unres_queue; |
64 | struct list_head mfc6_cache_array[MFC6_LINES]; | 64 | struct list_head mfc6_cache_array[MFC6_LINES]; |
65 | struct mif_device vif6_table[MAXMIFS]; | 65 | struct mif_device vif6_table[MAXMIFS]; |
66 | int maxvif; | 66 | int maxvif; |
67 | atomic_t cache_resolve_queue_len; | 67 | atomic_t cache_resolve_queue_len; |
68 | int mroute_do_assert; | 68 | int mroute_do_assert; |
69 | int mroute_do_pim; | 69 | int mroute_do_pim; |
70 | #ifdef CONFIG_IPV6_PIMSM_V2 | 70 | #ifdef CONFIG_IPV6_PIMSM_V2 |
71 | int mroute_reg_vif_num; | 71 | int mroute_reg_vif_num; |
72 | #endif | 72 | #endif |
73 | }; | 73 | }; |
74 | 74 | ||
75 | struct ip6mr_rule { | 75 | struct ip6mr_rule { |
76 | struct fib_rule common; | 76 | struct fib_rule common; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct ip6mr_result { | 79 | struct ip6mr_result { |
80 | struct mr6_table *mrt; | 80 | struct mr6_table *mrt; |
81 | }; | 81 | }; |
82 | 82 | ||
83 | /* Big lock, protecting vif table, mrt cache and mroute socket state. | 83 | /* Big lock, protecting vif table, mrt cache and mroute socket state. |
84 | Note that the changes are semaphored via rtnl_lock. | 84 | Note that the changes are semaphored via rtnl_lock. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | static DEFINE_RWLOCK(mrt_lock); | 87 | static DEFINE_RWLOCK(mrt_lock); |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * Multicast router control variables | 90 | * Multicast router control variables |
91 | */ | 91 | */ |
92 | 92 | ||
93 | #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) | 93 | #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) |
94 | 94 | ||
95 | /* Special spinlock for queue of unresolved entries */ | 95 | /* Special spinlock for queue of unresolved entries */ |
96 | static DEFINE_SPINLOCK(mfc_unres_lock); | 96 | static DEFINE_SPINLOCK(mfc_unres_lock); |
97 | 97 | ||
98 | /* We return to original Alan's scheme. Hash table of resolved | 98 | /* We return to original Alan's scheme. Hash table of resolved |
99 | entries is changed only in process context and protected | 99 | entries is changed only in process context and protected |
100 | with weak lock mrt_lock. Queue of unresolved entries is protected | 100 | with weak lock mrt_lock. Queue of unresolved entries is protected |
101 | with strong spinlock mfc_unres_lock. | 101 | with strong spinlock mfc_unres_lock. |
102 | 102 | ||
103 | In this case data path is free of exclusive locks at all. | 103 | In this case data path is free of exclusive locks at all. |
104 | */ | 104 | */ |
105 | 105 | ||
106 | static struct kmem_cache *mrt_cachep __read_mostly; | 106 | static struct kmem_cache *mrt_cachep __read_mostly; |
107 | 107 | ||
108 | static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); | 108 | static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); |
109 | static void ip6mr_free_table(struct mr6_table *mrt); | 109 | static void ip6mr_free_table(struct mr6_table *mrt); |
110 | 110 | ||
111 | static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, | 111 | static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, |
112 | struct sk_buff *skb, struct mfc6_cache *cache); | 112 | struct sk_buff *skb, struct mfc6_cache *cache); |
113 | static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, | 113 | static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, |
114 | mifi_t mifi, int assert); | 114 | mifi_t mifi, int assert); |
115 | static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, | 115 | static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, |
116 | struct mfc6_cache *c, struct rtmsg *rtm); | 116 | struct mfc6_cache *c, struct rtmsg *rtm); |
117 | static int ip6mr_rtm_dumproute(struct sk_buff *skb, | 117 | static int ip6mr_rtm_dumproute(struct sk_buff *skb, |
118 | struct netlink_callback *cb); | 118 | struct netlink_callback *cb); |
119 | static void mroute_clean_tables(struct mr6_table *mrt); | 119 | static void mroute_clean_tables(struct mr6_table *mrt); |
120 | static void ipmr_expire_process(unsigned long arg); | 120 | static void ipmr_expire_process(unsigned long arg); |
121 | 121 | ||
122 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES | 122 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES |
123 | #define ip6mr_for_each_table(mrt, met) \ | 123 | #define ip6mr_for_each_table(mrt, met) \ |
124 | list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) | 124 | list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) |
125 | 125 | ||
126 | static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) | 126 | static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) |
127 | { | 127 | { |
128 | struct mr6_table *mrt; | 128 | struct mr6_table *mrt; |
129 | 129 | ||
130 | ip6mr_for_each_table(mrt, net) { | 130 | ip6mr_for_each_table(mrt, net) { |
131 | if (mrt->id == id) | 131 | if (mrt->id == id) |
132 | return mrt; | 132 | return mrt; |
133 | } | 133 | } |
134 | return NULL; | 134 | return NULL; |
135 | } | 135 | } |
136 | 136 | ||
137 | static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, | 137 | static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, |
138 | struct mr6_table **mrt) | 138 | struct mr6_table **mrt) |
139 | { | 139 | { |
140 | struct ip6mr_result res; | 140 | struct ip6mr_result res; |
141 | struct fib_lookup_arg arg = { .result = &res, }; | 141 | struct fib_lookup_arg arg = { .result = &res, }; |
142 | int err; | 142 | int err; |
143 | 143 | ||
144 | err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); | 144 | err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); |
145 | if (err < 0) | 145 | if (err < 0) |
146 | return err; | 146 | return err; |
147 | *mrt = res.mrt; | 147 | *mrt = res.mrt; |
148 | return 0; | 148 | return 0; |
149 | } | 149 | } |
150 | 150 | ||
151 | static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, | 151 | static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, |
152 | int flags, struct fib_lookup_arg *arg) | 152 | int flags, struct fib_lookup_arg *arg) |
153 | { | 153 | { |
154 | struct ip6mr_result *res = arg->result; | 154 | struct ip6mr_result *res = arg->result; |
155 | struct mr6_table *mrt; | 155 | struct mr6_table *mrt; |
156 | 156 | ||
157 | switch (rule->action) { | 157 | switch (rule->action) { |
158 | case FR_ACT_TO_TBL: | 158 | case FR_ACT_TO_TBL: |
159 | break; | 159 | break; |
160 | case FR_ACT_UNREACHABLE: | 160 | case FR_ACT_UNREACHABLE: |
161 | return -ENETUNREACH; | 161 | return -ENETUNREACH; |
162 | case FR_ACT_PROHIBIT: | 162 | case FR_ACT_PROHIBIT: |
163 | return -EACCES; | 163 | return -EACCES; |
164 | case FR_ACT_BLACKHOLE: | 164 | case FR_ACT_BLACKHOLE: |
165 | default: | 165 | default: |
166 | return -EINVAL; | 166 | return -EINVAL; |
167 | } | 167 | } |
168 | 168 | ||
169 | mrt = ip6mr_get_table(rule->fr_net, rule->table); | 169 | mrt = ip6mr_get_table(rule->fr_net, rule->table); |
170 | if (mrt == NULL) | 170 | if (mrt == NULL) |
171 | return -EAGAIN; | 171 | return -EAGAIN; |
172 | res->mrt = mrt; | 172 | res->mrt = mrt; |
173 | return 0; | 173 | return 0; |
174 | } | 174 | } |
175 | 175 | ||
176 | static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) | 176 | static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) |
177 | { | 177 | { |
178 | return 1; | 178 | return 1; |
179 | } | 179 | } |
180 | 180 | ||
181 | static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { | 181 | static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { |
182 | FRA_GENERIC_POLICY, | 182 | FRA_GENERIC_POLICY, |
183 | }; | 183 | }; |
184 | 184 | ||
185 | static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | 185 | static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, |
186 | struct fib_rule_hdr *frh, struct nlattr **tb) | 186 | struct fib_rule_hdr *frh, struct nlattr **tb) |
187 | { | 187 | { |
188 | return 0; | 188 | return 0; |
189 | } | 189 | } |
190 | 190 | ||
191 | static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | 191 | static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, |
192 | struct nlattr **tb) | 192 | struct nlattr **tb) |
193 | { | 193 | { |
194 | return 1; | 194 | return 1; |
195 | } | 195 | } |
196 | 196 | ||
197 | static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | 197 | static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, |
198 | struct fib_rule_hdr *frh) | 198 | struct fib_rule_hdr *frh) |
199 | { | 199 | { |
200 | frh->dst_len = 0; | 200 | frh->dst_len = 0; |
201 | frh->src_len = 0; | 201 | frh->src_len = 0; |
202 | frh->tos = 0; | 202 | frh->tos = 0; |
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
206 | static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { | 206 | static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { |
207 | .family = RTNL_FAMILY_IP6MR, | 207 | .family = RTNL_FAMILY_IP6MR, |
208 | .rule_size = sizeof(struct ip6mr_rule), | 208 | .rule_size = sizeof(struct ip6mr_rule), |
209 | .addr_size = sizeof(struct in6_addr), | 209 | .addr_size = sizeof(struct in6_addr), |
210 | .action = ip6mr_rule_action, | 210 | .action = ip6mr_rule_action, |
211 | .match = ip6mr_rule_match, | 211 | .match = ip6mr_rule_match, |
212 | .configure = ip6mr_rule_configure, | 212 | .configure = ip6mr_rule_configure, |
213 | .compare = ip6mr_rule_compare, | 213 | .compare = ip6mr_rule_compare, |
214 | .default_pref = fib_default_rule_pref, | 214 | .default_pref = fib_default_rule_pref, |
215 | .fill = ip6mr_rule_fill, | 215 | .fill = ip6mr_rule_fill, |
216 | .nlgroup = RTNLGRP_IPV6_RULE, | 216 | .nlgroup = RTNLGRP_IPV6_RULE, |
217 | .policy = ip6mr_rule_policy, | 217 | .policy = ip6mr_rule_policy, |
218 | .owner = THIS_MODULE, | 218 | .owner = THIS_MODULE, |
219 | }; | 219 | }; |
220 | 220 | ||
221 | static int __net_init ip6mr_rules_init(struct net *net) | 221 | static int __net_init ip6mr_rules_init(struct net *net) |
222 | { | 222 | { |
223 | struct fib_rules_ops *ops; | 223 | struct fib_rules_ops *ops; |
224 | struct mr6_table *mrt; | 224 | struct mr6_table *mrt; |
225 | int err; | 225 | int err; |
226 | 226 | ||
227 | ops = fib_rules_register(&ip6mr_rules_ops_template, net); | 227 | ops = fib_rules_register(&ip6mr_rules_ops_template, net); |
228 | if (IS_ERR(ops)) | 228 | if (IS_ERR(ops)) |
229 | return PTR_ERR(ops); | 229 | return PTR_ERR(ops); |
230 | 230 | ||
231 | INIT_LIST_HEAD(&net->ipv6.mr6_tables); | 231 | INIT_LIST_HEAD(&net->ipv6.mr6_tables); |
232 | 232 | ||
233 | mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); | 233 | mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); |
234 | if (mrt == NULL) { | 234 | if (mrt == NULL) { |
235 | err = -ENOMEM; | 235 | err = -ENOMEM; |
236 | goto err1; | 236 | goto err1; |
237 | } | 237 | } |
238 | 238 | ||
239 | err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); | 239 | err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); |
240 | if (err < 0) | 240 | if (err < 0) |
241 | goto err2; | 241 | goto err2; |
242 | 242 | ||
243 | net->ipv6.mr6_rules_ops = ops; | 243 | net->ipv6.mr6_rules_ops = ops; |
244 | return 0; | 244 | return 0; |
245 | 245 | ||
246 | err2: | 246 | err2: |
247 | kfree(mrt); | 247 | kfree(mrt); |
248 | err1: | 248 | err1: |
249 | fib_rules_unregister(ops); | 249 | fib_rules_unregister(ops); |
250 | return err; | 250 | return err; |
251 | } | 251 | } |
252 | 252 | ||
253 | static void __net_exit ip6mr_rules_exit(struct net *net) | 253 | static void __net_exit ip6mr_rules_exit(struct net *net) |
254 | { | 254 | { |
255 | struct mr6_table *mrt, *next; | 255 | struct mr6_table *mrt, *next; |
256 | 256 | ||
257 | list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) | 257 | list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) |
258 | ip6mr_free_table(mrt); | 258 | ip6mr_free_table(mrt); |
259 | fib_rules_unregister(net->ipv6.mr6_rules_ops); | 259 | fib_rules_unregister(net->ipv6.mr6_rules_ops); |
260 | } | 260 | } |
261 | #else | 261 | #else |
262 | #define ip6mr_for_each_table(mrt, net) \ | 262 | #define ip6mr_for_each_table(mrt, net) \ |
263 | for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) | 263 | for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) |
264 | 264 | ||
265 | static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) | 265 | static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) |
266 | { | 266 | { |
267 | return net->ipv6.mrt6; | 267 | return net->ipv6.mrt6; |
268 | } | 268 | } |
269 | 269 | ||
270 | static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, | 270 | static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, |
271 | struct mr6_table **mrt) | 271 | struct mr6_table **mrt) |
272 | { | 272 | { |
273 | *mrt = net->ipv6.mrt6; | 273 | *mrt = net->ipv6.mrt6; |
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
276 | 276 | ||
277 | static int __net_init ip6mr_rules_init(struct net *net) | 277 | static int __net_init ip6mr_rules_init(struct net *net) |
278 | { | 278 | { |
279 | net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); | 279 | net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); |
280 | return net->ipv6.mrt6 ? 0 : -ENOMEM; | 280 | return net->ipv6.mrt6 ? 0 : -ENOMEM; |
281 | } | 281 | } |
282 | 282 | ||
283 | static void __net_exit ip6mr_rules_exit(struct net *net) | 283 | static void __net_exit ip6mr_rules_exit(struct net *net) |
284 | { | 284 | { |
285 | ip6mr_free_table(net->ipv6.mrt6); | 285 | ip6mr_free_table(net->ipv6.mrt6); |
286 | } | 286 | } |
287 | #endif | 287 | #endif |
288 | 288 | ||
289 | static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) | 289 | static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) |
290 | { | 290 | { |
291 | struct mr6_table *mrt; | 291 | struct mr6_table *mrt; |
292 | unsigned int i; | 292 | unsigned int i; |
293 | 293 | ||
294 | mrt = ip6mr_get_table(net, id); | 294 | mrt = ip6mr_get_table(net, id); |
295 | if (mrt != NULL) | 295 | if (mrt != NULL) |
296 | return mrt; | 296 | return mrt; |
297 | 297 | ||
298 | mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); | 298 | mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); |
299 | if (mrt == NULL) | 299 | if (mrt == NULL) |
300 | return NULL; | 300 | return NULL; |
301 | mrt->id = id; | 301 | mrt->id = id; |
302 | write_pnet(&mrt->net, net); | 302 | write_pnet(&mrt->net, net); |
303 | 303 | ||
304 | /* Forwarding cache */ | 304 | /* Forwarding cache */ |
305 | for (i = 0; i < MFC6_LINES; i++) | 305 | for (i = 0; i < MFC6_LINES; i++) |
306 | INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); | 306 | INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); |
307 | 307 | ||
308 | INIT_LIST_HEAD(&mrt->mfc6_unres_queue); | 308 | INIT_LIST_HEAD(&mrt->mfc6_unres_queue); |
309 | 309 | ||
310 | setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, | 310 | setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, |
311 | (unsigned long)mrt); | 311 | (unsigned long)mrt); |
312 | 312 | ||
313 | #ifdef CONFIG_IPV6_PIMSM_V2 | 313 | #ifdef CONFIG_IPV6_PIMSM_V2 |
314 | mrt->mroute_reg_vif_num = -1; | 314 | mrt->mroute_reg_vif_num = -1; |
315 | #endif | 315 | #endif |
316 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES | 316 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES |
317 | list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); | 317 | list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); |
318 | #endif | 318 | #endif |
319 | return mrt; | 319 | return mrt; |
320 | } | 320 | } |
321 | 321 | ||
322 | static void ip6mr_free_table(struct mr6_table *mrt) | 322 | static void ip6mr_free_table(struct mr6_table *mrt) |
323 | { | 323 | { |
324 | del_timer(&mrt->ipmr_expire_timer); | 324 | del_timer(&mrt->ipmr_expire_timer); |
325 | mroute_clean_tables(mrt); | 325 | mroute_clean_tables(mrt); |
326 | kfree(mrt); | 326 | kfree(mrt); |
327 | } | 327 | } |
328 | 328 | ||
329 | #ifdef CONFIG_PROC_FS | 329 | #ifdef CONFIG_PROC_FS |
330 | 330 | ||
331 | struct ipmr_mfc_iter { | 331 | struct ipmr_mfc_iter { |
332 | struct seq_net_private p; | 332 | struct seq_net_private p; |
333 | struct mr6_table *mrt; | 333 | struct mr6_table *mrt; |
334 | struct list_head *cache; | 334 | struct list_head *cache; |
335 | int ct; | 335 | int ct; |
336 | }; | 336 | }; |
337 | 337 | ||
338 | 338 | ||
339 | static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, | 339 | static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, |
340 | struct ipmr_mfc_iter *it, loff_t pos) | 340 | struct ipmr_mfc_iter *it, loff_t pos) |
341 | { | 341 | { |
342 | struct mr6_table *mrt = it->mrt; | 342 | struct mr6_table *mrt = it->mrt; |
343 | struct mfc6_cache *mfc; | 343 | struct mfc6_cache *mfc; |
344 | 344 | ||
345 | read_lock(&mrt_lock); | 345 | read_lock(&mrt_lock); |
346 | for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { | 346 | for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { |
347 | it->cache = &mrt->mfc6_cache_array[it->ct]; | 347 | it->cache = &mrt->mfc6_cache_array[it->ct]; |
348 | list_for_each_entry(mfc, it->cache, list) | 348 | list_for_each_entry(mfc, it->cache, list) |
349 | if (pos-- == 0) | 349 | if (pos-- == 0) |
350 | return mfc; | 350 | return mfc; |
351 | } | 351 | } |
352 | read_unlock(&mrt_lock); | 352 | read_unlock(&mrt_lock); |
353 | 353 | ||
354 | spin_lock_bh(&mfc_unres_lock); | 354 | spin_lock_bh(&mfc_unres_lock); |
355 | it->cache = &mrt->mfc6_unres_queue; | 355 | it->cache = &mrt->mfc6_unres_queue; |
356 | list_for_each_entry(mfc, it->cache, list) | 356 | list_for_each_entry(mfc, it->cache, list) |
357 | if (pos-- == 0) | 357 | if (pos-- == 0) |
358 | return mfc; | 358 | return mfc; |
359 | spin_unlock_bh(&mfc_unres_lock); | 359 | spin_unlock_bh(&mfc_unres_lock); |
360 | 360 | ||
361 | it->cache = NULL; | 361 | it->cache = NULL; |
362 | return NULL; | 362 | return NULL; |
363 | } | 363 | } |
364 | 364 | ||
365 | /* | 365 | /* |
366 | * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif | 366 | * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif |
367 | */ | 367 | */ |
368 | 368 | ||
369 | struct ipmr_vif_iter { | 369 | struct ipmr_vif_iter { |
370 | struct seq_net_private p; | 370 | struct seq_net_private p; |
371 | struct mr6_table *mrt; | 371 | struct mr6_table *mrt; |
372 | int ct; | 372 | int ct; |
373 | }; | 373 | }; |
374 | 374 | ||
375 | static struct mif_device *ip6mr_vif_seq_idx(struct net *net, | 375 | static struct mif_device *ip6mr_vif_seq_idx(struct net *net, |
376 | struct ipmr_vif_iter *iter, | 376 | struct ipmr_vif_iter *iter, |
377 | loff_t pos) | 377 | loff_t pos) |
378 | { | 378 | { |
379 | struct mr6_table *mrt = iter->mrt; | 379 | struct mr6_table *mrt = iter->mrt; |
380 | 380 | ||
381 | for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { | 381 | for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { |
382 | if (!MIF_EXISTS(mrt, iter->ct)) | 382 | if (!MIF_EXISTS(mrt, iter->ct)) |
383 | continue; | 383 | continue; |
384 | if (pos-- == 0) | 384 | if (pos-- == 0) |
385 | return &mrt->vif6_table[iter->ct]; | 385 | return &mrt->vif6_table[iter->ct]; |
386 | } | 386 | } |
387 | return NULL; | 387 | return NULL; |
388 | } | 388 | } |
389 | 389 | ||
390 | static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) | 390 | static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) |
391 | __acquires(mrt_lock) | 391 | __acquires(mrt_lock) |
392 | { | 392 | { |
393 | struct ipmr_vif_iter *iter = seq->private; | 393 | struct ipmr_vif_iter *iter = seq->private; |
394 | struct net *net = seq_file_net(seq); | 394 | struct net *net = seq_file_net(seq); |
395 | struct mr6_table *mrt; | 395 | struct mr6_table *mrt; |
396 | 396 | ||
397 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); | 397 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); |
398 | if (mrt == NULL) | 398 | if (mrt == NULL) |
399 | return ERR_PTR(-ENOENT); | 399 | return ERR_PTR(-ENOENT); |
400 | 400 | ||
401 | iter->mrt = mrt; | 401 | iter->mrt = mrt; |
402 | 402 | ||
403 | read_lock(&mrt_lock); | 403 | read_lock(&mrt_lock); |
404 | return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) | 404 | return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) |
405 | : SEQ_START_TOKEN; | 405 | : SEQ_START_TOKEN; |
406 | } | 406 | } |
407 | 407 | ||
408 | static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 408 | static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
409 | { | 409 | { |
410 | struct ipmr_vif_iter *iter = seq->private; | 410 | struct ipmr_vif_iter *iter = seq->private; |
411 | struct net *net = seq_file_net(seq); | 411 | struct net *net = seq_file_net(seq); |
412 | struct mr6_table *mrt = iter->mrt; | 412 | struct mr6_table *mrt = iter->mrt; |
413 | 413 | ||
414 | ++*pos; | 414 | ++*pos; |
415 | if (v == SEQ_START_TOKEN) | 415 | if (v == SEQ_START_TOKEN) |
416 | return ip6mr_vif_seq_idx(net, iter, 0); | 416 | return ip6mr_vif_seq_idx(net, iter, 0); |
417 | 417 | ||
418 | while (++iter->ct < mrt->maxvif) { | 418 | while (++iter->ct < mrt->maxvif) { |
419 | if (!MIF_EXISTS(mrt, iter->ct)) | 419 | if (!MIF_EXISTS(mrt, iter->ct)) |
420 | continue; | 420 | continue; |
421 | return &mrt->vif6_table[iter->ct]; | 421 | return &mrt->vif6_table[iter->ct]; |
422 | } | 422 | } |
423 | return NULL; | 423 | return NULL; |
424 | } | 424 | } |
425 | 425 | ||
426 | static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) | 426 | static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) |
427 | __releases(mrt_lock) | 427 | __releases(mrt_lock) |
428 | { | 428 | { |
429 | read_unlock(&mrt_lock); | 429 | read_unlock(&mrt_lock); |
430 | } | 430 | } |
431 | 431 | ||
432 | static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) | 432 | static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) |
433 | { | 433 | { |
434 | struct ipmr_vif_iter *iter = seq->private; | 434 | struct ipmr_vif_iter *iter = seq->private; |
435 | struct mr6_table *mrt = iter->mrt; | 435 | struct mr6_table *mrt = iter->mrt; |
436 | 436 | ||
437 | if (v == SEQ_START_TOKEN) { | 437 | if (v == SEQ_START_TOKEN) { |
438 | seq_puts(seq, | 438 | seq_puts(seq, |
439 | "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); | 439 | "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); |
440 | } else { | 440 | } else { |
441 | const struct mif_device *vif = v; | 441 | const struct mif_device *vif = v; |
442 | const char *name = vif->dev ? vif->dev->name : "none"; | 442 | const char *name = vif->dev ? vif->dev->name : "none"; |
443 | 443 | ||
444 | seq_printf(seq, | 444 | seq_printf(seq, |
445 | "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", | 445 | "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", |
446 | vif - mrt->vif6_table, | 446 | vif - mrt->vif6_table, |
447 | name, vif->bytes_in, vif->pkt_in, | 447 | name, vif->bytes_in, vif->pkt_in, |
448 | vif->bytes_out, vif->pkt_out, | 448 | vif->bytes_out, vif->pkt_out, |
449 | vif->flags); | 449 | vif->flags); |
450 | } | 450 | } |
451 | return 0; | 451 | return 0; |
452 | } | 452 | } |
453 | 453 | ||
454 | static const struct seq_operations ip6mr_vif_seq_ops = { | 454 | static const struct seq_operations ip6mr_vif_seq_ops = { |
455 | .start = ip6mr_vif_seq_start, | 455 | .start = ip6mr_vif_seq_start, |
456 | .next = ip6mr_vif_seq_next, | 456 | .next = ip6mr_vif_seq_next, |
457 | .stop = ip6mr_vif_seq_stop, | 457 | .stop = ip6mr_vif_seq_stop, |
458 | .show = ip6mr_vif_seq_show, | 458 | .show = ip6mr_vif_seq_show, |
459 | }; | 459 | }; |
460 | 460 | ||
461 | static int ip6mr_vif_open(struct inode *inode, struct file *file) | 461 | static int ip6mr_vif_open(struct inode *inode, struct file *file) |
462 | { | 462 | { |
463 | return seq_open_net(inode, file, &ip6mr_vif_seq_ops, | 463 | return seq_open_net(inode, file, &ip6mr_vif_seq_ops, |
464 | sizeof(struct ipmr_vif_iter)); | 464 | sizeof(struct ipmr_vif_iter)); |
465 | } | 465 | } |
466 | 466 | ||
467 | static const struct file_operations ip6mr_vif_fops = { | 467 | static const struct file_operations ip6mr_vif_fops = { |
468 | .owner = THIS_MODULE, | 468 | .owner = THIS_MODULE, |
469 | .open = ip6mr_vif_open, | 469 | .open = ip6mr_vif_open, |
470 | .read = seq_read, | 470 | .read = seq_read, |
471 | .llseek = seq_lseek, | 471 | .llseek = seq_lseek, |
472 | .release = seq_release_net, | 472 | .release = seq_release_net, |
473 | }; | 473 | }; |
474 | 474 | ||
475 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | 475 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) |
476 | { | 476 | { |
477 | struct ipmr_mfc_iter *it = seq->private; | 477 | struct ipmr_mfc_iter *it = seq->private; |
478 | struct net *net = seq_file_net(seq); | 478 | struct net *net = seq_file_net(seq); |
479 | struct mr6_table *mrt; | 479 | struct mr6_table *mrt; |
480 | 480 | ||
481 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); | 481 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); |
482 | if (mrt == NULL) | 482 | if (mrt == NULL) |
483 | return ERR_PTR(-ENOENT); | 483 | return ERR_PTR(-ENOENT); |
484 | 484 | ||
485 | it->mrt = mrt; | 485 | it->mrt = mrt; |
486 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) | 486 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) |
487 | : SEQ_START_TOKEN; | 487 | : SEQ_START_TOKEN; |
488 | } | 488 | } |
489 | 489 | ||
490 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 490 | static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
491 | { | 491 | { |
492 | struct mfc6_cache *mfc = v; | 492 | struct mfc6_cache *mfc = v; |
493 | struct ipmr_mfc_iter *it = seq->private; | 493 | struct ipmr_mfc_iter *it = seq->private; |
494 | struct net *net = seq_file_net(seq); | 494 | struct net *net = seq_file_net(seq); |
495 | struct mr6_table *mrt = it->mrt; | 495 | struct mr6_table *mrt = it->mrt; |
496 | 496 | ||
497 | ++*pos; | 497 | ++*pos; |
498 | 498 | ||
499 | if (v == SEQ_START_TOKEN) | 499 | if (v == SEQ_START_TOKEN) |
500 | return ipmr_mfc_seq_idx(net, seq->private, 0); | 500 | return ipmr_mfc_seq_idx(net, seq->private, 0); |
501 | 501 | ||
502 | if (mfc->list.next != it->cache) | 502 | if (mfc->list.next != it->cache) |
503 | return list_entry(mfc->list.next, struct mfc6_cache, list); | 503 | return list_entry(mfc->list.next, struct mfc6_cache, list); |
504 | 504 | ||
505 | if (it->cache == &mrt->mfc6_unres_queue) | 505 | if (it->cache == &mrt->mfc6_unres_queue) |
506 | goto end_of_list; | 506 | goto end_of_list; |
507 | 507 | ||
508 | BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); | 508 | BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); |
509 | 509 | ||
510 | while (++it->ct < MFC6_LINES) { | 510 | while (++it->ct < MFC6_LINES) { |
511 | it->cache = &mrt->mfc6_cache_array[it->ct]; | 511 | it->cache = &mrt->mfc6_cache_array[it->ct]; |
512 | if (list_empty(it->cache)) | 512 | if (list_empty(it->cache)) |
513 | continue; | 513 | continue; |
514 | return list_first_entry(it->cache, struct mfc6_cache, list); | 514 | return list_first_entry(it->cache, struct mfc6_cache, list); |
515 | } | 515 | } |
516 | 516 | ||
517 | /* exhausted cache_array, show unresolved */ | 517 | /* exhausted cache_array, show unresolved */ |
518 | read_unlock(&mrt_lock); | 518 | read_unlock(&mrt_lock); |
519 | it->cache = &mrt->mfc6_unres_queue; | 519 | it->cache = &mrt->mfc6_unres_queue; |
520 | it->ct = 0; | 520 | it->ct = 0; |
521 | 521 | ||
522 | spin_lock_bh(&mfc_unres_lock); | 522 | spin_lock_bh(&mfc_unres_lock); |
523 | if (!list_empty(it->cache)) | 523 | if (!list_empty(it->cache)) |
524 | return list_first_entry(it->cache, struct mfc6_cache, list); | 524 | return list_first_entry(it->cache, struct mfc6_cache, list); |
525 | 525 | ||
526 | end_of_list: | 526 | end_of_list: |
527 | spin_unlock_bh(&mfc_unres_lock); | 527 | spin_unlock_bh(&mfc_unres_lock); |
528 | it->cache = NULL; | 528 | it->cache = NULL; |
529 | 529 | ||
530 | return NULL; | 530 | return NULL; |
531 | } | 531 | } |
532 | 532 | ||
533 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) | 533 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) |
534 | { | 534 | { |
535 | struct ipmr_mfc_iter *it = seq->private; | 535 | struct ipmr_mfc_iter *it = seq->private; |
536 | struct mr6_table *mrt = it->mrt; | 536 | struct mr6_table *mrt = it->mrt; |
537 | 537 | ||
538 | if (it->cache == &mrt->mfc6_unres_queue) | 538 | if (it->cache == &mrt->mfc6_unres_queue) |
539 | spin_unlock_bh(&mfc_unres_lock); | 539 | spin_unlock_bh(&mfc_unres_lock); |
540 | else if (it->cache == mrt->mfc6_cache_array) | 540 | else if (it->cache == mrt->mfc6_cache_array) |
541 | read_unlock(&mrt_lock); | 541 | read_unlock(&mrt_lock); |
542 | } | 542 | } |
543 | 543 | ||
544 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | 544 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) |
545 | { | 545 | { |
546 | int n; | 546 | int n; |
547 | 547 | ||
548 | if (v == SEQ_START_TOKEN) { | 548 | if (v == SEQ_START_TOKEN) { |
549 | seq_puts(seq, | 549 | seq_puts(seq, |
550 | "Group " | 550 | "Group " |
551 | "Origin " | 551 | "Origin " |
552 | "Iif Pkts Bytes Wrong Oifs\n"); | 552 | "Iif Pkts Bytes Wrong Oifs\n"); |
553 | } else { | 553 | } else { |
554 | const struct mfc6_cache *mfc = v; | 554 | const struct mfc6_cache *mfc = v; |
555 | const struct ipmr_mfc_iter *it = seq->private; | 555 | const struct ipmr_mfc_iter *it = seq->private; |
556 | struct mr6_table *mrt = it->mrt; | 556 | struct mr6_table *mrt = it->mrt; |
557 | 557 | ||
558 | seq_printf(seq, "%pI6 %pI6 %-3hd", | 558 | seq_printf(seq, "%pI6 %pI6 %-3hd", |
559 | &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, | 559 | &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, |
560 | mfc->mf6c_parent); | 560 | mfc->mf6c_parent); |
561 | 561 | ||
562 | if (it->cache != &mrt->mfc6_unres_queue) { | 562 | if (it->cache != &mrt->mfc6_unres_queue) { |
563 | seq_printf(seq, " %8lu %8lu %8lu", | 563 | seq_printf(seq, " %8lu %8lu %8lu", |
564 | mfc->mfc_un.res.pkt, | 564 | mfc->mfc_un.res.pkt, |
565 | mfc->mfc_un.res.bytes, | 565 | mfc->mfc_un.res.bytes, |
566 | mfc->mfc_un.res.wrong_if); | 566 | mfc->mfc_un.res.wrong_if); |
567 | for (n = mfc->mfc_un.res.minvif; | 567 | for (n = mfc->mfc_un.res.minvif; |
568 | n < mfc->mfc_un.res.maxvif; n++) { | 568 | n < mfc->mfc_un.res.maxvif; n++) { |
569 | if (MIF_EXISTS(mrt, n) && | 569 | if (MIF_EXISTS(mrt, n) && |
570 | mfc->mfc_un.res.ttls[n] < 255) | 570 | mfc->mfc_un.res.ttls[n] < 255) |
571 | seq_printf(seq, | 571 | seq_printf(seq, |
572 | " %2d:%-3d", | 572 | " %2d:%-3d", |
573 | n, mfc->mfc_un.res.ttls[n]); | 573 | n, mfc->mfc_un.res.ttls[n]); |
574 | } | 574 | } |
575 | } else { | 575 | } else { |
576 | /* unresolved mfc_caches don't contain | 576 | /* unresolved mfc_caches don't contain |
577 | * pkt, bytes and wrong_if values | 577 | * pkt, bytes and wrong_if values |
578 | */ | 578 | */ |
579 | seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); | 579 | seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); |
580 | } | 580 | } |
581 | seq_putc(seq, '\n'); | 581 | seq_putc(seq, '\n'); |
582 | } | 582 | } |
583 | return 0; | 583 | return 0; |
584 | } | 584 | } |
585 | 585 | ||
586 | static const struct seq_operations ipmr_mfc_seq_ops = { | 586 | static const struct seq_operations ipmr_mfc_seq_ops = { |
587 | .start = ipmr_mfc_seq_start, | 587 | .start = ipmr_mfc_seq_start, |
588 | .next = ipmr_mfc_seq_next, | 588 | .next = ipmr_mfc_seq_next, |
589 | .stop = ipmr_mfc_seq_stop, | 589 | .stop = ipmr_mfc_seq_stop, |
590 | .show = ipmr_mfc_seq_show, | 590 | .show = ipmr_mfc_seq_show, |
591 | }; | 591 | }; |
592 | 592 | ||
593 | static int ipmr_mfc_open(struct inode *inode, struct file *file) | 593 | static int ipmr_mfc_open(struct inode *inode, struct file *file) |
594 | { | 594 | { |
595 | return seq_open_net(inode, file, &ipmr_mfc_seq_ops, | 595 | return seq_open_net(inode, file, &ipmr_mfc_seq_ops, |
596 | sizeof(struct ipmr_mfc_iter)); | 596 | sizeof(struct ipmr_mfc_iter)); |
597 | } | 597 | } |
598 | 598 | ||
599 | static const struct file_operations ip6mr_mfc_fops = { | 599 | static const struct file_operations ip6mr_mfc_fops = { |
600 | .owner = THIS_MODULE, | 600 | .owner = THIS_MODULE, |
601 | .open = ipmr_mfc_open, | 601 | .open = ipmr_mfc_open, |
602 | .read = seq_read, | 602 | .read = seq_read, |
603 | .llseek = seq_lseek, | 603 | .llseek = seq_lseek, |
604 | .release = seq_release_net, | 604 | .release = seq_release_net, |
605 | }; | 605 | }; |
606 | #endif | 606 | #endif |
607 | 607 | ||
608 | #ifdef CONFIG_IPV6_PIMSM_V2 | 608 | #ifdef CONFIG_IPV6_PIMSM_V2 |
609 | 609 | ||
610 | static int pim6_rcv(struct sk_buff *skb) | 610 | static int pim6_rcv(struct sk_buff *skb) |
611 | { | 611 | { |
612 | struct pimreghdr *pim; | 612 | struct pimreghdr *pim; |
613 | struct ipv6hdr *encap; | 613 | struct ipv6hdr *encap; |
614 | struct net_device *reg_dev = NULL; | 614 | struct net_device *reg_dev = NULL; |
615 | struct net *net = dev_net(skb->dev); | 615 | struct net *net = dev_net(skb->dev); |
616 | struct mr6_table *mrt; | 616 | struct mr6_table *mrt; |
617 | struct flowi fl = { | 617 | struct flowi fl = { |
618 | .iif = skb->dev->ifindex, | 618 | .iif = skb->dev->ifindex, |
619 | .mark = skb->mark, | 619 | .mark = skb->mark, |
620 | }; | 620 | }; |
621 | int reg_vif_num; | 621 | int reg_vif_num; |
622 | 622 | ||
623 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 623 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) |
624 | goto drop; | 624 | goto drop; |
625 | 625 | ||
626 | pim = (struct pimreghdr *)skb_transport_header(skb); | 626 | pim = (struct pimreghdr *)skb_transport_header(skb); |
627 | if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || | 627 | if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || |
628 | (pim->flags & PIM_NULL_REGISTER) || | 628 | (pim->flags & PIM_NULL_REGISTER) || |
629 | (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, | 629 | (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, |
630 | sizeof(*pim), IPPROTO_PIM, | 630 | sizeof(*pim), IPPROTO_PIM, |
631 | csum_partial((void *)pim, sizeof(*pim), 0)) && | 631 | csum_partial((void *)pim, sizeof(*pim), 0)) && |
632 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 632 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
633 | goto drop; | 633 | goto drop; |
634 | 634 | ||
635 | /* check if the inner packet is destined to mcast group */ | 635 | /* check if the inner packet is destined to mcast group */ |
636 | encap = (struct ipv6hdr *)(skb_transport_header(skb) + | 636 | encap = (struct ipv6hdr *)(skb_transport_header(skb) + |
637 | sizeof(*pim)); | 637 | sizeof(*pim)); |
638 | 638 | ||
639 | if (!ipv6_addr_is_multicast(&encap->daddr) || | 639 | if (!ipv6_addr_is_multicast(&encap->daddr) || |
640 | encap->payload_len == 0 || | 640 | encap->payload_len == 0 || |
641 | ntohs(encap->payload_len) + sizeof(*pim) > skb->len) | 641 | ntohs(encap->payload_len) + sizeof(*pim) > skb->len) |
642 | goto drop; | 642 | goto drop; |
643 | 643 | ||
644 | if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) | 644 | if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) |
645 | goto drop; | 645 | goto drop; |
646 | reg_vif_num = mrt->mroute_reg_vif_num; | 646 | reg_vif_num = mrt->mroute_reg_vif_num; |
647 | 647 | ||
648 | read_lock(&mrt_lock); | 648 | read_lock(&mrt_lock); |
649 | if (reg_vif_num >= 0) | 649 | if (reg_vif_num >= 0) |
650 | reg_dev = mrt->vif6_table[reg_vif_num].dev; | 650 | reg_dev = mrt->vif6_table[reg_vif_num].dev; |
651 | if (reg_dev) | 651 | if (reg_dev) |
652 | dev_hold(reg_dev); | 652 | dev_hold(reg_dev); |
653 | read_unlock(&mrt_lock); | 653 | read_unlock(&mrt_lock); |
654 | 654 | ||
655 | if (reg_dev == NULL) | 655 | if (reg_dev == NULL) |
656 | goto drop; | 656 | goto drop; |
657 | 657 | ||
658 | skb->mac_header = skb->network_header; | 658 | skb->mac_header = skb->network_header; |
659 | skb_pull(skb, (u8 *)encap - skb->data); | 659 | skb_pull(skb, (u8 *)encap - skb->data); |
660 | skb_reset_network_header(skb); | 660 | skb_reset_network_header(skb); |
661 | skb->dev = reg_dev; | ||
662 | skb->protocol = htons(ETH_P_IPV6); | 661 | skb->protocol = htons(ETH_P_IPV6); |
663 | skb->ip_summed = 0; | 662 | skb->ip_summed = 0; |
664 | skb->pkt_type = PACKET_HOST; | 663 | skb->pkt_type = PACKET_HOST; |
665 | skb_dst_drop(skb); | 664 | |
666 | reg_dev->stats.rx_bytes += skb->len; | 665 | skb_tunnel_rx(skb, reg_dev); |
667 | reg_dev->stats.rx_packets++; | 666 | |
668 | nf_reset(skb); | ||
669 | netif_rx(skb); | 667 | netif_rx(skb); |
670 | dev_put(reg_dev); | 668 | dev_put(reg_dev); |
671 | return 0; | 669 | return 0; |
672 | drop: | 670 | drop: |
673 | kfree_skb(skb); | 671 | kfree_skb(skb); |
674 | return 0; | 672 | return 0; |
675 | } | 673 | } |
676 | 674 | ||
677 | static const struct inet6_protocol pim6_protocol = { | 675 | static const struct inet6_protocol pim6_protocol = { |
678 | .handler = pim6_rcv, | 676 | .handler = pim6_rcv, |
679 | }; | 677 | }; |
680 | 678 | ||
681 | /* Service routines creating virtual interfaces: PIMREG */ | 679 | /* Service routines creating virtual interfaces: PIMREG */ |
682 | 680 | ||
683 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, | 681 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, |
684 | struct net_device *dev) | 682 | struct net_device *dev) |
685 | { | 683 | { |
686 | struct net *net = dev_net(dev); | 684 | struct net *net = dev_net(dev); |
687 | struct mr6_table *mrt; | 685 | struct mr6_table *mrt; |
688 | struct flowi fl = { | 686 | struct flowi fl = { |
689 | .oif = dev->ifindex, | 687 | .oif = dev->ifindex, |
690 | .iif = skb->skb_iif, | 688 | .iif = skb->skb_iif, |
691 | .mark = skb->mark, | 689 | .mark = skb->mark, |
692 | }; | 690 | }; |
693 | int err; | 691 | int err; |
694 | 692 | ||
695 | err = ip6mr_fib_lookup(net, &fl, &mrt); | 693 | err = ip6mr_fib_lookup(net, &fl, &mrt); |
696 | if (err < 0) | 694 | if (err < 0) |
697 | return err; | 695 | return err; |
698 | 696 | ||
699 | read_lock(&mrt_lock); | 697 | read_lock(&mrt_lock); |
700 | dev->stats.tx_bytes += skb->len; | 698 | dev->stats.tx_bytes += skb->len; |
701 | dev->stats.tx_packets++; | 699 | dev->stats.tx_packets++; |
702 | ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); | 700 | ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); |
703 | read_unlock(&mrt_lock); | 701 | read_unlock(&mrt_lock); |
704 | kfree_skb(skb); | 702 | kfree_skb(skb); |
705 | return NETDEV_TX_OK; | 703 | return NETDEV_TX_OK; |
706 | } | 704 | } |
707 | 705 | ||
708 | static const struct net_device_ops reg_vif_netdev_ops = { | 706 | static const struct net_device_ops reg_vif_netdev_ops = { |
709 | .ndo_start_xmit = reg_vif_xmit, | 707 | .ndo_start_xmit = reg_vif_xmit, |
710 | }; | 708 | }; |
711 | 709 | ||
712 | static void reg_vif_setup(struct net_device *dev) | 710 | static void reg_vif_setup(struct net_device *dev) |
713 | { | 711 | { |
714 | dev->type = ARPHRD_PIMREG; | 712 | dev->type = ARPHRD_PIMREG; |
715 | dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; | 713 | dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; |
716 | dev->flags = IFF_NOARP; | 714 | dev->flags = IFF_NOARP; |
717 | dev->netdev_ops = ®_vif_netdev_ops; | 715 | dev->netdev_ops = ®_vif_netdev_ops; |
718 | dev->destructor = free_netdev; | 716 | dev->destructor = free_netdev; |
719 | dev->features |= NETIF_F_NETNS_LOCAL; | 717 | dev->features |= NETIF_F_NETNS_LOCAL; |
720 | } | 718 | } |
721 | 719 | ||
722 | static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) | 720 | static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) |
723 | { | 721 | { |
724 | struct net_device *dev; | 722 | struct net_device *dev; |
725 | char name[IFNAMSIZ]; | 723 | char name[IFNAMSIZ]; |
726 | 724 | ||
727 | if (mrt->id == RT6_TABLE_DFLT) | 725 | if (mrt->id == RT6_TABLE_DFLT) |
728 | sprintf(name, "pim6reg"); | 726 | sprintf(name, "pim6reg"); |
729 | else | 727 | else |
730 | sprintf(name, "pim6reg%u", mrt->id); | 728 | sprintf(name, "pim6reg%u", mrt->id); |
731 | 729 | ||
732 | dev = alloc_netdev(0, name, reg_vif_setup); | 730 | dev = alloc_netdev(0, name, reg_vif_setup); |
733 | if (dev == NULL) | 731 | if (dev == NULL) |
734 | return NULL; | 732 | return NULL; |
735 | 733 | ||
736 | dev_net_set(dev, net); | 734 | dev_net_set(dev, net); |
737 | 735 | ||
738 | if (register_netdevice(dev)) { | 736 | if (register_netdevice(dev)) { |
739 | free_netdev(dev); | 737 | free_netdev(dev); |
740 | return NULL; | 738 | return NULL; |
741 | } | 739 | } |
742 | dev->iflink = 0; | 740 | dev->iflink = 0; |
743 | 741 | ||
744 | if (dev_open(dev)) | 742 | if (dev_open(dev)) |
745 | goto failure; | 743 | goto failure; |
746 | 744 | ||
747 | dev_hold(dev); | 745 | dev_hold(dev); |
748 | return dev; | 746 | return dev; |
749 | 747 | ||
750 | failure: | 748 | failure: |
751 | /* allow the register to be completed before unregistering. */ | 749 | /* allow the register to be completed before unregistering. */ |
752 | rtnl_unlock(); | 750 | rtnl_unlock(); |
753 | rtnl_lock(); | 751 | rtnl_lock(); |
754 | 752 | ||
755 | unregister_netdevice(dev); | 753 | unregister_netdevice(dev); |
756 | return NULL; | 754 | return NULL; |
757 | } | 755 | } |
758 | #endif | 756 | #endif |
759 | 757 | ||
760 | /* | 758 | /* |
761 | * Delete a VIF entry | 759 | * Delete a VIF entry |
762 | */ | 760 | */ |
763 | 761 | ||
764 | static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) | 762 | static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) |
765 | { | 763 | { |
766 | struct mif_device *v; | 764 | struct mif_device *v; |
767 | struct net_device *dev; | 765 | struct net_device *dev; |
768 | struct inet6_dev *in6_dev; | 766 | struct inet6_dev *in6_dev; |
769 | 767 | ||
770 | if (vifi < 0 || vifi >= mrt->maxvif) | 768 | if (vifi < 0 || vifi >= mrt->maxvif) |
771 | return -EADDRNOTAVAIL; | 769 | return -EADDRNOTAVAIL; |
772 | 770 | ||
773 | v = &mrt->vif6_table[vifi]; | 771 | v = &mrt->vif6_table[vifi]; |
774 | 772 | ||
775 | write_lock_bh(&mrt_lock); | 773 | write_lock_bh(&mrt_lock); |
776 | dev = v->dev; | 774 | dev = v->dev; |
777 | v->dev = NULL; | 775 | v->dev = NULL; |
778 | 776 | ||
779 | if (!dev) { | 777 | if (!dev) { |
780 | write_unlock_bh(&mrt_lock); | 778 | write_unlock_bh(&mrt_lock); |
781 | return -EADDRNOTAVAIL; | 779 | return -EADDRNOTAVAIL; |
782 | } | 780 | } |
783 | 781 | ||
784 | #ifdef CONFIG_IPV6_PIMSM_V2 | 782 | #ifdef CONFIG_IPV6_PIMSM_V2 |
785 | if (vifi == mrt->mroute_reg_vif_num) | 783 | if (vifi == mrt->mroute_reg_vif_num) |
786 | mrt->mroute_reg_vif_num = -1; | 784 | mrt->mroute_reg_vif_num = -1; |
787 | #endif | 785 | #endif |
788 | 786 | ||
789 | if (vifi + 1 == mrt->maxvif) { | 787 | if (vifi + 1 == mrt->maxvif) { |
790 | int tmp; | 788 | int tmp; |
791 | for (tmp = vifi - 1; tmp >= 0; tmp--) { | 789 | for (tmp = vifi - 1; tmp >= 0; tmp--) { |
792 | if (MIF_EXISTS(mrt, tmp)) | 790 | if (MIF_EXISTS(mrt, tmp)) |
793 | break; | 791 | break; |
794 | } | 792 | } |
795 | mrt->maxvif = tmp + 1; | 793 | mrt->maxvif = tmp + 1; |
796 | } | 794 | } |
797 | 795 | ||
798 | write_unlock_bh(&mrt_lock); | 796 | write_unlock_bh(&mrt_lock); |
799 | 797 | ||
800 | dev_set_allmulti(dev, -1); | 798 | dev_set_allmulti(dev, -1); |
801 | 799 | ||
802 | in6_dev = __in6_dev_get(dev); | 800 | in6_dev = __in6_dev_get(dev); |
803 | if (in6_dev) | 801 | if (in6_dev) |
804 | in6_dev->cnf.mc_forwarding--; | 802 | in6_dev->cnf.mc_forwarding--; |
805 | 803 | ||
806 | if (v->flags & MIFF_REGISTER) | 804 | if (v->flags & MIFF_REGISTER) |
807 | unregister_netdevice_queue(dev, head); | 805 | unregister_netdevice_queue(dev, head); |
808 | 806 | ||
809 | dev_put(dev); | 807 | dev_put(dev); |
810 | return 0; | 808 | return 0; |
811 | } | 809 | } |
812 | 810 | ||
813 | static inline void ip6mr_cache_free(struct mfc6_cache *c) | 811 | static inline void ip6mr_cache_free(struct mfc6_cache *c) |
814 | { | 812 | { |
815 | kmem_cache_free(mrt_cachep, c); | 813 | kmem_cache_free(mrt_cachep, c); |
816 | } | 814 | } |
817 | 815 | ||
818 | /* Destroy an unresolved cache entry, killing queued skbs | 816 | /* Destroy an unresolved cache entry, killing queued skbs |
819 | and reporting error to netlink readers. | 817 | and reporting error to netlink readers. |
820 | */ | 818 | */ |
821 | 819 | ||
822 | static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) | 820 | static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) |
823 | { | 821 | { |
824 | struct net *net = read_pnet(&mrt->net); | 822 | struct net *net = read_pnet(&mrt->net); |
825 | struct sk_buff *skb; | 823 | struct sk_buff *skb; |
826 | 824 | ||
827 | atomic_dec(&mrt->cache_resolve_queue_len); | 825 | atomic_dec(&mrt->cache_resolve_queue_len); |
828 | 826 | ||
829 | while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { | 827 | while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { |
830 | if (ipv6_hdr(skb)->version == 0) { | 828 | if (ipv6_hdr(skb)->version == 0) { |
831 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); | 829 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); |
832 | nlh->nlmsg_type = NLMSG_ERROR; | 830 | nlh->nlmsg_type = NLMSG_ERROR; |
833 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 831 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
834 | skb_trim(skb, nlh->nlmsg_len); | 832 | skb_trim(skb, nlh->nlmsg_len); |
835 | ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; | 833 | ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; |
836 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 834 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
837 | } else | 835 | } else |
838 | kfree_skb(skb); | 836 | kfree_skb(skb); |
839 | } | 837 | } |
840 | 838 | ||
841 | ip6mr_cache_free(c); | 839 | ip6mr_cache_free(c); |
842 | } | 840 | } |
843 | 841 | ||
844 | 842 | ||
845 | /* Timer process for all the unresolved queue. */ | 843 | /* Timer process for all the unresolved queue. */ |
846 | 844 | ||
847 | static void ipmr_do_expire_process(struct mr6_table *mrt) | 845 | static void ipmr_do_expire_process(struct mr6_table *mrt) |
848 | { | 846 | { |
849 | unsigned long now = jiffies; | 847 | unsigned long now = jiffies; |
850 | unsigned long expires = 10 * HZ; | 848 | unsigned long expires = 10 * HZ; |
851 | struct mfc6_cache *c, *next; | 849 | struct mfc6_cache *c, *next; |
852 | 850 | ||
853 | list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { | 851 | list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { |
854 | if (time_after(c->mfc_un.unres.expires, now)) { | 852 | if (time_after(c->mfc_un.unres.expires, now)) { |
855 | /* not yet... */ | 853 | /* not yet... */ |
856 | unsigned long interval = c->mfc_un.unres.expires - now; | 854 | unsigned long interval = c->mfc_un.unres.expires - now; |
857 | if (interval < expires) | 855 | if (interval < expires) |
858 | expires = interval; | 856 | expires = interval; |
859 | continue; | 857 | continue; |
860 | } | 858 | } |
861 | 859 | ||
862 | list_del(&c->list); | 860 | list_del(&c->list); |
863 | ip6mr_destroy_unres(mrt, c); | 861 | ip6mr_destroy_unres(mrt, c); |
864 | } | 862 | } |
865 | 863 | ||
866 | if (!list_empty(&mrt->mfc6_unres_queue)) | 864 | if (!list_empty(&mrt->mfc6_unres_queue)) |
867 | mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); | 865 | mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); |
868 | } | 866 | } |
869 | 867 | ||
870 | static void ipmr_expire_process(unsigned long arg) | 868 | static void ipmr_expire_process(unsigned long arg) |
871 | { | 869 | { |
872 | struct mr6_table *mrt = (struct mr6_table *)arg; | 870 | struct mr6_table *mrt = (struct mr6_table *)arg; |
873 | 871 | ||
874 | if (!spin_trylock(&mfc_unres_lock)) { | 872 | if (!spin_trylock(&mfc_unres_lock)) { |
875 | mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); | 873 | mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); |
876 | return; | 874 | return; |
877 | } | 875 | } |
878 | 876 | ||
879 | if (!list_empty(&mrt->mfc6_unres_queue)) | 877 | if (!list_empty(&mrt->mfc6_unres_queue)) |
880 | ipmr_do_expire_process(mrt); | 878 | ipmr_do_expire_process(mrt); |
881 | 879 | ||
882 | spin_unlock(&mfc_unres_lock); | 880 | spin_unlock(&mfc_unres_lock); |
883 | } | 881 | } |
884 | 882 | ||
885 | /* Fill oifs list. It is called under write locked mrt_lock. */ | 883 | /* Fill oifs list. It is called under write locked mrt_lock. */ |
886 | 884 | ||
887 | static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, | 885 | static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, |
888 | unsigned char *ttls) | 886 | unsigned char *ttls) |
889 | { | 887 | { |
890 | int vifi; | 888 | int vifi; |
891 | 889 | ||
892 | cache->mfc_un.res.minvif = MAXMIFS; | 890 | cache->mfc_un.res.minvif = MAXMIFS; |
893 | cache->mfc_un.res.maxvif = 0; | 891 | cache->mfc_un.res.maxvif = 0; |
894 | memset(cache->mfc_un.res.ttls, 255, MAXMIFS); | 892 | memset(cache->mfc_un.res.ttls, 255, MAXMIFS); |
895 | 893 | ||
896 | for (vifi = 0; vifi < mrt->maxvif; vifi++) { | 894 | for (vifi = 0; vifi < mrt->maxvif; vifi++) { |
897 | if (MIF_EXISTS(mrt, vifi) && | 895 | if (MIF_EXISTS(mrt, vifi) && |
898 | ttls[vifi] && ttls[vifi] < 255) { | 896 | ttls[vifi] && ttls[vifi] < 255) { |
899 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; | 897 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; |
900 | if (cache->mfc_un.res.minvif > vifi) | 898 | if (cache->mfc_un.res.minvif > vifi) |
901 | cache->mfc_un.res.minvif = vifi; | 899 | cache->mfc_un.res.minvif = vifi; |
902 | if (cache->mfc_un.res.maxvif <= vifi) | 900 | if (cache->mfc_un.res.maxvif <= vifi) |
903 | cache->mfc_un.res.maxvif = vifi + 1; | 901 | cache->mfc_un.res.maxvif = vifi + 1; |
904 | } | 902 | } |
905 | } | 903 | } |
906 | } | 904 | } |
907 | 905 | ||
908 | static int mif6_add(struct net *net, struct mr6_table *mrt, | 906 | static int mif6_add(struct net *net, struct mr6_table *mrt, |
909 | struct mif6ctl *vifc, int mrtsock) | 907 | struct mif6ctl *vifc, int mrtsock) |
910 | { | 908 | { |
911 | int vifi = vifc->mif6c_mifi; | 909 | int vifi = vifc->mif6c_mifi; |
912 | struct mif_device *v = &mrt->vif6_table[vifi]; | 910 | struct mif_device *v = &mrt->vif6_table[vifi]; |
913 | struct net_device *dev; | 911 | struct net_device *dev; |
914 | struct inet6_dev *in6_dev; | 912 | struct inet6_dev *in6_dev; |
915 | int err; | 913 | int err; |
916 | 914 | ||
917 | /* Is vif busy ? */ | 915 | /* Is vif busy ? */ |
918 | if (MIF_EXISTS(mrt, vifi)) | 916 | if (MIF_EXISTS(mrt, vifi)) |
919 | return -EADDRINUSE; | 917 | return -EADDRINUSE; |
920 | 918 | ||
921 | switch (vifc->mif6c_flags) { | 919 | switch (vifc->mif6c_flags) { |
922 | #ifdef CONFIG_IPV6_PIMSM_V2 | 920 | #ifdef CONFIG_IPV6_PIMSM_V2 |
923 | case MIFF_REGISTER: | 921 | case MIFF_REGISTER: |
924 | /* | 922 | /* |
925 | * Special Purpose VIF in PIM | 923 | * Special Purpose VIF in PIM |
926 | * All the packets will be sent to the daemon | 924 | * All the packets will be sent to the daemon |
927 | */ | 925 | */ |
928 | if (mrt->mroute_reg_vif_num >= 0) | 926 | if (mrt->mroute_reg_vif_num >= 0) |
929 | return -EADDRINUSE; | 927 | return -EADDRINUSE; |
930 | dev = ip6mr_reg_vif(net, mrt); | 928 | dev = ip6mr_reg_vif(net, mrt); |
931 | if (!dev) | 929 | if (!dev) |
932 | return -ENOBUFS; | 930 | return -ENOBUFS; |
933 | err = dev_set_allmulti(dev, 1); | 931 | err = dev_set_allmulti(dev, 1); |
934 | if (err) { | 932 | if (err) { |
935 | unregister_netdevice(dev); | 933 | unregister_netdevice(dev); |
936 | dev_put(dev); | 934 | dev_put(dev); |
937 | return err; | 935 | return err; |
938 | } | 936 | } |
939 | break; | 937 | break; |
940 | #endif | 938 | #endif |
941 | case 0: | 939 | case 0: |
942 | dev = dev_get_by_index(net, vifc->mif6c_pifi); | 940 | dev = dev_get_by_index(net, vifc->mif6c_pifi); |
943 | if (!dev) | 941 | if (!dev) |
944 | return -EADDRNOTAVAIL; | 942 | return -EADDRNOTAVAIL; |
945 | err = dev_set_allmulti(dev, 1); | 943 | err = dev_set_allmulti(dev, 1); |
946 | if (err) { | 944 | if (err) { |
947 | dev_put(dev); | 945 | dev_put(dev); |
948 | return err; | 946 | return err; |
949 | } | 947 | } |
950 | break; | 948 | break; |
951 | default: | 949 | default: |
952 | return -EINVAL; | 950 | return -EINVAL; |
953 | } | 951 | } |
954 | 952 | ||
955 | in6_dev = __in6_dev_get(dev); | 953 | in6_dev = __in6_dev_get(dev); |
956 | if (in6_dev) | 954 | if (in6_dev) |
957 | in6_dev->cnf.mc_forwarding++; | 955 | in6_dev->cnf.mc_forwarding++; |
958 | 956 | ||
959 | /* | 957 | /* |
960 | * Fill in the VIF structures | 958 | * Fill in the VIF structures |
961 | */ | 959 | */ |
962 | v->rate_limit = vifc->vifc_rate_limit; | 960 | v->rate_limit = vifc->vifc_rate_limit; |
963 | v->flags = vifc->mif6c_flags; | 961 | v->flags = vifc->mif6c_flags; |
964 | if (!mrtsock) | 962 | if (!mrtsock) |
965 | v->flags |= VIFF_STATIC; | 963 | v->flags |= VIFF_STATIC; |
966 | v->threshold = vifc->vifc_threshold; | 964 | v->threshold = vifc->vifc_threshold; |
967 | v->bytes_in = 0; | 965 | v->bytes_in = 0; |
968 | v->bytes_out = 0; | 966 | v->bytes_out = 0; |
969 | v->pkt_in = 0; | 967 | v->pkt_in = 0; |
970 | v->pkt_out = 0; | 968 | v->pkt_out = 0; |
971 | v->link = dev->ifindex; | 969 | v->link = dev->ifindex; |
972 | if (v->flags & MIFF_REGISTER) | 970 | if (v->flags & MIFF_REGISTER) |
973 | v->link = dev->iflink; | 971 | v->link = dev->iflink; |
974 | 972 | ||
975 | /* And finish update writing critical data */ | 973 | /* And finish update writing critical data */ |
976 | write_lock_bh(&mrt_lock); | 974 | write_lock_bh(&mrt_lock); |
977 | v->dev = dev; | 975 | v->dev = dev; |
978 | #ifdef CONFIG_IPV6_PIMSM_V2 | 976 | #ifdef CONFIG_IPV6_PIMSM_V2 |
979 | if (v->flags & MIFF_REGISTER) | 977 | if (v->flags & MIFF_REGISTER) |
980 | mrt->mroute_reg_vif_num = vifi; | 978 | mrt->mroute_reg_vif_num = vifi; |
981 | #endif | 979 | #endif |
982 | if (vifi + 1 > mrt->maxvif) | 980 | if (vifi + 1 > mrt->maxvif) |
983 | mrt->maxvif = vifi + 1; | 981 | mrt->maxvif = vifi + 1; |
984 | write_unlock_bh(&mrt_lock); | 982 | write_unlock_bh(&mrt_lock); |
985 | return 0; | 983 | return 0; |
986 | } | 984 | } |
987 | 985 | ||
988 | static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, | 986 | static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, |
989 | struct in6_addr *origin, | 987 | struct in6_addr *origin, |
990 | struct in6_addr *mcastgrp) | 988 | struct in6_addr *mcastgrp) |
991 | { | 989 | { |
992 | int line = MFC6_HASH(mcastgrp, origin); | 990 | int line = MFC6_HASH(mcastgrp, origin); |
993 | struct mfc6_cache *c; | 991 | struct mfc6_cache *c; |
994 | 992 | ||
995 | list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { | 993 | list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { |
996 | if (ipv6_addr_equal(&c->mf6c_origin, origin) && | 994 | if (ipv6_addr_equal(&c->mf6c_origin, origin) && |
997 | ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) | 995 | ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) |
998 | return c; | 996 | return c; |
999 | } | 997 | } |
1000 | return NULL; | 998 | return NULL; |
1001 | } | 999 | } |
1002 | 1000 | ||
1003 | /* | 1001 | /* |
1004 | * Allocate a multicast cache entry | 1002 | * Allocate a multicast cache entry |
1005 | */ | 1003 | */ |
1006 | static struct mfc6_cache *ip6mr_cache_alloc(void) | 1004 | static struct mfc6_cache *ip6mr_cache_alloc(void) |
1007 | { | 1005 | { |
1008 | struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 1006 | struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
1009 | if (c == NULL) | 1007 | if (c == NULL) |
1010 | return NULL; | 1008 | return NULL; |
1011 | c->mfc_un.res.minvif = MAXMIFS; | 1009 | c->mfc_un.res.minvif = MAXMIFS; |
1012 | return c; | 1010 | return c; |
1013 | } | 1011 | } |
1014 | 1012 | ||
1015 | static struct mfc6_cache *ip6mr_cache_alloc_unres(void) | 1013 | static struct mfc6_cache *ip6mr_cache_alloc_unres(void) |
1016 | { | 1014 | { |
1017 | struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 1015 | struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
1018 | if (c == NULL) | 1016 | if (c == NULL) |
1019 | return NULL; | 1017 | return NULL; |
1020 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 1018 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
1021 | c->mfc_un.unres.expires = jiffies + 10 * HZ; | 1019 | c->mfc_un.unres.expires = jiffies + 10 * HZ; |
1022 | return c; | 1020 | return c; |
1023 | } | 1021 | } |
1024 | 1022 | ||
1025 | /* | 1023 | /* |
1026 | * A cache entry has gone into a resolved state from queued | 1024 | * A cache entry has gone into a resolved state from queued |
1027 | */ | 1025 | */ |
1028 | 1026 | ||
1029 | static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, | 1027 | static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, |
1030 | struct mfc6_cache *uc, struct mfc6_cache *c) | 1028 | struct mfc6_cache *uc, struct mfc6_cache *c) |
1031 | { | 1029 | { |
1032 | struct sk_buff *skb; | 1030 | struct sk_buff *skb; |
1033 | 1031 | ||
1034 | /* | 1032 | /* |
1035 | * Play the pending entries through our router | 1033 | * Play the pending entries through our router |
1036 | */ | 1034 | */ |
1037 | 1035 | ||
1038 | while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 1036 | while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
1039 | if (ipv6_hdr(skb)->version == 0) { | 1037 | if (ipv6_hdr(skb)->version == 0) { |
1040 | int err; | 1038 | int err; |
1041 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); | 1039 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); |
1042 | 1040 | ||
1043 | if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { | 1041 | if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { |
1044 | nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; | 1042 | nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; |
1045 | } else { | 1043 | } else { |
1046 | nlh->nlmsg_type = NLMSG_ERROR; | 1044 | nlh->nlmsg_type = NLMSG_ERROR; |
1047 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 1045 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
1048 | skb_trim(skb, nlh->nlmsg_len); | 1046 | skb_trim(skb, nlh->nlmsg_len); |
1049 | ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; | 1047 | ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; |
1050 | } | 1048 | } |
1051 | err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 1049 | err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
1052 | } else | 1050 | } else |
1053 | ip6_mr_forward(net, mrt, skb, c); | 1051 | ip6_mr_forward(net, mrt, skb, c); |
1054 | } | 1052 | } |
1055 | } | 1053 | } |
1056 | 1054 | ||
1057 | /* | 1055 | /* |
1058 | * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd | 1056 | * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd |
1059 | * expects the following bizarre scheme. | 1057 | * expects the following bizarre scheme. |
1060 | * | 1058 | * |
1061 | * Called under mrt_lock. | 1059 | * Called under mrt_lock. |
1062 | */ | 1060 | */ |
1063 | 1061 | ||
1064 | static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, | 1062 | static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, |
1065 | mifi_t mifi, int assert) | 1063 | mifi_t mifi, int assert) |
1066 | { | 1064 | { |
1067 | struct sk_buff *skb; | 1065 | struct sk_buff *skb; |
1068 | struct mrt6msg *msg; | 1066 | struct mrt6msg *msg; |
1069 | int ret; | 1067 | int ret; |
1070 | 1068 | ||
1071 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1069 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1072 | if (assert == MRT6MSG_WHOLEPKT) | 1070 | if (assert == MRT6MSG_WHOLEPKT) |
1073 | skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) | 1071 | skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) |
1074 | +sizeof(*msg)); | 1072 | +sizeof(*msg)); |
1075 | else | 1073 | else |
1076 | #endif | 1074 | #endif |
1077 | skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); | 1075 | skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); |
1078 | 1076 | ||
1079 | if (!skb) | 1077 | if (!skb) |
1080 | return -ENOBUFS; | 1078 | return -ENOBUFS; |
1081 | 1079 | ||
1082 | /* I suppose that internal messages | 1080 | /* I suppose that internal messages |
1083 | * do not require checksums */ | 1081 | * do not require checksums */ |
1084 | 1082 | ||
1085 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1083 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1086 | 1084 | ||
1087 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1085 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1088 | if (assert == MRT6MSG_WHOLEPKT) { | 1086 | if (assert == MRT6MSG_WHOLEPKT) { |
1089 | /* Ugly, but we have no choice with this interface. | 1087 | /* Ugly, but we have no choice with this interface. |
1090 | Duplicate old header, fix length etc. | 1088 | Duplicate old header, fix length etc. |
1091 | And all this only to mangle msg->im6_msgtype and | 1089 | And all this only to mangle msg->im6_msgtype and |
1092 | to set msg->im6_mbz to "mbz" :-) | 1090 | to set msg->im6_mbz to "mbz" :-) |
1093 | */ | 1091 | */ |
1094 | skb_push(skb, -skb_network_offset(pkt)); | 1092 | skb_push(skb, -skb_network_offset(pkt)); |
1095 | 1093 | ||
1096 | skb_push(skb, sizeof(*msg)); | 1094 | skb_push(skb, sizeof(*msg)); |
1097 | skb_reset_transport_header(skb); | 1095 | skb_reset_transport_header(skb); |
1098 | msg = (struct mrt6msg *)skb_transport_header(skb); | 1096 | msg = (struct mrt6msg *)skb_transport_header(skb); |
1099 | msg->im6_mbz = 0; | 1097 | msg->im6_mbz = 0; |
1100 | msg->im6_msgtype = MRT6MSG_WHOLEPKT; | 1098 | msg->im6_msgtype = MRT6MSG_WHOLEPKT; |
1101 | msg->im6_mif = mrt->mroute_reg_vif_num; | 1099 | msg->im6_mif = mrt->mroute_reg_vif_num; |
1102 | msg->im6_pad = 0; | 1100 | msg->im6_pad = 0; |
1103 | ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); | 1101 | ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); |
1104 | ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); | 1102 | ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); |
1105 | 1103 | ||
1106 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1104 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1107 | } else | 1105 | } else |
1108 | #endif | 1106 | #endif |
1109 | { | 1107 | { |
1110 | /* | 1108 | /* |
1111 | * Copy the IP header | 1109 | * Copy the IP header |
1112 | */ | 1110 | */ |
1113 | 1111 | ||
1114 | skb_put(skb, sizeof(struct ipv6hdr)); | 1112 | skb_put(skb, sizeof(struct ipv6hdr)); |
1115 | skb_reset_network_header(skb); | 1113 | skb_reset_network_header(skb); |
1116 | skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); | 1114 | skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); |
1117 | 1115 | ||
1118 | /* | 1116 | /* |
1119 | * Add our header | 1117 | * Add our header |
1120 | */ | 1118 | */ |
1121 | skb_put(skb, sizeof(*msg)); | 1119 | skb_put(skb, sizeof(*msg)); |
1122 | skb_reset_transport_header(skb); | 1120 | skb_reset_transport_header(skb); |
1123 | msg = (struct mrt6msg *)skb_transport_header(skb); | 1121 | msg = (struct mrt6msg *)skb_transport_header(skb); |
1124 | 1122 | ||
1125 | msg->im6_mbz = 0; | 1123 | msg->im6_mbz = 0; |
1126 | msg->im6_msgtype = assert; | 1124 | msg->im6_msgtype = assert; |
1127 | msg->im6_mif = mifi; | 1125 | msg->im6_mif = mifi; |
1128 | msg->im6_pad = 0; | 1126 | msg->im6_pad = 0; |
1129 | ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); | 1127 | ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); |
1130 | ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); | 1128 | ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); |
1131 | 1129 | ||
1132 | skb_dst_set(skb, dst_clone(skb_dst(pkt))); | 1130 | skb_dst_set(skb, dst_clone(skb_dst(pkt))); |
1133 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1131 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1134 | } | 1132 | } |
1135 | 1133 | ||
1136 | if (mrt->mroute6_sk == NULL) { | 1134 | if (mrt->mroute6_sk == NULL) { |
1137 | kfree_skb(skb); | 1135 | kfree_skb(skb); |
1138 | return -EINVAL; | 1136 | return -EINVAL; |
1139 | } | 1137 | } |
1140 | 1138 | ||
1141 | /* | 1139 | /* |
1142 | * Deliver to user space multicast routing algorithms | 1140 | * Deliver to user space multicast routing algorithms |
1143 | */ | 1141 | */ |
1144 | ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); | 1142 | ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); |
1145 | if (ret < 0) { | 1143 | if (ret < 0) { |
1146 | if (net_ratelimit()) | 1144 | if (net_ratelimit()) |
1147 | printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); | 1145 | printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); |
1148 | kfree_skb(skb); | 1146 | kfree_skb(skb); |
1149 | } | 1147 | } |
1150 | 1148 | ||
1151 | return ret; | 1149 | return ret; |
1152 | } | 1150 | } |
1153 | 1151 | ||
1154 | /* | 1152 | /* |
1155 | * Queue a packet for resolution. It gets locked cache entry! | 1153 | * Queue a packet for resolution. It gets locked cache entry! |
1156 | */ | 1154 | */ |
1157 | 1155 | ||
1158 | static int | 1156 | static int |
1159 | ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) | 1157 | ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) |
1160 | { | 1158 | { |
1161 | bool found = false; | 1159 | bool found = false; |
1162 | int err; | 1160 | int err; |
1163 | struct mfc6_cache *c; | 1161 | struct mfc6_cache *c; |
1164 | 1162 | ||
1165 | spin_lock_bh(&mfc_unres_lock); | 1163 | spin_lock_bh(&mfc_unres_lock); |
1166 | list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { | 1164 | list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { |
1167 | if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && | 1165 | if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && |
1168 | ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { | 1166 | ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { |
1169 | found = true; | 1167 | found = true; |
1170 | break; | 1168 | break; |
1171 | } | 1169 | } |
1172 | } | 1170 | } |
1173 | 1171 | ||
1174 | if (!found) { | 1172 | if (!found) { |
1175 | /* | 1173 | /* |
1176 | * Create a new entry if allowable | 1174 | * Create a new entry if allowable |
1177 | */ | 1175 | */ |
1178 | 1176 | ||
1179 | if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || | 1177 | if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || |
1180 | (c = ip6mr_cache_alloc_unres()) == NULL) { | 1178 | (c = ip6mr_cache_alloc_unres()) == NULL) { |
1181 | spin_unlock_bh(&mfc_unres_lock); | 1179 | spin_unlock_bh(&mfc_unres_lock); |
1182 | 1180 | ||
1183 | kfree_skb(skb); | 1181 | kfree_skb(skb); |
1184 | return -ENOBUFS; | 1182 | return -ENOBUFS; |
1185 | } | 1183 | } |
1186 | 1184 | ||
1187 | /* | 1185 | /* |
1188 | * Fill in the new cache entry | 1186 | * Fill in the new cache entry |
1189 | */ | 1187 | */ |
1190 | c->mf6c_parent = -1; | 1188 | c->mf6c_parent = -1; |
1191 | c->mf6c_origin = ipv6_hdr(skb)->saddr; | 1189 | c->mf6c_origin = ipv6_hdr(skb)->saddr; |
1192 | c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; | 1190 | c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; |
1193 | 1191 | ||
1194 | /* | 1192 | /* |
1195 | * Reflect first query at pim6sd | 1193 | * Reflect first query at pim6sd |
1196 | */ | 1194 | */ |
1197 | err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); | 1195 | err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); |
1198 | if (err < 0) { | 1196 | if (err < 0) { |
1199 | /* If the report failed throw the cache entry | 1197 | /* If the report failed throw the cache entry |
1200 | out - Brad Parker | 1198 | out - Brad Parker |
1201 | */ | 1199 | */ |
1202 | spin_unlock_bh(&mfc_unres_lock); | 1200 | spin_unlock_bh(&mfc_unres_lock); |
1203 | 1201 | ||
1204 | ip6mr_cache_free(c); | 1202 | ip6mr_cache_free(c); |
1205 | kfree_skb(skb); | 1203 | kfree_skb(skb); |
1206 | return err; | 1204 | return err; |
1207 | } | 1205 | } |
1208 | 1206 | ||
1209 | atomic_inc(&mrt->cache_resolve_queue_len); | 1207 | atomic_inc(&mrt->cache_resolve_queue_len); |
1210 | list_add(&c->list, &mrt->mfc6_unres_queue); | 1208 | list_add(&c->list, &mrt->mfc6_unres_queue); |
1211 | 1209 | ||
1212 | ipmr_do_expire_process(mrt); | 1210 | ipmr_do_expire_process(mrt); |
1213 | } | 1211 | } |
1214 | 1212 | ||
1215 | /* | 1213 | /* |
1216 | * See if we can append the packet | 1214 | * See if we can append the packet |
1217 | */ | 1215 | */ |
1218 | if (c->mfc_un.unres.unresolved.qlen > 3) { | 1216 | if (c->mfc_un.unres.unresolved.qlen > 3) { |
1219 | kfree_skb(skb); | 1217 | kfree_skb(skb); |
1220 | err = -ENOBUFS; | 1218 | err = -ENOBUFS; |
1221 | } else { | 1219 | } else { |
1222 | skb_queue_tail(&c->mfc_un.unres.unresolved, skb); | 1220 | skb_queue_tail(&c->mfc_un.unres.unresolved, skb); |
1223 | err = 0; | 1221 | err = 0; |
1224 | } | 1222 | } |
1225 | 1223 | ||
1226 | spin_unlock_bh(&mfc_unres_lock); | 1224 | spin_unlock_bh(&mfc_unres_lock); |
1227 | return err; | 1225 | return err; |
1228 | } | 1226 | } |
1229 | 1227 | ||
1230 | /* | 1228 | /* |
1231 | * MFC6 cache manipulation by user space | 1229 | * MFC6 cache manipulation by user space |
1232 | */ | 1230 | */ |
1233 | 1231 | ||
1234 | static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) | 1232 | static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) |
1235 | { | 1233 | { |
1236 | int line; | 1234 | int line; |
1237 | struct mfc6_cache *c, *next; | 1235 | struct mfc6_cache *c, *next; |
1238 | 1236 | ||
1239 | line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); | 1237 | line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); |
1240 | 1238 | ||
1241 | list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { | 1239 | list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { |
1242 | if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && | 1240 | if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && |
1243 | ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { | 1241 | ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { |
1244 | write_lock_bh(&mrt_lock); | 1242 | write_lock_bh(&mrt_lock); |
1245 | list_del(&c->list); | 1243 | list_del(&c->list); |
1246 | write_unlock_bh(&mrt_lock); | 1244 | write_unlock_bh(&mrt_lock); |
1247 | 1245 | ||
1248 | ip6mr_cache_free(c); | 1246 | ip6mr_cache_free(c); |
1249 | return 0; | 1247 | return 0; |
1250 | } | 1248 | } |
1251 | } | 1249 | } |
1252 | return -ENOENT; | 1250 | return -ENOENT; |
1253 | } | 1251 | } |
1254 | 1252 | ||
1255 | static int ip6mr_device_event(struct notifier_block *this, | 1253 | static int ip6mr_device_event(struct notifier_block *this, |
1256 | unsigned long event, void *ptr) | 1254 | unsigned long event, void *ptr) |
1257 | { | 1255 | { |
1258 | struct net_device *dev = ptr; | 1256 | struct net_device *dev = ptr; |
1259 | struct net *net = dev_net(dev); | 1257 | struct net *net = dev_net(dev); |
1260 | struct mr6_table *mrt; | 1258 | struct mr6_table *mrt; |
1261 | struct mif_device *v; | 1259 | struct mif_device *v; |
1262 | int ct; | 1260 | int ct; |
1263 | LIST_HEAD(list); | 1261 | LIST_HEAD(list); |
1264 | 1262 | ||
1265 | if (event != NETDEV_UNREGISTER) | 1263 | if (event != NETDEV_UNREGISTER) |
1266 | return NOTIFY_DONE; | 1264 | return NOTIFY_DONE; |
1267 | 1265 | ||
1268 | ip6mr_for_each_table(mrt, net) { | 1266 | ip6mr_for_each_table(mrt, net) { |
1269 | v = &mrt->vif6_table[0]; | 1267 | v = &mrt->vif6_table[0]; |
1270 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { | 1268 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { |
1271 | if (v->dev == dev) | 1269 | if (v->dev == dev) |
1272 | mif6_delete(mrt, ct, &list); | 1270 | mif6_delete(mrt, ct, &list); |
1273 | } | 1271 | } |
1274 | } | 1272 | } |
1275 | unregister_netdevice_many(&list); | 1273 | unregister_netdevice_many(&list); |
1276 | 1274 | ||
1277 | return NOTIFY_DONE; | 1275 | return NOTIFY_DONE; |
1278 | } | 1276 | } |
1279 | 1277 | ||
1280 | static struct notifier_block ip6_mr_notifier = { | 1278 | static struct notifier_block ip6_mr_notifier = { |
1281 | .notifier_call = ip6mr_device_event | 1279 | .notifier_call = ip6mr_device_event |
1282 | }; | 1280 | }; |
1283 | 1281 | ||
1284 | /* | 1282 | /* |
1285 | * Setup for IP multicast routing | 1283 | * Setup for IP multicast routing |
1286 | */ | 1284 | */ |
1287 | 1285 | ||
1288 | static int __net_init ip6mr_net_init(struct net *net) | 1286 | static int __net_init ip6mr_net_init(struct net *net) |
1289 | { | 1287 | { |
1290 | int err; | 1288 | int err; |
1291 | 1289 | ||
1292 | err = ip6mr_rules_init(net); | 1290 | err = ip6mr_rules_init(net); |
1293 | if (err < 0) | 1291 | if (err < 0) |
1294 | goto fail; | 1292 | goto fail; |
1295 | 1293 | ||
1296 | #ifdef CONFIG_PROC_FS | 1294 | #ifdef CONFIG_PROC_FS |
1297 | err = -ENOMEM; | 1295 | err = -ENOMEM; |
1298 | if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) | 1296 | if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) |
1299 | goto proc_vif_fail; | 1297 | goto proc_vif_fail; |
1300 | if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) | 1298 | if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) |
1301 | goto proc_cache_fail; | 1299 | goto proc_cache_fail; |
1302 | #endif | 1300 | #endif |
1303 | 1301 | ||
1304 | return 0; | 1302 | return 0; |
1305 | 1303 | ||
1306 | #ifdef CONFIG_PROC_FS | 1304 | #ifdef CONFIG_PROC_FS |
1307 | proc_cache_fail: | 1305 | proc_cache_fail: |
1308 | proc_net_remove(net, "ip6_mr_vif"); | 1306 | proc_net_remove(net, "ip6_mr_vif"); |
1309 | proc_vif_fail: | 1307 | proc_vif_fail: |
1310 | ip6mr_rules_exit(net); | 1308 | ip6mr_rules_exit(net); |
1311 | #endif | 1309 | #endif |
1312 | fail: | 1310 | fail: |
1313 | return err; | 1311 | return err; |
1314 | } | 1312 | } |
1315 | 1313 | ||
1316 | static void __net_exit ip6mr_net_exit(struct net *net) | 1314 | static void __net_exit ip6mr_net_exit(struct net *net) |
1317 | { | 1315 | { |
1318 | #ifdef CONFIG_PROC_FS | 1316 | #ifdef CONFIG_PROC_FS |
1319 | proc_net_remove(net, "ip6_mr_cache"); | 1317 | proc_net_remove(net, "ip6_mr_cache"); |
1320 | proc_net_remove(net, "ip6_mr_vif"); | 1318 | proc_net_remove(net, "ip6_mr_vif"); |
1321 | #endif | 1319 | #endif |
1322 | ip6mr_rules_exit(net); | 1320 | ip6mr_rules_exit(net); |
1323 | } | 1321 | } |
1324 | 1322 | ||
1325 | static struct pernet_operations ip6mr_net_ops = { | 1323 | static struct pernet_operations ip6mr_net_ops = { |
1326 | .init = ip6mr_net_init, | 1324 | .init = ip6mr_net_init, |
1327 | .exit = ip6mr_net_exit, | 1325 | .exit = ip6mr_net_exit, |
1328 | }; | 1326 | }; |
1329 | 1327 | ||
1330 | int __init ip6_mr_init(void) | 1328 | int __init ip6_mr_init(void) |
1331 | { | 1329 | { |
1332 | int err; | 1330 | int err; |
1333 | 1331 | ||
1334 | mrt_cachep = kmem_cache_create("ip6_mrt_cache", | 1332 | mrt_cachep = kmem_cache_create("ip6_mrt_cache", |
1335 | sizeof(struct mfc6_cache), | 1333 | sizeof(struct mfc6_cache), |
1336 | 0, SLAB_HWCACHE_ALIGN, | 1334 | 0, SLAB_HWCACHE_ALIGN, |
1337 | NULL); | 1335 | NULL); |
1338 | if (!mrt_cachep) | 1336 | if (!mrt_cachep) |
1339 | return -ENOMEM; | 1337 | return -ENOMEM; |
1340 | 1338 | ||
1341 | err = register_pernet_subsys(&ip6mr_net_ops); | 1339 | err = register_pernet_subsys(&ip6mr_net_ops); |
1342 | if (err) | 1340 | if (err) |
1343 | goto reg_pernet_fail; | 1341 | goto reg_pernet_fail; |
1344 | 1342 | ||
1345 | err = register_netdevice_notifier(&ip6_mr_notifier); | 1343 | err = register_netdevice_notifier(&ip6_mr_notifier); |
1346 | if (err) | 1344 | if (err) |
1347 | goto reg_notif_fail; | 1345 | goto reg_notif_fail; |
1348 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1346 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1349 | if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { | 1347 | if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { |
1350 | printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n"); | 1348 | printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n"); |
1351 | err = -EAGAIN; | 1349 | err = -EAGAIN; |
1352 | goto add_proto_fail; | 1350 | goto add_proto_fail; |
1353 | } | 1351 | } |
1354 | #endif | 1352 | #endif |
1355 | rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute); | 1353 | rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute); |
1356 | return 0; | 1354 | return 0; |
1357 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1355 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1358 | add_proto_fail: | 1356 | add_proto_fail: |
1359 | unregister_netdevice_notifier(&ip6_mr_notifier); | 1357 | unregister_netdevice_notifier(&ip6_mr_notifier); |
1360 | #endif | 1358 | #endif |
1361 | reg_notif_fail: | 1359 | reg_notif_fail: |
1362 | unregister_pernet_subsys(&ip6mr_net_ops); | 1360 | unregister_pernet_subsys(&ip6mr_net_ops); |
1363 | reg_pernet_fail: | 1361 | reg_pernet_fail: |
1364 | kmem_cache_destroy(mrt_cachep); | 1362 | kmem_cache_destroy(mrt_cachep); |
1365 | return err; | 1363 | return err; |
1366 | } | 1364 | } |
1367 | 1365 | ||
1368 | void ip6_mr_cleanup(void) | 1366 | void ip6_mr_cleanup(void) |
1369 | { | 1367 | { |
1370 | unregister_netdevice_notifier(&ip6_mr_notifier); | 1368 | unregister_netdevice_notifier(&ip6_mr_notifier); |
1371 | unregister_pernet_subsys(&ip6mr_net_ops); | 1369 | unregister_pernet_subsys(&ip6mr_net_ops); |
1372 | kmem_cache_destroy(mrt_cachep); | 1370 | kmem_cache_destroy(mrt_cachep); |
1373 | } | 1371 | } |
1374 | 1372 | ||
1375 | static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, | 1373 | static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, |
1376 | struct mf6cctl *mfc, int mrtsock) | 1374 | struct mf6cctl *mfc, int mrtsock) |
1377 | { | 1375 | { |
1378 | bool found = false; | 1376 | bool found = false; |
1379 | int line; | 1377 | int line; |
1380 | struct mfc6_cache *uc, *c; | 1378 | struct mfc6_cache *uc, *c; |
1381 | unsigned char ttls[MAXMIFS]; | 1379 | unsigned char ttls[MAXMIFS]; |
1382 | int i; | 1380 | int i; |
1383 | 1381 | ||
1384 | if (mfc->mf6cc_parent >= MAXMIFS) | 1382 | if (mfc->mf6cc_parent >= MAXMIFS) |
1385 | return -ENFILE; | 1383 | return -ENFILE; |
1386 | 1384 | ||
1387 | memset(ttls, 255, MAXMIFS); | 1385 | memset(ttls, 255, MAXMIFS); |
1388 | for (i = 0; i < MAXMIFS; i++) { | 1386 | for (i = 0; i < MAXMIFS; i++) { |
1389 | if (IF_ISSET(i, &mfc->mf6cc_ifset)) | 1387 | if (IF_ISSET(i, &mfc->mf6cc_ifset)) |
1390 | ttls[i] = 1; | 1388 | ttls[i] = 1; |
1391 | 1389 | ||
1392 | } | 1390 | } |
1393 | 1391 | ||
1394 | line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); | 1392 | line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); |
1395 | 1393 | ||
1396 | list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { | 1394 | list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { |
1397 | if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && | 1395 | if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && |
1398 | ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { | 1396 | ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { |
1399 | found = true; | 1397 | found = true; |
1400 | break; | 1398 | break; |
1401 | } | 1399 | } |
1402 | } | 1400 | } |
1403 | 1401 | ||
1404 | if (found) { | 1402 | if (found) { |
1405 | write_lock_bh(&mrt_lock); | 1403 | write_lock_bh(&mrt_lock); |
1406 | c->mf6c_parent = mfc->mf6cc_parent; | 1404 | c->mf6c_parent = mfc->mf6cc_parent; |
1407 | ip6mr_update_thresholds(mrt, c, ttls); | 1405 | ip6mr_update_thresholds(mrt, c, ttls); |
1408 | if (!mrtsock) | 1406 | if (!mrtsock) |
1409 | c->mfc_flags |= MFC_STATIC; | 1407 | c->mfc_flags |= MFC_STATIC; |
1410 | write_unlock_bh(&mrt_lock); | 1408 | write_unlock_bh(&mrt_lock); |
1411 | return 0; | 1409 | return 0; |
1412 | } | 1410 | } |
1413 | 1411 | ||
1414 | if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) | 1412 | if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) |
1415 | return -EINVAL; | 1413 | return -EINVAL; |
1416 | 1414 | ||
1417 | c = ip6mr_cache_alloc(); | 1415 | c = ip6mr_cache_alloc(); |
1418 | if (c == NULL) | 1416 | if (c == NULL) |
1419 | return -ENOMEM; | 1417 | return -ENOMEM; |
1420 | 1418 | ||
1421 | c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; | 1419 | c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; |
1422 | c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; | 1420 | c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; |
1423 | c->mf6c_parent = mfc->mf6cc_parent; | 1421 | c->mf6c_parent = mfc->mf6cc_parent; |
1424 | ip6mr_update_thresholds(mrt, c, ttls); | 1422 | ip6mr_update_thresholds(mrt, c, ttls); |
1425 | if (!mrtsock) | 1423 | if (!mrtsock) |
1426 | c->mfc_flags |= MFC_STATIC; | 1424 | c->mfc_flags |= MFC_STATIC; |
1427 | 1425 | ||
1428 | write_lock_bh(&mrt_lock); | 1426 | write_lock_bh(&mrt_lock); |
1429 | list_add(&c->list, &mrt->mfc6_cache_array[line]); | 1427 | list_add(&c->list, &mrt->mfc6_cache_array[line]); |
1430 | write_unlock_bh(&mrt_lock); | 1428 | write_unlock_bh(&mrt_lock); |
1431 | 1429 | ||
1432 | /* | 1430 | /* |
1433 | * Check to see if we resolved a queued list. If so we | 1431 | * Check to see if we resolved a queued list. If so we |
1434 | * need to send on the frames and tidy up. | 1432 | * need to send on the frames and tidy up. |
1435 | */ | 1433 | */ |
1436 | found = false; | 1434 | found = false; |
1437 | spin_lock_bh(&mfc_unres_lock); | 1435 | spin_lock_bh(&mfc_unres_lock); |
1438 | list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { | 1436 | list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { |
1439 | if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && | 1437 | if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && |
1440 | ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { | 1438 | ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { |
1441 | list_del(&uc->list); | 1439 | list_del(&uc->list); |
1442 | atomic_dec(&mrt->cache_resolve_queue_len); | 1440 | atomic_dec(&mrt->cache_resolve_queue_len); |
1443 | found = true; | 1441 | found = true; |
1444 | break; | 1442 | break; |
1445 | } | 1443 | } |
1446 | } | 1444 | } |
1447 | if (list_empty(&mrt->mfc6_unres_queue)) | 1445 | if (list_empty(&mrt->mfc6_unres_queue)) |
1448 | del_timer(&mrt->ipmr_expire_timer); | 1446 | del_timer(&mrt->ipmr_expire_timer); |
1449 | spin_unlock_bh(&mfc_unres_lock); | 1447 | spin_unlock_bh(&mfc_unres_lock); |
1450 | 1448 | ||
1451 | if (found) { | 1449 | if (found) { |
1452 | ip6mr_cache_resolve(net, mrt, uc, c); | 1450 | ip6mr_cache_resolve(net, mrt, uc, c); |
1453 | ip6mr_cache_free(uc); | 1451 | ip6mr_cache_free(uc); |
1454 | } | 1452 | } |
1455 | return 0; | 1453 | return 0; |
1456 | } | 1454 | } |
1457 | 1455 | ||
1458 | /* | 1456 | /* |
1459 | * Close the multicast socket, and clear the vif tables etc | 1457 | * Close the multicast socket, and clear the vif tables etc |
1460 | */ | 1458 | */ |
1461 | 1459 | ||
1462 | static void mroute_clean_tables(struct mr6_table *mrt) | 1460 | static void mroute_clean_tables(struct mr6_table *mrt) |
1463 | { | 1461 | { |
1464 | int i; | 1462 | int i; |
1465 | LIST_HEAD(list); | 1463 | LIST_HEAD(list); |
1466 | struct mfc6_cache *c, *next; | 1464 | struct mfc6_cache *c, *next; |
1467 | 1465 | ||
1468 | /* | 1466 | /* |
1469 | * Shut down all active vif entries | 1467 | * Shut down all active vif entries |
1470 | */ | 1468 | */ |
1471 | for (i = 0; i < mrt->maxvif; i++) { | 1469 | for (i = 0; i < mrt->maxvif; i++) { |
1472 | if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) | 1470 | if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) |
1473 | mif6_delete(mrt, i, &list); | 1471 | mif6_delete(mrt, i, &list); |
1474 | } | 1472 | } |
1475 | unregister_netdevice_many(&list); | 1473 | unregister_netdevice_many(&list); |
1476 | 1474 | ||
1477 | /* | 1475 | /* |
1478 | * Wipe the cache | 1476 | * Wipe the cache |
1479 | */ | 1477 | */ |
1480 | for (i = 0; i < MFC6_LINES; i++) { | 1478 | for (i = 0; i < MFC6_LINES; i++) { |
1481 | list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { | 1479 | list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { |
1482 | if (c->mfc_flags & MFC_STATIC) | 1480 | if (c->mfc_flags & MFC_STATIC) |
1483 | continue; | 1481 | continue; |
1484 | write_lock_bh(&mrt_lock); | 1482 | write_lock_bh(&mrt_lock); |
1485 | list_del(&c->list); | 1483 | list_del(&c->list); |
1486 | write_unlock_bh(&mrt_lock); | 1484 | write_unlock_bh(&mrt_lock); |
1487 | 1485 | ||
1488 | ip6mr_cache_free(c); | 1486 | ip6mr_cache_free(c); |
1489 | } | 1487 | } |
1490 | } | 1488 | } |
1491 | 1489 | ||
1492 | if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { | 1490 | if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { |
1493 | spin_lock_bh(&mfc_unres_lock); | 1491 | spin_lock_bh(&mfc_unres_lock); |
1494 | list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { | 1492 | list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { |
1495 | list_del(&c->list); | 1493 | list_del(&c->list); |
1496 | ip6mr_destroy_unres(mrt, c); | 1494 | ip6mr_destroy_unres(mrt, c); |
1497 | } | 1495 | } |
1498 | spin_unlock_bh(&mfc_unres_lock); | 1496 | spin_unlock_bh(&mfc_unres_lock); |
1499 | } | 1497 | } |
1500 | } | 1498 | } |
1501 | 1499 | ||
1502 | static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) | 1500 | static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) |
1503 | { | 1501 | { |
1504 | int err = 0; | 1502 | int err = 0; |
1505 | struct net *net = sock_net(sk); | 1503 | struct net *net = sock_net(sk); |
1506 | 1504 | ||
1507 | rtnl_lock(); | 1505 | rtnl_lock(); |
1508 | write_lock_bh(&mrt_lock); | 1506 | write_lock_bh(&mrt_lock); |
1509 | if (likely(mrt->mroute6_sk == NULL)) { | 1507 | if (likely(mrt->mroute6_sk == NULL)) { |
1510 | mrt->mroute6_sk = sk; | 1508 | mrt->mroute6_sk = sk; |
1511 | net->ipv6.devconf_all->mc_forwarding++; | 1509 | net->ipv6.devconf_all->mc_forwarding++; |
1512 | } | 1510 | } |
1513 | else | 1511 | else |
1514 | err = -EADDRINUSE; | 1512 | err = -EADDRINUSE; |
1515 | write_unlock_bh(&mrt_lock); | 1513 | write_unlock_bh(&mrt_lock); |
1516 | 1514 | ||
1517 | rtnl_unlock(); | 1515 | rtnl_unlock(); |
1518 | 1516 | ||
1519 | return err; | 1517 | return err; |
1520 | } | 1518 | } |
1521 | 1519 | ||
1522 | int ip6mr_sk_done(struct sock *sk) | 1520 | int ip6mr_sk_done(struct sock *sk) |
1523 | { | 1521 | { |
1524 | int err = -EACCES; | 1522 | int err = -EACCES; |
1525 | struct net *net = sock_net(sk); | 1523 | struct net *net = sock_net(sk); |
1526 | struct mr6_table *mrt; | 1524 | struct mr6_table *mrt; |
1527 | 1525 | ||
1528 | rtnl_lock(); | 1526 | rtnl_lock(); |
1529 | ip6mr_for_each_table(mrt, net) { | 1527 | ip6mr_for_each_table(mrt, net) { |
1530 | if (sk == mrt->mroute6_sk) { | 1528 | if (sk == mrt->mroute6_sk) { |
1531 | write_lock_bh(&mrt_lock); | 1529 | write_lock_bh(&mrt_lock); |
1532 | mrt->mroute6_sk = NULL; | 1530 | mrt->mroute6_sk = NULL; |
1533 | net->ipv6.devconf_all->mc_forwarding--; | 1531 | net->ipv6.devconf_all->mc_forwarding--; |
1534 | write_unlock_bh(&mrt_lock); | 1532 | write_unlock_bh(&mrt_lock); |
1535 | 1533 | ||
1536 | mroute_clean_tables(mrt); | 1534 | mroute_clean_tables(mrt); |
1537 | err = 0; | 1535 | err = 0; |
1538 | break; | 1536 | break; |
1539 | } | 1537 | } |
1540 | } | 1538 | } |
1541 | rtnl_unlock(); | 1539 | rtnl_unlock(); |
1542 | 1540 | ||
1543 | return err; | 1541 | return err; |
1544 | } | 1542 | } |
1545 | 1543 | ||
1546 | struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) | 1544 | struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) |
1547 | { | 1545 | { |
1548 | struct mr6_table *mrt; | 1546 | struct mr6_table *mrt; |
1549 | struct flowi fl = { | 1547 | struct flowi fl = { |
1550 | .iif = skb->skb_iif, | 1548 | .iif = skb->skb_iif, |
1551 | .oif = skb->dev->ifindex, | 1549 | .oif = skb->dev->ifindex, |
1552 | .mark = skb->mark, | 1550 | .mark = skb->mark, |
1553 | }; | 1551 | }; |
1554 | 1552 | ||
1555 | if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) | 1553 | if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) |
1556 | return NULL; | 1554 | return NULL; |
1557 | 1555 | ||
1558 | return mrt->mroute6_sk; | 1556 | return mrt->mroute6_sk; |
1559 | } | 1557 | } |
1560 | 1558 | ||
1561 | /* | 1559 | /* |
1562 | * Socket options and virtual interface manipulation. The whole | 1560 | * Socket options and virtual interface manipulation. The whole |
1563 | * virtual interface system is a complete heap, but unfortunately | 1561 | * virtual interface system is a complete heap, but unfortunately |
1564 | * that's how BSD mrouted happens to think. Maybe one day with a proper | 1562 | * that's how BSD mrouted happens to think. Maybe one day with a proper |
1565 | * MOSPF/PIM router set up we can clean this up. | 1563 | * MOSPF/PIM router set up we can clean this up. |
1566 | */ | 1564 | */ |
1567 | 1565 | ||
1568 | int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) | 1566 | int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) |
1569 | { | 1567 | { |
1570 | int ret; | 1568 | int ret; |
1571 | struct mif6ctl vif; | 1569 | struct mif6ctl vif; |
1572 | struct mf6cctl mfc; | 1570 | struct mf6cctl mfc; |
1573 | mifi_t mifi; | 1571 | mifi_t mifi; |
1574 | struct net *net = sock_net(sk); | 1572 | struct net *net = sock_net(sk); |
1575 | struct mr6_table *mrt; | 1573 | struct mr6_table *mrt; |
1576 | 1574 | ||
1577 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); | 1575 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); |
1578 | if (mrt == NULL) | 1576 | if (mrt == NULL) |
1579 | return -ENOENT; | 1577 | return -ENOENT; |
1580 | 1578 | ||
1581 | if (optname != MRT6_INIT) { | 1579 | if (optname != MRT6_INIT) { |
1582 | if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN)) | 1580 | if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN)) |
1583 | return -EACCES; | 1581 | return -EACCES; |
1584 | } | 1582 | } |
1585 | 1583 | ||
1586 | switch (optname) { | 1584 | switch (optname) { |
1587 | case MRT6_INIT: | 1585 | case MRT6_INIT: |
1588 | if (sk->sk_type != SOCK_RAW || | 1586 | if (sk->sk_type != SOCK_RAW || |
1589 | inet_sk(sk)->inet_num != IPPROTO_ICMPV6) | 1587 | inet_sk(sk)->inet_num != IPPROTO_ICMPV6) |
1590 | return -EOPNOTSUPP; | 1588 | return -EOPNOTSUPP; |
1591 | if (optlen < sizeof(int)) | 1589 | if (optlen < sizeof(int)) |
1592 | return -EINVAL; | 1590 | return -EINVAL; |
1593 | 1591 | ||
1594 | return ip6mr_sk_init(mrt, sk); | 1592 | return ip6mr_sk_init(mrt, sk); |
1595 | 1593 | ||
1596 | case MRT6_DONE: | 1594 | case MRT6_DONE: |
1597 | return ip6mr_sk_done(sk); | 1595 | return ip6mr_sk_done(sk); |
1598 | 1596 | ||
1599 | case MRT6_ADD_MIF: | 1597 | case MRT6_ADD_MIF: |
1600 | if (optlen < sizeof(vif)) | 1598 | if (optlen < sizeof(vif)) |
1601 | return -EINVAL; | 1599 | return -EINVAL; |
1602 | if (copy_from_user(&vif, optval, sizeof(vif))) | 1600 | if (copy_from_user(&vif, optval, sizeof(vif))) |
1603 | return -EFAULT; | 1601 | return -EFAULT; |
1604 | if (vif.mif6c_mifi >= MAXMIFS) | 1602 | if (vif.mif6c_mifi >= MAXMIFS) |
1605 | return -ENFILE; | 1603 | return -ENFILE; |
1606 | rtnl_lock(); | 1604 | rtnl_lock(); |
1607 | ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); | 1605 | ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); |
1608 | rtnl_unlock(); | 1606 | rtnl_unlock(); |
1609 | return ret; | 1607 | return ret; |
1610 | 1608 | ||
1611 | case MRT6_DEL_MIF: | 1609 | case MRT6_DEL_MIF: |
1612 | if (optlen < sizeof(mifi_t)) | 1610 | if (optlen < sizeof(mifi_t)) |
1613 | return -EINVAL; | 1611 | return -EINVAL; |
1614 | if (copy_from_user(&mifi, optval, sizeof(mifi_t))) | 1612 | if (copy_from_user(&mifi, optval, sizeof(mifi_t))) |
1615 | return -EFAULT; | 1613 | return -EFAULT; |
1616 | rtnl_lock(); | 1614 | rtnl_lock(); |
1617 | ret = mif6_delete(mrt, mifi, NULL); | 1615 | ret = mif6_delete(mrt, mifi, NULL); |
1618 | rtnl_unlock(); | 1616 | rtnl_unlock(); |
1619 | return ret; | 1617 | return ret; |
1620 | 1618 | ||
1621 | /* | 1619 | /* |
1622 | * Manipulate the forwarding caches. These live | 1620 | * Manipulate the forwarding caches. These live |
1623 | * in a sort of kernel/user symbiosis. | 1621 | * in a sort of kernel/user symbiosis. |
1624 | */ | 1622 | */ |
1625 | case MRT6_ADD_MFC: | 1623 | case MRT6_ADD_MFC: |
1626 | case MRT6_DEL_MFC: | 1624 | case MRT6_DEL_MFC: |
1627 | if (optlen < sizeof(mfc)) | 1625 | if (optlen < sizeof(mfc)) |
1628 | return -EINVAL; | 1626 | return -EINVAL; |
1629 | if (copy_from_user(&mfc, optval, sizeof(mfc))) | 1627 | if (copy_from_user(&mfc, optval, sizeof(mfc))) |
1630 | return -EFAULT; | 1628 | return -EFAULT; |
1631 | rtnl_lock(); | 1629 | rtnl_lock(); |
1632 | if (optname == MRT6_DEL_MFC) | 1630 | if (optname == MRT6_DEL_MFC) |
1633 | ret = ip6mr_mfc_delete(mrt, &mfc); | 1631 | ret = ip6mr_mfc_delete(mrt, &mfc); |
1634 | else | 1632 | else |
1635 | ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); | 1633 | ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); |
1636 | rtnl_unlock(); | 1634 | rtnl_unlock(); |
1637 | return ret; | 1635 | return ret; |
1638 | 1636 | ||
1639 | /* | 1637 | /* |
1640 | * Control PIM assert (to activate pim will activate assert) | 1638 | * Control PIM assert (to activate pim will activate assert) |
1641 | */ | 1639 | */ |
1642 | case MRT6_ASSERT: | 1640 | case MRT6_ASSERT: |
1643 | { | 1641 | { |
1644 | int v; | 1642 | int v; |
1645 | if (get_user(v, (int __user *)optval)) | 1643 | if (get_user(v, (int __user *)optval)) |
1646 | return -EFAULT; | 1644 | return -EFAULT; |
1647 | mrt->mroute_do_assert = !!v; | 1645 | mrt->mroute_do_assert = !!v; |
1648 | return 0; | 1646 | return 0; |
1649 | } | 1647 | } |
1650 | 1648 | ||
1651 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1649 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1652 | case MRT6_PIM: | 1650 | case MRT6_PIM: |
1653 | { | 1651 | { |
1654 | int v; | 1652 | int v; |
1655 | if (get_user(v, (int __user *)optval)) | 1653 | if (get_user(v, (int __user *)optval)) |
1656 | return -EFAULT; | 1654 | return -EFAULT; |
1657 | v = !!v; | 1655 | v = !!v; |
1658 | rtnl_lock(); | 1656 | rtnl_lock(); |
1659 | ret = 0; | 1657 | ret = 0; |
1660 | if (v != mrt->mroute_do_pim) { | 1658 | if (v != mrt->mroute_do_pim) { |
1661 | mrt->mroute_do_pim = v; | 1659 | mrt->mroute_do_pim = v; |
1662 | mrt->mroute_do_assert = v; | 1660 | mrt->mroute_do_assert = v; |
1663 | } | 1661 | } |
1664 | rtnl_unlock(); | 1662 | rtnl_unlock(); |
1665 | return ret; | 1663 | return ret; |
1666 | } | 1664 | } |
1667 | 1665 | ||
1668 | #endif | 1666 | #endif |
1669 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES | 1667 | #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES |
1670 | case MRT6_TABLE: | 1668 | case MRT6_TABLE: |
1671 | { | 1669 | { |
1672 | u32 v; | 1670 | u32 v; |
1673 | 1671 | ||
1674 | if (optlen != sizeof(u32)) | 1672 | if (optlen != sizeof(u32)) |
1675 | return -EINVAL; | 1673 | return -EINVAL; |
1676 | if (get_user(v, (u32 __user *)optval)) | 1674 | if (get_user(v, (u32 __user *)optval)) |
1677 | return -EFAULT; | 1675 | return -EFAULT; |
1678 | if (sk == mrt->mroute6_sk) | 1676 | if (sk == mrt->mroute6_sk) |
1679 | return -EBUSY; | 1677 | return -EBUSY; |
1680 | 1678 | ||
1681 | rtnl_lock(); | 1679 | rtnl_lock(); |
1682 | ret = 0; | 1680 | ret = 0; |
1683 | if (!ip6mr_new_table(net, v)) | 1681 | if (!ip6mr_new_table(net, v)) |
1684 | ret = -ENOMEM; | 1682 | ret = -ENOMEM; |
1685 | raw6_sk(sk)->ip6mr_table = v; | 1683 | raw6_sk(sk)->ip6mr_table = v; |
1686 | rtnl_unlock(); | 1684 | rtnl_unlock(); |
1687 | return ret; | 1685 | return ret; |
1688 | } | 1686 | } |
1689 | #endif | 1687 | #endif |
1690 | /* | 1688 | /* |
1691 | * Spurious command, or MRT6_VERSION which you cannot | 1689 | * Spurious command, or MRT6_VERSION which you cannot |
1692 | * set. | 1690 | * set. |
1693 | */ | 1691 | */ |
1694 | default: | 1692 | default: |
1695 | return -ENOPROTOOPT; | 1693 | return -ENOPROTOOPT; |
1696 | } | 1694 | } |
1697 | } | 1695 | } |
1698 | 1696 | ||
1699 | /* | 1697 | /* |
1700 | * Getsock opt support for the multicast routing system. | 1698 | * Getsock opt support for the multicast routing system. |
1701 | */ | 1699 | */ |
1702 | 1700 | ||
1703 | int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, | 1701 | int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, |
1704 | int __user *optlen) | 1702 | int __user *optlen) |
1705 | { | 1703 | { |
1706 | int olr; | 1704 | int olr; |
1707 | int val; | 1705 | int val; |
1708 | struct net *net = sock_net(sk); | 1706 | struct net *net = sock_net(sk); |
1709 | struct mr6_table *mrt; | 1707 | struct mr6_table *mrt; |
1710 | 1708 | ||
1711 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); | 1709 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); |
1712 | if (mrt == NULL) | 1710 | if (mrt == NULL) |
1713 | return -ENOENT; | 1711 | return -ENOENT; |
1714 | 1712 | ||
1715 | switch (optname) { | 1713 | switch (optname) { |
1716 | case MRT6_VERSION: | 1714 | case MRT6_VERSION: |
1717 | val = 0x0305; | 1715 | val = 0x0305; |
1718 | break; | 1716 | break; |
1719 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1717 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1720 | case MRT6_PIM: | 1718 | case MRT6_PIM: |
1721 | val = mrt->mroute_do_pim; | 1719 | val = mrt->mroute_do_pim; |
1722 | break; | 1720 | break; |
1723 | #endif | 1721 | #endif |
1724 | case MRT6_ASSERT: | 1722 | case MRT6_ASSERT: |
1725 | val = mrt->mroute_do_assert; | 1723 | val = mrt->mroute_do_assert; |
1726 | break; | 1724 | break; |
1727 | default: | 1725 | default: |
1728 | return -ENOPROTOOPT; | 1726 | return -ENOPROTOOPT; |
1729 | } | 1727 | } |
1730 | 1728 | ||
1731 | if (get_user(olr, optlen)) | 1729 | if (get_user(olr, optlen)) |
1732 | return -EFAULT; | 1730 | return -EFAULT; |
1733 | 1731 | ||
1734 | olr = min_t(int, olr, sizeof(int)); | 1732 | olr = min_t(int, olr, sizeof(int)); |
1735 | if (olr < 0) | 1733 | if (olr < 0) |
1736 | return -EINVAL; | 1734 | return -EINVAL; |
1737 | 1735 | ||
1738 | if (put_user(olr, optlen)) | 1736 | if (put_user(olr, optlen)) |
1739 | return -EFAULT; | 1737 | return -EFAULT; |
1740 | if (copy_to_user(optval, &val, olr)) | 1738 | if (copy_to_user(optval, &val, olr)) |
1741 | return -EFAULT; | 1739 | return -EFAULT; |
1742 | return 0; | 1740 | return 0; |
1743 | } | 1741 | } |
1744 | 1742 | ||
1745 | /* | 1743 | /* |
1746 | * The IP multicast ioctl support routines. | 1744 | * The IP multicast ioctl support routines. |
1747 | */ | 1745 | */ |
1748 | 1746 | ||
1749 | int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) | 1747 | int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) |
1750 | { | 1748 | { |
1751 | struct sioc_sg_req6 sr; | 1749 | struct sioc_sg_req6 sr; |
1752 | struct sioc_mif_req6 vr; | 1750 | struct sioc_mif_req6 vr; |
1753 | struct mif_device *vif; | 1751 | struct mif_device *vif; |
1754 | struct mfc6_cache *c; | 1752 | struct mfc6_cache *c; |
1755 | struct net *net = sock_net(sk); | 1753 | struct net *net = sock_net(sk); |
1756 | struct mr6_table *mrt; | 1754 | struct mr6_table *mrt; |
1757 | 1755 | ||
1758 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); | 1756 | mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); |
1759 | if (mrt == NULL) | 1757 | if (mrt == NULL) |
1760 | return -ENOENT; | 1758 | return -ENOENT; |
1761 | 1759 | ||
1762 | switch (cmd) { | 1760 | switch (cmd) { |
1763 | case SIOCGETMIFCNT_IN6: | 1761 | case SIOCGETMIFCNT_IN6: |
1764 | if (copy_from_user(&vr, arg, sizeof(vr))) | 1762 | if (copy_from_user(&vr, arg, sizeof(vr))) |
1765 | return -EFAULT; | 1763 | return -EFAULT; |
1766 | if (vr.mifi >= mrt->maxvif) | 1764 | if (vr.mifi >= mrt->maxvif) |
1767 | return -EINVAL; | 1765 | return -EINVAL; |
1768 | read_lock(&mrt_lock); | 1766 | read_lock(&mrt_lock); |
1769 | vif = &mrt->vif6_table[vr.mifi]; | 1767 | vif = &mrt->vif6_table[vr.mifi]; |
1770 | if (MIF_EXISTS(mrt, vr.mifi)) { | 1768 | if (MIF_EXISTS(mrt, vr.mifi)) { |
1771 | vr.icount = vif->pkt_in; | 1769 | vr.icount = vif->pkt_in; |
1772 | vr.ocount = vif->pkt_out; | 1770 | vr.ocount = vif->pkt_out; |
1773 | vr.ibytes = vif->bytes_in; | 1771 | vr.ibytes = vif->bytes_in; |
1774 | vr.obytes = vif->bytes_out; | 1772 | vr.obytes = vif->bytes_out; |
1775 | read_unlock(&mrt_lock); | 1773 | read_unlock(&mrt_lock); |
1776 | 1774 | ||
1777 | if (copy_to_user(arg, &vr, sizeof(vr))) | 1775 | if (copy_to_user(arg, &vr, sizeof(vr))) |
1778 | return -EFAULT; | 1776 | return -EFAULT; |
1779 | return 0; | 1777 | return 0; |
1780 | } | 1778 | } |
1781 | read_unlock(&mrt_lock); | 1779 | read_unlock(&mrt_lock); |
1782 | return -EADDRNOTAVAIL; | 1780 | return -EADDRNOTAVAIL; |
1783 | case SIOCGETSGCNT_IN6: | 1781 | case SIOCGETSGCNT_IN6: |
1784 | if (copy_from_user(&sr, arg, sizeof(sr))) | 1782 | if (copy_from_user(&sr, arg, sizeof(sr))) |
1785 | return -EFAULT; | 1783 | return -EFAULT; |
1786 | 1784 | ||
1787 | read_lock(&mrt_lock); | 1785 | read_lock(&mrt_lock); |
1788 | c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); | 1786 | c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); |
1789 | if (c) { | 1787 | if (c) { |
1790 | sr.pktcnt = c->mfc_un.res.pkt; | 1788 | sr.pktcnt = c->mfc_un.res.pkt; |
1791 | sr.bytecnt = c->mfc_un.res.bytes; | 1789 | sr.bytecnt = c->mfc_un.res.bytes; |
1792 | sr.wrong_if = c->mfc_un.res.wrong_if; | 1790 | sr.wrong_if = c->mfc_un.res.wrong_if; |
1793 | read_unlock(&mrt_lock); | 1791 | read_unlock(&mrt_lock); |
1794 | 1792 | ||
1795 | if (copy_to_user(arg, &sr, sizeof(sr))) | 1793 | if (copy_to_user(arg, &sr, sizeof(sr))) |
1796 | return -EFAULT; | 1794 | return -EFAULT; |
1797 | return 0; | 1795 | return 0; |
1798 | } | 1796 | } |
1799 | read_unlock(&mrt_lock); | 1797 | read_unlock(&mrt_lock); |
1800 | return -EADDRNOTAVAIL; | 1798 | return -EADDRNOTAVAIL; |
1801 | default: | 1799 | default: |
1802 | return -ENOIOCTLCMD; | 1800 | return -ENOIOCTLCMD; |
1803 | } | 1801 | } |
1804 | } | 1802 | } |
1805 | 1803 | ||
1806 | 1804 | ||
1807 | static inline int ip6mr_forward2_finish(struct sk_buff *skb) | 1805 | static inline int ip6mr_forward2_finish(struct sk_buff *skb) |
1808 | { | 1806 | { |
1809 | IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), | 1807 | IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), |
1810 | IPSTATS_MIB_OUTFORWDATAGRAMS); | 1808 | IPSTATS_MIB_OUTFORWDATAGRAMS); |
1811 | return dst_output(skb); | 1809 | return dst_output(skb); |
1812 | } | 1810 | } |
1813 | 1811 | ||
1814 | /* | 1812 | /* |
1815 | * Processing handlers for ip6mr_forward | 1813 | * Processing handlers for ip6mr_forward |
1816 | */ | 1814 | */ |
1817 | 1815 | ||
1818 | static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, | 1816 | static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, |
1819 | struct sk_buff *skb, struct mfc6_cache *c, int vifi) | 1817 | struct sk_buff *skb, struct mfc6_cache *c, int vifi) |
1820 | { | 1818 | { |
1821 | struct ipv6hdr *ipv6h; | 1819 | struct ipv6hdr *ipv6h; |
1822 | struct mif_device *vif = &mrt->vif6_table[vifi]; | 1820 | struct mif_device *vif = &mrt->vif6_table[vifi]; |
1823 | struct net_device *dev; | 1821 | struct net_device *dev; |
1824 | struct dst_entry *dst; | 1822 | struct dst_entry *dst; |
1825 | struct flowi fl; | 1823 | struct flowi fl; |
1826 | 1824 | ||
1827 | if (vif->dev == NULL) | 1825 | if (vif->dev == NULL) |
1828 | goto out_free; | 1826 | goto out_free; |
1829 | 1827 | ||
1830 | #ifdef CONFIG_IPV6_PIMSM_V2 | 1828 | #ifdef CONFIG_IPV6_PIMSM_V2 |
1831 | if (vif->flags & MIFF_REGISTER) { | 1829 | if (vif->flags & MIFF_REGISTER) { |
1832 | vif->pkt_out++; | 1830 | vif->pkt_out++; |
1833 | vif->bytes_out += skb->len; | 1831 | vif->bytes_out += skb->len; |
1834 | vif->dev->stats.tx_bytes += skb->len; | 1832 | vif->dev->stats.tx_bytes += skb->len; |
1835 | vif->dev->stats.tx_packets++; | 1833 | vif->dev->stats.tx_packets++; |
1836 | ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); | 1834 | ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); |
1837 | goto out_free; | 1835 | goto out_free; |
1838 | } | 1836 | } |
1839 | #endif | 1837 | #endif |
1840 | 1838 | ||
1841 | ipv6h = ipv6_hdr(skb); | 1839 | ipv6h = ipv6_hdr(skb); |
1842 | 1840 | ||
1843 | fl = (struct flowi) { | 1841 | fl = (struct flowi) { |
1844 | .oif = vif->link, | 1842 | .oif = vif->link, |
1845 | .nl_u = { .ip6_u = | 1843 | .nl_u = { .ip6_u = |
1846 | { .daddr = ipv6h->daddr, } | 1844 | { .daddr = ipv6h->daddr, } |
1847 | } | 1845 | } |
1848 | }; | 1846 | }; |
1849 | 1847 | ||
1850 | dst = ip6_route_output(net, NULL, &fl); | 1848 | dst = ip6_route_output(net, NULL, &fl); |
1851 | if (!dst) | 1849 | if (!dst) |
1852 | goto out_free; | 1850 | goto out_free; |
1853 | 1851 | ||
1854 | skb_dst_drop(skb); | 1852 | skb_dst_drop(skb); |
1855 | skb_dst_set(skb, dst); | 1853 | skb_dst_set(skb, dst); |
1856 | 1854 | ||
1857 | /* | 1855 | /* |
1858 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally | 1856 | * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally |
1859 | * not only before forwarding, but after forwarding on all output | 1857 | * not only before forwarding, but after forwarding on all output |
1860 | * interfaces. It is clear, if mrouter runs a multicasting | 1858 | * interfaces. It is clear, if mrouter runs a multicasting |
1861 | * program, it should receive packets not depending to what interface | 1859 | * program, it should receive packets not depending to what interface |
1862 | * program is joined. | 1860 | * program is joined. |
1863 | * If we will not make it, the program will have to join on all | 1861 | * If we will not make it, the program will have to join on all |
1864 | * interfaces. On the other hand, multihoming host (or router, but | 1862 | * interfaces. On the other hand, multihoming host (or router, but |
1865 | * not mrouter) cannot join to more than one interface - it will | 1863 | * not mrouter) cannot join to more than one interface - it will |
1866 | * result in receiving multiple packets. | 1864 | * result in receiving multiple packets. |
1867 | */ | 1865 | */ |
1868 | dev = vif->dev; | 1866 | dev = vif->dev; |
1869 | skb->dev = dev; | 1867 | skb->dev = dev; |
1870 | vif->pkt_out++; | 1868 | vif->pkt_out++; |
1871 | vif->bytes_out += skb->len; | 1869 | vif->bytes_out += skb->len; |
1872 | 1870 | ||
1873 | /* We are about to write */ | 1871 | /* We are about to write */ |
1874 | /* XXX: extension headers? */ | 1872 | /* XXX: extension headers? */ |
1875 | if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) | 1873 | if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) |
1876 | goto out_free; | 1874 | goto out_free; |
1877 | 1875 | ||
1878 | ipv6h = ipv6_hdr(skb); | 1876 | ipv6h = ipv6_hdr(skb); |
1879 | ipv6h->hop_limit--; | 1877 | ipv6h->hop_limit--; |
1880 | 1878 | ||
1881 | IP6CB(skb)->flags |= IP6SKB_FORWARDED; | 1879 | IP6CB(skb)->flags |= IP6SKB_FORWARDED; |
1882 | 1880 | ||
1883 | return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, | 1881 | return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, |
1884 | ip6mr_forward2_finish); | 1882 | ip6mr_forward2_finish); |
1885 | 1883 | ||
1886 | out_free: | 1884 | out_free: |
1887 | kfree_skb(skb); | 1885 | kfree_skb(skb); |
1888 | return 0; | 1886 | return 0; |
1889 | } | 1887 | } |
1890 | 1888 | ||
1891 | static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) | 1889 | static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) |
1892 | { | 1890 | { |
1893 | int ct; | 1891 | int ct; |
1894 | 1892 | ||
1895 | for (ct = mrt->maxvif - 1; ct >= 0; ct--) { | 1893 | for (ct = mrt->maxvif - 1; ct >= 0; ct--) { |
1896 | if (mrt->vif6_table[ct].dev == dev) | 1894 | if (mrt->vif6_table[ct].dev == dev) |
1897 | break; | 1895 | break; |
1898 | } | 1896 | } |
1899 | return ct; | 1897 | return ct; |
1900 | } | 1898 | } |
1901 | 1899 | ||
1902 | static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, | 1900 | static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, |
1903 | struct sk_buff *skb, struct mfc6_cache *cache) | 1901 | struct sk_buff *skb, struct mfc6_cache *cache) |
1904 | { | 1902 | { |
1905 | int psend = -1; | 1903 | int psend = -1; |
1906 | int vif, ct; | 1904 | int vif, ct; |
1907 | 1905 | ||
1908 | vif = cache->mf6c_parent; | 1906 | vif = cache->mf6c_parent; |
1909 | cache->mfc_un.res.pkt++; | 1907 | cache->mfc_un.res.pkt++; |
1910 | cache->mfc_un.res.bytes += skb->len; | 1908 | cache->mfc_un.res.bytes += skb->len; |
1911 | 1909 | ||
1912 | /* | 1910 | /* |
1913 | * Wrong interface: drop packet and (maybe) send PIM assert. | 1911 | * Wrong interface: drop packet and (maybe) send PIM assert. |
1914 | */ | 1912 | */ |
1915 | if (mrt->vif6_table[vif].dev != skb->dev) { | 1913 | if (mrt->vif6_table[vif].dev != skb->dev) { |
1916 | int true_vifi; | 1914 | int true_vifi; |
1917 | 1915 | ||
1918 | cache->mfc_un.res.wrong_if++; | 1916 | cache->mfc_un.res.wrong_if++; |
1919 | true_vifi = ip6mr_find_vif(mrt, skb->dev); | 1917 | true_vifi = ip6mr_find_vif(mrt, skb->dev); |
1920 | 1918 | ||
1921 | if (true_vifi >= 0 && mrt->mroute_do_assert && | 1919 | if (true_vifi >= 0 && mrt->mroute_do_assert && |
1922 | /* pimsm uses asserts, when switching from RPT to SPT, | 1920 | /* pimsm uses asserts, when switching from RPT to SPT, |
1923 | so that we cannot check that packet arrived on an oif. | 1921 | so that we cannot check that packet arrived on an oif. |
1924 | It is bad, but otherwise we would need to move pretty | 1922 | It is bad, but otherwise we would need to move pretty |
1925 | large chunk of pimd to kernel. Ough... --ANK | 1923 | large chunk of pimd to kernel. Ough... --ANK |
1926 | */ | 1924 | */ |
1927 | (mrt->mroute_do_pim || | 1925 | (mrt->mroute_do_pim || |
1928 | cache->mfc_un.res.ttls[true_vifi] < 255) && | 1926 | cache->mfc_un.res.ttls[true_vifi] < 255) && |
1929 | time_after(jiffies, | 1927 | time_after(jiffies, |
1930 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { | 1928 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { |
1931 | cache->mfc_un.res.last_assert = jiffies; | 1929 | cache->mfc_un.res.last_assert = jiffies; |
1932 | ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); | 1930 | ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); |
1933 | } | 1931 | } |
1934 | goto dont_forward; | 1932 | goto dont_forward; |
1935 | } | 1933 | } |
1936 | 1934 | ||
1937 | mrt->vif6_table[vif].pkt_in++; | 1935 | mrt->vif6_table[vif].pkt_in++; |
1938 | mrt->vif6_table[vif].bytes_in += skb->len; | 1936 | mrt->vif6_table[vif].bytes_in += skb->len; |
1939 | 1937 | ||
1940 | /* | 1938 | /* |
1941 | * Forward the frame | 1939 | * Forward the frame |
1942 | */ | 1940 | */ |
1943 | for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { | 1941 | for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { |
1944 | if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { | 1942 | if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { |
1945 | if (psend != -1) { | 1943 | if (psend != -1) { |
1946 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1944 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1947 | if (skb2) | 1945 | if (skb2) |
1948 | ip6mr_forward2(net, mrt, skb2, cache, psend); | 1946 | ip6mr_forward2(net, mrt, skb2, cache, psend); |
1949 | } | 1947 | } |
1950 | psend = ct; | 1948 | psend = ct; |
1951 | } | 1949 | } |
1952 | } | 1950 | } |
1953 | if (psend != -1) { | 1951 | if (psend != -1) { |
1954 | ip6mr_forward2(net, mrt, skb, cache, psend); | 1952 | ip6mr_forward2(net, mrt, skb, cache, psend); |
1955 | return 0; | 1953 | return 0; |
1956 | } | 1954 | } |
1957 | 1955 | ||
1958 | dont_forward: | 1956 | dont_forward: |
1959 | kfree_skb(skb); | 1957 | kfree_skb(skb); |
1960 | return 0; | 1958 | return 0; |
1961 | } | 1959 | } |
1962 | 1960 | ||
1963 | 1961 | ||
1964 | /* | 1962 | /* |
1965 | * Multicast packets for forwarding arrive here | 1963 | * Multicast packets for forwarding arrive here |
1966 | */ | 1964 | */ |
1967 | 1965 | ||
1968 | int ip6_mr_input(struct sk_buff *skb) | 1966 | int ip6_mr_input(struct sk_buff *skb) |
1969 | { | 1967 | { |
1970 | struct mfc6_cache *cache; | 1968 | struct mfc6_cache *cache; |
1971 | struct net *net = dev_net(skb->dev); | 1969 | struct net *net = dev_net(skb->dev); |
1972 | struct mr6_table *mrt; | 1970 | struct mr6_table *mrt; |
1973 | struct flowi fl = { | 1971 | struct flowi fl = { |
1974 | .iif = skb->dev->ifindex, | 1972 | .iif = skb->dev->ifindex, |
1975 | .mark = skb->mark, | 1973 | .mark = skb->mark, |
1976 | }; | 1974 | }; |
1977 | int err; | 1975 | int err; |
1978 | 1976 | ||
1979 | err = ip6mr_fib_lookup(net, &fl, &mrt); | 1977 | err = ip6mr_fib_lookup(net, &fl, &mrt); |
1980 | if (err < 0) | 1978 | if (err < 0) |
1981 | return err; | 1979 | return err; |
1982 | 1980 | ||
1983 | read_lock(&mrt_lock); | 1981 | read_lock(&mrt_lock); |
1984 | cache = ip6mr_cache_find(mrt, | 1982 | cache = ip6mr_cache_find(mrt, |
1985 | &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); | 1983 | &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); |
1986 | 1984 | ||
1987 | /* | 1985 | /* |
1988 | * No usable cache entry | 1986 | * No usable cache entry |
1989 | */ | 1987 | */ |
1990 | if (cache == NULL) { | 1988 | if (cache == NULL) { |
1991 | int vif; | 1989 | int vif; |
1992 | 1990 | ||
1993 | vif = ip6mr_find_vif(mrt, skb->dev); | 1991 | vif = ip6mr_find_vif(mrt, skb->dev); |
1994 | if (vif >= 0) { | 1992 | if (vif >= 0) { |
1995 | int err = ip6mr_cache_unresolved(mrt, vif, skb); | 1993 | int err = ip6mr_cache_unresolved(mrt, vif, skb); |
1996 | read_unlock(&mrt_lock); | 1994 | read_unlock(&mrt_lock); |
1997 | 1995 | ||
1998 | return err; | 1996 | return err; |
1999 | } | 1997 | } |
2000 | read_unlock(&mrt_lock); | 1998 | read_unlock(&mrt_lock); |
2001 | kfree_skb(skb); | 1999 | kfree_skb(skb); |
2002 | return -ENODEV; | 2000 | return -ENODEV; |
2003 | } | 2001 | } |
2004 | 2002 | ||
2005 | ip6_mr_forward(net, mrt, skb, cache); | 2003 | ip6_mr_forward(net, mrt, skb, cache); |
2006 | 2004 | ||
2007 | read_unlock(&mrt_lock); | 2005 | read_unlock(&mrt_lock); |
2008 | 2006 | ||
2009 | return 0; | 2007 | return 0; |
2010 | } | 2008 | } |
2011 | 2009 | ||
2012 | 2010 | ||
2013 | static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, | 2011 | static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, |
2014 | struct mfc6_cache *c, struct rtmsg *rtm) | 2012 | struct mfc6_cache *c, struct rtmsg *rtm) |
2015 | { | 2013 | { |
2016 | int ct; | 2014 | int ct; |
2017 | struct rtnexthop *nhp; | 2015 | struct rtnexthop *nhp; |
2018 | u8 *b = skb_tail_pointer(skb); | 2016 | u8 *b = skb_tail_pointer(skb); |
2019 | struct rtattr *mp_head; | 2017 | struct rtattr *mp_head; |
2020 | 2018 | ||
2021 | /* If cache is unresolved, don't try to parse IIF and OIF */ | 2019 | /* If cache is unresolved, don't try to parse IIF and OIF */ |
2022 | if (c->mf6c_parent > MAXMIFS) | 2020 | if (c->mf6c_parent > MAXMIFS) |
2023 | return -ENOENT; | 2021 | return -ENOENT; |
2024 | 2022 | ||
2025 | if (MIF_EXISTS(mrt, c->mf6c_parent)) | 2023 | if (MIF_EXISTS(mrt, c->mf6c_parent)) |
2026 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex); | 2024 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex); |
2027 | 2025 | ||
2028 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); | 2026 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); |
2029 | 2027 | ||
2030 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 2028 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
2031 | if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { | 2029 | if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { |
2032 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 2030 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) |
2033 | goto rtattr_failure; | 2031 | goto rtattr_failure; |
2034 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 2032 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
2035 | nhp->rtnh_flags = 0; | 2033 | nhp->rtnh_flags = 0; |
2036 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 2034 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
2037 | nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; | 2035 | nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; |
2038 | nhp->rtnh_len = sizeof(*nhp); | 2036 | nhp->rtnh_len = sizeof(*nhp); |
2039 | } | 2037 | } |
2040 | } | 2038 | } |
2041 | mp_head->rta_type = RTA_MULTIPATH; | 2039 | mp_head->rta_type = RTA_MULTIPATH; |
2042 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; | 2040 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; |
2043 | rtm->rtm_type = RTN_MULTICAST; | 2041 | rtm->rtm_type = RTN_MULTICAST; |
2044 | return 1; | 2042 | return 1; |
2045 | 2043 | ||
2046 | rtattr_failure: | 2044 | rtattr_failure: |
2047 | nlmsg_trim(skb, b); | 2045 | nlmsg_trim(skb, b); |
2048 | return -EMSGSIZE; | 2046 | return -EMSGSIZE; |
2049 | } | 2047 | } |
2050 | 2048 | ||
2051 | int ip6mr_get_route(struct net *net, | 2049 | int ip6mr_get_route(struct net *net, |
2052 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 2050 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) |
2053 | { | 2051 | { |
2054 | int err; | 2052 | int err; |
2055 | struct mr6_table *mrt; | 2053 | struct mr6_table *mrt; |
2056 | struct mfc6_cache *cache; | 2054 | struct mfc6_cache *cache; |
2057 | struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); | 2055 | struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); |
2058 | 2056 | ||
2059 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); | 2057 | mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); |
2060 | if (mrt == NULL) | 2058 | if (mrt == NULL) |
2061 | return -ENOENT; | 2059 | return -ENOENT; |
2062 | 2060 | ||
2063 | read_lock(&mrt_lock); | 2061 | read_lock(&mrt_lock); |
2064 | cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); | 2062 | cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); |
2065 | 2063 | ||
2066 | if (!cache) { | 2064 | if (!cache) { |
2067 | struct sk_buff *skb2; | 2065 | struct sk_buff *skb2; |
2068 | struct ipv6hdr *iph; | 2066 | struct ipv6hdr *iph; |
2069 | struct net_device *dev; | 2067 | struct net_device *dev; |
2070 | int vif; | 2068 | int vif; |
2071 | 2069 | ||
2072 | if (nowait) { | 2070 | if (nowait) { |
2073 | read_unlock(&mrt_lock); | 2071 | read_unlock(&mrt_lock); |
2074 | return -EAGAIN; | 2072 | return -EAGAIN; |
2075 | } | 2073 | } |
2076 | 2074 | ||
2077 | dev = skb->dev; | 2075 | dev = skb->dev; |
2078 | if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { | 2076 | if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { |
2079 | read_unlock(&mrt_lock); | 2077 | read_unlock(&mrt_lock); |
2080 | return -ENODEV; | 2078 | return -ENODEV; |
2081 | } | 2079 | } |
2082 | 2080 | ||
2083 | /* really correct? */ | 2081 | /* really correct? */ |
2084 | skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); | 2082 | skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); |
2085 | if (!skb2) { | 2083 | if (!skb2) { |
2086 | read_unlock(&mrt_lock); | 2084 | read_unlock(&mrt_lock); |
2087 | return -ENOMEM; | 2085 | return -ENOMEM; |
2088 | } | 2086 | } |
2089 | 2087 | ||
2090 | skb_reset_transport_header(skb2); | 2088 | skb_reset_transport_header(skb2); |
2091 | 2089 | ||
2092 | skb_put(skb2, sizeof(struct ipv6hdr)); | 2090 | skb_put(skb2, sizeof(struct ipv6hdr)); |
2093 | skb_reset_network_header(skb2); | 2091 | skb_reset_network_header(skb2); |
2094 | 2092 | ||
2095 | iph = ipv6_hdr(skb2); | 2093 | iph = ipv6_hdr(skb2); |
2096 | iph->version = 0; | 2094 | iph->version = 0; |
2097 | iph->priority = 0; | 2095 | iph->priority = 0; |
2098 | iph->flow_lbl[0] = 0; | 2096 | iph->flow_lbl[0] = 0; |
2099 | iph->flow_lbl[1] = 0; | 2097 | iph->flow_lbl[1] = 0; |
2100 | iph->flow_lbl[2] = 0; | 2098 | iph->flow_lbl[2] = 0; |
2101 | iph->payload_len = 0; | 2099 | iph->payload_len = 0; |
2102 | iph->nexthdr = IPPROTO_NONE; | 2100 | iph->nexthdr = IPPROTO_NONE; |
2103 | iph->hop_limit = 0; | 2101 | iph->hop_limit = 0; |
2104 | ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); | 2102 | ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); |
2105 | ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); | 2103 | ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); |
2106 | 2104 | ||
2107 | err = ip6mr_cache_unresolved(mrt, vif, skb2); | 2105 | err = ip6mr_cache_unresolved(mrt, vif, skb2); |
2108 | read_unlock(&mrt_lock); | 2106 | read_unlock(&mrt_lock); |
2109 | 2107 | ||
2110 | return err; | 2108 | return err; |
2111 | } | 2109 | } |
2112 | 2110 | ||
2113 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) | 2111 | if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) |
2114 | cache->mfc_flags |= MFC_NOTIFY; | 2112 | cache->mfc_flags |= MFC_NOTIFY; |
2115 | 2113 | ||
2116 | err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); | 2114 | err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); |
2117 | read_unlock(&mrt_lock); | 2115 | read_unlock(&mrt_lock); |
2118 | return err; | 2116 | return err; |
2119 | } | 2117 | } |
2120 | 2118 | ||
2121 | static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, | 2119 | static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, |
2122 | u32 pid, u32 seq, struct mfc6_cache *c) | 2120 | u32 pid, u32 seq, struct mfc6_cache *c) |
2123 | { | 2121 | { |
2124 | struct nlmsghdr *nlh; | 2122 | struct nlmsghdr *nlh; |
2125 | struct rtmsg *rtm; | 2123 | struct rtmsg *rtm; |
2126 | 2124 | ||
2127 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); | 2125 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); |
2128 | if (nlh == NULL) | 2126 | if (nlh == NULL) |
2129 | return -EMSGSIZE; | 2127 | return -EMSGSIZE; |
2130 | 2128 | ||
2131 | rtm = nlmsg_data(nlh); | 2129 | rtm = nlmsg_data(nlh); |
2132 | rtm->rtm_family = RTNL_FAMILY_IPMR; | 2130 | rtm->rtm_family = RTNL_FAMILY_IPMR; |
2133 | rtm->rtm_dst_len = 128; | 2131 | rtm->rtm_dst_len = 128; |
2134 | rtm->rtm_src_len = 128; | 2132 | rtm->rtm_src_len = 128; |
2135 | rtm->rtm_tos = 0; | 2133 | rtm->rtm_tos = 0; |
2136 | rtm->rtm_table = mrt->id; | 2134 | rtm->rtm_table = mrt->id; |
2137 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); | 2135 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); |
2138 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | 2136 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; |
2139 | rtm->rtm_protocol = RTPROT_UNSPEC; | 2137 | rtm->rtm_protocol = RTPROT_UNSPEC; |
2140 | rtm->rtm_flags = 0; | 2138 | rtm->rtm_flags = 0; |
2141 | 2139 | ||
2142 | NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin); | 2140 | NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin); |
2143 | NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp); | 2141 | NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp); |
2144 | 2142 | ||
2145 | if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) | 2143 | if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) |
2146 | goto nla_put_failure; | 2144 | goto nla_put_failure; |
2147 | 2145 | ||
2148 | return nlmsg_end(skb, nlh); | 2146 | return nlmsg_end(skb, nlh); |
2149 | 2147 | ||
2150 | nla_put_failure: | 2148 | nla_put_failure: |
2151 | nlmsg_cancel(skb, nlh); | 2149 | nlmsg_cancel(skb, nlh); |
2152 | return -EMSGSIZE; | 2150 | return -EMSGSIZE; |
2153 | } | 2151 | } |
2154 | 2152 | ||
2155 | static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) | 2153 | static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) |
2156 | { | 2154 | { |
2157 | struct net *net = sock_net(skb->sk); | 2155 | struct net *net = sock_net(skb->sk); |
2158 | struct mr6_table *mrt; | 2156 | struct mr6_table *mrt; |
2159 | struct mfc6_cache *mfc; | 2157 | struct mfc6_cache *mfc; |
2160 | unsigned int t = 0, s_t; | 2158 | unsigned int t = 0, s_t; |
2161 | unsigned int h = 0, s_h; | 2159 | unsigned int h = 0, s_h; |
2162 | unsigned int e = 0, s_e; | 2160 | unsigned int e = 0, s_e; |
2163 | 2161 | ||
2164 | s_t = cb->args[0]; | 2162 | s_t = cb->args[0]; |
2165 | s_h = cb->args[1]; | 2163 | s_h = cb->args[1]; |
2166 | s_e = cb->args[2]; | 2164 | s_e = cb->args[2]; |
2167 | 2165 | ||
2168 | read_lock(&mrt_lock); | 2166 | read_lock(&mrt_lock); |
2169 | ip6mr_for_each_table(mrt, net) { | 2167 | ip6mr_for_each_table(mrt, net) { |
2170 | if (t < s_t) | 2168 | if (t < s_t) |
2171 | goto next_table; | 2169 | goto next_table; |
2172 | if (t > s_t) | 2170 | if (t > s_t) |
2173 | s_h = 0; | 2171 | s_h = 0; |
2174 | for (h = s_h; h < MFC6_LINES; h++) { | 2172 | for (h = s_h; h < MFC6_LINES; h++) { |
2175 | list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { | 2173 | list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { |
2176 | if (e < s_e) | 2174 | if (e < s_e) |
2177 | goto next_entry; | 2175 | goto next_entry; |
2178 | if (ip6mr_fill_mroute(mrt, skb, | 2176 | if (ip6mr_fill_mroute(mrt, skb, |
2179 | NETLINK_CB(cb->skb).pid, | 2177 | NETLINK_CB(cb->skb).pid, |
2180 | cb->nlh->nlmsg_seq, | 2178 | cb->nlh->nlmsg_seq, |
2181 | mfc) < 0) | 2179 | mfc) < 0) |
2182 | goto done; | 2180 | goto done; |
2183 | next_entry: | 2181 | next_entry: |
2184 | e++; | 2182 | e++; |
2185 | } | 2183 | } |
2186 | e = s_e = 0; | 2184 | e = s_e = 0; |
2187 | } | 2185 | } |
2188 | s_h = 0; | 2186 | s_h = 0; |
2189 | next_table: | 2187 | next_table: |
2190 | t++; | 2188 | t++; |
2191 | } | 2189 | } |
2192 | done: | 2190 | done: |
2193 | read_unlock(&mrt_lock); | 2191 | read_unlock(&mrt_lock); |
2194 | 2192 | ||
2195 | cb->args[2] = e; | 2193 | cb->args[2] = e; |
2196 | cb->args[1] = h; | 2194 | cb->args[1] = h; |
2197 | cb->args[0] = t; | 2195 | cb->args[0] = t; |
2198 | 2196 | ||
2199 | return skb->len; | 2197 | return skb->len; |
2200 | } | 2198 | } |
2201 | 2199 |
net/ipv6/sit.c
1 | /* | 1 | /* |
2 | * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) | 2 | * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or | 9 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License | 10 | * modify it under the terms of the GNU General Public License |
11 | * as published by the Free Software Foundation; either version | 11 | * as published by the Free Software Foundation; either version |
12 | * 2 of the License, or (at your option) any later version. | 12 | * 2 of the License, or (at your option) any later version. |
13 | * | 13 | * |
14 | * Changes: | 14 | * Changes: |
15 | * Roger Venning <r.venning@telstra.com>: 6to4 support | 15 | * Roger Venning <r.venning@telstra.com>: 6to4 support |
16 | * Nate Thompson <nate@thebog.net>: 6to4 support | 16 | * Nate Thompson <nate@thebog.net>: 6to4 support |
17 | * Fred Templin <fred.l.templin@boeing.com>: isatap support | 17 | * Fred Templin <fred.l.templin@boeing.com>: isatap support |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/capability.h> | 21 | #include <linux/capability.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/socket.h> | 24 | #include <linux/socket.h> |
25 | #include <linux/sockios.h> | 25 | #include <linux/sockios.h> |
26 | #include <linux/net.h> | 26 | #include <linux/net.h> |
27 | #include <linux/in6.h> | 27 | #include <linux/in6.h> |
28 | #include <linux/netdevice.h> | 28 | #include <linux/netdevice.h> |
29 | #include <linux/if_arp.h> | 29 | #include <linux/if_arp.h> |
30 | #include <linux/icmp.h> | 30 | #include <linux/icmp.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
33 | #include <linux/init.h> | 33 | #include <linux/init.h> |
34 | #include <linux/netfilter_ipv4.h> | 34 | #include <linux/netfilter_ipv4.h> |
35 | #include <linux/if_ether.h> | 35 | #include <linux/if_ether.h> |
36 | 36 | ||
37 | #include <net/sock.h> | 37 | #include <net/sock.h> |
38 | #include <net/snmp.h> | 38 | #include <net/snmp.h> |
39 | 39 | ||
40 | #include <net/ipv6.h> | 40 | #include <net/ipv6.h> |
41 | #include <net/protocol.h> | 41 | #include <net/protocol.h> |
42 | #include <net/transp_v6.h> | 42 | #include <net/transp_v6.h> |
43 | #include <net/ip6_fib.h> | 43 | #include <net/ip6_fib.h> |
44 | #include <net/ip6_route.h> | 44 | #include <net/ip6_route.h> |
45 | #include <net/ndisc.h> | 45 | #include <net/ndisc.h> |
46 | #include <net/addrconf.h> | 46 | #include <net/addrconf.h> |
47 | #include <net/ip.h> | 47 | #include <net/ip.h> |
48 | #include <net/udp.h> | 48 | #include <net/udp.h> |
49 | #include <net/icmp.h> | 49 | #include <net/icmp.h> |
50 | #include <net/ipip.h> | 50 | #include <net/ipip.h> |
51 | #include <net/inet_ecn.h> | 51 | #include <net/inet_ecn.h> |
52 | #include <net/xfrm.h> | 52 | #include <net/xfrm.h> |
53 | #include <net/dsfield.h> | 53 | #include <net/dsfield.h> |
54 | #include <net/net_namespace.h> | 54 | #include <net/net_namespace.h> |
55 | #include <net/netns/generic.h> | 55 | #include <net/netns/generic.h> |
56 | 56 | ||
57 | /* | 57 | /* |
58 | This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c | 58 | This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c |
59 | 59 | ||
60 | For comments look at net/ipv4/ip_gre.c --ANK | 60 | For comments look at net/ipv4/ip_gre.c --ANK |
61 | */ | 61 | */ |
62 | 62 | ||
63 | #define HASH_SIZE 16 | 63 | #define HASH_SIZE 16 |
64 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 64 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
65 | 65 | ||
66 | static void ipip6_tunnel_init(struct net_device *dev); | 66 | static void ipip6_tunnel_init(struct net_device *dev); |
67 | static void ipip6_tunnel_setup(struct net_device *dev); | 67 | static void ipip6_tunnel_setup(struct net_device *dev); |
68 | 68 | ||
69 | static int sit_net_id __read_mostly; | 69 | static int sit_net_id __read_mostly; |
70 | struct sit_net { | 70 | struct sit_net { |
71 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | 71 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; |
72 | struct ip_tunnel *tunnels_r[HASH_SIZE]; | 72 | struct ip_tunnel *tunnels_r[HASH_SIZE]; |
73 | struct ip_tunnel *tunnels_l[HASH_SIZE]; | 73 | struct ip_tunnel *tunnels_l[HASH_SIZE]; |
74 | struct ip_tunnel *tunnels_wc[1]; | 74 | struct ip_tunnel *tunnels_wc[1]; |
75 | struct ip_tunnel **tunnels[4]; | 75 | struct ip_tunnel **tunnels[4]; |
76 | 76 | ||
77 | struct net_device *fb_tunnel_dev; | 77 | struct net_device *fb_tunnel_dev; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Locking : hash tables are protected by RCU and a spinlock | 81 | * Locking : hash tables are protected by RCU and a spinlock |
82 | */ | 82 | */ |
83 | static DEFINE_SPINLOCK(ipip6_lock); | 83 | static DEFINE_SPINLOCK(ipip6_lock); |
84 | 84 | ||
85 | #define for_each_ip_tunnel_rcu(start) \ | 85 | #define for_each_ip_tunnel_rcu(start) \ |
86 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | 86 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * Must be invoked with rcu_read_lock | 89 | * Must be invoked with rcu_read_lock |
90 | */ | 90 | */ |
91 | static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, | 91 | static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, |
92 | struct net_device *dev, __be32 remote, __be32 local) | 92 | struct net_device *dev, __be32 remote, __be32 local) |
93 | { | 93 | { |
94 | unsigned h0 = HASH(remote); | 94 | unsigned h0 = HASH(remote); |
95 | unsigned h1 = HASH(local); | 95 | unsigned h1 = HASH(local); |
96 | struct ip_tunnel *t; | 96 | struct ip_tunnel *t; |
97 | struct sit_net *sitn = net_generic(net, sit_net_id); | 97 | struct sit_net *sitn = net_generic(net, sit_net_id); |
98 | 98 | ||
99 | for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { | 99 | for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { |
100 | if (local == t->parms.iph.saddr && | 100 | if (local == t->parms.iph.saddr && |
101 | remote == t->parms.iph.daddr && | 101 | remote == t->parms.iph.daddr && |
102 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && | 102 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && |
103 | (t->dev->flags & IFF_UP)) | 103 | (t->dev->flags & IFF_UP)) |
104 | return t; | 104 | return t; |
105 | } | 105 | } |
106 | for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { | 106 | for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { |
107 | if (remote == t->parms.iph.daddr && | 107 | if (remote == t->parms.iph.daddr && |
108 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && | 108 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && |
109 | (t->dev->flags & IFF_UP)) | 109 | (t->dev->flags & IFF_UP)) |
110 | return t; | 110 | return t; |
111 | } | 111 | } |
112 | for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { | 112 | for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { |
113 | if (local == t->parms.iph.saddr && | 113 | if (local == t->parms.iph.saddr && |
114 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && | 114 | (!dev || !t->parms.link || dev->iflink == t->parms.link) && |
115 | (t->dev->flags & IFF_UP)) | 115 | (t->dev->flags & IFF_UP)) |
116 | return t; | 116 | return t; |
117 | } | 117 | } |
118 | t = rcu_dereference(sitn->tunnels_wc[0]); | 118 | t = rcu_dereference(sitn->tunnels_wc[0]); |
119 | if ((t != NULL) && (t->dev->flags & IFF_UP)) | 119 | if ((t != NULL) && (t->dev->flags & IFF_UP)) |
120 | return t; | 120 | return t; |
121 | return NULL; | 121 | return NULL; |
122 | } | 122 | } |
123 | 123 | ||
124 | static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, | 124 | static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, |
125 | struct ip_tunnel_parm *parms) | 125 | struct ip_tunnel_parm *parms) |
126 | { | 126 | { |
127 | __be32 remote = parms->iph.daddr; | 127 | __be32 remote = parms->iph.daddr; |
128 | __be32 local = parms->iph.saddr; | 128 | __be32 local = parms->iph.saddr; |
129 | unsigned h = 0; | 129 | unsigned h = 0; |
130 | int prio = 0; | 130 | int prio = 0; |
131 | 131 | ||
132 | if (remote) { | 132 | if (remote) { |
133 | prio |= 2; | 133 | prio |= 2; |
134 | h ^= HASH(remote); | 134 | h ^= HASH(remote); |
135 | } | 135 | } |
136 | if (local) { | 136 | if (local) { |
137 | prio |= 1; | 137 | prio |= 1; |
138 | h ^= HASH(local); | 138 | h ^= HASH(local); |
139 | } | 139 | } |
140 | return &sitn->tunnels[prio][h]; | 140 | return &sitn->tunnels[prio][h]; |
141 | } | 141 | } |
142 | 142 | ||
143 | static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, | 143 | static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, |
144 | struct ip_tunnel *t) | 144 | struct ip_tunnel *t) |
145 | { | 145 | { |
146 | return __ipip6_bucket(sitn, &t->parms); | 146 | return __ipip6_bucket(sitn, &t->parms); |
147 | } | 147 | } |
148 | 148 | ||
149 | static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) | 149 | static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) |
150 | { | 150 | { |
151 | struct ip_tunnel **tp; | 151 | struct ip_tunnel **tp; |
152 | 152 | ||
153 | for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { | 153 | for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { |
154 | if (t == *tp) { | 154 | if (t == *tp) { |
155 | spin_lock_bh(&ipip6_lock); | 155 | spin_lock_bh(&ipip6_lock); |
156 | *tp = t->next; | 156 | *tp = t->next; |
157 | spin_unlock_bh(&ipip6_lock); | 157 | spin_unlock_bh(&ipip6_lock); |
158 | break; | 158 | break; |
159 | } | 159 | } |
160 | } | 160 | } |
161 | } | 161 | } |
162 | 162 | ||
163 | static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) | 163 | static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) |
164 | { | 164 | { |
165 | struct ip_tunnel **tp = ipip6_bucket(sitn, t); | 165 | struct ip_tunnel **tp = ipip6_bucket(sitn, t); |
166 | 166 | ||
167 | spin_lock_bh(&ipip6_lock); | 167 | spin_lock_bh(&ipip6_lock); |
168 | t->next = *tp; | 168 | t->next = *tp; |
169 | rcu_assign_pointer(*tp, t); | 169 | rcu_assign_pointer(*tp, t); |
170 | spin_unlock_bh(&ipip6_lock); | 170 | spin_unlock_bh(&ipip6_lock); |
171 | } | 171 | } |
172 | 172 | ||
173 | static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) | 173 | static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) |
174 | { | 174 | { |
175 | #ifdef CONFIG_IPV6_SIT_6RD | 175 | #ifdef CONFIG_IPV6_SIT_6RD |
176 | struct ip_tunnel *t = netdev_priv(dev); | 176 | struct ip_tunnel *t = netdev_priv(dev); |
177 | 177 | ||
178 | if (t->dev == sitn->fb_tunnel_dev) { | 178 | if (t->dev == sitn->fb_tunnel_dev) { |
179 | ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); | 179 | ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); |
180 | t->ip6rd.relay_prefix = 0; | 180 | t->ip6rd.relay_prefix = 0; |
181 | t->ip6rd.prefixlen = 16; | 181 | t->ip6rd.prefixlen = 16; |
182 | t->ip6rd.relay_prefixlen = 0; | 182 | t->ip6rd.relay_prefixlen = 0; |
183 | } else { | 183 | } else { |
184 | struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); | 184 | struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); |
185 | memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); | 185 | memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); |
186 | } | 186 | } |
187 | #endif | 187 | #endif |
188 | } | 188 | } |
189 | 189 | ||
190 | static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, | 190 | static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, |
191 | struct ip_tunnel_parm *parms, int create) | 191 | struct ip_tunnel_parm *parms, int create) |
192 | { | 192 | { |
193 | __be32 remote = parms->iph.daddr; | 193 | __be32 remote = parms->iph.daddr; |
194 | __be32 local = parms->iph.saddr; | 194 | __be32 local = parms->iph.saddr; |
195 | struct ip_tunnel *t, **tp, *nt; | 195 | struct ip_tunnel *t, **tp, *nt; |
196 | struct net_device *dev; | 196 | struct net_device *dev; |
197 | char name[IFNAMSIZ]; | 197 | char name[IFNAMSIZ]; |
198 | struct sit_net *sitn = net_generic(net, sit_net_id); | 198 | struct sit_net *sitn = net_generic(net, sit_net_id); |
199 | 199 | ||
200 | for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { | 200 | for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { |
201 | if (local == t->parms.iph.saddr && | 201 | if (local == t->parms.iph.saddr && |
202 | remote == t->parms.iph.daddr && | 202 | remote == t->parms.iph.daddr && |
203 | parms->link == t->parms.link) { | 203 | parms->link == t->parms.link) { |
204 | if (create) | 204 | if (create) |
205 | return NULL; | 205 | return NULL; |
206 | else | 206 | else |
207 | return t; | 207 | return t; |
208 | } | 208 | } |
209 | } | 209 | } |
210 | if (!create) | 210 | if (!create) |
211 | goto failed; | 211 | goto failed; |
212 | 212 | ||
213 | if (parms->name[0]) | 213 | if (parms->name[0]) |
214 | strlcpy(name, parms->name, IFNAMSIZ); | 214 | strlcpy(name, parms->name, IFNAMSIZ); |
215 | else | 215 | else |
216 | sprintf(name, "sit%%d"); | 216 | sprintf(name, "sit%%d"); |
217 | 217 | ||
218 | dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); | 218 | dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); |
219 | if (dev == NULL) | 219 | if (dev == NULL) |
220 | return NULL; | 220 | return NULL; |
221 | 221 | ||
222 | dev_net_set(dev, net); | 222 | dev_net_set(dev, net); |
223 | 223 | ||
224 | if (strchr(name, '%')) { | 224 | if (strchr(name, '%')) { |
225 | if (dev_alloc_name(dev, name) < 0) | 225 | if (dev_alloc_name(dev, name) < 0) |
226 | goto failed_free; | 226 | goto failed_free; |
227 | } | 227 | } |
228 | 228 | ||
229 | nt = netdev_priv(dev); | 229 | nt = netdev_priv(dev); |
230 | 230 | ||
231 | nt->parms = *parms; | 231 | nt->parms = *parms; |
232 | ipip6_tunnel_init(dev); | 232 | ipip6_tunnel_init(dev); |
233 | ipip6_tunnel_clone_6rd(dev, sitn); | 233 | ipip6_tunnel_clone_6rd(dev, sitn); |
234 | 234 | ||
235 | if (parms->i_flags & SIT_ISATAP) | 235 | if (parms->i_flags & SIT_ISATAP) |
236 | dev->priv_flags |= IFF_ISATAP; | 236 | dev->priv_flags |= IFF_ISATAP; |
237 | 237 | ||
238 | if (register_netdevice(dev) < 0) | 238 | if (register_netdevice(dev) < 0) |
239 | goto failed_free; | 239 | goto failed_free; |
240 | 240 | ||
241 | dev_hold(dev); | 241 | dev_hold(dev); |
242 | 242 | ||
243 | ipip6_tunnel_link(sitn, nt); | 243 | ipip6_tunnel_link(sitn, nt); |
244 | return nt; | 244 | return nt; |
245 | 245 | ||
246 | failed_free: | 246 | failed_free: |
247 | free_netdev(dev); | 247 | free_netdev(dev); |
248 | failed: | 248 | failed: |
249 | return NULL; | 249 | return NULL; |
250 | } | 250 | } |
251 | 251 | ||
252 | static DEFINE_SPINLOCK(ipip6_prl_lock); | 252 | static DEFINE_SPINLOCK(ipip6_prl_lock); |
253 | 253 | ||
254 | #define for_each_prl_rcu(start) \ | 254 | #define for_each_prl_rcu(start) \ |
255 | for (prl = rcu_dereference(start); \ | 255 | for (prl = rcu_dereference(start); \ |
256 | prl; \ | 256 | prl; \ |
257 | prl = rcu_dereference(prl->next)) | 257 | prl = rcu_dereference(prl->next)) |
258 | 258 | ||
259 | static struct ip_tunnel_prl_entry * | 259 | static struct ip_tunnel_prl_entry * |
260 | __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) | 260 | __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) |
261 | { | 261 | { |
262 | struct ip_tunnel_prl_entry *prl; | 262 | struct ip_tunnel_prl_entry *prl; |
263 | 263 | ||
264 | for_each_prl_rcu(t->prl) | 264 | for_each_prl_rcu(t->prl) |
265 | if (prl->addr == addr) | 265 | if (prl->addr == addr) |
266 | break; | 266 | break; |
267 | return prl; | 267 | return prl; |
268 | 268 | ||
269 | } | 269 | } |
270 | 270 | ||
271 | static int ipip6_tunnel_get_prl(struct ip_tunnel *t, | 271 | static int ipip6_tunnel_get_prl(struct ip_tunnel *t, |
272 | struct ip_tunnel_prl __user *a) | 272 | struct ip_tunnel_prl __user *a) |
273 | { | 273 | { |
274 | struct ip_tunnel_prl kprl, *kp; | 274 | struct ip_tunnel_prl kprl, *kp; |
275 | struct ip_tunnel_prl_entry *prl; | 275 | struct ip_tunnel_prl_entry *prl; |
276 | unsigned int cmax, c = 0, ca, len; | 276 | unsigned int cmax, c = 0, ca, len; |
277 | int ret = 0; | 277 | int ret = 0; |
278 | 278 | ||
279 | if (copy_from_user(&kprl, a, sizeof(kprl))) | 279 | if (copy_from_user(&kprl, a, sizeof(kprl))) |
280 | return -EFAULT; | 280 | return -EFAULT; |
281 | cmax = kprl.datalen / sizeof(kprl); | 281 | cmax = kprl.datalen / sizeof(kprl); |
282 | if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) | 282 | if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) |
283 | cmax = 1; | 283 | cmax = 1; |
284 | 284 | ||
285 | /* For simple GET or for root users, | 285 | /* For simple GET or for root users, |
286 | * we try harder to allocate. | 286 | * we try harder to allocate. |
287 | */ | 287 | */ |
288 | kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? | 288 | kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? |
289 | kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : | 289 | kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : |
290 | NULL; | 290 | NULL; |
291 | 291 | ||
292 | rcu_read_lock(); | 292 | rcu_read_lock(); |
293 | 293 | ||
294 | ca = t->prl_count < cmax ? t->prl_count : cmax; | 294 | ca = t->prl_count < cmax ? t->prl_count : cmax; |
295 | 295 | ||
296 | if (!kp) { | 296 | if (!kp) { |
297 | /* We don't try hard to allocate much memory for | 297 | /* We don't try hard to allocate much memory for |
298 | * non-root users. | 298 | * non-root users. |
299 | * For root users, retry allocating enough memory for | 299 | * For root users, retry allocating enough memory for |
300 | * the answer. | 300 | * the answer. |
301 | */ | 301 | */ |
302 | kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); | 302 | kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); |
303 | if (!kp) { | 303 | if (!kp) { |
304 | ret = -ENOMEM; | 304 | ret = -ENOMEM; |
305 | goto out; | 305 | goto out; |
306 | } | 306 | } |
307 | } | 307 | } |
308 | 308 | ||
309 | c = 0; | 309 | c = 0; |
310 | for_each_prl_rcu(t->prl) { | 310 | for_each_prl_rcu(t->prl) { |
311 | if (c >= cmax) | 311 | if (c >= cmax) |
312 | break; | 312 | break; |
313 | if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) | 313 | if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) |
314 | continue; | 314 | continue; |
315 | kp[c].addr = prl->addr; | 315 | kp[c].addr = prl->addr; |
316 | kp[c].flags = prl->flags; | 316 | kp[c].flags = prl->flags; |
317 | c++; | 317 | c++; |
318 | if (kprl.addr != htonl(INADDR_ANY)) | 318 | if (kprl.addr != htonl(INADDR_ANY)) |
319 | break; | 319 | break; |
320 | } | 320 | } |
321 | out: | 321 | out: |
322 | rcu_read_unlock(); | 322 | rcu_read_unlock(); |
323 | 323 | ||
324 | len = sizeof(*kp) * c; | 324 | len = sizeof(*kp) * c; |
325 | ret = 0; | 325 | ret = 0; |
326 | if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) | 326 | if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) |
327 | ret = -EFAULT; | 327 | ret = -EFAULT; |
328 | 328 | ||
329 | kfree(kp); | 329 | kfree(kp); |
330 | 330 | ||
331 | return ret; | 331 | return ret; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int | 334 | static int |
335 | ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) | 335 | ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) |
336 | { | 336 | { |
337 | struct ip_tunnel_prl_entry *p; | 337 | struct ip_tunnel_prl_entry *p; |
338 | int err = 0; | 338 | int err = 0; |
339 | 339 | ||
340 | if (a->addr == htonl(INADDR_ANY)) | 340 | if (a->addr == htonl(INADDR_ANY)) |
341 | return -EINVAL; | 341 | return -EINVAL; |
342 | 342 | ||
343 | spin_lock(&ipip6_prl_lock); | 343 | spin_lock(&ipip6_prl_lock); |
344 | 344 | ||
345 | for (p = t->prl; p; p = p->next) { | 345 | for (p = t->prl; p; p = p->next) { |
346 | if (p->addr == a->addr) { | 346 | if (p->addr == a->addr) { |
347 | if (chg) { | 347 | if (chg) { |
348 | p->flags = a->flags; | 348 | p->flags = a->flags; |
349 | goto out; | 349 | goto out; |
350 | } | 350 | } |
351 | err = -EEXIST; | 351 | err = -EEXIST; |
352 | goto out; | 352 | goto out; |
353 | } | 353 | } |
354 | } | 354 | } |
355 | 355 | ||
356 | if (chg) { | 356 | if (chg) { |
357 | err = -ENXIO; | 357 | err = -ENXIO; |
358 | goto out; | 358 | goto out; |
359 | } | 359 | } |
360 | 360 | ||
361 | p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); | 361 | p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); |
362 | if (!p) { | 362 | if (!p) { |
363 | err = -ENOBUFS; | 363 | err = -ENOBUFS; |
364 | goto out; | 364 | goto out; |
365 | } | 365 | } |
366 | 366 | ||
367 | p->next = t->prl; | 367 | p->next = t->prl; |
368 | p->addr = a->addr; | 368 | p->addr = a->addr; |
369 | p->flags = a->flags; | 369 | p->flags = a->flags; |
370 | t->prl_count++; | 370 | t->prl_count++; |
371 | rcu_assign_pointer(t->prl, p); | 371 | rcu_assign_pointer(t->prl, p); |
372 | out: | 372 | out: |
373 | spin_unlock(&ipip6_prl_lock); | 373 | spin_unlock(&ipip6_prl_lock); |
374 | return err; | 374 | return err; |
375 | } | 375 | } |
376 | 376 | ||
377 | static void prl_entry_destroy_rcu(struct rcu_head *head) | 377 | static void prl_entry_destroy_rcu(struct rcu_head *head) |
378 | { | 378 | { |
379 | kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); | 379 | kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); |
380 | } | 380 | } |
381 | 381 | ||
382 | static void prl_list_destroy_rcu(struct rcu_head *head) | 382 | static void prl_list_destroy_rcu(struct rcu_head *head) |
383 | { | 383 | { |
384 | struct ip_tunnel_prl_entry *p, *n; | 384 | struct ip_tunnel_prl_entry *p, *n; |
385 | 385 | ||
386 | p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); | 386 | p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); |
387 | do { | 387 | do { |
388 | n = p->next; | 388 | n = p->next; |
389 | kfree(p); | 389 | kfree(p); |
390 | p = n; | 390 | p = n; |
391 | } while (p); | 391 | } while (p); |
392 | } | 392 | } |
393 | 393 | ||
394 | static int | 394 | static int |
395 | ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) | 395 | ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) |
396 | { | 396 | { |
397 | struct ip_tunnel_prl_entry *x, **p; | 397 | struct ip_tunnel_prl_entry *x, **p; |
398 | int err = 0; | 398 | int err = 0; |
399 | 399 | ||
400 | spin_lock(&ipip6_prl_lock); | 400 | spin_lock(&ipip6_prl_lock); |
401 | 401 | ||
402 | if (a && a->addr != htonl(INADDR_ANY)) { | 402 | if (a && a->addr != htonl(INADDR_ANY)) { |
403 | for (p = &t->prl; *p; p = &(*p)->next) { | 403 | for (p = &t->prl; *p; p = &(*p)->next) { |
404 | if ((*p)->addr == a->addr) { | 404 | if ((*p)->addr == a->addr) { |
405 | x = *p; | 405 | x = *p; |
406 | *p = x->next; | 406 | *p = x->next; |
407 | call_rcu(&x->rcu_head, prl_entry_destroy_rcu); | 407 | call_rcu(&x->rcu_head, prl_entry_destroy_rcu); |
408 | t->prl_count--; | 408 | t->prl_count--; |
409 | goto out; | 409 | goto out; |
410 | } | 410 | } |
411 | } | 411 | } |
412 | err = -ENXIO; | 412 | err = -ENXIO; |
413 | } else { | 413 | } else { |
414 | if (t->prl) { | 414 | if (t->prl) { |
415 | t->prl_count = 0; | 415 | t->prl_count = 0; |
416 | x = t->prl; | 416 | x = t->prl; |
417 | call_rcu(&x->rcu_head, prl_list_destroy_rcu); | 417 | call_rcu(&x->rcu_head, prl_list_destroy_rcu); |
418 | t->prl = NULL; | 418 | t->prl = NULL; |
419 | } | 419 | } |
420 | } | 420 | } |
421 | out: | 421 | out: |
422 | spin_unlock(&ipip6_prl_lock); | 422 | spin_unlock(&ipip6_prl_lock); |
423 | return err; | 423 | return err; |
424 | } | 424 | } |
425 | 425 | ||
426 | static int | 426 | static int |
427 | isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) | 427 | isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) |
428 | { | 428 | { |
429 | struct ip_tunnel_prl_entry *p; | 429 | struct ip_tunnel_prl_entry *p; |
430 | int ok = 1; | 430 | int ok = 1; |
431 | 431 | ||
432 | rcu_read_lock(); | 432 | rcu_read_lock(); |
433 | p = __ipip6_tunnel_locate_prl(t, iph->saddr); | 433 | p = __ipip6_tunnel_locate_prl(t, iph->saddr); |
434 | if (p) { | 434 | if (p) { |
435 | if (p->flags & PRL_DEFAULT) | 435 | if (p->flags & PRL_DEFAULT) |
436 | skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; | 436 | skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; |
437 | else | 437 | else |
438 | skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; | 438 | skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; |
439 | } else { | 439 | } else { |
440 | struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; | 440 | struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; |
441 | if (ipv6_addr_is_isatap(addr6) && | 441 | if (ipv6_addr_is_isatap(addr6) && |
442 | (addr6->s6_addr32[3] == iph->saddr) && | 442 | (addr6->s6_addr32[3] == iph->saddr) && |
443 | ipv6_chk_prefix(addr6, t->dev)) | 443 | ipv6_chk_prefix(addr6, t->dev)) |
444 | skb->ndisc_nodetype = NDISC_NODETYPE_HOST; | 444 | skb->ndisc_nodetype = NDISC_NODETYPE_HOST; |
445 | else | 445 | else |
446 | ok = 0; | 446 | ok = 0; |
447 | } | 447 | } |
448 | rcu_read_unlock(); | 448 | rcu_read_unlock(); |
449 | return ok; | 449 | return ok; |
450 | } | 450 | } |
451 | 451 | ||
452 | static void ipip6_tunnel_uninit(struct net_device *dev) | 452 | static void ipip6_tunnel_uninit(struct net_device *dev) |
453 | { | 453 | { |
454 | struct net *net = dev_net(dev); | 454 | struct net *net = dev_net(dev); |
455 | struct sit_net *sitn = net_generic(net, sit_net_id); | 455 | struct sit_net *sitn = net_generic(net, sit_net_id); |
456 | 456 | ||
457 | if (dev == sitn->fb_tunnel_dev) { | 457 | if (dev == sitn->fb_tunnel_dev) { |
458 | spin_lock_bh(&ipip6_lock); | 458 | spin_lock_bh(&ipip6_lock); |
459 | sitn->tunnels_wc[0] = NULL; | 459 | sitn->tunnels_wc[0] = NULL; |
460 | spin_unlock_bh(&ipip6_lock); | 460 | spin_unlock_bh(&ipip6_lock); |
461 | dev_put(dev); | 461 | dev_put(dev); |
462 | } else { | 462 | } else { |
463 | ipip6_tunnel_unlink(sitn, netdev_priv(dev)); | 463 | ipip6_tunnel_unlink(sitn, netdev_priv(dev)); |
464 | ipip6_tunnel_del_prl(netdev_priv(dev), NULL); | 464 | ipip6_tunnel_del_prl(netdev_priv(dev), NULL); |
465 | dev_put(dev); | 465 | dev_put(dev); |
466 | } | 466 | } |
467 | } | 467 | } |
468 | 468 | ||
469 | 469 | ||
470 | static int ipip6_err(struct sk_buff *skb, u32 info) | 470 | static int ipip6_err(struct sk_buff *skb, u32 info) |
471 | { | 471 | { |
472 | 472 | ||
473 | /* All the routers (except for Linux) return only | 473 | /* All the routers (except for Linux) return only |
474 | 8 bytes of packet payload. It means, that precise relaying of | 474 | 8 bytes of packet payload. It means, that precise relaying of |
475 | ICMP in the real Internet is absolutely infeasible. | 475 | ICMP in the real Internet is absolutely infeasible. |
476 | */ | 476 | */ |
477 | struct iphdr *iph = (struct iphdr*)skb->data; | 477 | struct iphdr *iph = (struct iphdr*)skb->data; |
478 | const int type = icmp_hdr(skb)->type; | 478 | const int type = icmp_hdr(skb)->type; |
479 | const int code = icmp_hdr(skb)->code; | 479 | const int code = icmp_hdr(skb)->code; |
480 | struct ip_tunnel *t; | 480 | struct ip_tunnel *t; |
481 | int err; | 481 | int err; |
482 | 482 | ||
483 | switch (type) { | 483 | switch (type) { |
484 | default: | 484 | default: |
485 | case ICMP_PARAMETERPROB: | 485 | case ICMP_PARAMETERPROB: |
486 | return 0; | 486 | return 0; |
487 | 487 | ||
488 | case ICMP_DEST_UNREACH: | 488 | case ICMP_DEST_UNREACH: |
489 | switch (code) { | 489 | switch (code) { |
490 | case ICMP_SR_FAILED: | 490 | case ICMP_SR_FAILED: |
491 | case ICMP_PORT_UNREACH: | 491 | case ICMP_PORT_UNREACH: |
492 | /* Impossible event. */ | 492 | /* Impossible event. */ |
493 | return 0; | 493 | return 0; |
494 | case ICMP_FRAG_NEEDED: | 494 | case ICMP_FRAG_NEEDED: |
495 | /* Soft state for pmtu is maintained by IP core. */ | 495 | /* Soft state for pmtu is maintained by IP core. */ |
496 | return 0; | 496 | return 0; |
497 | default: | 497 | default: |
498 | /* All others are translated to HOST_UNREACH. | 498 | /* All others are translated to HOST_UNREACH. |
499 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 499 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
500 | I believe they are just ether pollution. --ANK | 500 | I believe they are just ether pollution. --ANK |
501 | */ | 501 | */ |
502 | break; | 502 | break; |
503 | } | 503 | } |
504 | break; | 504 | break; |
505 | case ICMP_TIME_EXCEEDED: | 505 | case ICMP_TIME_EXCEEDED: |
506 | if (code != ICMP_EXC_TTL) | 506 | if (code != ICMP_EXC_TTL) |
507 | return 0; | 507 | return 0; |
508 | break; | 508 | break; |
509 | } | 509 | } |
510 | 510 | ||
511 | err = -ENOENT; | 511 | err = -ENOENT; |
512 | 512 | ||
513 | rcu_read_lock(); | 513 | rcu_read_lock(); |
514 | t = ipip6_tunnel_lookup(dev_net(skb->dev), | 514 | t = ipip6_tunnel_lookup(dev_net(skb->dev), |
515 | skb->dev, | 515 | skb->dev, |
516 | iph->daddr, | 516 | iph->daddr, |
517 | iph->saddr); | 517 | iph->saddr); |
518 | if (t == NULL || t->parms.iph.daddr == 0) | 518 | if (t == NULL || t->parms.iph.daddr == 0) |
519 | goto out; | 519 | goto out; |
520 | 520 | ||
521 | err = 0; | 521 | err = 0; |
522 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 522 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
523 | goto out; | 523 | goto out; |
524 | 524 | ||
525 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) | 525 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
526 | t->err_count++; | 526 | t->err_count++; |
527 | else | 527 | else |
528 | t->err_count = 1; | 528 | t->err_count = 1; |
529 | t->err_time = jiffies; | 529 | t->err_time = jiffies; |
530 | out: | 530 | out: |
531 | rcu_read_unlock(); | 531 | rcu_read_unlock(); |
532 | return err; | 532 | return err; |
533 | } | 533 | } |
534 | 534 | ||
535 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 535 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
536 | { | 536 | { |
537 | if (INET_ECN_is_ce(iph->tos)) | 537 | if (INET_ECN_is_ce(iph->tos)) |
538 | IP6_ECN_set_ce(ipv6_hdr(skb)); | 538 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
539 | } | 539 | } |
540 | 540 | ||
541 | static int ipip6_rcv(struct sk_buff *skb) | 541 | static int ipip6_rcv(struct sk_buff *skb) |
542 | { | 542 | { |
543 | struct iphdr *iph; | 543 | struct iphdr *iph; |
544 | struct ip_tunnel *tunnel; | 544 | struct ip_tunnel *tunnel; |
545 | 545 | ||
546 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) | 546 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) |
547 | goto out; | 547 | goto out; |
548 | 548 | ||
549 | iph = ip_hdr(skb); | 549 | iph = ip_hdr(skb); |
550 | 550 | ||
551 | rcu_read_lock(); | 551 | rcu_read_lock(); |
552 | tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, | 552 | tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, |
553 | iph->saddr, iph->daddr); | 553 | iph->saddr, iph->daddr); |
554 | if (tunnel != NULL) { | 554 | if (tunnel != NULL) { |
555 | secpath_reset(skb); | 555 | secpath_reset(skb); |
556 | skb->mac_header = skb->network_header; | 556 | skb->mac_header = skb->network_header; |
557 | skb_reset_network_header(skb); | 557 | skb_reset_network_header(skb); |
558 | IPCB(skb)->flags = 0; | 558 | IPCB(skb)->flags = 0; |
559 | skb->protocol = htons(ETH_P_IPV6); | 559 | skb->protocol = htons(ETH_P_IPV6); |
560 | skb->pkt_type = PACKET_HOST; | 560 | skb->pkt_type = PACKET_HOST; |
561 | 561 | ||
562 | if ((tunnel->dev->priv_flags & IFF_ISATAP) && | 562 | if ((tunnel->dev->priv_flags & IFF_ISATAP) && |
563 | !isatap_chksrc(skb, iph, tunnel)) { | 563 | !isatap_chksrc(skb, iph, tunnel)) { |
564 | tunnel->dev->stats.rx_errors++; | 564 | tunnel->dev->stats.rx_errors++; |
565 | rcu_read_unlock(); | 565 | rcu_read_unlock(); |
566 | kfree_skb(skb); | 566 | kfree_skb(skb); |
567 | return 0; | 567 | return 0; |
568 | } | 568 | } |
569 | tunnel->dev->stats.rx_packets++; | 569 | |
570 | tunnel->dev->stats.rx_bytes += skb->len; | 570 | skb_tunnel_rx(skb, tunnel->dev); |
571 | skb->dev = tunnel->dev; | 571 | |
572 | skb_dst_drop(skb); | ||
573 | nf_reset(skb); | ||
574 | ipip6_ecn_decapsulate(iph, skb); | 572 | ipip6_ecn_decapsulate(iph, skb); |
575 | netif_rx(skb); | 573 | netif_rx(skb); |
576 | rcu_read_unlock(); | 574 | rcu_read_unlock(); |
577 | return 0; | 575 | return 0; |
578 | } | 576 | } |
579 | 577 | ||
580 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 578 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
581 | rcu_read_unlock(); | 579 | rcu_read_unlock(); |
582 | out: | 580 | out: |
583 | kfree_skb(skb); | 581 | kfree_skb(skb); |
584 | return 0; | 582 | return 0; |
585 | } | 583 | } |
586 | 584 | ||
587 | /* | 585 | /* |
588 | * Returns the embedded IPv4 address if the IPv6 address | 586 | * Returns the embedded IPv4 address if the IPv6 address |
589 | * comes from 6rd / 6to4 (RFC 3056) addr space. | 587 | * comes from 6rd / 6to4 (RFC 3056) addr space. |
590 | */ | 588 | */ |
591 | static inline | 589 | static inline |
592 | __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) | 590 | __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) |
593 | { | 591 | { |
594 | __be32 dst = 0; | 592 | __be32 dst = 0; |
595 | 593 | ||
596 | #ifdef CONFIG_IPV6_SIT_6RD | 594 | #ifdef CONFIG_IPV6_SIT_6RD |
597 | if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, | 595 | if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, |
598 | tunnel->ip6rd.prefixlen)) { | 596 | tunnel->ip6rd.prefixlen)) { |
599 | unsigned pbw0, pbi0; | 597 | unsigned pbw0, pbi0; |
600 | int pbi1; | 598 | int pbi1; |
601 | u32 d; | 599 | u32 d; |
602 | 600 | ||
603 | pbw0 = tunnel->ip6rd.prefixlen >> 5; | 601 | pbw0 = tunnel->ip6rd.prefixlen >> 5; |
604 | pbi0 = tunnel->ip6rd.prefixlen & 0x1f; | 602 | pbi0 = tunnel->ip6rd.prefixlen & 0x1f; |
605 | 603 | ||
606 | d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> | 604 | d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> |
607 | tunnel->ip6rd.relay_prefixlen; | 605 | tunnel->ip6rd.relay_prefixlen; |
608 | 606 | ||
609 | pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; | 607 | pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; |
610 | if (pbi1 > 0) | 608 | if (pbi1 > 0) |
611 | d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> | 609 | d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> |
612 | (32 - pbi1); | 610 | (32 - pbi1); |
613 | 611 | ||
614 | dst = tunnel->ip6rd.relay_prefix | htonl(d); | 612 | dst = tunnel->ip6rd.relay_prefix | htonl(d); |
615 | } | 613 | } |
616 | #else | 614 | #else |
617 | if (v6dst->s6_addr16[0] == htons(0x2002)) { | 615 | if (v6dst->s6_addr16[0] == htons(0x2002)) { |
618 | /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ | 616 | /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ |
619 | memcpy(&dst, &v6dst->s6_addr16[1], 4); | 617 | memcpy(&dst, &v6dst->s6_addr16[1], 4); |
620 | } | 618 | } |
621 | #endif | 619 | #endif |
622 | return dst; | 620 | return dst; |
623 | } | 621 | } |
624 | 622 | ||
625 | /* | 623 | /* |
626 | * This function assumes it is being called from dev_queue_xmit() | 624 | * This function assumes it is being called from dev_queue_xmit() |
627 | * and that skb is filled properly by that function. | 625 | * and that skb is filled properly by that function. |
628 | */ | 626 | */ |
629 | 627 | ||
630 | static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | 628 | static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, |
631 | struct net_device *dev) | 629 | struct net_device *dev) |
632 | { | 630 | { |
633 | struct ip_tunnel *tunnel = netdev_priv(dev); | 631 | struct ip_tunnel *tunnel = netdev_priv(dev); |
634 | struct net_device_stats *stats = &dev->stats; | 632 | struct net_device_stats *stats = &dev->stats; |
635 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | 633 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); |
636 | struct iphdr *tiph = &tunnel->parms.iph; | 634 | struct iphdr *tiph = &tunnel->parms.iph; |
637 | struct ipv6hdr *iph6 = ipv6_hdr(skb); | 635 | struct ipv6hdr *iph6 = ipv6_hdr(skb); |
638 | u8 tos = tunnel->parms.iph.tos; | 636 | u8 tos = tunnel->parms.iph.tos; |
639 | __be16 df = tiph->frag_off; | 637 | __be16 df = tiph->frag_off; |
640 | struct rtable *rt; /* Route to the other host */ | 638 | struct rtable *rt; /* Route to the other host */ |
641 | struct net_device *tdev; /* Device to other host */ | 639 | struct net_device *tdev; /* Device to other host */ |
642 | struct iphdr *iph; /* Our new IP header */ | 640 | struct iphdr *iph; /* Our new IP header */ |
643 | unsigned int max_headroom; /* The extra header space needed */ | 641 | unsigned int max_headroom; /* The extra header space needed */ |
644 | __be32 dst = tiph->daddr; | 642 | __be32 dst = tiph->daddr; |
645 | int mtu; | 643 | int mtu; |
646 | struct in6_addr *addr6; | 644 | struct in6_addr *addr6; |
647 | int addr_type; | 645 | int addr_type; |
648 | 646 | ||
649 | if (skb->protocol != htons(ETH_P_IPV6)) | 647 | if (skb->protocol != htons(ETH_P_IPV6)) |
650 | goto tx_error; | 648 | goto tx_error; |
651 | 649 | ||
652 | /* ISATAP (RFC4214) - must come before 6to4 */ | 650 | /* ISATAP (RFC4214) - must come before 6to4 */ |
653 | if (dev->priv_flags & IFF_ISATAP) { | 651 | if (dev->priv_flags & IFF_ISATAP) { |
654 | struct neighbour *neigh = NULL; | 652 | struct neighbour *neigh = NULL; |
655 | 653 | ||
656 | if (skb_dst(skb)) | 654 | if (skb_dst(skb)) |
657 | neigh = skb_dst(skb)->neighbour; | 655 | neigh = skb_dst(skb)->neighbour; |
658 | 656 | ||
659 | if (neigh == NULL) { | 657 | if (neigh == NULL) { |
660 | if (net_ratelimit()) | 658 | if (net_ratelimit()) |
661 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); | 659 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); |
662 | goto tx_error; | 660 | goto tx_error; |
663 | } | 661 | } |
664 | 662 | ||
665 | addr6 = (struct in6_addr*)&neigh->primary_key; | 663 | addr6 = (struct in6_addr*)&neigh->primary_key; |
666 | addr_type = ipv6_addr_type(addr6); | 664 | addr_type = ipv6_addr_type(addr6); |
667 | 665 | ||
668 | if ((addr_type & IPV6_ADDR_UNICAST) && | 666 | if ((addr_type & IPV6_ADDR_UNICAST) && |
669 | ipv6_addr_is_isatap(addr6)) | 667 | ipv6_addr_is_isatap(addr6)) |
670 | dst = addr6->s6_addr32[3]; | 668 | dst = addr6->s6_addr32[3]; |
671 | else | 669 | else |
672 | goto tx_error; | 670 | goto tx_error; |
673 | } | 671 | } |
674 | 672 | ||
675 | if (!dst) | 673 | if (!dst) |
676 | dst = try_6rd(&iph6->daddr, tunnel); | 674 | dst = try_6rd(&iph6->daddr, tunnel); |
677 | 675 | ||
678 | if (!dst) { | 676 | if (!dst) { |
679 | struct neighbour *neigh = NULL; | 677 | struct neighbour *neigh = NULL; |
680 | 678 | ||
681 | if (skb_dst(skb)) | 679 | if (skb_dst(skb)) |
682 | neigh = skb_dst(skb)->neighbour; | 680 | neigh = skb_dst(skb)->neighbour; |
683 | 681 | ||
684 | if (neigh == NULL) { | 682 | if (neigh == NULL) { |
685 | if (net_ratelimit()) | 683 | if (net_ratelimit()) |
686 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); | 684 | printk(KERN_DEBUG "sit: nexthop == NULL\n"); |
687 | goto tx_error; | 685 | goto tx_error; |
688 | } | 686 | } |
689 | 687 | ||
690 | addr6 = (struct in6_addr*)&neigh->primary_key; | 688 | addr6 = (struct in6_addr*)&neigh->primary_key; |
691 | addr_type = ipv6_addr_type(addr6); | 689 | addr_type = ipv6_addr_type(addr6); |
692 | 690 | ||
693 | if (addr_type == IPV6_ADDR_ANY) { | 691 | if (addr_type == IPV6_ADDR_ANY) { |
694 | addr6 = &ipv6_hdr(skb)->daddr; | 692 | addr6 = &ipv6_hdr(skb)->daddr; |
695 | addr_type = ipv6_addr_type(addr6); | 693 | addr_type = ipv6_addr_type(addr6); |
696 | } | 694 | } |
697 | 695 | ||
698 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) | 696 | if ((addr_type & IPV6_ADDR_COMPATv4) == 0) |
699 | goto tx_error_icmp; | 697 | goto tx_error_icmp; |
700 | 698 | ||
701 | dst = addr6->s6_addr32[3]; | 699 | dst = addr6->s6_addr32[3]; |
702 | } | 700 | } |
703 | 701 | ||
704 | { | 702 | { |
705 | struct flowi fl = { .nl_u = { .ip4_u = | 703 | struct flowi fl = { .nl_u = { .ip4_u = |
706 | { .daddr = dst, | 704 | { .daddr = dst, |
707 | .saddr = tiph->saddr, | 705 | .saddr = tiph->saddr, |
708 | .tos = RT_TOS(tos) } }, | 706 | .tos = RT_TOS(tos) } }, |
709 | .oif = tunnel->parms.link, | 707 | .oif = tunnel->parms.link, |
710 | .proto = IPPROTO_IPV6 }; | 708 | .proto = IPPROTO_IPV6 }; |
711 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | 709 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
712 | stats->tx_carrier_errors++; | 710 | stats->tx_carrier_errors++; |
713 | goto tx_error_icmp; | 711 | goto tx_error_icmp; |
714 | } | 712 | } |
715 | } | 713 | } |
716 | if (rt->rt_type != RTN_UNICAST) { | 714 | if (rt->rt_type != RTN_UNICAST) { |
717 | ip_rt_put(rt); | 715 | ip_rt_put(rt); |
718 | stats->tx_carrier_errors++; | 716 | stats->tx_carrier_errors++; |
719 | goto tx_error_icmp; | 717 | goto tx_error_icmp; |
720 | } | 718 | } |
721 | tdev = rt->u.dst.dev; | 719 | tdev = rt->u.dst.dev; |
722 | 720 | ||
723 | if (tdev == dev) { | 721 | if (tdev == dev) { |
724 | ip_rt_put(rt); | 722 | ip_rt_put(rt); |
725 | stats->collisions++; | 723 | stats->collisions++; |
726 | goto tx_error; | 724 | goto tx_error; |
727 | } | 725 | } |
728 | 726 | ||
729 | if (df) { | 727 | if (df) { |
730 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 728 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); |
731 | 729 | ||
732 | if (mtu < 68) { | 730 | if (mtu < 68) { |
733 | stats->collisions++; | 731 | stats->collisions++; |
734 | ip_rt_put(rt); | 732 | ip_rt_put(rt); |
735 | goto tx_error; | 733 | goto tx_error; |
736 | } | 734 | } |
737 | 735 | ||
738 | if (mtu < IPV6_MIN_MTU) { | 736 | if (mtu < IPV6_MIN_MTU) { |
739 | mtu = IPV6_MIN_MTU; | 737 | mtu = IPV6_MIN_MTU; |
740 | df = 0; | 738 | df = 0; |
741 | } | 739 | } |
742 | 740 | ||
743 | if (tunnel->parms.iph.daddr && skb_dst(skb)) | 741 | if (tunnel->parms.iph.daddr && skb_dst(skb)) |
744 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 742 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
745 | 743 | ||
746 | if (skb->len > mtu) { | 744 | if (skb->len > mtu) { |
747 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 745 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
748 | ip_rt_put(rt); | 746 | ip_rt_put(rt); |
749 | goto tx_error; | 747 | goto tx_error; |
750 | } | 748 | } |
751 | } | 749 | } |
752 | 750 | ||
753 | if (tunnel->err_count > 0) { | 751 | if (tunnel->err_count > 0) { |
754 | if (time_before(jiffies, | 752 | if (time_before(jiffies, |
755 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | 753 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { |
756 | tunnel->err_count--; | 754 | tunnel->err_count--; |
757 | dst_link_failure(skb); | 755 | dst_link_failure(skb); |
758 | } else | 756 | } else |
759 | tunnel->err_count = 0; | 757 | tunnel->err_count = 0; |
760 | } | 758 | } |
761 | 759 | ||
762 | /* | 760 | /* |
763 | * Okay, now see if we can stuff it in the buffer as-is. | 761 | * Okay, now see if we can stuff it in the buffer as-is. |
764 | */ | 762 | */ |
765 | max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); | 763 | max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); |
766 | 764 | ||
767 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | 765 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || |
768 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 766 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
769 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 767 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
770 | if (!new_skb) { | 768 | if (!new_skb) { |
771 | ip_rt_put(rt); | 769 | ip_rt_put(rt); |
772 | txq->tx_dropped++; | 770 | txq->tx_dropped++; |
773 | dev_kfree_skb(skb); | 771 | dev_kfree_skb(skb); |
774 | return NETDEV_TX_OK; | 772 | return NETDEV_TX_OK; |
775 | } | 773 | } |
776 | if (skb->sk) | 774 | if (skb->sk) |
777 | skb_set_owner_w(new_skb, skb->sk); | 775 | skb_set_owner_w(new_skb, skb->sk); |
778 | dev_kfree_skb(skb); | 776 | dev_kfree_skb(skb); |
779 | skb = new_skb; | 777 | skb = new_skb; |
780 | iph6 = ipv6_hdr(skb); | 778 | iph6 = ipv6_hdr(skb); |
781 | } | 779 | } |
782 | 780 | ||
783 | skb->transport_header = skb->network_header; | 781 | skb->transport_header = skb->network_header; |
784 | skb_push(skb, sizeof(struct iphdr)); | 782 | skb_push(skb, sizeof(struct iphdr)); |
785 | skb_reset_network_header(skb); | 783 | skb_reset_network_header(skb); |
786 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 784 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
787 | IPCB(skb)->flags = 0; | 785 | IPCB(skb)->flags = 0; |
788 | skb_dst_drop(skb); | 786 | skb_dst_drop(skb); |
789 | skb_dst_set(skb, &rt->u.dst); | 787 | skb_dst_set(skb, &rt->u.dst); |
790 | 788 | ||
791 | /* | 789 | /* |
792 | * Push down and install the IPIP header. | 790 | * Push down and install the IPIP header. |
793 | */ | 791 | */ |
794 | 792 | ||
795 | iph = ip_hdr(skb); | 793 | iph = ip_hdr(skb); |
796 | iph->version = 4; | 794 | iph->version = 4; |
797 | iph->ihl = sizeof(struct iphdr)>>2; | 795 | iph->ihl = sizeof(struct iphdr)>>2; |
798 | iph->frag_off = df; | 796 | iph->frag_off = df; |
799 | iph->protocol = IPPROTO_IPV6; | 797 | iph->protocol = IPPROTO_IPV6; |
800 | iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); | 798 | iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); |
801 | iph->daddr = rt->rt_dst; | 799 | iph->daddr = rt->rt_dst; |
802 | iph->saddr = rt->rt_src; | 800 | iph->saddr = rt->rt_src; |
803 | 801 | ||
804 | if ((iph->ttl = tiph->ttl) == 0) | 802 | if ((iph->ttl = tiph->ttl) == 0) |
805 | iph->ttl = iph6->hop_limit; | 803 | iph->ttl = iph6->hop_limit; |
806 | 804 | ||
807 | nf_reset(skb); | 805 | nf_reset(skb); |
808 | 806 | ||
809 | IPTUNNEL_XMIT(); | 807 | IPTUNNEL_XMIT(); |
810 | return NETDEV_TX_OK; | 808 | return NETDEV_TX_OK; |
811 | 809 | ||
812 | tx_error_icmp: | 810 | tx_error_icmp: |
813 | dst_link_failure(skb); | 811 | dst_link_failure(skb); |
814 | tx_error: | 812 | tx_error: |
815 | stats->tx_errors++; | 813 | stats->tx_errors++; |
816 | dev_kfree_skb(skb); | 814 | dev_kfree_skb(skb); |
817 | return NETDEV_TX_OK; | 815 | return NETDEV_TX_OK; |
818 | } | 816 | } |
819 | 817 | ||
820 | static void ipip6_tunnel_bind_dev(struct net_device *dev) | 818 | static void ipip6_tunnel_bind_dev(struct net_device *dev) |
821 | { | 819 | { |
822 | struct net_device *tdev = NULL; | 820 | struct net_device *tdev = NULL; |
823 | struct ip_tunnel *tunnel; | 821 | struct ip_tunnel *tunnel; |
824 | struct iphdr *iph; | 822 | struct iphdr *iph; |
825 | 823 | ||
826 | tunnel = netdev_priv(dev); | 824 | tunnel = netdev_priv(dev); |
827 | iph = &tunnel->parms.iph; | 825 | iph = &tunnel->parms.iph; |
828 | 826 | ||
829 | if (iph->daddr) { | 827 | if (iph->daddr) { |
830 | struct flowi fl = { .nl_u = { .ip4_u = | 828 | struct flowi fl = { .nl_u = { .ip4_u = |
831 | { .daddr = iph->daddr, | 829 | { .daddr = iph->daddr, |
832 | .saddr = iph->saddr, | 830 | .saddr = iph->saddr, |
833 | .tos = RT_TOS(iph->tos) } }, | 831 | .tos = RT_TOS(iph->tos) } }, |
834 | .oif = tunnel->parms.link, | 832 | .oif = tunnel->parms.link, |
835 | .proto = IPPROTO_IPV6 }; | 833 | .proto = IPPROTO_IPV6 }; |
836 | struct rtable *rt; | 834 | struct rtable *rt; |
837 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 835 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
838 | tdev = rt->u.dst.dev; | 836 | tdev = rt->u.dst.dev; |
839 | ip_rt_put(rt); | 837 | ip_rt_put(rt); |
840 | } | 838 | } |
841 | dev->flags |= IFF_POINTOPOINT; | 839 | dev->flags |= IFF_POINTOPOINT; |
842 | } | 840 | } |
843 | 841 | ||
844 | if (!tdev && tunnel->parms.link) | 842 | if (!tdev && tunnel->parms.link) |
845 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); | 843 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); |
846 | 844 | ||
847 | if (tdev) { | 845 | if (tdev) { |
848 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | 846 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); |
849 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | 847 | dev->mtu = tdev->mtu - sizeof(struct iphdr); |
850 | if (dev->mtu < IPV6_MIN_MTU) | 848 | if (dev->mtu < IPV6_MIN_MTU) |
851 | dev->mtu = IPV6_MIN_MTU; | 849 | dev->mtu = IPV6_MIN_MTU; |
852 | } | 850 | } |
853 | dev->iflink = tunnel->parms.link; | 851 | dev->iflink = tunnel->parms.link; |
854 | } | 852 | } |
855 | 853 | ||
856 | static int | 854 | static int |
857 | ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 855 | ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
858 | { | 856 | { |
859 | int err = 0; | 857 | int err = 0; |
860 | struct ip_tunnel_parm p; | 858 | struct ip_tunnel_parm p; |
861 | struct ip_tunnel_prl prl; | 859 | struct ip_tunnel_prl prl; |
862 | struct ip_tunnel *t; | 860 | struct ip_tunnel *t; |
863 | struct net *net = dev_net(dev); | 861 | struct net *net = dev_net(dev); |
864 | struct sit_net *sitn = net_generic(net, sit_net_id); | 862 | struct sit_net *sitn = net_generic(net, sit_net_id); |
865 | #ifdef CONFIG_IPV6_SIT_6RD | 863 | #ifdef CONFIG_IPV6_SIT_6RD |
866 | struct ip_tunnel_6rd ip6rd; | 864 | struct ip_tunnel_6rd ip6rd; |
867 | #endif | 865 | #endif |
868 | 866 | ||
869 | switch (cmd) { | 867 | switch (cmd) { |
870 | case SIOCGETTUNNEL: | 868 | case SIOCGETTUNNEL: |
871 | #ifdef CONFIG_IPV6_SIT_6RD | 869 | #ifdef CONFIG_IPV6_SIT_6RD |
872 | case SIOCGET6RD: | 870 | case SIOCGET6RD: |
873 | #endif | 871 | #endif |
874 | t = NULL; | 872 | t = NULL; |
875 | if (dev == sitn->fb_tunnel_dev) { | 873 | if (dev == sitn->fb_tunnel_dev) { |
876 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 874 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
877 | err = -EFAULT; | 875 | err = -EFAULT; |
878 | break; | 876 | break; |
879 | } | 877 | } |
880 | t = ipip6_tunnel_locate(net, &p, 0); | 878 | t = ipip6_tunnel_locate(net, &p, 0); |
881 | } | 879 | } |
882 | if (t == NULL) | 880 | if (t == NULL) |
883 | t = netdev_priv(dev); | 881 | t = netdev_priv(dev); |
884 | 882 | ||
885 | err = -EFAULT; | 883 | err = -EFAULT; |
886 | if (cmd == SIOCGETTUNNEL) { | 884 | if (cmd == SIOCGETTUNNEL) { |
887 | memcpy(&p, &t->parms, sizeof(p)); | 885 | memcpy(&p, &t->parms, sizeof(p)); |
888 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, | 886 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, |
889 | sizeof(p))) | 887 | sizeof(p))) |
890 | goto done; | 888 | goto done; |
891 | #ifdef CONFIG_IPV6_SIT_6RD | 889 | #ifdef CONFIG_IPV6_SIT_6RD |
892 | } else { | 890 | } else { |
893 | ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); | 891 | ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); |
894 | ip6rd.relay_prefix = t->ip6rd.relay_prefix; | 892 | ip6rd.relay_prefix = t->ip6rd.relay_prefix; |
895 | ip6rd.prefixlen = t->ip6rd.prefixlen; | 893 | ip6rd.prefixlen = t->ip6rd.prefixlen; |
896 | ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; | 894 | ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; |
897 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, | 895 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, |
898 | sizeof(ip6rd))) | 896 | sizeof(ip6rd))) |
899 | goto done; | 897 | goto done; |
900 | #endif | 898 | #endif |
901 | } | 899 | } |
902 | err = 0; | 900 | err = 0; |
903 | break; | 901 | break; |
904 | 902 | ||
905 | case SIOCADDTUNNEL: | 903 | case SIOCADDTUNNEL: |
906 | case SIOCCHGTUNNEL: | 904 | case SIOCCHGTUNNEL: |
907 | err = -EPERM; | 905 | err = -EPERM; |
908 | if (!capable(CAP_NET_ADMIN)) | 906 | if (!capable(CAP_NET_ADMIN)) |
909 | goto done; | 907 | goto done; |
910 | 908 | ||
911 | err = -EFAULT; | 909 | err = -EFAULT; |
912 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 910 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
913 | goto done; | 911 | goto done; |
914 | 912 | ||
915 | err = -EINVAL; | 913 | err = -EINVAL; |
916 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || | 914 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || |
917 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | 915 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
918 | goto done; | 916 | goto done; |
919 | if (p.iph.ttl) | 917 | if (p.iph.ttl) |
920 | p.iph.frag_off |= htons(IP_DF); | 918 | p.iph.frag_off |= htons(IP_DF); |
921 | 919 | ||
922 | t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); | 920 | t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); |
923 | 921 | ||
924 | if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 922 | if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
925 | if (t != NULL) { | 923 | if (t != NULL) { |
926 | if (t->dev != dev) { | 924 | if (t->dev != dev) { |
927 | err = -EEXIST; | 925 | err = -EEXIST; |
928 | break; | 926 | break; |
929 | } | 927 | } |
930 | } else { | 928 | } else { |
931 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | 929 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || |
932 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | 930 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { |
933 | err = -EINVAL; | 931 | err = -EINVAL; |
934 | break; | 932 | break; |
935 | } | 933 | } |
936 | t = netdev_priv(dev); | 934 | t = netdev_priv(dev); |
937 | ipip6_tunnel_unlink(sitn, t); | 935 | ipip6_tunnel_unlink(sitn, t); |
938 | t->parms.iph.saddr = p.iph.saddr; | 936 | t->parms.iph.saddr = p.iph.saddr; |
939 | t->parms.iph.daddr = p.iph.daddr; | 937 | t->parms.iph.daddr = p.iph.daddr; |
940 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 938 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
941 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 939 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
942 | ipip6_tunnel_link(sitn, t); | 940 | ipip6_tunnel_link(sitn, t); |
943 | netdev_state_change(dev); | 941 | netdev_state_change(dev); |
944 | } | 942 | } |
945 | } | 943 | } |
946 | 944 | ||
947 | if (t) { | 945 | if (t) { |
948 | err = 0; | 946 | err = 0; |
949 | if (cmd == SIOCCHGTUNNEL) { | 947 | if (cmd == SIOCCHGTUNNEL) { |
950 | t->parms.iph.ttl = p.iph.ttl; | 948 | t->parms.iph.ttl = p.iph.ttl; |
951 | t->parms.iph.tos = p.iph.tos; | 949 | t->parms.iph.tos = p.iph.tos; |
952 | if (t->parms.link != p.link) { | 950 | if (t->parms.link != p.link) { |
953 | t->parms.link = p.link; | 951 | t->parms.link = p.link; |
954 | ipip6_tunnel_bind_dev(dev); | 952 | ipip6_tunnel_bind_dev(dev); |
955 | netdev_state_change(dev); | 953 | netdev_state_change(dev); |
956 | } | 954 | } |
957 | } | 955 | } |
958 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 956 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
959 | err = -EFAULT; | 957 | err = -EFAULT; |
960 | } else | 958 | } else |
961 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | 959 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); |
962 | break; | 960 | break; |
963 | 961 | ||
964 | case SIOCDELTUNNEL: | 962 | case SIOCDELTUNNEL: |
965 | err = -EPERM; | 963 | err = -EPERM; |
966 | if (!capable(CAP_NET_ADMIN)) | 964 | if (!capable(CAP_NET_ADMIN)) |
967 | goto done; | 965 | goto done; |
968 | 966 | ||
969 | if (dev == sitn->fb_tunnel_dev) { | 967 | if (dev == sitn->fb_tunnel_dev) { |
970 | err = -EFAULT; | 968 | err = -EFAULT; |
971 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 969 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
972 | goto done; | 970 | goto done; |
973 | err = -ENOENT; | 971 | err = -ENOENT; |
974 | if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) | 972 | if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) |
975 | goto done; | 973 | goto done; |
976 | err = -EPERM; | 974 | err = -EPERM; |
977 | if (t == netdev_priv(sitn->fb_tunnel_dev)) | 975 | if (t == netdev_priv(sitn->fb_tunnel_dev)) |
978 | goto done; | 976 | goto done; |
979 | dev = t->dev; | 977 | dev = t->dev; |
980 | } | 978 | } |
981 | unregister_netdevice(dev); | 979 | unregister_netdevice(dev); |
982 | err = 0; | 980 | err = 0; |
983 | break; | 981 | break; |
984 | 982 | ||
985 | case SIOCGETPRL: | 983 | case SIOCGETPRL: |
986 | err = -EINVAL; | 984 | err = -EINVAL; |
987 | if (dev == sitn->fb_tunnel_dev) | 985 | if (dev == sitn->fb_tunnel_dev) |
988 | goto done; | 986 | goto done; |
989 | err = -ENOENT; | 987 | err = -ENOENT; |
990 | if (!(t = netdev_priv(dev))) | 988 | if (!(t = netdev_priv(dev))) |
991 | goto done; | 989 | goto done; |
992 | err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); | 990 | err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); |
993 | break; | 991 | break; |
994 | 992 | ||
995 | case SIOCADDPRL: | 993 | case SIOCADDPRL: |
996 | case SIOCDELPRL: | 994 | case SIOCDELPRL: |
997 | case SIOCCHGPRL: | 995 | case SIOCCHGPRL: |
998 | err = -EPERM; | 996 | err = -EPERM; |
999 | if (!capable(CAP_NET_ADMIN)) | 997 | if (!capable(CAP_NET_ADMIN)) |
1000 | goto done; | 998 | goto done; |
1001 | err = -EINVAL; | 999 | err = -EINVAL; |
1002 | if (dev == sitn->fb_tunnel_dev) | 1000 | if (dev == sitn->fb_tunnel_dev) |
1003 | goto done; | 1001 | goto done; |
1004 | err = -EFAULT; | 1002 | err = -EFAULT; |
1005 | if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) | 1003 | if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) |
1006 | goto done; | 1004 | goto done; |
1007 | err = -ENOENT; | 1005 | err = -ENOENT; |
1008 | if (!(t = netdev_priv(dev))) | 1006 | if (!(t = netdev_priv(dev))) |
1009 | goto done; | 1007 | goto done; |
1010 | 1008 | ||
1011 | switch (cmd) { | 1009 | switch (cmd) { |
1012 | case SIOCDELPRL: | 1010 | case SIOCDELPRL: |
1013 | err = ipip6_tunnel_del_prl(t, &prl); | 1011 | err = ipip6_tunnel_del_prl(t, &prl); |
1014 | break; | 1012 | break; |
1015 | case SIOCADDPRL: | 1013 | case SIOCADDPRL: |
1016 | case SIOCCHGPRL: | 1014 | case SIOCCHGPRL: |
1017 | err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); | 1015 | err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); |
1018 | break; | 1016 | break; |
1019 | } | 1017 | } |
1020 | netdev_state_change(dev); | 1018 | netdev_state_change(dev); |
1021 | break; | 1019 | break; |
1022 | 1020 | ||
1023 | #ifdef CONFIG_IPV6_SIT_6RD | 1021 | #ifdef CONFIG_IPV6_SIT_6RD |
1024 | case SIOCADD6RD: | 1022 | case SIOCADD6RD: |
1025 | case SIOCCHG6RD: | 1023 | case SIOCCHG6RD: |
1026 | case SIOCDEL6RD: | 1024 | case SIOCDEL6RD: |
1027 | err = -EPERM; | 1025 | err = -EPERM; |
1028 | if (!capable(CAP_NET_ADMIN)) | 1026 | if (!capable(CAP_NET_ADMIN)) |
1029 | goto done; | 1027 | goto done; |
1030 | 1028 | ||
1031 | err = -EFAULT; | 1029 | err = -EFAULT; |
1032 | if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, | 1030 | if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, |
1033 | sizeof(ip6rd))) | 1031 | sizeof(ip6rd))) |
1034 | goto done; | 1032 | goto done; |
1035 | 1033 | ||
1036 | t = netdev_priv(dev); | 1034 | t = netdev_priv(dev); |
1037 | 1035 | ||
1038 | if (cmd != SIOCDEL6RD) { | 1036 | if (cmd != SIOCDEL6RD) { |
1039 | struct in6_addr prefix; | 1037 | struct in6_addr prefix; |
1040 | __be32 relay_prefix; | 1038 | __be32 relay_prefix; |
1041 | 1039 | ||
1042 | err = -EINVAL; | 1040 | err = -EINVAL; |
1043 | if (ip6rd.relay_prefixlen > 32 || | 1041 | if (ip6rd.relay_prefixlen > 32 || |
1044 | ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) | 1042 | ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) |
1045 | goto done; | 1043 | goto done; |
1046 | 1044 | ||
1047 | ipv6_addr_prefix(&prefix, &ip6rd.prefix, | 1045 | ipv6_addr_prefix(&prefix, &ip6rd.prefix, |
1048 | ip6rd.prefixlen); | 1046 | ip6rd.prefixlen); |
1049 | if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) | 1047 | if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) |
1050 | goto done; | 1048 | goto done; |
1051 | if (ip6rd.relay_prefixlen) | 1049 | if (ip6rd.relay_prefixlen) |
1052 | relay_prefix = ip6rd.relay_prefix & | 1050 | relay_prefix = ip6rd.relay_prefix & |
1053 | htonl(0xffffffffUL << | 1051 | htonl(0xffffffffUL << |
1054 | (32 - ip6rd.relay_prefixlen)); | 1052 | (32 - ip6rd.relay_prefixlen)); |
1055 | else | 1053 | else |
1056 | relay_prefix = 0; | 1054 | relay_prefix = 0; |
1057 | if (relay_prefix != ip6rd.relay_prefix) | 1055 | if (relay_prefix != ip6rd.relay_prefix) |
1058 | goto done; | 1056 | goto done; |
1059 | 1057 | ||
1060 | ipv6_addr_copy(&t->ip6rd.prefix, &prefix); | 1058 | ipv6_addr_copy(&t->ip6rd.prefix, &prefix); |
1061 | t->ip6rd.relay_prefix = relay_prefix; | 1059 | t->ip6rd.relay_prefix = relay_prefix; |
1062 | t->ip6rd.prefixlen = ip6rd.prefixlen; | 1060 | t->ip6rd.prefixlen = ip6rd.prefixlen; |
1063 | t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; | 1061 | t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; |
1064 | } else | 1062 | } else |
1065 | ipip6_tunnel_clone_6rd(dev, sitn); | 1063 | ipip6_tunnel_clone_6rd(dev, sitn); |
1066 | 1064 | ||
1067 | err = 0; | 1065 | err = 0; |
1068 | break; | 1066 | break; |
1069 | #endif | 1067 | #endif |
1070 | 1068 | ||
1071 | default: | 1069 | default: |
1072 | err = -EINVAL; | 1070 | err = -EINVAL; |
1073 | } | 1071 | } |
1074 | 1072 | ||
1075 | done: | 1073 | done: |
1076 | return err; | 1074 | return err; |
1077 | } | 1075 | } |
1078 | 1076 | ||
1079 | static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 1077 | static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) |
1080 | { | 1078 | { |
1081 | if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 1079 | if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) |
1082 | return -EINVAL; | 1080 | return -EINVAL; |
1083 | dev->mtu = new_mtu; | 1081 | dev->mtu = new_mtu; |
1084 | return 0; | 1082 | return 0; |
1085 | } | 1083 | } |
1086 | 1084 | ||
1087 | static const struct net_device_ops ipip6_netdev_ops = { | 1085 | static const struct net_device_ops ipip6_netdev_ops = { |
1088 | .ndo_uninit = ipip6_tunnel_uninit, | 1086 | .ndo_uninit = ipip6_tunnel_uninit, |
1089 | .ndo_start_xmit = ipip6_tunnel_xmit, | 1087 | .ndo_start_xmit = ipip6_tunnel_xmit, |
1090 | .ndo_do_ioctl = ipip6_tunnel_ioctl, | 1088 | .ndo_do_ioctl = ipip6_tunnel_ioctl, |
1091 | .ndo_change_mtu = ipip6_tunnel_change_mtu, | 1089 | .ndo_change_mtu = ipip6_tunnel_change_mtu, |
1092 | }; | 1090 | }; |
1093 | 1091 | ||
1094 | static void ipip6_tunnel_setup(struct net_device *dev) | 1092 | static void ipip6_tunnel_setup(struct net_device *dev) |
1095 | { | 1093 | { |
1096 | dev->netdev_ops = &ipip6_netdev_ops; | 1094 | dev->netdev_ops = &ipip6_netdev_ops; |
1097 | dev->destructor = free_netdev; | 1095 | dev->destructor = free_netdev; |
1098 | 1096 | ||
1099 | dev->type = ARPHRD_SIT; | 1097 | dev->type = ARPHRD_SIT; |
1100 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | 1098 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); |
1101 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | 1099 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); |
1102 | dev->flags = IFF_NOARP; | 1100 | dev->flags = IFF_NOARP; |
1103 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; | 1101 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
1104 | dev->iflink = 0; | 1102 | dev->iflink = 0; |
1105 | dev->addr_len = 4; | 1103 | dev->addr_len = 4; |
1106 | dev->features |= NETIF_F_NETNS_LOCAL; | 1104 | dev->features |= NETIF_F_NETNS_LOCAL; |
1107 | } | 1105 | } |
1108 | 1106 | ||
1109 | static void ipip6_tunnel_init(struct net_device *dev) | 1107 | static void ipip6_tunnel_init(struct net_device *dev) |
1110 | { | 1108 | { |
1111 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1109 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1112 | 1110 | ||
1113 | tunnel->dev = dev; | 1111 | tunnel->dev = dev; |
1114 | strcpy(tunnel->parms.name, dev->name); | 1112 | strcpy(tunnel->parms.name, dev->name); |
1115 | 1113 | ||
1116 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 1114 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
1117 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 1115 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
1118 | 1116 | ||
1119 | ipip6_tunnel_bind_dev(dev); | 1117 | ipip6_tunnel_bind_dev(dev); |
1120 | } | 1118 | } |
1121 | 1119 | ||
1122 | static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) | 1120 | static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) |
1123 | { | 1121 | { |
1124 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1122 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1125 | struct iphdr *iph = &tunnel->parms.iph; | 1123 | struct iphdr *iph = &tunnel->parms.iph; |
1126 | struct net *net = dev_net(dev); | 1124 | struct net *net = dev_net(dev); |
1127 | struct sit_net *sitn = net_generic(net, sit_net_id); | 1125 | struct sit_net *sitn = net_generic(net, sit_net_id); |
1128 | 1126 | ||
1129 | tunnel->dev = dev; | 1127 | tunnel->dev = dev; |
1130 | strcpy(tunnel->parms.name, dev->name); | 1128 | strcpy(tunnel->parms.name, dev->name); |
1131 | 1129 | ||
1132 | iph->version = 4; | 1130 | iph->version = 4; |
1133 | iph->protocol = IPPROTO_IPV6; | 1131 | iph->protocol = IPPROTO_IPV6; |
1134 | iph->ihl = 5; | 1132 | iph->ihl = 5; |
1135 | iph->ttl = 64; | 1133 | iph->ttl = 64; |
1136 | 1134 | ||
1137 | dev_hold(dev); | 1135 | dev_hold(dev); |
1138 | sitn->tunnels_wc[0] = tunnel; | 1136 | sitn->tunnels_wc[0] = tunnel; |
1139 | } | 1137 | } |
1140 | 1138 | ||
1141 | static struct xfrm_tunnel sit_handler = { | 1139 | static struct xfrm_tunnel sit_handler = { |
1142 | .handler = ipip6_rcv, | 1140 | .handler = ipip6_rcv, |
1143 | .err_handler = ipip6_err, | 1141 | .err_handler = ipip6_err, |
1144 | .priority = 1, | 1142 | .priority = 1, |
1145 | }; | 1143 | }; |
1146 | 1144 | ||
1147 | static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) | 1145 | static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) |
1148 | { | 1146 | { |
1149 | int prio; | 1147 | int prio; |
1150 | 1148 | ||
1151 | for (prio = 1; prio < 4; prio++) { | 1149 | for (prio = 1; prio < 4; prio++) { |
1152 | int h; | 1150 | int h; |
1153 | for (h = 0; h < HASH_SIZE; h++) { | 1151 | for (h = 0; h < HASH_SIZE; h++) { |
1154 | struct ip_tunnel *t = sitn->tunnels[prio][h]; | 1152 | struct ip_tunnel *t = sitn->tunnels[prio][h]; |
1155 | 1153 | ||
1156 | while (t != NULL) { | 1154 | while (t != NULL) { |
1157 | unregister_netdevice_queue(t->dev, head); | 1155 | unregister_netdevice_queue(t->dev, head); |
1158 | t = t->next; | 1156 | t = t->next; |
1159 | } | 1157 | } |
1160 | } | 1158 | } |
1161 | } | 1159 | } |
1162 | } | 1160 | } |
1163 | 1161 | ||
1164 | static int __net_init sit_init_net(struct net *net) | 1162 | static int __net_init sit_init_net(struct net *net) |
1165 | { | 1163 | { |
1166 | struct sit_net *sitn = net_generic(net, sit_net_id); | 1164 | struct sit_net *sitn = net_generic(net, sit_net_id); |
1167 | int err; | 1165 | int err; |
1168 | 1166 | ||
1169 | sitn->tunnels[0] = sitn->tunnels_wc; | 1167 | sitn->tunnels[0] = sitn->tunnels_wc; |
1170 | sitn->tunnels[1] = sitn->tunnels_l; | 1168 | sitn->tunnels[1] = sitn->tunnels_l; |
1171 | sitn->tunnels[2] = sitn->tunnels_r; | 1169 | sitn->tunnels[2] = sitn->tunnels_r; |
1172 | sitn->tunnels[3] = sitn->tunnels_r_l; | 1170 | sitn->tunnels[3] = sitn->tunnels_r_l; |
1173 | 1171 | ||
1174 | sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", | 1172 | sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", |
1175 | ipip6_tunnel_setup); | 1173 | ipip6_tunnel_setup); |
1176 | if (!sitn->fb_tunnel_dev) { | 1174 | if (!sitn->fb_tunnel_dev) { |
1177 | err = -ENOMEM; | 1175 | err = -ENOMEM; |
1178 | goto err_alloc_dev; | 1176 | goto err_alloc_dev; |
1179 | } | 1177 | } |
1180 | dev_net_set(sitn->fb_tunnel_dev, net); | 1178 | dev_net_set(sitn->fb_tunnel_dev, net); |
1181 | 1179 | ||
1182 | ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); | 1180 | ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); |
1183 | ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); | 1181 | ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); |
1184 | 1182 | ||
1185 | if ((err = register_netdev(sitn->fb_tunnel_dev))) | 1183 | if ((err = register_netdev(sitn->fb_tunnel_dev))) |
1186 | goto err_reg_dev; | 1184 | goto err_reg_dev; |
1187 | 1185 | ||
1188 | return 0; | 1186 | return 0; |
1189 | 1187 | ||
1190 | err_reg_dev: | 1188 | err_reg_dev: |
1191 | dev_put(sitn->fb_tunnel_dev); | 1189 | dev_put(sitn->fb_tunnel_dev); |
1192 | free_netdev(sitn->fb_tunnel_dev); | 1190 | free_netdev(sitn->fb_tunnel_dev); |
1193 | err_alloc_dev: | 1191 | err_alloc_dev: |
1194 | return err; | 1192 | return err; |
1195 | } | 1193 | } |
1196 | 1194 | ||
1197 | static void __net_exit sit_exit_net(struct net *net) | 1195 | static void __net_exit sit_exit_net(struct net *net) |
1198 | { | 1196 | { |
1199 | struct sit_net *sitn = net_generic(net, sit_net_id); | 1197 | struct sit_net *sitn = net_generic(net, sit_net_id); |
1200 | LIST_HEAD(list); | 1198 | LIST_HEAD(list); |
1201 | 1199 | ||
1202 | rtnl_lock(); | 1200 | rtnl_lock(); |
1203 | sit_destroy_tunnels(sitn, &list); | 1201 | sit_destroy_tunnels(sitn, &list); |
1204 | unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); | 1202 | unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); |
1205 | unregister_netdevice_many(&list); | 1203 | unregister_netdevice_many(&list); |
1206 | rtnl_unlock(); | 1204 | rtnl_unlock(); |
1207 | } | 1205 | } |
1208 | 1206 | ||
1209 | static struct pernet_operations sit_net_ops = { | 1207 | static struct pernet_operations sit_net_ops = { |
1210 | .init = sit_init_net, | 1208 | .init = sit_init_net, |
1211 | .exit = sit_exit_net, | 1209 | .exit = sit_exit_net, |
1212 | .id = &sit_net_id, | 1210 | .id = &sit_net_id, |
1213 | .size = sizeof(struct sit_net), | 1211 | .size = sizeof(struct sit_net), |
1214 | }; | 1212 | }; |
1215 | 1213 | ||
1216 | static void __exit sit_cleanup(void) | 1214 | static void __exit sit_cleanup(void) |
1217 | { | 1215 | { |
1218 | xfrm4_tunnel_deregister(&sit_handler, AF_INET6); | 1216 | xfrm4_tunnel_deregister(&sit_handler, AF_INET6); |
1219 | 1217 | ||
1220 | unregister_pernet_device(&sit_net_ops); | 1218 | unregister_pernet_device(&sit_net_ops); |
1221 | rcu_barrier(); /* Wait for completion of call_rcu()'s */ | 1219 | rcu_barrier(); /* Wait for completion of call_rcu()'s */ |
1222 | } | 1220 | } |
1223 | 1221 | ||
1224 | static int __init sit_init(void) | 1222 | static int __init sit_init(void) |
1225 | { | 1223 | { |
1226 | int err; | 1224 | int err; |
1227 | 1225 | ||
1228 | printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); | 1226 | printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); |
1229 | 1227 | ||
1230 | err = register_pernet_device(&sit_net_ops); | 1228 | err = register_pernet_device(&sit_net_ops); |
1231 | if (err < 0) | 1229 | if (err < 0) |
1232 | return err; | 1230 | return err; |
1233 | err = xfrm4_tunnel_register(&sit_handler, AF_INET6); | 1231 | err = xfrm4_tunnel_register(&sit_handler, AF_INET6); |
1234 | if (err < 0) { | 1232 | if (err < 0) { |
1235 | unregister_pernet_device(&sit_net_ops); | 1233 | unregister_pernet_device(&sit_net_ops); |
1236 | printk(KERN_INFO "sit init: Can't add protocol\n"); | 1234 | printk(KERN_INFO "sit init: Can't add protocol\n"); |
1237 | } | 1235 | } |
1238 | return err; | 1236 | return err; |
1239 | } | 1237 | } |
1240 | 1238 | ||
1241 | module_init(sit_init); | 1239 | module_init(sit_init); |
1242 | module_exit(sit_cleanup); | 1240 | module_exit(sit_cleanup); |
1243 | MODULE_LICENSE("GPL"); | 1241 | MODULE_LICENSE("GPL"); |
1244 | MODULE_ALIAS("sit0"); | 1242 | MODULE_ALIAS("sit0"); |
1245 | 1243 |