Commit f2c31e32b378a6653f8de606149d963baf11d7d3
net: fix NULL dereferences in check_peer_redir()
Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman <synapse@hippy.csoma.elte.hu> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 7 changed files with 67 additions and 26 deletions Inline Diff
1 | /* | 1 | /* |
2 | * net/dst.h Protocol independent destination cache definitions. | 2 | * net/dst.h Protocol independent destination cache definitions. |
3 | * | 3 | * |
4 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 4 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
5 | * | 5 | * |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifndef _NET_DST_H | 8 | #ifndef _NET_DST_H |
9 | #define _NET_DST_H | 9 | #define _NET_DST_H |
10 | 10 | ||
11 | #include <net/dst_ops.h> | 11 | #include <net/dst_ops.h> |
12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
13 | #include <linux/rtnetlink.h> | 13 | #include <linux/rtnetlink.h> |
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <linux/jiffies.h> | 15 | #include <linux/jiffies.h> |
16 | #include <net/neighbour.h> | 16 | #include <net/neighbour.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | 18 | ||
19 | #define DST_GC_MIN (HZ/10) | 19 | #define DST_GC_MIN (HZ/10) |
20 | #define DST_GC_INC (HZ/2) | 20 | #define DST_GC_INC (HZ/2) |
21 | #define DST_GC_MAX (120*HZ) | 21 | #define DST_GC_MAX (120*HZ) |
22 | 22 | ||
23 | /* Each dst_entry has reference count and sits in some parent list(s). | 23 | /* Each dst_entry has reference count and sits in some parent list(s). |
24 | * When it is removed from parent list, it is "freed" (dst_free). | 24 | * When it is removed from parent list, it is "freed" (dst_free). |
25 | * After this it enters dead state (dst->obsolete > 0) and if its refcnt | 25 | * After this it enters dead state (dst->obsolete > 0) and if its refcnt |
26 | * is zero, it can be destroyed immediately, otherwise it is added | 26 | * is zero, it can be destroyed immediately, otherwise it is added |
27 | * to gc list and garbage collector periodically checks the refcnt. | 27 | * to gc list and garbage collector periodically checks the refcnt. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | struct sk_buff; | 30 | struct sk_buff; |
31 | 31 | ||
32 | struct dst_entry { | 32 | struct dst_entry { |
33 | struct rcu_head rcu_head; | 33 | struct rcu_head rcu_head; |
34 | struct dst_entry *child; | 34 | struct dst_entry *child; |
35 | struct net_device *dev; | 35 | struct net_device *dev; |
36 | struct dst_ops *ops; | 36 | struct dst_ops *ops; |
37 | unsigned long _metrics; | 37 | unsigned long _metrics; |
38 | unsigned long expires; | 38 | unsigned long expires; |
39 | struct dst_entry *path; | 39 | struct dst_entry *path; |
40 | struct neighbour *_neighbour; | 40 | struct neighbour __rcu *_neighbour; |
41 | #ifdef CONFIG_XFRM | 41 | #ifdef CONFIG_XFRM |
42 | struct xfrm_state *xfrm; | 42 | struct xfrm_state *xfrm; |
43 | #else | 43 | #else |
44 | void *__pad1; | 44 | void *__pad1; |
45 | #endif | 45 | #endif |
46 | int (*input)(struct sk_buff*); | 46 | int (*input)(struct sk_buff*); |
47 | int (*output)(struct sk_buff*); | 47 | int (*output)(struct sk_buff*); |
48 | 48 | ||
49 | int flags; | 49 | int flags; |
50 | #define DST_HOST 0x0001 | 50 | #define DST_HOST 0x0001 |
51 | #define DST_NOXFRM 0x0002 | 51 | #define DST_NOXFRM 0x0002 |
52 | #define DST_NOPOLICY 0x0004 | 52 | #define DST_NOPOLICY 0x0004 |
53 | #define DST_NOHASH 0x0008 | 53 | #define DST_NOHASH 0x0008 |
54 | #define DST_NOCACHE 0x0010 | 54 | #define DST_NOCACHE 0x0010 |
55 | #define DST_NOCOUNT 0x0020 | 55 | #define DST_NOCOUNT 0x0020 |
56 | 56 | ||
57 | short error; | 57 | short error; |
58 | short obsolete; | 58 | short obsolete; |
59 | unsigned short header_len; /* more space at head required */ | 59 | unsigned short header_len; /* more space at head required */ |
60 | unsigned short trailer_len; /* space to reserve at tail */ | 60 | unsigned short trailer_len; /* space to reserve at tail */ |
61 | #ifdef CONFIG_IP_ROUTE_CLASSID | 61 | #ifdef CONFIG_IP_ROUTE_CLASSID |
62 | __u32 tclassid; | 62 | __u32 tclassid; |
63 | #else | 63 | #else |
64 | __u32 __pad2; | 64 | __u32 __pad2; |
65 | #endif | 65 | #endif |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * Align __refcnt to a 64 bytes alignment | 68 | * Align __refcnt to a 64 bytes alignment |
69 | * (L1_CACHE_SIZE would be too much) | 69 | * (L1_CACHE_SIZE would be too much) |
70 | */ | 70 | */ |
71 | #ifdef CONFIG_64BIT | 71 | #ifdef CONFIG_64BIT |
72 | long __pad_to_align_refcnt[2]; | 72 | long __pad_to_align_refcnt[2]; |
73 | #endif | 73 | #endif |
74 | /* | 74 | /* |
75 | * __refcnt wants to be on a different cache line from | 75 | * __refcnt wants to be on a different cache line from |
76 | * input/output/ops or performance tanks badly | 76 | * input/output/ops or performance tanks badly |
77 | */ | 77 | */ |
78 | atomic_t __refcnt; /* client references */ | 78 | atomic_t __refcnt; /* client references */ |
79 | int __use; | 79 | int __use; |
80 | unsigned long lastuse; | 80 | unsigned long lastuse; |
81 | union { | 81 | union { |
82 | struct dst_entry *next; | 82 | struct dst_entry *next; |
83 | struct rtable __rcu *rt_next; | 83 | struct rtable __rcu *rt_next; |
84 | struct rt6_info *rt6_next; | 84 | struct rt6_info *rt6_next; |
85 | struct dn_route __rcu *dn_next; | 85 | struct dn_route __rcu *dn_next; |
86 | }; | 86 | }; |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) | 89 | static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) |
90 | { | 90 | { |
91 | return dst->_neighbour; | 91 | return rcu_dereference(dst->_neighbour); |
92 | } | 92 | } |
93 | 93 | ||
94 | static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst) | ||
95 | { | ||
96 | return rcu_dereference_raw(dst->_neighbour); | ||
97 | } | ||
98 | |||
94 | static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) | 99 | static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) |
95 | { | 100 | { |
96 | dst->_neighbour = neigh; | 101 | rcu_assign_pointer(dst->_neighbour, neigh); |
97 | } | 102 | } |
98 | 103 | ||
99 | extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); | 104 | extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); |
100 | extern const u32 dst_default_metrics[RTAX_MAX]; | 105 | extern const u32 dst_default_metrics[RTAX_MAX]; |
101 | 106 | ||
102 | #define DST_METRICS_READ_ONLY 0x1UL | 107 | #define DST_METRICS_READ_ONLY 0x1UL |
103 | #define __DST_METRICS_PTR(Y) \ | 108 | #define __DST_METRICS_PTR(Y) \ |
104 | ((u32 *)((Y) & ~DST_METRICS_READ_ONLY)) | 109 | ((u32 *)((Y) & ~DST_METRICS_READ_ONLY)) |
105 | #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) | 110 | #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) |
106 | 111 | ||
107 | static inline bool dst_metrics_read_only(const struct dst_entry *dst) | 112 | static inline bool dst_metrics_read_only(const struct dst_entry *dst) |
108 | { | 113 | { |
109 | return dst->_metrics & DST_METRICS_READ_ONLY; | 114 | return dst->_metrics & DST_METRICS_READ_ONLY; |
110 | } | 115 | } |
111 | 116 | ||
112 | extern void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); | 117 | extern void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); |
113 | 118 | ||
114 | static inline void dst_destroy_metrics_generic(struct dst_entry *dst) | 119 | static inline void dst_destroy_metrics_generic(struct dst_entry *dst) |
115 | { | 120 | { |
116 | unsigned long val = dst->_metrics; | 121 | unsigned long val = dst->_metrics; |
117 | if (!(val & DST_METRICS_READ_ONLY)) | 122 | if (!(val & DST_METRICS_READ_ONLY)) |
118 | __dst_destroy_metrics_generic(dst, val); | 123 | __dst_destroy_metrics_generic(dst, val); |
119 | } | 124 | } |
120 | 125 | ||
121 | static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) | 126 | static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) |
122 | { | 127 | { |
123 | unsigned long p = dst->_metrics; | 128 | unsigned long p = dst->_metrics; |
124 | 129 | ||
125 | BUG_ON(!p); | 130 | BUG_ON(!p); |
126 | 131 | ||
127 | if (p & DST_METRICS_READ_ONLY) | 132 | if (p & DST_METRICS_READ_ONLY) |
128 | return dst->ops->cow_metrics(dst, p); | 133 | return dst->ops->cow_metrics(dst, p); |
129 | return __DST_METRICS_PTR(p); | 134 | return __DST_METRICS_PTR(p); |
130 | } | 135 | } |
131 | 136 | ||
132 | /* This may only be invoked before the entry has reached global | 137 | /* This may only be invoked before the entry has reached global |
133 | * visibility. | 138 | * visibility. |
134 | */ | 139 | */ |
135 | static inline void dst_init_metrics(struct dst_entry *dst, | 140 | static inline void dst_init_metrics(struct dst_entry *dst, |
136 | const u32 *src_metrics, | 141 | const u32 *src_metrics, |
137 | bool read_only) | 142 | bool read_only) |
138 | { | 143 | { |
139 | dst->_metrics = ((unsigned long) src_metrics) | | 144 | dst->_metrics = ((unsigned long) src_metrics) | |
140 | (read_only ? DST_METRICS_READ_ONLY : 0); | 145 | (read_only ? DST_METRICS_READ_ONLY : 0); |
141 | } | 146 | } |
142 | 147 | ||
143 | static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) | 148 | static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) |
144 | { | 149 | { |
145 | u32 *dst_metrics = dst_metrics_write_ptr(dest); | 150 | u32 *dst_metrics = dst_metrics_write_ptr(dest); |
146 | 151 | ||
147 | if (dst_metrics) { | 152 | if (dst_metrics) { |
148 | u32 *src_metrics = DST_METRICS_PTR(src); | 153 | u32 *src_metrics = DST_METRICS_PTR(src); |
149 | 154 | ||
150 | memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); | 155 | memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); |
151 | } | 156 | } |
152 | } | 157 | } |
153 | 158 | ||
154 | static inline u32 *dst_metrics_ptr(struct dst_entry *dst) | 159 | static inline u32 *dst_metrics_ptr(struct dst_entry *dst) |
155 | { | 160 | { |
156 | return DST_METRICS_PTR(dst); | 161 | return DST_METRICS_PTR(dst); |
157 | } | 162 | } |
158 | 163 | ||
159 | static inline u32 | 164 | static inline u32 |
160 | dst_metric_raw(const struct dst_entry *dst, const int metric) | 165 | dst_metric_raw(const struct dst_entry *dst, const int metric) |
161 | { | 166 | { |
162 | u32 *p = DST_METRICS_PTR(dst); | 167 | u32 *p = DST_METRICS_PTR(dst); |
163 | 168 | ||
164 | return p[metric-1]; | 169 | return p[metric-1]; |
165 | } | 170 | } |
166 | 171 | ||
167 | static inline u32 | 172 | static inline u32 |
168 | dst_metric(const struct dst_entry *dst, const int metric) | 173 | dst_metric(const struct dst_entry *dst, const int metric) |
169 | { | 174 | { |
170 | WARN_ON_ONCE(metric == RTAX_HOPLIMIT || | 175 | WARN_ON_ONCE(metric == RTAX_HOPLIMIT || |
171 | metric == RTAX_ADVMSS || | 176 | metric == RTAX_ADVMSS || |
172 | metric == RTAX_MTU); | 177 | metric == RTAX_MTU); |
173 | return dst_metric_raw(dst, metric); | 178 | return dst_metric_raw(dst, metric); |
174 | } | 179 | } |
175 | 180 | ||
176 | static inline u32 | 181 | static inline u32 |
177 | dst_metric_advmss(const struct dst_entry *dst) | 182 | dst_metric_advmss(const struct dst_entry *dst) |
178 | { | 183 | { |
179 | u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); | 184 | u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); |
180 | 185 | ||
181 | if (!advmss) | 186 | if (!advmss) |
182 | advmss = dst->ops->default_advmss(dst); | 187 | advmss = dst->ops->default_advmss(dst); |
183 | 188 | ||
184 | return advmss; | 189 | return advmss; |
185 | } | 190 | } |
186 | 191 | ||
187 | static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) | 192 | static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) |
188 | { | 193 | { |
189 | u32 *p = dst_metrics_write_ptr(dst); | 194 | u32 *p = dst_metrics_write_ptr(dst); |
190 | 195 | ||
191 | if (p) | 196 | if (p) |
192 | p[metric-1] = val; | 197 | p[metric-1] = val; |
193 | } | 198 | } |
194 | 199 | ||
195 | static inline u32 | 200 | static inline u32 |
196 | dst_feature(const struct dst_entry *dst, u32 feature) | 201 | dst_feature(const struct dst_entry *dst, u32 feature) |
197 | { | 202 | { |
198 | return dst_metric(dst, RTAX_FEATURES) & feature; | 203 | return dst_metric(dst, RTAX_FEATURES) & feature; |
199 | } | 204 | } |
200 | 205 | ||
201 | static inline u32 dst_mtu(const struct dst_entry *dst) | 206 | static inline u32 dst_mtu(const struct dst_entry *dst) |
202 | { | 207 | { |
203 | u32 mtu = dst_metric_raw(dst, RTAX_MTU); | 208 | u32 mtu = dst_metric_raw(dst, RTAX_MTU); |
204 | 209 | ||
205 | if (!mtu) | 210 | if (!mtu) |
206 | mtu = dst->ops->default_mtu(dst); | 211 | mtu = dst->ops->default_mtu(dst); |
207 | 212 | ||
208 | return mtu; | 213 | return mtu; |
209 | } | 214 | } |
210 | 215 | ||
211 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ | 216 | /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ |
212 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) | 217 | static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) |
213 | { | 218 | { |
214 | return msecs_to_jiffies(dst_metric(dst, metric)); | 219 | return msecs_to_jiffies(dst_metric(dst, metric)); |
215 | } | 220 | } |
216 | 221 | ||
217 | static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, | 222 | static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, |
218 | unsigned long rtt) | 223 | unsigned long rtt) |
219 | { | 224 | { |
220 | dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); | 225 | dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); |
221 | } | 226 | } |
222 | 227 | ||
223 | static inline u32 | 228 | static inline u32 |
224 | dst_allfrag(const struct dst_entry *dst) | 229 | dst_allfrag(const struct dst_entry *dst) |
225 | { | 230 | { |
226 | int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); | 231 | int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); |
227 | return ret; | 232 | return ret; |
228 | } | 233 | } |
229 | 234 | ||
230 | static inline int | 235 | static inline int |
231 | dst_metric_locked(const struct dst_entry *dst, int metric) | 236 | dst_metric_locked(const struct dst_entry *dst, int metric) |
232 | { | 237 | { |
233 | return dst_metric(dst, RTAX_LOCK) & (1<<metric); | 238 | return dst_metric(dst, RTAX_LOCK) & (1<<metric); |
234 | } | 239 | } |
235 | 240 | ||
236 | static inline void dst_hold(struct dst_entry * dst) | 241 | static inline void dst_hold(struct dst_entry * dst) |
237 | { | 242 | { |
238 | /* | 243 | /* |
239 | * If your kernel compilation stops here, please check | 244 | * If your kernel compilation stops here, please check |
240 | * __pad_to_align_refcnt declaration in struct dst_entry | 245 | * __pad_to_align_refcnt declaration in struct dst_entry |
241 | */ | 246 | */ |
242 | BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); | 247 | BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); |
243 | atomic_inc(&dst->__refcnt); | 248 | atomic_inc(&dst->__refcnt); |
244 | } | 249 | } |
245 | 250 | ||
246 | static inline void dst_use(struct dst_entry *dst, unsigned long time) | 251 | static inline void dst_use(struct dst_entry *dst, unsigned long time) |
247 | { | 252 | { |
248 | dst_hold(dst); | 253 | dst_hold(dst); |
249 | dst->__use++; | 254 | dst->__use++; |
250 | dst->lastuse = time; | 255 | dst->lastuse = time; |
251 | } | 256 | } |
252 | 257 | ||
253 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) | 258 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) |
254 | { | 259 | { |
255 | dst->__use++; | 260 | dst->__use++; |
256 | dst->lastuse = time; | 261 | dst->lastuse = time; |
257 | } | 262 | } |
258 | 263 | ||
259 | static inline | 264 | static inline |
260 | struct dst_entry * dst_clone(struct dst_entry * dst) | 265 | struct dst_entry * dst_clone(struct dst_entry * dst) |
261 | { | 266 | { |
262 | if (dst) | 267 | if (dst) |
263 | atomic_inc(&dst->__refcnt); | 268 | atomic_inc(&dst->__refcnt); |
264 | return dst; | 269 | return dst; |
265 | } | 270 | } |
266 | 271 | ||
267 | extern void dst_release(struct dst_entry *dst); | 272 | extern void dst_release(struct dst_entry *dst); |
268 | 273 | ||
269 | static inline void refdst_drop(unsigned long refdst) | 274 | static inline void refdst_drop(unsigned long refdst) |
270 | { | 275 | { |
271 | if (!(refdst & SKB_DST_NOREF)) | 276 | if (!(refdst & SKB_DST_NOREF)) |
272 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); | 277 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); |
273 | } | 278 | } |
274 | 279 | ||
275 | /** | 280 | /** |
276 | * skb_dst_drop - drops skb dst | 281 | * skb_dst_drop - drops skb dst |
277 | * @skb: buffer | 282 | * @skb: buffer |
278 | * | 283 | * |
279 | * Drops dst reference count if a reference was taken. | 284 | * Drops dst reference count if a reference was taken. |
280 | */ | 285 | */ |
281 | static inline void skb_dst_drop(struct sk_buff *skb) | 286 | static inline void skb_dst_drop(struct sk_buff *skb) |
282 | { | 287 | { |
283 | if (skb->_skb_refdst) { | 288 | if (skb->_skb_refdst) { |
284 | refdst_drop(skb->_skb_refdst); | 289 | refdst_drop(skb->_skb_refdst); |
285 | skb->_skb_refdst = 0UL; | 290 | skb->_skb_refdst = 0UL; |
286 | } | 291 | } |
287 | } | 292 | } |
288 | 293 | ||
289 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) | 294 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) |
290 | { | 295 | { |
291 | nskb->_skb_refdst = oskb->_skb_refdst; | 296 | nskb->_skb_refdst = oskb->_skb_refdst; |
292 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) | 297 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) |
293 | dst_clone(skb_dst(nskb)); | 298 | dst_clone(skb_dst(nskb)); |
294 | } | 299 | } |
295 | 300 | ||
296 | /** | 301 | /** |
297 | * skb_dst_force - makes sure skb dst is refcounted | 302 | * skb_dst_force - makes sure skb dst is refcounted |
298 | * @skb: buffer | 303 | * @skb: buffer |
299 | * | 304 | * |
300 | * If dst is not yet refcounted, let's do it | 305 | * If dst is not yet refcounted, let's do it |
301 | */ | 306 | */ |
302 | static inline void skb_dst_force(struct sk_buff *skb) | 307 | static inline void skb_dst_force(struct sk_buff *skb) |
303 | { | 308 | { |
304 | if (skb_dst_is_noref(skb)) { | 309 | if (skb_dst_is_noref(skb)) { |
305 | WARN_ON(!rcu_read_lock_held()); | 310 | WARN_ON(!rcu_read_lock_held()); |
306 | skb->_skb_refdst &= ~SKB_DST_NOREF; | 311 | skb->_skb_refdst &= ~SKB_DST_NOREF; |
307 | dst_clone(skb_dst(skb)); | 312 | dst_clone(skb_dst(skb)); |
308 | } | 313 | } |
309 | } | 314 | } |
310 | 315 | ||
311 | 316 | ||
312 | /** | 317 | /** |
313 | * __skb_tunnel_rx - prepare skb for rx reinsert | 318 | * __skb_tunnel_rx - prepare skb for rx reinsert |
314 | * @skb: buffer | 319 | * @skb: buffer |
315 | * @dev: tunnel device | 320 | * @dev: tunnel device |
316 | * | 321 | * |
317 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, | 322 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
318 | * so make some cleanups. (no accounting done) | 323 | * so make some cleanups. (no accounting done) |
319 | */ | 324 | */ |
320 | static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) | 325 | static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) |
321 | { | 326 | { |
322 | skb->dev = dev; | 327 | skb->dev = dev; |
323 | skb->rxhash = 0; | 328 | skb->rxhash = 0; |
324 | skb_set_queue_mapping(skb, 0); | 329 | skb_set_queue_mapping(skb, 0); |
325 | skb_dst_drop(skb); | 330 | skb_dst_drop(skb); |
326 | nf_reset(skb); | 331 | nf_reset(skb); |
327 | } | 332 | } |
328 | 333 | ||
329 | /** | 334 | /** |
330 | * skb_tunnel_rx - prepare skb for rx reinsert | 335 | * skb_tunnel_rx - prepare skb for rx reinsert |
331 | * @skb: buffer | 336 | * @skb: buffer |
332 | * @dev: tunnel device | 337 | * @dev: tunnel device |
333 | * | 338 | * |
334 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, | 339 | * After decapsulation, packet is going to re-enter (netif_rx()) our stack, |
335 | * so make some cleanups, and perform accounting. | 340 | * so make some cleanups, and perform accounting. |
336 | * Note: this accounting is not SMP safe. | 341 | * Note: this accounting is not SMP safe. |
337 | */ | 342 | */ |
338 | static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) | 343 | static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) |
339 | { | 344 | { |
340 | /* TODO : stats should be SMP safe */ | 345 | /* TODO : stats should be SMP safe */ |
341 | dev->stats.rx_packets++; | 346 | dev->stats.rx_packets++; |
342 | dev->stats.rx_bytes += skb->len; | 347 | dev->stats.rx_bytes += skb->len; |
343 | __skb_tunnel_rx(skb, dev); | 348 | __skb_tunnel_rx(skb, dev); |
344 | } | 349 | } |
345 | 350 | ||
346 | /* Children define the path of the packet through the | 351 | /* Children define the path of the packet through the |
347 | * Linux networking. Thus, destinations are stackable. | 352 | * Linux networking. Thus, destinations are stackable. |
348 | */ | 353 | */ |
349 | 354 | ||
350 | static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) | 355 | static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) |
351 | { | 356 | { |
352 | struct dst_entry *child = dst_clone(skb_dst(skb)->child); | 357 | struct dst_entry *child = dst_clone(skb_dst(skb)->child); |
353 | 358 | ||
354 | skb_dst_drop(skb); | 359 | skb_dst_drop(skb); |
355 | return child; | 360 | return child; |
356 | } | 361 | } |
357 | 362 | ||
358 | extern int dst_discard(struct sk_buff *skb); | 363 | extern int dst_discard(struct sk_buff *skb); |
359 | extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, | 364 | extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, |
360 | int initial_ref, int initial_obsolete, int flags); | 365 | int initial_ref, int initial_obsolete, int flags); |
361 | extern void __dst_free(struct dst_entry * dst); | 366 | extern void __dst_free(struct dst_entry * dst); |
362 | extern struct dst_entry *dst_destroy(struct dst_entry * dst); | 367 | extern struct dst_entry *dst_destroy(struct dst_entry * dst); |
363 | 368 | ||
364 | static inline void dst_free(struct dst_entry * dst) | 369 | static inline void dst_free(struct dst_entry * dst) |
365 | { | 370 | { |
366 | if (dst->obsolete > 1) | 371 | if (dst->obsolete > 1) |
367 | return; | 372 | return; |
368 | if (!atomic_read(&dst->__refcnt)) { | 373 | if (!atomic_read(&dst->__refcnt)) { |
369 | dst = dst_destroy(dst); | 374 | dst = dst_destroy(dst); |
370 | if (!dst) | 375 | if (!dst) |
371 | return; | 376 | return; |
372 | } | 377 | } |
373 | __dst_free(dst); | 378 | __dst_free(dst); |
374 | } | 379 | } |
375 | 380 | ||
376 | static inline void dst_rcu_free(struct rcu_head *head) | 381 | static inline void dst_rcu_free(struct rcu_head *head) |
377 | { | 382 | { |
378 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | 383 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); |
379 | dst_free(dst); | 384 | dst_free(dst); |
380 | } | 385 | } |
381 | 386 | ||
382 | static inline void dst_confirm(struct dst_entry *dst) | 387 | static inline void dst_confirm(struct dst_entry *dst) |
383 | { | 388 | { |
384 | if (dst) { | 389 | if (dst) { |
385 | struct neighbour *n = dst_get_neighbour(dst); | 390 | struct neighbour *n; |
391 | |||
392 | rcu_read_lock(); | ||
393 | n = dst_get_neighbour(dst); | ||
386 | neigh_confirm(n); | 394 | neigh_confirm(n); |
395 | rcu_read_unlock(); | ||
387 | } | 396 | } |
388 | } | 397 | } |
389 | 398 | ||
390 | static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) | 399 | static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
391 | { | 400 | { |
392 | return dst->ops->neigh_lookup(dst, daddr); | 401 | return dst->ops->neigh_lookup(dst, daddr); |
393 | } | 402 | } |
394 | 403 | ||
395 | static inline void dst_link_failure(struct sk_buff *skb) | 404 | static inline void dst_link_failure(struct sk_buff *skb) |
396 | { | 405 | { |
397 | struct dst_entry *dst = skb_dst(skb); | 406 | struct dst_entry *dst = skb_dst(skb); |
398 | if (dst && dst->ops && dst->ops->link_failure) | 407 | if (dst && dst->ops && dst->ops->link_failure) |
399 | dst->ops->link_failure(skb); | 408 | dst->ops->link_failure(skb); |
400 | } | 409 | } |
401 | 410 | ||
402 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) | 411 | static inline void dst_set_expires(struct dst_entry *dst, int timeout) |
403 | { | 412 | { |
404 | unsigned long expires = jiffies + timeout; | 413 | unsigned long expires = jiffies + timeout; |
405 | 414 | ||
406 | if (expires == 0) | 415 | if (expires == 0) |
407 | expires = 1; | 416 | expires = 1; |
408 | 417 | ||
409 | if (dst->expires == 0 || time_before(expires, dst->expires)) | 418 | if (dst->expires == 0 || time_before(expires, dst->expires)) |
410 | dst->expires = expires; | 419 | dst->expires = expires; |
411 | } | 420 | } |
412 | 421 | ||
413 | /* Output packet to network from transport. */ | 422 | /* Output packet to network from transport. */ |
414 | static inline int dst_output(struct sk_buff *skb) | 423 | static inline int dst_output(struct sk_buff *skb) |
415 | { | 424 | { |
416 | return skb_dst(skb)->output(skb); | 425 | return skb_dst(skb)->output(skb); |
417 | } | 426 | } |
418 | 427 | ||
419 | /* Input packet from network to transport. */ | 428 | /* Input packet from network to transport. */ |
420 | static inline int dst_input(struct sk_buff *skb) | 429 | static inline int dst_input(struct sk_buff *skb) |
421 | { | 430 | { |
422 | return skb_dst(skb)->input(skb); | 431 | return skb_dst(skb)->input(skb); |
423 | } | 432 | } |
424 | 433 | ||
425 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) | 434 | static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) |
426 | { | 435 | { |
427 | if (dst->obsolete) | 436 | if (dst->obsolete) |
428 | dst = dst->ops->check(dst, cookie); | 437 | dst = dst->ops->check(dst, cookie); |
429 | return dst; | 438 | return dst; |
430 | } | 439 | } |
431 | 440 | ||
432 | extern void dst_init(void); | 441 | extern void dst_init(void); |
433 | 442 | ||
434 | /* Flags for xfrm_lookup flags argument. */ | 443 | /* Flags for xfrm_lookup flags argument. */ |
435 | enum { | 444 | enum { |
436 | XFRM_LOOKUP_ICMP = 1 << 0, | 445 | XFRM_LOOKUP_ICMP = 1 << 0, |
437 | }; | 446 | }; |
438 | 447 | ||
439 | struct flowi; | 448 | struct flowi; |
440 | #ifndef CONFIG_XFRM | 449 | #ifndef CONFIG_XFRM |
441 | static inline struct dst_entry *xfrm_lookup(struct net *net, | 450 | static inline struct dst_entry *xfrm_lookup(struct net *net, |
442 | struct dst_entry *dst_orig, | 451 | struct dst_entry *dst_orig, |
443 | const struct flowi *fl, struct sock *sk, | 452 | const struct flowi *fl, struct sock *sk, |
444 | int flags) | 453 | int flags) |
445 | { | 454 | { |
446 | return dst_orig; | 455 | return dst_orig; |
447 | } | 456 | } |
448 | #else | 457 | #else |
449 | extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, | 458 | extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, |
450 | const struct flowi *fl, struct sock *sk, | 459 | const struct flowi *fl, struct sock *sk, |
451 | int flags); | 460 | int flags); |
452 | #endif | 461 | #endif |
453 | 462 | ||
454 | #endif /* _NET_DST_H */ | 463 | #endif /* _NET_DST_H */ |
455 | 464 |
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * The Internet Protocol (IP) output module. | 6 | * The Internet Protocol (IP) output module. |
7 | * | 7 | * |
8 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
10 | * Donald Becker, <becker@super.org> | 10 | * Donald Becker, <becker@super.org> |
11 | * Alan Cox, <Alan.Cox@linux.org> | 11 | * Alan Cox, <Alan.Cox@linux.org> |
12 | * Richard Underwood | 12 | * Richard Underwood |
13 | * Stefan Becker, <stefanb@yello.ping.de> | 13 | * Stefan Becker, <stefanb@yello.ping.de> |
14 | * Jorge Cwik, <jorge@laser.satlink.net> | 14 | * Jorge Cwik, <jorge@laser.satlink.net> |
15 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | 15 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
16 | * Hirokazu Takahashi, <taka@valinux.co.jp> | 16 | * Hirokazu Takahashi, <taka@valinux.co.jp> |
17 | * | 17 | * |
18 | * See ip_input.c for original log | 18 | * See ip_input.c for original log |
19 | * | 19 | * |
20 | * Fixes: | 20 | * Fixes: |
21 | * Alan Cox : Missing nonblock feature in ip_build_xmit. | 21 | * Alan Cox : Missing nonblock feature in ip_build_xmit. |
22 | * Mike Kilburn : htons() missing in ip_build_xmit. | 22 | * Mike Kilburn : htons() missing in ip_build_xmit. |
23 | * Bradford Johnson: Fix faulty handling of some frames when | 23 | * Bradford Johnson: Fix faulty handling of some frames when |
24 | * no route is found. | 24 | * no route is found. |
25 | * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit | 25 | * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit |
26 | * (in case if packet not accepted by | 26 | * (in case if packet not accepted by |
27 | * output firewall rules) | 27 | * output firewall rules) |
28 | * Mike McLagan : Routing by source | 28 | * Mike McLagan : Routing by source |
29 | * Alexey Kuznetsov: use new route cache | 29 | * Alexey Kuznetsov: use new route cache |
30 | * Andi Kleen: Fix broken PMTU recovery and remove | 30 | * Andi Kleen: Fix broken PMTU recovery and remove |
31 | * some redundant tests. | 31 | * some redundant tests. |
32 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. | 32 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. |
33 | * Andi Kleen : Replace ip_reply with ip_send_reply. | 33 | * Andi Kleen : Replace ip_reply with ip_send_reply. |
34 | * Andi Kleen : Split fast and slow ip_build_xmit path | 34 | * Andi Kleen : Split fast and slow ip_build_xmit path |
35 | * for decreased register pressure on x86 | 35 | * for decreased register pressure on x86 |
36 | * and more readibility. | 36 | * and more readibility. |
37 | * Marc Boucher : When call_out_firewall returns FW_QUEUE, | 37 | * Marc Boucher : When call_out_firewall returns FW_QUEUE, |
38 | * silently drop skb instead of failing with -EPERM. | 38 | * silently drop skb instead of failing with -EPERM. |
39 | * Detlev Wengorz : Copy protocol for fragments. | 39 | * Detlev Wengorz : Copy protocol for fragments. |
40 | * Hirokazu Takahashi: HW checksumming for outgoing UDP | 40 | * Hirokazu Takahashi: HW checksumming for outgoing UDP |
41 | * datagrams. | 41 | * datagrams. |
42 | * Hirokazu Takahashi: sendfile() on UDP works now. | 42 | * Hirokazu Takahashi: sendfile() on UDP works now. |
43 | */ | 43 | */ |
44 | 44 | ||
45 | #include <asm/uaccess.h> | 45 | #include <asm/uaccess.h> |
46 | #include <asm/system.h> | 46 | #include <asm/system.h> |
47 | #include <linux/module.h> | 47 | #include <linux/module.h> |
48 | #include <linux/types.h> | 48 | #include <linux/types.h> |
49 | #include <linux/kernel.h> | 49 | #include <linux/kernel.h> |
50 | #include <linux/mm.h> | 50 | #include <linux/mm.h> |
51 | #include <linux/string.h> | 51 | #include <linux/string.h> |
52 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | 55 | ||
56 | #include <linux/socket.h> | 56 | #include <linux/socket.h> |
57 | #include <linux/sockios.h> | 57 | #include <linux/sockios.h> |
58 | #include <linux/in.h> | 58 | #include <linux/in.h> |
59 | #include <linux/inet.h> | 59 | #include <linux/inet.h> |
60 | #include <linux/netdevice.h> | 60 | #include <linux/netdevice.h> |
61 | #include <linux/etherdevice.h> | 61 | #include <linux/etherdevice.h> |
62 | #include <linux/proc_fs.h> | 62 | #include <linux/proc_fs.h> |
63 | #include <linux/stat.h> | 63 | #include <linux/stat.h> |
64 | #include <linux/init.h> | 64 | #include <linux/init.h> |
65 | 65 | ||
66 | #include <net/snmp.h> | 66 | #include <net/snmp.h> |
67 | #include <net/ip.h> | 67 | #include <net/ip.h> |
68 | #include <net/protocol.h> | 68 | #include <net/protocol.h> |
69 | #include <net/route.h> | 69 | #include <net/route.h> |
70 | #include <net/xfrm.h> | 70 | #include <net/xfrm.h> |
71 | #include <linux/skbuff.h> | 71 | #include <linux/skbuff.h> |
72 | #include <net/sock.h> | 72 | #include <net/sock.h> |
73 | #include <net/arp.h> | 73 | #include <net/arp.h> |
74 | #include <net/icmp.h> | 74 | #include <net/icmp.h> |
75 | #include <net/checksum.h> | 75 | #include <net/checksum.h> |
76 | #include <net/inetpeer.h> | 76 | #include <net/inetpeer.h> |
77 | #include <linux/igmp.h> | 77 | #include <linux/igmp.h> |
78 | #include <linux/netfilter_ipv4.h> | 78 | #include <linux/netfilter_ipv4.h> |
79 | #include <linux/netfilter_bridge.h> | 79 | #include <linux/netfilter_bridge.h> |
80 | #include <linux/mroute.h> | 80 | #include <linux/mroute.h> |
81 | #include <linux/netlink.h> | 81 | #include <linux/netlink.h> |
82 | #include <linux/tcp.h> | 82 | #include <linux/tcp.h> |
83 | 83 | ||
84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; | 84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; |
85 | EXPORT_SYMBOL(sysctl_ip_default_ttl); | 85 | EXPORT_SYMBOL(sysctl_ip_default_ttl); |
86 | 86 | ||
87 | /* Generate a checksum for an outgoing IP datagram. */ | 87 | /* Generate a checksum for an outgoing IP datagram. */ |
88 | __inline__ void ip_send_check(struct iphdr *iph) | 88 | __inline__ void ip_send_check(struct iphdr *iph) |
89 | { | 89 | { |
90 | iph->check = 0; | 90 | iph->check = 0; |
91 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 91 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
92 | } | 92 | } |
93 | EXPORT_SYMBOL(ip_send_check); | 93 | EXPORT_SYMBOL(ip_send_check); |
94 | 94 | ||
95 | int __ip_local_out(struct sk_buff *skb) | 95 | int __ip_local_out(struct sk_buff *skb) |
96 | { | 96 | { |
97 | struct iphdr *iph = ip_hdr(skb); | 97 | struct iphdr *iph = ip_hdr(skb); |
98 | 98 | ||
99 | iph->tot_len = htons(skb->len); | 99 | iph->tot_len = htons(skb->len); |
100 | ip_send_check(iph); | 100 | ip_send_check(iph); |
101 | return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, | 101 | return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, |
102 | skb_dst(skb)->dev, dst_output); | 102 | skb_dst(skb)->dev, dst_output); |
103 | } | 103 | } |
104 | 104 | ||
105 | int ip_local_out(struct sk_buff *skb) | 105 | int ip_local_out(struct sk_buff *skb) |
106 | { | 106 | { |
107 | int err; | 107 | int err; |
108 | 108 | ||
109 | err = __ip_local_out(skb); | 109 | err = __ip_local_out(skb); |
110 | if (likely(err == 1)) | 110 | if (likely(err == 1)) |
111 | err = dst_output(skb); | 111 | err = dst_output(skb); |
112 | 112 | ||
113 | return err; | 113 | return err; |
114 | } | 114 | } |
115 | EXPORT_SYMBOL_GPL(ip_local_out); | 115 | EXPORT_SYMBOL_GPL(ip_local_out); |
116 | 116 | ||
117 | /* dev_loopback_xmit for use with netfilter. */ | 117 | /* dev_loopback_xmit for use with netfilter. */ |
118 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) | 118 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) |
119 | { | 119 | { |
120 | skb_reset_mac_header(newskb); | 120 | skb_reset_mac_header(newskb); |
121 | __skb_pull(newskb, skb_network_offset(newskb)); | 121 | __skb_pull(newskb, skb_network_offset(newskb)); |
122 | newskb->pkt_type = PACKET_LOOPBACK; | 122 | newskb->pkt_type = PACKET_LOOPBACK; |
123 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 123 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
124 | WARN_ON(!skb_dst(newskb)); | 124 | WARN_ON(!skb_dst(newskb)); |
125 | netif_rx_ni(newskb); | 125 | netif_rx_ni(newskb); |
126 | return 0; | 126 | return 0; |
127 | } | 127 | } |
128 | 128 | ||
129 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | 129 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) |
130 | { | 130 | { |
131 | int ttl = inet->uc_ttl; | 131 | int ttl = inet->uc_ttl; |
132 | 132 | ||
133 | if (ttl < 0) | 133 | if (ttl < 0) |
134 | ttl = ip4_dst_hoplimit(dst); | 134 | ttl = ip4_dst_hoplimit(dst); |
135 | return ttl; | 135 | return ttl; |
136 | } | 136 | } |
137 | 137 | ||
138 | /* | 138 | /* |
139 | * Add an ip header to a skbuff and send it out. | 139 | * Add an ip header to a skbuff and send it out. |
140 | * | 140 | * |
141 | */ | 141 | */ |
142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | 142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, |
143 | __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) | 143 | __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) |
144 | { | 144 | { |
145 | struct inet_sock *inet = inet_sk(sk); | 145 | struct inet_sock *inet = inet_sk(sk); |
146 | struct rtable *rt = skb_rtable(skb); | 146 | struct rtable *rt = skb_rtable(skb); |
147 | struct iphdr *iph; | 147 | struct iphdr *iph; |
148 | 148 | ||
149 | /* Build the IP header. */ | 149 | /* Build the IP header. */ |
150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); | 150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); |
151 | skb_reset_network_header(skb); | 151 | skb_reset_network_header(skb); |
152 | iph = ip_hdr(skb); | 152 | iph = ip_hdr(skb); |
153 | iph->version = 4; | 153 | iph->version = 4; |
154 | iph->ihl = 5; | 154 | iph->ihl = 5; |
155 | iph->tos = inet->tos; | 155 | iph->tos = inet->tos; |
156 | if (ip_dont_fragment(sk, &rt->dst)) | 156 | if (ip_dont_fragment(sk, &rt->dst)) |
157 | iph->frag_off = htons(IP_DF); | 157 | iph->frag_off = htons(IP_DF); |
158 | else | 158 | else |
159 | iph->frag_off = 0; | 159 | iph->frag_off = 0; |
160 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 160 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
161 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); | 161 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); |
162 | iph->saddr = saddr; | 162 | iph->saddr = saddr; |
163 | iph->protocol = sk->sk_protocol; | 163 | iph->protocol = sk->sk_protocol; |
164 | ip_select_ident(iph, &rt->dst, sk); | 164 | ip_select_ident(iph, &rt->dst, sk); |
165 | 165 | ||
166 | if (opt && opt->opt.optlen) { | 166 | if (opt && opt->opt.optlen) { |
167 | iph->ihl += opt->opt.optlen>>2; | 167 | iph->ihl += opt->opt.optlen>>2; |
168 | ip_options_build(skb, &opt->opt, daddr, rt, 0); | 168 | ip_options_build(skb, &opt->opt, daddr, rt, 0); |
169 | } | 169 | } |
170 | 170 | ||
171 | skb->priority = sk->sk_priority; | 171 | skb->priority = sk->sk_priority; |
172 | skb->mark = sk->sk_mark; | 172 | skb->mark = sk->sk_mark; |
173 | 173 | ||
174 | /* Send it out. */ | 174 | /* Send it out. */ |
175 | return ip_local_out(skb); | 175 | return ip_local_out(skb); |
176 | } | 176 | } |
177 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); | 177 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); |
178 | 178 | ||
179 | static inline int ip_finish_output2(struct sk_buff *skb) | 179 | static inline int ip_finish_output2(struct sk_buff *skb) |
180 | { | 180 | { |
181 | struct dst_entry *dst = skb_dst(skb); | 181 | struct dst_entry *dst = skb_dst(skb); |
182 | struct rtable *rt = (struct rtable *)dst; | 182 | struct rtable *rt = (struct rtable *)dst; |
183 | struct net_device *dev = dst->dev; | 183 | struct net_device *dev = dst->dev; |
184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); | 184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); |
185 | struct neighbour *neigh; | 185 | struct neighbour *neigh; |
186 | 186 | ||
187 | if (rt->rt_type == RTN_MULTICAST) { | 187 | if (rt->rt_type == RTN_MULTICAST) { |
188 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); | 188 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); |
189 | } else if (rt->rt_type == RTN_BROADCAST) | 189 | } else if (rt->rt_type == RTN_BROADCAST) |
190 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); | 190 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); |
191 | 191 | ||
192 | /* Be paranoid, rather than too clever. */ | 192 | /* Be paranoid, rather than too clever. */ |
193 | if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { | 193 | if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { |
194 | struct sk_buff *skb2; | 194 | struct sk_buff *skb2; |
195 | 195 | ||
196 | skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); | 196 | skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); |
197 | if (skb2 == NULL) { | 197 | if (skb2 == NULL) { |
198 | kfree_skb(skb); | 198 | kfree_skb(skb); |
199 | return -ENOMEM; | 199 | return -ENOMEM; |
200 | } | 200 | } |
201 | if (skb->sk) | 201 | if (skb->sk) |
202 | skb_set_owner_w(skb2, skb->sk); | 202 | skb_set_owner_w(skb2, skb->sk); |
203 | kfree_skb(skb); | 203 | kfree_skb(skb); |
204 | skb = skb2; | 204 | skb = skb2; |
205 | } | 205 | } |
206 | 206 | ||
207 | rcu_read_lock(); | ||
207 | neigh = dst_get_neighbour(dst); | 208 | neigh = dst_get_neighbour(dst); |
208 | if (neigh) | 209 | if (neigh) { |
209 | return neigh_output(neigh, skb); | 210 | int res = neigh_output(neigh, skb); |
211 | |||
212 | rcu_read_unlock(); | ||
213 | return res; | ||
214 | } | ||
215 | rcu_read_unlock(); | ||
210 | 216 | ||
211 | if (net_ratelimit()) | 217 | if (net_ratelimit()) |
212 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); | 218 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); |
213 | kfree_skb(skb); | 219 | kfree_skb(skb); |
214 | return -EINVAL; | 220 | return -EINVAL; |
215 | } | 221 | } |
216 | 222 | ||
217 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) | 223 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) |
218 | { | 224 | { |
219 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | 225 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; |
220 | 226 | ||
221 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | 227 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? |
222 | skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); | 228 | skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); |
223 | } | 229 | } |
224 | 230 | ||
225 | static int ip_finish_output(struct sk_buff *skb) | 231 | static int ip_finish_output(struct sk_buff *skb) |
226 | { | 232 | { |
227 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 233 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
228 | /* Policy lookup after SNAT yielded a new policy */ | 234 | /* Policy lookup after SNAT yielded a new policy */ |
229 | if (skb_dst(skb)->xfrm != NULL) { | 235 | if (skb_dst(skb)->xfrm != NULL) { |
230 | IPCB(skb)->flags |= IPSKB_REROUTED; | 236 | IPCB(skb)->flags |= IPSKB_REROUTED; |
231 | return dst_output(skb); | 237 | return dst_output(skb); |
232 | } | 238 | } |
233 | #endif | 239 | #endif |
234 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) | 240 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) |
235 | return ip_fragment(skb, ip_finish_output2); | 241 | return ip_fragment(skb, ip_finish_output2); |
236 | else | 242 | else |
237 | return ip_finish_output2(skb); | 243 | return ip_finish_output2(skb); |
238 | } | 244 | } |
239 | 245 | ||
240 | int ip_mc_output(struct sk_buff *skb) | 246 | int ip_mc_output(struct sk_buff *skb) |
241 | { | 247 | { |
242 | struct sock *sk = skb->sk; | 248 | struct sock *sk = skb->sk; |
243 | struct rtable *rt = skb_rtable(skb); | 249 | struct rtable *rt = skb_rtable(skb); |
244 | struct net_device *dev = rt->dst.dev; | 250 | struct net_device *dev = rt->dst.dev; |
245 | 251 | ||
246 | /* | 252 | /* |
247 | * If the indicated interface is up and running, send the packet. | 253 | * If the indicated interface is up and running, send the packet. |
248 | */ | 254 | */ |
249 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); | 255 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); |
250 | 256 | ||
251 | skb->dev = dev; | 257 | skb->dev = dev; |
252 | skb->protocol = htons(ETH_P_IP); | 258 | skb->protocol = htons(ETH_P_IP); |
253 | 259 | ||
254 | /* | 260 | /* |
255 | * Multicasts are looped back for other local users | 261 | * Multicasts are looped back for other local users |
256 | */ | 262 | */ |
257 | 263 | ||
258 | if (rt->rt_flags&RTCF_MULTICAST) { | 264 | if (rt->rt_flags&RTCF_MULTICAST) { |
259 | if (sk_mc_loop(sk) | 265 | if (sk_mc_loop(sk) |
260 | #ifdef CONFIG_IP_MROUTE | 266 | #ifdef CONFIG_IP_MROUTE |
261 | /* Small optimization: do not loopback not local frames, | 267 | /* Small optimization: do not loopback not local frames, |
262 | which returned after forwarding; they will be dropped | 268 | which returned after forwarding; they will be dropped |
263 | by ip_mr_input in any case. | 269 | by ip_mr_input in any case. |
264 | Note, that local frames are looped back to be delivered | 270 | Note, that local frames are looped back to be delivered |
265 | to local recipients. | 271 | to local recipients. |
266 | 272 | ||
267 | This check is duplicated in ip_mr_input at the moment. | 273 | This check is duplicated in ip_mr_input at the moment. |
268 | */ | 274 | */ |
269 | && | 275 | && |
270 | ((rt->rt_flags & RTCF_LOCAL) || | 276 | ((rt->rt_flags & RTCF_LOCAL) || |
271 | !(IPCB(skb)->flags & IPSKB_FORWARDED)) | 277 | !(IPCB(skb)->flags & IPSKB_FORWARDED)) |
272 | #endif | 278 | #endif |
273 | ) { | 279 | ) { |
274 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 280 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
275 | if (newskb) | 281 | if (newskb) |
276 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, | 282 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, |
277 | newskb, NULL, newskb->dev, | 283 | newskb, NULL, newskb->dev, |
278 | ip_dev_loopback_xmit); | 284 | ip_dev_loopback_xmit); |
279 | } | 285 | } |
280 | 286 | ||
281 | /* Multicasts with ttl 0 must not go beyond the host */ | 287 | /* Multicasts with ttl 0 must not go beyond the host */ |
282 | 288 | ||
283 | if (ip_hdr(skb)->ttl == 0) { | 289 | if (ip_hdr(skb)->ttl == 0) { |
284 | kfree_skb(skb); | 290 | kfree_skb(skb); |
285 | return 0; | 291 | return 0; |
286 | } | 292 | } |
287 | } | 293 | } |
288 | 294 | ||
289 | if (rt->rt_flags&RTCF_BROADCAST) { | 295 | if (rt->rt_flags&RTCF_BROADCAST) { |
290 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 296 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
291 | if (newskb) | 297 | if (newskb) |
292 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, | 298 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, |
293 | NULL, newskb->dev, ip_dev_loopback_xmit); | 299 | NULL, newskb->dev, ip_dev_loopback_xmit); |
294 | } | 300 | } |
295 | 301 | ||
296 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, | 302 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, |
297 | skb->dev, ip_finish_output, | 303 | skb->dev, ip_finish_output, |
298 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 304 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
299 | } | 305 | } |
300 | 306 | ||
301 | int ip_output(struct sk_buff *skb) | 307 | int ip_output(struct sk_buff *skb) |
302 | { | 308 | { |
303 | struct net_device *dev = skb_dst(skb)->dev; | 309 | struct net_device *dev = skb_dst(skb)->dev; |
304 | 310 | ||
305 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); | 311 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); |
306 | 312 | ||
307 | skb->dev = dev; | 313 | skb->dev = dev; |
308 | skb->protocol = htons(ETH_P_IP); | 314 | skb->protocol = htons(ETH_P_IP); |
309 | 315 | ||
310 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, | 316 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, |
311 | ip_finish_output, | 317 | ip_finish_output, |
312 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 318 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
313 | } | 319 | } |
314 | 320 | ||
315 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) | 321 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) |
316 | { | 322 | { |
317 | struct sock *sk = skb->sk; | 323 | struct sock *sk = skb->sk; |
318 | struct inet_sock *inet = inet_sk(sk); | 324 | struct inet_sock *inet = inet_sk(sk); |
319 | struct ip_options_rcu *inet_opt; | 325 | struct ip_options_rcu *inet_opt; |
320 | struct flowi4 *fl4; | 326 | struct flowi4 *fl4; |
321 | struct rtable *rt; | 327 | struct rtable *rt; |
322 | struct iphdr *iph; | 328 | struct iphdr *iph; |
323 | int res; | 329 | int res; |
324 | 330 | ||
325 | /* Skip all of this if the packet is already routed, | 331 | /* Skip all of this if the packet is already routed, |
326 | * f.e. by something like SCTP. | 332 | * f.e. by something like SCTP. |
327 | */ | 333 | */ |
328 | rcu_read_lock(); | 334 | rcu_read_lock(); |
329 | inet_opt = rcu_dereference(inet->inet_opt); | 335 | inet_opt = rcu_dereference(inet->inet_opt); |
330 | fl4 = &fl->u.ip4; | 336 | fl4 = &fl->u.ip4; |
331 | rt = skb_rtable(skb); | 337 | rt = skb_rtable(skb); |
332 | if (rt != NULL) | 338 | if (rt != NULL) |
333 | goto packet_routed; | 339 | goto packet_routed; |
334 | 340 | ||
335 | /* Make sure we can route this packet. */ | 341 | /* Make sure we can route this packet. */ |
336 | rt = (struct rtable *)__sk_dst_check(sk, 0); | 342 | rt = (struct rtable *)__sk_dst_check(sk, 0); |
337 | if (rt == NULL) { | 343 | if (rt == NULL) { |
338 | __be32 daddr; | 344 | __be32 daddr; |
339 | 345 | ||
340 | /* Use correct destination address if we have options. */ | 346 | /* Use correct destination address if we have options. */ |
341 | daddr = inet->inet_daddr; | 347 | daddr = inet->inet_daddr; |
342 | if (inet_opt && inet_opt->opt.srr) | 348 | if (inet_opt && inet_opt->opt.srr) |
343 | daddr = inet_opt->opt.faddr; | 349 | daddr = inet_opt->opt.faddr; |
344 | 350 | ||
345 | /* If this fails, retransmit mechanism of transport layer will | 351 | /* If this fails, retransmit mechanism of transport layer will |
346 | * keep trying until route appears or the connection times | 352 | * keep trying until route appears or the connection times |
347 | * itself out. | 353 | * itself out. |
348 | */ | 354 | */ |
349 | rt = ip_route_output_ports(sock_net(sk), fl4, sk, | 355 | rt = ip_route_output_ports(sock_net(sk), fl4, sk, |
350 | daddr, inet->inet_saddr, | 356 | daddr, inet->inet_saddr, |
351 | inet->inet_dport, | 357 | inet->inet_dport, |
352 | inet->inet_sport, | 358 | inet->inet_sport, |
353 | sk->sk_protocol, | 359 | sk->sk_protocol, |
354 | RT_CONN_FLAGS(sk), | 360 | RT_CONN_FLAGS(sk), |
355 | sk->sk_bound_dev_if); | 361 | sk->sk_bound_dev_if); |
356 | if (IS_ERR(rt)) | 362 | if (IS_ERR(rt)) |
357 | goto no_route; | 363 | goto no_route; |
358 | sk_setup_caps(sk, &rt->dst); | 364 | sk_setup_caps(sk, &rt->dst); |
359 | } | 365 | } |
360 | skb_dst_set_noref(skb, &rt->dst); | 366 | skb_dst_set_noref(skb, &rt->dst); |
361 | 367 | ||
362 | packet_routed: | 368 | packet_routed: |
363 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | 369 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) |
364 | goto no_route; | 370 | goto no_route; |
365 | 371 | ||
366 | /* OK, we know where to send it, allocate and build IP header. */ | 372 | /* OK, we know where to send it, allocate and build IP header. */ |
367 | skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); | 373 | skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); |
368 | skb_reset_network_header(skb); | 374 | skb_reset_network_header(skb); |
369 | iph = ip_hdr(skb); | 375 | iph = ip_hdr(skb); |
370 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 376 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
371 | if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) | 377 | if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) |
372 | iph->frag_off = htons(IP_DF); | 378 | iph->frag_off = htons(IP_DF); |
373 | else | 379 | else |
374 | iph->frag_off = 0; | 380 | iph->frag_off = 0; |
375 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 381 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
376 | iph->protocol = sk->sk_protocol; | 382 | iph->protocol = sk->sk_protocol; |
377 | iph->saddr = fl4->saddr; | 383 | iph->saddr = fl4->saddr; |
378 | iph->daddr = fl4->daddr; | 384 | iph->daddr = fl4->daddr; |
379 | /* Transport layer set skb->h.foo itself. */ | 385 | /* Transport layer set skb->h.foo itself. */ |
380 | 386 | ||
381 | if (inet_opt && inet_opt->opt.optlen) { | 387 | if (inet_opt && inet_opt->opt.optlen) { |
382 | iph->ihl += inet_opt->opt.optlen >> 2; | 388 | iph->ihl += inet_opt->opt.optlen >> 2; |
383 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); | 389 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
384 | } | 390 | } |
385 | 391 | ||
386 | ip_select_ident_more(iph, &rt->dst, sk, | 392 | ip_select_ident_more(iph, &rt->dst, sk, |
387 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 393 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); |
388 | 394 | ||
389 | skb->priority = sk->sk_priority; | 395 | skb->priority = sk->sk_priority; |
390 | skb->mark = sk->sk_mark; | 396 | skb->mark = sk->sk_mark; |
391 | 397 | ||
392 | res = ip_local_out(skb); | 398 | res = ip_local_out(skb); |
393 | rcu_read_unlock(); | 399 | rcu_read_unlock(); |
394 | return res; | 400 | return res; |
395 | 401 | ||
396 | no_route: | 402 | no_route: |
397 | rcu_read_unlock(); | 403 | rcu_read_unlock(); |
398 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 404 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
399 | kfree_skb(skb); | 405 | kfree_skb(skb); |
400 | return -EHOSTUNREACH; | 406 | return -EHOSTUNREACH; |
401 | } | 407 | } |
402 | EXPORT_SYMBOL(ip_queue_xmit); | 408 | EXPORT_SYMBOL(ip_queue_xmit); |
403 | 409 | ||
404 | 410 | ||
405 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | 411 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
406 | { | 412 | { |
407 | to->pkt_type = from->pkt_type; | 413 | to->pkt_type = from->pkt_type; |
408 | to->priority = from->priority; | 414 | to->priority = from->priority; |
409 | to->protocol = from->protocol; | 415 | to->protocol = from->protocol; |
410 | skb_dst_drop(to); | 416 | skb_dst_drop(to); |
411 | skb_dst_copy(to, from); | 417 | skb_dst_copy(to, from); |
412 | to->dev = from->dev; | 418 | to->dev = from->dev; |
413 | to->mark = from->mark; | 419 | to->mark = from->mark; |
414 | 420 | ||
415 | /* Copy the flags to each fragment. */ | 421 | /* Copy the flags to each fragment. */ |
416 | IPCB(to)->flags = IPCB(from)->flags; | 422 | IPCB(to)->flags = IPCB(from)->flags; |
417 | 423 | ||
418 | #ifdef CONFIG_NET_SCHED | 424 | #ifdef CONFIG_NET_SCHED |
419 | to->tc_index = from->tc_index; | 425 | to->tc_index = from->tc_index; |
420 | #endif | 426 | #endif |
421 | nf_copy(to, from); | 427 | nf_copy(to, from); |
422 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 428 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ |
423 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | 429 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) |
424 | to->nf_trace = from->nf_trace; | 430 | to->nf_trace = from->nf_trace; |
425 | #endif | 431 | #endif |
426 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 432 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
427 | to->ipvs_property = from->ipvs_property; | 433 | to->ipvs_property = from->ipvs_property; |
428 | #endif | 434 | #endif |
429 | skb_copy_secmark(to, from); | 435 | skb_copy_secmark(to, from); |
430 | } | 436 | } |
431 | 437 | ||
432 | /* | 438 | /* |
433 | * This IP datagram is too large to be sent in one piece. Break it up into | 439 | * This IP datagram is too large to be sent in one piece. Break it up into |
434 | * smaller pieces (each of size equal to IP header plus | 440 | * smaller pieces (each of size equal to IP header plus |
435 | * a block of the data of the original IP data part) that will yet fit in a | 441 | * a block of the data of the original IP data part) that will yet fit in a |
436 | * single device frame, and queue such a frame for sending. | 442 | * single device frame, and queue such a frame for sending. |
437 | */ | 443 | */ |
438 | 444 | ||
439 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | 445 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
440 | { | 446 | { |
441 | struct iphdr *iph; | 447 | struct iphdr *iph; |
442 | int ptr; | 448 | int ptr; |
443 | struct net_device *dev; | 449 | struct net_device *dev; |
444 | struct sk_buff *skb2; | 450 | struct sk_buff *skb2; |
445 | unsigned int mtu, hlen, left, len, ll_rs; | 451 | unsigned int mtu, hlen, left, len, ll_rs; |
446 | int offset; | 452 | int offset; |
447 | __be16 not_last_frag; | 453 | __be16 not_last_frag; |
448 | struct rtable *rt = skb_rtable(skb); | 454 | struct rtable *rt = skb_rtable(skb); |
449 | int err = 0; | 455 | int err = 0; |
450 | 456 | ||
451 | dev = rt->dst.dev; | 457 | dev = rt->dst.dev; |
452 | 458 | ||
453 | /* | 459 | /* |
454 | * Point into the IP datagram header. | 460 | * Point into the IP datagram header. |
455 | */ | 461 | */ |
456 | 462 | ||
457 | iph = ip_hdr(skb); | 463 | iph = ip_hdr(skb); |
458 | 464 | ||
459 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 465 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
460 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 466 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
461 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 467 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
462 | htonl(ip_skb_dst_mtu(skb))); | 468 | htonl(ip_skb_dst_mtu(skb))); |
463 | kfree_skb(skb); | 469 | kfree_skb(skb); |
464 | return -EMSGSIZE; | 470 | return -EMSGSIZE; |
465 | } | 471 | } |
466 | 472 | ||
467 | /* | 473 | /* |
468 | * Setup starting values. | 474 | * Setup starting values. |
469 | */ | 475 | */ |
470 | 476 | ||
471 | hlen = iph->ihl * 4; | 477 | hlen = iph->ihl * 4; |
472 | mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ | 478 | mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ |
473 | #ifdef CONFIG_BRIDGE_NETFILTER | 479 | #ifdef CONFIG_BRIDGE_NETFILTER |
474 | if (skb->nf_bridge) | 480 | if (skb->nf_bridge) |
475 | mtu -= nf_bridge_mtu_reduction(skb); | 481 | mtu -= nf_bridge_mtu_reduction(skb); |
476 | #endif | 482 | #endif |
477 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; | 483 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; |
478 | 484 | ||
479 | /* When frag_list is given, use it. First, check its validity: | 485 | /* When frag_list is given, use it. First, check its validity: |
480 | * some transformers could create wrong frag_list or break existing | 486 | * some transformers could create wrong frag_list or break existing |
481 | * one, it is not prohibited. In this case fall back to copying. | 487 | * one, it is not prohibited. In this case fall back to copying. |
482 | * | 488 | * |
483 | * LATER: this step can be merged to real generation of fragments, | 489 | * LATER: this step can be merged to real generation of fragments, |
484 | * we can switch to copy when see the first bad fragment. | 490 | * we can switch to copy when see the first bad fragment. |
485 | */ | 491 | */ |
486 | if (skb_has_frag_list(skb)) { | 492 | if (skb_has_frag_list(skb)) { |
487 | struct sk_buff *frag, *frag2; | 493 | struct sk_buff *frag, *frag2; |
488 | int first_len = skb_pagelen(skb); | 494 | int first_len = skb_pagelen(skb); |
489 | 495 | ||
490 | if (first_len - hlen > mtu || | 496 | if (first_len - hlen > mtu || |
491 | ((first_len - hlen) & 7) || | 497 | ((first_len - hlen) & 7) || |
492 | ip_is_fragment(iph) || | 498 | ip_is_fragment(iph) || |
493 | skb_cloned(skb)) | 499 | skb_cloned(skb)) |
494 | goto slow_path; | 500 | goto slow_path; |
495 | 501 | ||
496 | skb_walk_frags(skb, frag) { | 502 | skb_walk_frags(skb, frag) { |
497 | /* Correct geometry. */ | 503 | /* Correct geometry. */ |
498 | if (frag->len > mtu || | 504 | if (frag->len > mtu || |
499 | ((frag->len & 7) && frag->next) || | 505 | ((frag->len & 7) && frag->next) || |
500 | skb_headroom(frag) < hlen) | 506 | skb_headroom(frag) < hlen) |
501 | goto slow_path_clean; | 507 | goto slow_path_clean; |
502 | 508 | ||
503 | /* Partially cloned skb? */ | 509 | /* Partially cloned skb? */ |
504 | if (skb_shared(frag)) | 510 | if (skb_shared(frag)) |
505 | goto slow_path_clean; | 511 | goto slow_path_clean; |
506 | 512 | ||
507 | BUG_ON(frag->sk); | 513 | BUG_ON(frag->sk); |
508 | if (skb->sk) { | 514 | if (skb->sk) { |
509 | frag->sk = skb->sk; | 515 | frag->sk = skb->sk; |
510 | frag->destructor = sock_wfree; | 516 | frag->destructor = sock_wfree; |
511 | } | 517 | } |
512 | skb->truesize -= frag->truesize; | 518 | skb->truesize -= frag->truesize; |
513 | } | 519 | } |
514 | 520 | ||
515 | /* Everything is OK. Generate! */ | 521 | /* Everything is OK. Generate! */ |
516 | 522 | ||
517 | err = 0; | 523 | err = 0; |
518 | offset = 0; | 524 | offset = 0; |
519 | frag = skb_shinfo(skb)->frag_list; | 525 | frag = skb_shinfo(skb)->frag_list; |
520 | skb_frag_list_init(skb); | 526 | skb_frag_list_init(skb); |
521 | skb->data_len = first_len - skb_headlen(skb); | 527 | skb->data_len = first_len - skb_headlen(skb); |
522 | skb->len = first_len; | 528 | skb->len = first_len; |
523 | iph->tot_len = htons(first_len); | 529 | iph->tot_len = htons(first_len); |
524 | iph->frag_off = htons(IP_MF); | 530 | iph->frag_off = htons(IP_MF); |
525 | ip_send_check(iph); | 531 | ip_send_check(iph); |
526 | 532 | ||
527 | for (;;) { | 533 | for (;;) { |
528 | /* Prepare header of the next frame, | 534 | /* Prepare header of the next frame, |
529 | * before previous one went down. */ | 535 | * before previous one went down. */ |
530 | if (frag) { | 536 | if (frag) { |
531 | frag->ip_summed = CHECKSUM_NONE; | 537 | frag->ip_summed = CHECKSUM_NONE; |
532 | skb_reset_transport_header(frag); | 538 | skb_reset_transport_header(frag); |
533 | __skb_push(frag, hlen); | 539 | __skb_push(frag, hlen); |
534 | skb_reset_network_header(frag); | 540 | skb_reset_network_header(frag); |
535 | memcpy(skb_network_header(frag), iph, hlen); | 541 | memcpy(skb_network_header(frag), iph, hlen); |
536 | iph = ip_hdr(frag); | 542 | iph = ip_hdr(frag); |
537 | iph->tot_len = htons(frag->len); | 543 | iph->tot_len = htons(frag->len); |
538 | ip_copy_metadata(frag, skb); | 544 | ip_copy_metadata(frag, skb); |
539 | if (offset == 0) | 545 | if (offset == 0) |
540 | ip_options_fragment(frag); | 546 | ip_options_fragment(frag); |
541 | offset += skb->len - hlen; | 547 | offset += skb->len - hlen; |
542 | iph->frag_off = htons(offset>>3); | 548 | iph->frag_off = htons(offset>>3); |
543 | if (frag->next != NULL) | 549 | if (frag->next != NULL) |
544 | iph->frag_off |= htons(IP_MF); | 550 | iph->frag_off |= htons(IP_MF); |
545 | /* Ready, complete checksum */ | 551 | /* Ready, complete checksum */ |
546 | ip_send_check(iph); | 552 | ip_send_check(iph); |
547 | } | 553 | } |
548 | 554 | ||
549 | err = output(skb); | 555 | err = output(skb); |
550 | 556 | ||
551 | if (!err) | 557 | if (!err) |
552 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); | 558 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); |
553 | if (err || !frag) | 559 | if (err || !frag) |
554 | break; | 560 | break; |
555 | 561 | ||
556 | skb = frag; | 562 | skb = frag; |
557 | frag = skb->next; | 563 | frag = skb->next; |
558 | skb->next = NULL; | 564 | skb->next = NULL; |
559 | } | 565 | } |
560 | 566 | ||
561 | if (err == 0) { | 567 | if (err == 0) { |
562 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); | 568 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); |
563 | return 0; | 569 | return 0; |
564 | } | 570 | } |
565 | 571 | ||
566 | while (frag) { | 572 | while (frag) { |
567 | skb = frag->next; | 573 | skb = frag->next; |
568 | kfree_skb(frag); | 574 | kfree_skb(frag); |
569 | frag = skb; | 575 | frag = skb; |
570 | } | 576 | } |
571 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 577 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
572 | return err; | 578 | return err; |
573 | 579 | ||
574 | slow_path_clean: | 580 | slow_path_clean: |
575 | skb_walk_frags(skb, frag2) { | 581 | skb_walk_frags(skb, frag2) { |
576 | if (frag2 == frag) | 582 | if (frag2 == frag) |
577 | break; | 583 | break; |
578 | frag2->sk = NULL; | 584 | frag2->sk = NULL; |
579 | frag2->destructor = NULL; | 585 | frag2->destructor = NULL; |
580 | skb->truesize += frag2->truesize; | 586 | skb->truesize += frag2->truesize; |
581 | } | 587 | } |
582 | } | 588 | } |
583 | 589 | ||
584 | slow_path: | 590 | slow_path: |
585 | left = skb->len - hlen; /* Space per frame */ | 591 | left = skb->len - hlen; /* Space per frame */ |
586 | ptr = hlen; /* Where to start from */ | 592 | ptr = hlen; /* Where to start from */ |
587 | 593 | ||
588 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, | 594 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, |
589 | * we need to make room for the encapsulating header | 595 | * we need to make room for the encapsulating header |
590 | */ | 596 | */ |
591 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); | 597 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); |
592 | 598 | ||
593 | /* | 599 | /* |
594 | * Fragment the datagram. | 600 | * Fragment the datagram. |
595 | */ | 601 | */ |
596 | 602 | ||
597 | offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; | 603 | offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; |
598 | not_last_frag = iph->frag_off & htons(IP_MF); | 604 | not_last_frag = iph->frag_off & htons(IP_MF); |
599 | 605 | ||
600 | /* | 606 | /* |
601 | * Keep copying data until we run out. | 607 | * Keep copying data until we run out. |
602 | */ | 608 | */ |
603 | 609 | ||
604 | while (left > 0) { | 610 | while (left > 0) { |
605 | len = left; | 611 | len = left; |
606 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 612 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
607 | if (len > mtu) | 613 | if (len > mtu) |
608 | len = mtu; | 614 | len = mtu; |
609 | /* IF: we are not sending up to and including the packet end | 615 | /* IF: we are not sending up to and including the packet end |
610 | then align the next start on an eight byte boundary */ | 616 | then align the next start on an eight byte boundary */ |
611 | if (len < left) { | 617 | if (len < left) { |
612 | len &= ~7; | 618 | len &= ~7; |
613 | } | 619 | } |
614 | /* | 620 | /* |
615 | * Allocate buffer. | 621 | * Allocate buffer. |
616 | */ | 622 | */ |
617 | 623 | ||
618 | if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { | 624 | if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { |
619 | NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); | 625 | NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); |
620 | err = -ENOMEM; | 626 | err = -ENOMEM; |
621 | goto fail; | 627 | goto fail; |
622 | } | 628 | } |
623 | 629 | ||
624 | /* | 630 | /* |
625 | * Set up data on packet | 631 | * Set up data on packet |
626 | */ | 632 | */ |
627 | 633 | ||
628 | ip_copy_metadata(skb2, skb); | 634 | ip_copy_metadata(skb2, skb); |
629 | skb_reserve(skb2, ll_rs); | 635 | skb_reserve(skb2, ll_rs); |
630 | skb_put(skb2, len + hlen); | 636 | skb_put(skb2, len + hlen); |
631 | skb_reset_network_header(skb2); | 637 | skb_reset_network_header(skb2); |
632 | skb2->transport_header = skb2->network_header + hlen; | 638 | skb2->transport_header = skb2->network_header + hlen; |
633 | 639 | ||
634 | /* | 640 | /* |
635 | * Charge the memory for the fragment to any owner | 641 | * Charge the memory for the fragment to any owner |
636 | * it might possess | 642 | * it might possess |
637 | */ | 643 | */ |
638 | 644 | ||
639 | if (skb->sk) | 645 | if (skb->sk) |
640 | skb_set_owner_w(skb2, skb->sk); | 646 | skb_set_owner_w(skb2, skb->sk); |
641 | 647 | ||
642 | /* | 648 | /* |
643 | * Copy the packet header into the new buffer. | 649 | * Copy the packet header into the new buffer. |
644 | */ | 650 | */ |
645 | 651 | ||
646 | skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); | 652 | skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); |
647 | 653 | ||
648 | /* | 654 | /* |
649 | * Copy a block of the IP datagram. | 655 | * Copy a block of the IP datagram. |
650 | */ | 656 | */ |
651 | if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) | 657 | if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) |
652 | BUG(); | 658 | BUG(); |
653 | left -= len; | 659 | left -= len; |
654 | 660 | ||
655 | /* | 661 | /* |
656 | * Fill in the new header fields. | 662 | * Fill in the new header fields. |
657 | */ | 663 | */ |
658 | iph = ip_hdr(skb2); | 664 | iph = ip_hdr(skb2); |
659 | iph->frag_off = htons((offset >> 3)); | 665 | iph->frag_off = htons((offset >> 3)); |
660 | 666 | ||
661 | /* ANK: dirty, but effective trick. Upgrade options only if | 667 | /* ANK: dirty, but effective trick. Upgrade options only if |
662 | * the segment to be fragmented was THE FIRST (otherwise, | 668 | * the segment to be fragmented was THE FIRST (otherwise, |
663 | * options are already fixed) and make it ONCE | 669 | * options are already fixed) and make it ONCE |
664 | * on the initial skb, so that all the following fragments | 670 | * on the initial skb, so that all the following fragments |
665 | * will inherit fixed options. | 671 | * will inherit fixed options. |
666 | */ | 672 | */ |
667 | if (offset == 0) | 673 | if (offset == 0) |
668 | ip_options_fragment(skb); | 674 | ip_options_fragment(skb); |
669 | 675 | ||
670 | /* | 676 | /* |
671 | * Added AC : If we are fragmenting a fragment that's not the | 677 | * Added AC : If we are fragmenting a fragment that's not the |
672 | * last fragment then keep MF on each bit | 678 | * last fragment then keep MF on each bit |
673 | */ | 679 | */ |
674 | if (left > 0 || not_last_frag) | 680 | if (left > 0 || not_last_frag) |
675 | iph->frag_off |= htons(IP_MF); | 681 | iph->frag_off |= htons(IP_MF); |
676 | ptr += len; | 682 | ptr += len; |
677 | offset += len; | 683 | offset += len; |
678 | 684 | ||
679 | /* | 685 | /* |
680 | * Put this fragment into the sending queue. | 686 | * Put this fragment into the sending queue. |
681 | */ | 687 | */ |
682 | iph->tot_len = htons(len + hlen); | 688 | iph->tot_len = htons(len + hlen); |
683 | 689 | ||
684 | ip_send_check(iph); | 690 | ip_send_check(iph); |
685 | 691 | ||
686 | err = output(skb2); | 692 | err = output(skb2); |
687 | if (err) | 693 | if (err) |
688 | goto fail; | 694 | goto fail; |
689 | 695 | ||
690 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); | 696 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); |
691 | } | 697 | } |
692 | kfree_skb(skb); | 698 | kfree_skb(skb); |
693 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); | 699 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); |
694 | return err; | 700 | return err; |
695 | 701 | ||
696 | fail: | 702 | fail: |
697 | kfree_skb(skb); | 703 | kfree_skb(skb); |
698 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 704 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
699 | return err; | 705 | return err; |
700 | } | 706 | } |
701 | EXPORT_SYMBOL(ip_fragment); | 707 | EXPORT_SYMBOL(ip_fragment); |
702 | 708 | ||
703 | int | 709 | int |
704 | ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) | 710 | ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) |
705 | { | 711 | { |
706 | struct iovec *iov = from; | 712 | struct iovec *iov = from; |
707 | 713 | ||
708 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 714 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
709 | if (memcpy_fromiovecend(to, iov, offset, len) < 0) | 715 | if (memcpy_fromiovecend(to, iov, offset, len) < 0) |
710 | return -EFAULT; | 716 | return -EFAULT; |
711 | } else { | 717 | } else { |
712 | __wsum csum = 0; | 718 | __wsum csum = 0; |
713 | if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) | 719 | if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) |
714 | return -EFAULT; | 720 | return -EFAULT; |
715 | skb->csum = csum_block_add(skb->csum, csum, odd); | 721 | skb->csum = csum_block_add(skb->csum, csum, odd); |
716 | } | 722 | } |
717 | return 0; | 723 | return 0; |
718 | } | 724 | } |
719 | EXPORT_SYMBOL(ip_generic_getfrag); | 725 | EXPORT_SYMBOL(ip_generic_getfrag); |
720 | 726 | ||
721 | static inline __wsum | 727 | static inline __wsum |
722 | csum_page(struct page *page, int offset, int copy) | 728 | csum_page(struct page *page, int offset, int copy) |
723 | { | 729 | { |
724 | char *kaddr; | 730 | char *kaddr; |
725 | __wsum csum; | 731 | __wsum csum; |
726 | kaddr = kmap(page); | 732 | kaddr = kmap(page); |
727 | csum = csum_partial(kaddr + offset, copy, 0); | 733 | csum = csum_partial(kaddr + offset, copy, 0); |
728 | kunmap(page); | 734 | kunmap(page); |
729 | return csum; | 735 | return csum; |
730 | } | 736 | } |
731 | 737 | ||
732 | static inline int ip_ufo_append_data(struct sock *sk, | 738 | static inline int ip_ufo_append_data(struct sock *sk, |
733 | struct sk_buff_head *queue, | 739 | struct sk_buff_head *queue, |
734 | int getfrag(void *from, char *to, int offset, int len, | 740 | int getfrag(void *from, char *to, int offset, int len, |
735 | int odd, struct sk_buff *skb), | 741 | int odd, struct sk_buff *skb), |
736 | void *from, int length, int hh_len, int fragheaderlen, | 742 | void *from, int length, int hh_len, int fragheaderlen, |
737 | int transhdrlen, int maxfraglen, unsigned int flags) | 743 | int transhdrlen, int maxfraglen, unsigned int flags) |
738 | { | 744 | { |
739 | struct sk_buff *skb; | 745 | struct sk_buff *skb; |
740 | int err; | 746 | int err; |
741 | 747 | ||
742 | /* There is support for UDP fragmentation offload by network | 748 | /* There is support for UDP fragmentation offload by network |
743 | * device, so create one single skb packet containing complete | 749 | * device, so create one single skb packet containing complete |
744 | * udp datagram | 750 | * udp datagram |
745 | */ | 751 | */ |
746 | if ((skb = skb_peek_tail(queue)) == NULL) { | 752 | if ((skb = skb_peek_tail(queue)) == NULL) { |
747 | skb = sock_alloc_send_skb(sk, | 753 | skb = sock_alloc_send_skb(sk, |
748 | hh_len + fragheaderlen + transhdrlen + 20, | 754 | hh_len + fragheaderlen + transhdrlen + 20, |
749 | (flags & MSG_DONTWAIT), &err); | 755 | (flags & MSG_DONTWAIT), &err); |
750 | 756 | ||
751 | if (skb == NULL) | 757 | if (skb == NULL) |
752 | return err; | 758 | return err; |
753 | 759 | ||
754 | /* reserve space for Hardware header */ | 760 | /* reserve space for Hardware header */ |
755 | skb_reserve(skb, hh_len); | 761 | skb_reserve(skb, hh_len); |
756 | 762 | ||
757 | /* create space for UDP/IP header */ | 763 | /* create space for UDP/IP header */ |
758 | skb_put(skb, fragheaderlen + transhdrlen); | 764 | skb_put(skb, fragheaderlen + transhdrlen); |
759 | 765 | ||
760 | /* initialize network header pointer */ | 766 | /* initialize network header pointer */ |
761 | skb_reset_network_header(skb); | 767 | skb_reset_network_header(skb); |
762 | 768 | ||
763 | /* initialize protocol header pointer */ | 769 | /* initialize protocol header pointer */ |
764 | skb->transport_header = skb->network_header + fragheaderlen; | 770 | skb->transport_header = skb->network_header + fragheaderlen; |
765 | 771 | ||
766 | skb->ip_summed = CHECKSUM_PARTIAL; | 772 | skb->ip_summed = CHECKSUM_PARTIAL; |
767 | skb->csum = 0; | 773 | skb->csum = 0; |
768 | 774 | ||
769 | /* specify the length of each IP datagram fragment */ | 775 | /* specify the length of each IP datagram fragment */ |
770 | skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; | 776 | skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; |
771 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 777 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
772 | __skb_queue_tail(queue, skb); | 778 | __skb_queue_tail(queue, skb); |
773 | } | 779 | } |
774 | 780 | ||
775 | return skb_append_datato_frags(sk, skb, getfrag, from, | 781 | return skb_append_datato_frags(sk, skb, getfrag, from, |
776 | (length - transhdrlen)); | 782 | (length - transhdrlen)); |
777 | } | 783 | } |
778 | 784 | ||
779 | static int __ip_append_data(struct sock *sk, | 785 | static int __ip_append_data(struct sock *sk, |
780 | struct flowi4 *fl4, | 786 | struct flowi4 *fl4, |
781 | struct sk_buff_head *queue, | 787 | struct sk_buff_head *queue, |
782 | struct inet_cork *cork, | 788 | struct inet_cork *cork, |
783 | int getfrag(void *from, char *to, int offset, | 789 | int getfrag(void *from, char *to, int offset, |
784 | int len, int odd, struct sk_buff *skb), | 790 | int len, int odd, struct sk_buff *skb), |
785 | void *from, int length, int transhdrlen, | 791 | void *from, int length, int transhdrlen, |
786 | unsigned int flags) | 792 | unsigned int flags) |
787 | { | 793 | { |
788 | struct inet_sock *inet = inet_sk(sk); | 794 | struct inet_sock *inet = inet_sk(sk); |
789 | struct sk_buff *skb; | 795 | struct sk_buff *skb; |
790 | 796 | ||
791 | struct ip_options *opt = cork->opt; | 797 | struct ip_options *opt = cork->opt; |
792 | int hh_len; | 798 | int hh_len; |
793 | int exthdrlen; | 799 | int exthdrlen; |
794 | int mtu; | 800 | int mtu; |
795 | int copy; | 801 | int copy; |
796 | int err; | 802 | int err; |
797 | int offset = 0; | 803 | int offset = 0; |
798 | unsigned int maxfraglen, fragheaderlen; | 804 | unsigned int maxfraglen, fragheaderlen; |
799 | int csummode = CHECKSUM_NONE; | 805 | int csummode = CHECKSUM_NONE; |
800 | struct rtable *rt = (struct rtable *)cork->dst; | 806 | struct rtable *rt = (struct rtable *)cork->dst; |
801 | 807 | ||
802 | skb = skb_peek_tail(queue); | 808 | skb = skb_peek_tail(queue); |
803 | 809 | ||
804 | exthdrlen = !skb ? rt->dst.header_len : 0; | 810 | exthdrlen = !skb ? rt->dst.header_len : 0; |
805 | mtu = cork->fragsize; | 811 | mtu = cork->fragsize; |
806 | 812 | ||
807 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 813 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
808 | 814 | ||
809 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 815 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
810 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 816 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
811 | 817 | ||
812 | if (cork->length + length > 0xFFFF - fragheaderlen) { | 818 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
813 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, | 819 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
814 | mtu-exthdrlen); | 820 | mtu-exthdrlen); |
815 | return -EMSGSIZE; | 821 | return -EMSGSIZE; |
816 | } | 822 | } |
817 | 823 | ||
818 | /* | 824 | /* |
819 | * transhdrlen > 0 means that this is the first fragment and we wish | 825 | * transhdrlen > 0 means that this is the first fragment and we wish |
820 | * it won't be fragmented in the future. | 826 | * it won't be fragmented in the future. |
821 | */ | 827 | */ |
822 | if (transhdrlen && | 828 | if (transhdrlen && |
823 | length + fragheaderlen <= mtu && | 829 | length + fragheaderlen <= mtu && |
824 | rt->dst.dev->features & NETIF_F_V4_CSUM && | 830 | rt->dst.dev->features & NETIF_F_V4_CSUM && |
825 | !exthdrlen) | 831 | !exthdrlen) |
826 | csummode = CHECKSUM_PARTIAL; | 832 | csummode = CHECKSUM_PARTIAL; |
827 | 833 | ||
828 | cork->length += length; | 834 | cork->length += length; |
829 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 835 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
830 | (sk->sk_protocol == IPPROTO_UDP) && | 836 | (sk->sk_protocol == IPPROTO_UDP) && |
831 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { | 837 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { |
832 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, | 838 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
833 | hh_len, fragheaderlen, transhdrlen, | 839 | hh_len, fragheaderlen, transhdrlen, |
834 | maxfraglen, flags); | 840 | maxfraglen, flags); |
835 | if (err) | 841 | if (err) |
836 | goto error; | 842 | goto error; |
837 | return 0; | 843 | return 0; |
838 | } | 844 | } |
839 | 845 | ||
840 | /* So, what's going on in the loop below? | 846 | /* So, what's going on in the loop below? |
841 | * | 847 | * |
842 | * We use calculated fragment length to generate chained skb, | 848 | * We use calculated fragment length to generate chained skb, |
843 | * each of segments is IP fragment ready for sending to network after | 849 | * each of segments is IP fragment ready for sending to network after |
844 | * adding appropriate IP header. | 850 | * adding appropriate IP header. |
845 | */ | 851 | */ |
846 | 852 | ||
847 | if (!skb) | 853 | if (!skb) |
848 | goto alloc_new_skb; | 854 | goto alloc_new_skb; |
849 | 855 | ||
850 | while (length > 0) { | 856 | while (length > 0) { |
851 | /* Check if the remaining data fits into current packet. */ | 857 | /* Check if the remaining data fits into current packet. */ |
852 | copy = mtu - skb->len; | 858 | copy = mtu - skb->len; |
853 | if (copy < length) | 859 | if (copy < length) |
854 | copy = maxfraglen - skb->len; | 860 | copy = maxfraglen - skb->len; |
855 | if (copy <= 0) { | 861 | if (copy <= 0) { |
856 | char *data; | 862 | char *data; |
857 | unsigned int datalen; | 863 | unsigned int datalen; |
858 | unsigned int fraglen; | 864 | unsigned int fraglen; |
859 | unsigned int fraggap; | 865 | unsigned int fraggap; |
860 | unsigned int alloclen; | 866 | unsigned int alloclen; |
861 | struct sk_buff *skb_prev; | 867 | struct sk_buff *skb_prev; |
862 | alloc_new_skb: | 868 | alloc_new_skb: |
863 | skb_prev = skb; | 869 | skb_prev = skb; |
864 | if (skb_prev) | 870 | if (skb_prev) |
865 | fraggap = skb_prev->len - maxfraglen; | 871 | fraggap = skb_prev->len - maxfraglen; |
866 | else | 872 | else |
867 | fraggap = 0; | 873 | fraggap = 0; |
868 | 874 | ||
869 | /* | 875 | /* |
870 | * If remaining data exceeds the mtu, | 876 | * If remaining data exceeds the mtu, |
871 | * we know we need more fragment(s). | 877 | * we know we need more fragment(s). |
872 | */ | 878 | */ |
873 | datalen = length + fraggap; | 879 | datalen = length + fraggap; |
874 | if (datalen > mtu - fragheaderlen) | 880 | if (datalen > mtu - fragheaderlen) |
875 | datalen = maxfraglen - fragheaderlen; | 881 | datalen = maxfraglen - fragheaderlen; |
876 | fraglen = datalen + fragheaderlen; | 882 | fraglen = datalen + fragheaderlen; |
877 | 883 | ||
878 | if ((flags & MSG_MORE) && | 884 | if ((flags & MSG_MORE) && |
879 | !(rt->dst.dev->features&NETIF_F_SG)) | 885 | !(rt->dst.dev->features&NETIF_F_SG)) |
880 | alloclen = mtu; | 886 | alloclen = mtu; |
881 | else | 887 | else |
882 | alloclen = fraglen; | 888 | alloclen = fraglen; |
883 | 889 | ||
884 | alloclen += exthdrlen; | 890 | alloclen += exthdrlen; |
885 | 891 | ||
886 | /* The last fragment gets additional space at tail. | 892 | /* The last fragment gets additional space at tail. |
887 | * Note, with MSG_MORE we overallocate on fragments, | 893 | * Note, with MSG_MORE we overallocate on fragments, |
888 | * because we have no idea what fragment will be | 894 | * because we have no idea what fragment will be |
889 | * the last. | 895 | * the last. |
890 | */ | 896 | */ |
891 | if (datalen == length + fraggap) | 897 | if (datalen == length + fraggap) |
892 | alloclen += rt->dst.trailer_len; | 898 | alloclen += rt->dst.trailer_len; |
893 | 899 | ||
894 | if (transhdrlen) { | 900 | if (transhdrlen) { |
895 | skb = sock_alloc_send_skb(sk, | 901 | skb = sock_alloc_send_skb(sk, |
896 | alloclen + hh_len + 15, | 902 | alloclen + hh_len + 15, |
897 | (flags & MSG_DONTWAIT), &err); | 903 | (flags & MSG_DONTWAIT), &err); |
898 | } else { | 904 | } else { |
899 | skb = NULL; | 905 | skb = NULL; |
900 | if (atomic_read(&sk->sk_wmem_alloc) <= | 906 | if (atomic_read(&sk->sk_wmem_alloc) <= |
901 | 2 * sk->sk_sndbuf) | 907 | 2 * sk->sk_sndbuf) |
902 | skb = sock_wmalloc(sk, | 908 | skb = sock_wmalloc(sk, |
903 | alloclen + hh_len + 15, 1, | 909 | alloclen + hh_len + 15, 1, |
904 | sk->sk_allocation); | 910 | sk->sk_allocation); |
905 | if (unlikely(skb == NULL)) | 911 | if (unlikely(skb == NULL)) |
906 | err = -ENOBUFS; | 912 | err = -ENOBUFS; |
907 | else | 913 | else |
908 | /* only the initial fragment is | 914 | /* only the initial fragment is |
909 | time stamped */ | 915 | time stamped */ |
910 | cork->tx_flags = 0; | 916 | cork->tx_flags = 0; |
911 | } | 917 | } |
912 | if (skb == NULL) | 918 | if (skb == NULL) |
913 | goto error; | 919 | goto error; |
914 | 920 | ||
915 | /* | 921 | /* |
916 | * Fill in the control structures | 922 | * Fill in the control structures |
917 | */ | 923 | */ |
918 | skb->ip_summed = csummode; | 924 | skb->ip_summed = csummode; |
919 | skb->csum = 0; | 925 | skb->csum = 0; |
920 | skb_reserve(skb, hh_len); | 926 | skb_reserve(skb, hh_len); |
921 | skb_shinfo(skb)->tx_flags = cork->tx_flags; | 927 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
922 | 928 | ||
923 | /* | 929 | /* |
924 | * Find where to start putting bytes. | 930 | * Find where to start putting bytes. |
925 | */ | 931 | */ |
926 | data = skb_put(skb, fraglen + exthdrlen); | 932 | data = skb_put(skb, fraglen + exthdrlen); |
927 | skb_set_network_header(skb, exthdrlen); | 933 | skb_set_network_header(skb, exthdrlen); |
928 | skb->transport_header = (skb->network_header + | 934 | skb->transport_header = (skb->network_header + |
929 | fragheaderlen); | 935 | fragheaderlen); |
930 | data += fragheaderlen + exthdrlen; | 936 | data += fragheaderlen + exthdrlen; |
931 | 937 | ||
932 | if (fraggap) { | 938 | if (fraggap) { |
933 | skb->csum = skb_copy_and_csum_bits( | 939 | skb->csum = skb_copy_and_csum_bits( |
934 | skb_prev, maxfraglen, | 940 | skb_prev, maxfraglen, |
935 | data + transhdrlen, fraggap, 0); | 941 | data + transhdrlen, fraggap, 0); |
936 | skb_prev->csum = csum_sub(skb_prev->csum, | 942 | skb_prev->csum = csum_sub(skb_prev->csum, |
937 | skb->csum); | 943 | skb->csum); |
938 | data += fraggap; | 944 | data += fraggap; |
939 | pskb_trim_unique(skb_prev, maxfraglen); | 945 | pskb_trim_unique(skb_prev, maxfraglen); |
940 | } | 946 | } |
941 | 947 | ||
942 | copy = datalen - transhdrlen - fraggap; | 948 | copy = datalen - transhdrlen - fraggap; |
943 | if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { | 949 | if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { |
944 | err = -EFAULT; | 950 | err = -EFAULT; |
945 | kfree_skb(skb); | 951 | kfree_skb(skb); |
946 | goto error; | 952 | goto error; |
947 | } | 953 | } |
948 | 954 | ||
949 | offset += copy; | 955 | offset += copy; |
950 | length -= datalen - fraggap; | 956 | length -= datalen - fraggap; |
951 | transhdrlen = 0; | 957 | transhdrlen = 0; |
952 | exthdrlen = 0; | 958 | exthdrlen = 0; |
953 | csummode = CHECKSUM_NONE; | 959 | csummode = CHECKSUM_NONE; |
954 | 960 | ||
955 | /* | 961 | /* |
956 | * Put the packet on the pending queue. | 962 | * Put the packet on the pending queue. |
957 | */ | 963 | */ |
958 | __skb_queue_tail(queue, skb); | 964 | __skb_queue_tail(queue, skb); |
959 | continue; | 965 | continue; |
960 | } | 966 | } |
961 | 967 | ||
962 | if (copy > length) | 968 | if (copy > length) |
963 | copy = length; | 969 | copy = length; |
964 | 970 | ||
965 | if (!(rt->dst.dev->features&NETIF_F_SG)) { | 971 | if (!(rt->dst.dev->features&NETIF_F_SG)) { |
966 | unsigned int off; | 972 | unsigned int off; |
967 | 973 | ||
968 | off = skb->len; | 974 | off = skb->len; |
969 | if (getfrag(from, skb_put(skb, copy), | 975 | if (getfrag(from, skb_put(skb, copy), |
970 | offset, copy, off, skb) < 0) { | 976 | offset, copy, off, skb) < 0) { |
971 | __skb_trim(skb, off); | 977 | __skb_trim(skb, off); |
972 | err = -EFAULT; | 978 | err = -EFAULT; |
973 | goto error; | 979 | goto error; |
974 | } | 980 | } |
975 | } else { | 981 | } else { |
976 | int i = skb_shinfo(skb)->nr_frags; | 982 | int i = skb_shinfo(skb)->nr_frags; |
977 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 983 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
978 | struct page *page = cork->page; | 984 | struct page *page = cork->page; |
979 | int off = cork->off; | 985 | int off = cork->off; |
980 | unsigned int left; | 986 | unsigned int left; |
981 | 987 | ||
982 | if (page && (left = PAGE_SIZE - off) > 0) { | 988 | if (page && (left = PAGE_SIZE - off) > 0) { |
983 | if (copy >= left) | 989 | if (copy >= left) |
984 | copy = left; | 990 | copy = left; |
985 | if (page != frag->page) { | 991 | if (page != frag->page) { |
986 | if (i == MAX_SKB_FRAGS) { | 992 | if (i == MAX_SKB_FRAGS) { |
987 | err = -EMSGSIZE; | 993 | err = -EMSGSIZE; |
988 | goto error; | 994 | goto error; |
989 | } | 995 | } |
990 | get_page(page); | 996 | get_page(page); |
991 | skb_fill_page_desc(skb, i, page, off, 0); | 997 | skb_fill_page_desc(skb, i, page, off, 0); |
992 | frag = &skb_shinfo(skb)->frags[i]; | 998 | frag = &skb_shinfo(skb)->frags[i]; |
993 | } | 999 | } |
994 | } else if (i < MAX_SKB_FRAGS) { | 1000 | } else if (i < MAX_SKB_FRAGS) { |
995 | if (copy > PAGE_SIZE) | 1001 | if (copy > PAGE_SIZE) |
996 | copy = PAGE_SIZE; | 1002 | copy = PAGE_SIZE; |
997 | page = alloc_pages(sk->sk_allocation, 0); | 1003 | page = alloc_pages(sk->sk_allocation, 0); |
998 | if (page == NULL) { | 1004 | if (page == NULL) { |
999 | err = -ENOMEM; | 1005 | err = -ENOMEM; |
1000 | goto error; | 1006 | goto error; |
1001 | } | 1007 | } |
1002 | cork->page = page; | 1008 | cork->page = page; |
1003 | cork->off = 0; | 1009 | cork->off = 0; |
1004 | 1010 | ||
1005 | skb_fill_page_desc(skb, i, page, 0, 0); | 1011 | skb_fill_page_desc(skb, i, page, 0, 0); |
1006 | frag = &skb_shinfo(skb)->frags[i]; | 1012 | frag = &skb_shinfo(skb)->frags[i]; |
1007 | } else { | 1013 | } else { |
1008 | err = -EMSGSIZE; | 1014 | err = -EMSGSIZE; |
1009 | goto error; | 1015 | goto error; |
1010 | } | 1016 | } |
1011 | if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { | 1017 | if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { |
1012 | err = -EFAULT; | 1018 | err = -EFAULT; |
1013 | goto error; | 1019 | goto error; |
1014 | } | 1020 | } |
1015 | cork->off += copy; | 1021 | cork->off += copy; |
1016 | frag->size += copy; | 1022 | frag->size += copy; |
1017 | skb->len += copy; | 1023 | skb->len += copy; |
1018 | skb->data_len += copy; | 1024 | skb->data_len += copy; |
1019 | skb->truesize += copy; | 1025 | skb->truesize += copy; |
1020 | atomic_add(copy, &sk->sk_wmem_alloc); | 1026 | atomic_add(copy, &sk->sk_wmem_alloc); |
1021 | } | 1027 | } |
1022 | offset += copy; | 1028 | offset += copy; |
1023 | length -= copy; | 1029 | length -= copy; |
1024 | } | 1030 | } |
1025 | 1031 | ||
1026 | return 0; | 1032 | return 0; |
1027 | 1033 | ||
1028 | error: | 1034 | error: |
1029 | cork->length -= length; | 1035 | cork->length -= length; |
1030 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1036 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1031 | return err; | 1037 | return err; |
1032 | } | 1038 | } |
1033 | 1039 | ||
1034 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | 1040 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, |
1035 | struct ipcm_cookie *ipc, struct rtable **rtp) | 1041 | struct ipcm_cookie *ipc, struct rtable **rtp) |
1036 | { | 1042 | { |
1037 | struct inet_sock *inet = inet_sk(sk); | 1043 | struct inet_sock *inet = inet_sk(sk); |
1038 | struct ip_options_rcu *opt; | 1044 | struct ip_options_rcu *opt; |
1039 | struct rtable *rt; | 1045 | struct rtable *rt; |
1040 | 1046 | ||
1041 | /* | 1047 | /* |
1042 | * setup for corking. | 1048 | * setup for corking. |
1043 | */ | 1049 | */ |
1044 | opt = ipc->opt; | 1050 | opt = ipc->opt; |
1045 | if (opt) { | 1051 | if (opt) { |
1046 | if (cork->opt == NULL) { | 1052 | if (cork->opt == NULL) { |
1047 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, | 1053 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, |
1048 | sk->sk_allocation); | 1054 | sk->sk_allocation); |
1049 | if (unlikely(cork->opt == NULL)) | 1055 | if (unlikely(cork->opt == NULL)) |
1050 | return -ENOBUFS; | 1056 | return -ENOBUFS; |
1051 | } | 1057 | } |
1052 | memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); | 1058 | memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); |
1053 | cork->flags |= IPCORK_OPT; | 1059 | cork->flags |= IPCORK_OPT; |
1054 | cork->addr = ipc->addr; | 1060 | cork->addr = ipc->addr; |
1055 | } | 1061 | } |
1056 | rt = *rtp; | 1062 | rt = *rtp; |
1057 | if (unlikely(!rt)) | 1063 | if (unlikely(!rt)) |
1058 | return -EFAULT; | 1064 | return -EFAULT; |
1059 | /* | 1065 | /* |
1060 | * We steal reference to this route, caller should not release it | 1066 | * We steal reference to this route, caller should not release it |
1061 | */ | 1067 | */ |
1062 | *rtp = NULL; | 1068 | *rtp = NULL; |
1063 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 1069 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
1064 | rt->dst.dev->mtu : dst_mtu(&rt->dst); | 1070 | rt->dst.dev->mtu : dst_mtu(&rt->dst); |
1065 | cork->dst = &rt->dst; | 1071 | cork->dst = &rt->dst; |
1066 | cork->length = 0; | 1072 | cork->length = 0; |
1067 | cork->tx_flags = ipc->tx_flags; | 1073 | cork->tx_flags = ipc->tx_flags; |
1068 | cork->page = NULL; | 1074 | cork->page = NULL; |
1069 | cork->off = 0; | 1075 | cork->off = 0; |
1070 | 1076 | ||
1071 | return 0; | 1077 | return 0; |
1072 | } | 1078 | } |
1073 | 1079 | ||
1074 | /* | 1080 | /* |
1075 | * ip_append_data() and ip_append_page() can make one large IP datagram | 1081 | * ip_append_data() and ip_append_page() can make one large IP datagram |
1076 | * from many pieces of data. Each pieces will be holded on the socket | 1082 | * from many pieces of data. Each pieces will be holded on the socket |
1077 | * until ip_push_pending_frames() is called. Each piece can be a page | 1083 | * until ip_push_pending_frames() is called. Each piece can be a page |
1078 | * or non-page data. | 1084 | * or non-page data. |
1079 | * | 1085 | * |
1080 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | 1086 | * Not only UDP, other transport protocols - e.g. raw sockets - can use |
1081 | * this interface potentially. | 1087 | * this interface potentially. |
1082 | * | 1088 | * |
1083 | * LATER: length must be adjusted by pad at tail, when it is required. | 1089 | * LATER: length must be adjusted by pad at tail, when it is required. |
1084 | */ | 1090 | */ |
1085 | int ip_append_data(struct sock *sk, struct flowi4 *fl4, | 1091 | int ip_append_data(struct sock *sk, struct flowi4 *fl4, |
1086 | int getfrag(void *from, char *to, int offset, int len, | 1092 | int getfrag(void *from, char *to, int offset, int len, |
1087 | int odd, struct sk_buff *skb), | 1093 | int odd, struct sk_buff *skb), |
1088 | void *from, int length, int transhdrlen, | 1094 | void *from, int length, int transhdrlen, |
1089 | struct ipcm_cookie *ipc, struct rtable **rtp, | 1095 | struct ipcm_cookie *ipc, struct rtable **rtp, |
1090 | unsigned int flags) | 1096 | unsigned int flags) |
1091 | { | 1097 | { |
1092 | struct inet_sock *inet = inet_sk(sk); | 1098 | struct inet_sock *inet = inet_sk(sk); |
1093 | int err; | 1099 | int err; |
1094 | 1100 | ||
1095 | if (flags&MSG_PROBE) | 1101 | if (flags&MSG_PROBE) |
1096 | return 0; | 1102 | return 0; |
1097 | 1103 | ||
1098 | if (skb_queue_empty(&sk->sk_write_queue)) { | 1104 | if (skb_queue_empty(&sk->sk_write_queue)) { |
1099 | err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); | 1105 | err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); |
1100 | if (err) | 1106 | if (err) |
1101 | return err; | 1107 | return err; |
1102 | } else { | 1108 | } else { |
1103 | transhdrlen = 0; | 1109 | transhdrlen = 0; |
1104 | } | 1110 | } |
1105 | 1111 | ||
1106 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, | 1112 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, |
1107 | from, length, transhdrlen, flags); | 1113 | from, length, transhdrlen, flags); |
1108 | } | 1114 | } |
1109 | 1115 | ||
1110 | ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, | 1116 | ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, |
1111 | int offset, size_t size, int flags) | 1117 | int offset, size_t size, int flags) |
1112 | { | 1118 | { |
1113 | struct inet_sock *inet = inet_sk(sk); | 1119 | struct inet_sock *inet = inet_sk(sk); |
1114 | struct sk_buff *skb; | 1120 | struct sk_buff *skb; |
1115 | struct rtable *rt; | 1121 | struct rtable *rt; |
1116 | struct ip_options *opt = NULL; | 1122 | struct ip_options *opt = NULL; |
1117 | struct inet_cork *cork; | 1123 | struct inet_cork *cork; |
1118 | int hh_len; | 1124 | int hh_len; |
1119 | int mtu; | 1125 | int mtu; |
1120 | int len; | 1126 | int len; |
1121 | int err; | 1127 | int err; |
1122 | unsigned int maxfraglen, fragheaderlen, fraggap; | 1128 | unsigned int maxfraglen, fragheaderlen, fraggap; |
1123 | 1129 | ||
1124 | if (inet->hdrincl) | 1130 | if (inet->hdrincl) |
1125 | return -EPERM; | 1131 | return -EPERM; |
1126 | 1132 | ||
1127 | if (flags&MSG_PROBE) | 1133 | if (flags&MSG_PROBE) |
1128 | return 0; | 1134 | return 0; |
1129 | 1135 | ||
1130 | if (skb_queue_empty(&sk->sk_write_queue)) | 1136 | if (skb_queue_empty(&sk->sk_write_queue)) |
1131 | return -EINVAL; | 1137 | return -EINVAL; |
1132 | 1138 | ||
1133 | cork = &inet->cork.base; | 1139 | cork = &inet->cork.base; |
1134 | rt = (struct rtable *)cork->dst; | 1140 | rt = (struct rtable *)cork->dst; |
1135 | if (cork->flags & IPCORK_OPT) | 1141 | if (cork->flags & IPCORK_OPT) |
1136 | opt = cork->opt; | 1142 | opt = cork->opt; |
1137 | 1143 | ||
1138 | if (!(rt->dst.dev->features&NETIF_F_SG)) | 1144 | if (!(rt->dst.dev->features&NETIF_F_SG)) |
1139 | return -EOPNOTSUPP; | 1145 | return -EOPNOTSUPP; |
1140 | 1146 | ||
1141 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 1147 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
1142 | mtu = cork->fragsize; | 1148 | mtu = cork->fragsize; |
1143 | 1149 | ||
1144 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1150 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
1145 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 1151 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
1146 | 1152 | ||
1147 | if (cork->length + size > 0xFFFF - fragheaderlen) { | 1153 | if (cork->length + size > 0xFFFF - fragheaderlen) { |
1148 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); | 1154 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); |
1149 | return -EMSGSIZE; | 1155 | return -EMSGSIZE; |
1150 | } | 1156 | } |
1151 | 1157 | ||
1152 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1158 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) |
1153 | return -EINVAL; | 1159 | return -EINVAL; |
1154 | 1160 | ||
1155 | cork->length += size; | 1161 | cork->length += size; |
1156 | if ((size + skb->len > mtu) && | 1162 | if ((size + skb->len > mtu) && |
1157 | (sk->sk_protocol == IPPROTO_UDP) && | 1163 | (sk->sk_protocol == IPPROTO_UDP) && |
1158 | (rt->dst.dev->features & NETIF_F_UFO)) { | 1164 | (rt->dst.dev->features & NETIF_F_UFO)) { |
1159 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 1165 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
1160 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 1166 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
1161 | } | 1167 | } |
1162 | 1168 | ||
1163 | 1169 | ||
1164 | while (size > 0) { | 1170 | while (size > 0) { |
1165 | int i; | 1171 | int i; |
1166 | 1172 | ||
1167 | if (skb_is_gso(skb)) | 1173 | if (skb_is_gso(skb)) |
1168 | len = size; | 1174 | len = size; |
1169 | else { | 1175 | else { |
1170 | 1176 | ||
1171 | /* Check if the remaining data fits into current packet. */ | 1177 | /* Check if the remaining data fits into current packet. */ |
1172 | len = mtu - skb->len; | 1178 | len = mtu - skb->len; |
1173 | if (len < size) | 1179 | if (len < size) |
1174 | len = maxfraglen - skb->len; | 1180 | len = maxfraglen - skb->len; |
1175 | } | 1181 | } |
1176 | if (len <= 0) { | 1182 | if (len <= 0) { |
1177 | struct sk_buff *skb_prev; | 1183 | struct sk_buff *skb_prev; |
1178 | int alloclen; | 1184 | int alloclen; |
1179 | 1185 | ||
1180 | skb_prev = skb; | 1186 | skb_prev = skb; |
1181 | fraggap = skb_prev->len - maxfraglen; | 1187 | fraggap = skb_prev->len - maxfraglen; |
1182 | 1188 | ||
1183 | alloclen = fragheaderlen + hh_len + fraggap + 15; | 1189 | alloclen = fragheaderlen + hh_len + fraggap + 15; |
1184 | skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); | 1190 | skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); |
1185 | if (unlikely(!skb)) { | 1191 | if (unlikely(!skb)) { |
1186 | err = -ENOBUFS; | 1192 | err = -ENOBUFS; |
1187 | goto error; | 1193 | goto error; |
1188 | } | 1194 | } |
1189 | 1195 | ||
1190 | /* | 1196 | /* |
1191 | * Fill in the control structures | 1197 | * Fill in the control structures |
1192 | */ | 1198 | */ |
1193 | skb->ip_summed = CHECKSUM_NONE; | 1199 | skb->ip_summed = CHECKSUM_NONE; |
1194 | skb->csum = 0; | 1200 | skb->csum = 0; |
1195 | skb_reserve(skb, hh_len); | 1201 | skb_reserve(skb, hh_len); |
1196 | 1202 | ||
1197 | /* | 1203 | /* |
1198 | * Find where to start putting bytes. | 1204 | * Find where to start putting bytes. |
1199 | */ | 1205 | */ |
1200 | skb_put(skb, fragheaderlen + fraggap); | 1206 | skb_put(skb, fragheaderlen + fraggap); |
1201 | skb_reset_network_header(skb); | 1207 | skb_reset_network_header(skb); |
1202 | skb->transport_header = (skb->network_header + | 1208 | skb->transport_header = (skb->network_header + |
1203 | fragheaderlen); | 1209 | fragheaderlen); |
1204 | if (fraggap) { | 1210 | if (fraggap) { |
1205 | skb->csum = skb_copy_and_csum_bits(skb_prev, | 1211 | skb->csum = skb_copy_and_csum_bits(skb_prev, |
1206 | maxfraglen, | 1212 | maxfraglen, |
1207 | skb_transport_header(skb), | 1213 | skb_transport_header(skb), |
1208 | fraggap, 0); | 1214 | fraggap, 0); |
1209 | skb_prev->csum = csum_sub(skb_prev->csum, | 1215 | skb_prev->csum = csum_sub(skb_prev->csum, |
1210 | skb->csum); | 1216 | skb->csum); |
1211 | pskb_trim_unique(skb_prev, maxfraglen); | 1217 | pskb_trim_unique(skb_prev, maxfraglen); |
1212 | } | 1218 | } |
1213 | 1219 | ||
1214 | /* | 1220 | /* |
1215 | * Put the packet on the pending queue. | 1221 | * Put the packet on the pending queue. |
1216 | */ | 1222 | */ |
1217 | __skb_queue_tail(&sk->sk_write_queue, skb); | 1223 | __skb_queue_tail(&sk->sk_write_queue, skb); |
1218 | continue; | 1224 | continue; |
1219 | } | 1225 | } |
1220 | 1226 | ||
1221 | i = skb_shinfo(skb)->nr_frags; | 1227 | i = skb_shinfo(skb)->nr_frags; |
1222 | if (len > size) | 1228 | if (len > size) |
1223 | len = size; | 1229 | len = size; |
1224 | if (skb_can_coalesce(skb, i, page, offset)) { | 1230 | if (skb_can_coalesce(skb, i, page, offset)) { |
1225 | skb_shinfo(skb)->frags[i-1].size += len; | 1231 | skb_shinfo(skb)->frags[i-1].size += len; |
1226 | } else if (i < MAX_SKB_FRAGS) { | 1232 | } else if (i < MAX_SKB_FRAGS) { |
1227 | get_page(page); | 1233 | get_page(page); |
1228 | skb_fill_page_desc(skb, i, page, offset, len); | 1234 | skb_fill_page_desc(skb, i, page, offset, len); |
1229 | } else { | 1235 | } else { |
1230 | err = -EMSGSIZE; | 1236 | err = -EMSGSIZE; |
1231 | goto error; | 1237 | goto error; |
1232 | } | 1238 | } |
1233 | 1239 | ||
1234 | if (skb->ip_summed == CHECKSUM_NONE) { | 1240 | if (skb->ip_summed == CHECKSUM_NONE) { |
1235 | __wsum csum; | 1241 | __wsum csum; |
1236 | csum = csum_page(page, offset, len); | 1242 | csum = csum_page(page, offset, len); |
1237 | skb->csum = csum_block_add(skb->csum, csum, skb->len); | 1243 | skb->csum = csum_block_add(skb->csum, csum, skb->len); |
1238 | } | 1244 | } |
1239 | 1245 | ||
1240 | skb->len += len; | 1246 | skb->len += len; |
1241 | skb->data_len += len; | 1247 | skb->data_len += len; |
1242 | skb->truesize += len; | 1248 | skb->truesize += len; |
1243 | atomic_add(len, &sk->sk_wmem_alloc); | 1249 | atomic_add(len, &sk->sk_wmem_alloc); |
1244 | offset += len; | 1250 | offset += len; |
1245 | size -= len; | 1251 | size -= len; |
1246 | } | 1252 | } |
1247 | return 0; | 1253 | return 0; |
1248 | 1254 | ||
1249 | error: | 1255 | error: |
1250 | cork->length -= size; | 1256 | cork->length -= size; |
1251 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1257 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1252 | return err; | 1258 | return err; |
1253 | } | 1259 | } |
1254 | 1260 | ||
1255 | static void ip_cork_release(struct inet_cork *cork) | 1261 | static void ip_cork_release(struct inet_cork *cork) |
1256 | { | 1262 | { |
1257 | cork->flags &= ~IPCORK_OPT; | 1263 | cork->flags &= ~IPCORK_OPT; |
1258 | kfree(cork->opt); | 1264 | kfree(cork->opt); |
1259 | cork->opt = NULL; | 1265 | cork->opt = NULL; |
1260 | dst_release(cork->dst); | 1266 | dst_release(cork->dst); |
1261 | cork->dst = NULL; | 1267 | cork->dst = NULL; |
1262 | } | 1268 | } |
1263 | 1269 | ||
1264 | /* | 1270 | /* |
1265 | * Combined all pending IP fragments on the socket as one IP datagram | 1271 | * Combined all pending IP fragments on the socket as one IP datagram |
1266 | * and push them out. | 1272 | * and push them out. |
1267 | */ | 1273 | */ |
1268 | struct sk_buff *__ip_make_skb(struct sock *sk, | 1274 | struct sk_buff *__ip_make_skb(struct sock *sk, |
1269 | struct flowi4 *fl4, | 1275 | struct flowi4 *fl4, |
1270 | struct sk_buff_head *queue, | 1276 | struct sk_buff_head *queue, |
1271 | struct inet_cork *cork) | 1277 | struct inet_cork *cork) |
1272 | { | 1278 | { |
1273 | struct sk_buff *skb, *tmp_skb; | 1279 | struct sk_buff *skb, *tmp_skb; |
1274 | struct sk_buff **tail_skb; | 1280 | struct sk_buff **tail_skb; |
1275 | struct inet_sock *inet = inet_sk(sk); | 1281 | struct inet_sock *inet = inet_sk(sk); |
1276 | struct net *net = sock_net(sk); | 1282 | struct net *net = sock_net(sk); |
1277 | struct ip_options *opt = NULL; | 1283 | struct ip_options *opt = NULL; |
1278 | struct rtable *rt = (struct rtable *)cork->dst; | 1284 | struct rtable *rt = (struct rtable *)cork->dst; |
1279 | struct iphdr *iph; | 1285 | struct iphdr *iph; |
1280 | __be16 df = 0; | 1286 | __be16 df = 0; |
1281 | __u8 ttl; | 1287 | __u8 ttl; |
1282 | 1288 | ||
1283 | if ((skb = __skb_dequeue(queue)) == NULL) | 1289 | if ((skb = __skb_dequeue(queue)) == NULL) |
1284 | goto out; | 1290 | goto out; |
1285 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1291 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1286 | 1292 | ||
1287 | /* move skb->data to ip header from ext header */ | 1293 | /* move skb->data to ip header from ext header */ |
1288 | if (skb->data < skb_network_header(skb)) | 1294 | if (skb->data < skb_network_header(skb)) |
1289 | __skb_pull(skb, skb_network_offset(skb)); | 1295 | __skb_pull(skb, skb_network_offset(skb)); |
1290 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { | 1296 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { |
1291 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1297 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1292 | *tail_skb = tmp_skb; | 1298 | *tail_skb = tmp_skb; |
1293 | tail_skb = &(tmp_skb->next); | 1299 | tail_skb = &(tmp_skb->next); |
1294 | skb->len += tmp_skb->len; | 1300 | skb->len += tmp_skb->len; |
1295 | skb->data_len += tmp_skb->len; | 1301 | skb->data_len += tmp_skb->len; |
1296 | skb->truesize += tmp_skb->truesize; | 1302 | skb->truesize += tmp_skb->truesize; |
1297 | tmp_skb->destructor = NULL; | 1303 | tmp_skb->destructor = NULL; |
1298 | tmp_skb->sk = NULL; | 1304 | tmp_skb->sk = NULL; |
1299 | } | 1305 | } |
1300 | 1306 | ||
1301 | /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow | 1307 | /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow |
1302 | * to fragment the frame generated here. No matter, what transforms | 1308 | * to fragment the frame generated here. No matter, what transforms |
1303 | * how transforms change size of the packet, it will come out. | 1309 | * how transforms change size of the packet, it will come out. |
1304 | */ | 1310 | */ |
1305 | if (inet->pmtudisc < IP_PMTUDISC_DO) | 1311 | if (inet->pmtudisc < IP_PMTUDISC_DO) |
1306 | skb->local_df = 1; | 1312 | skb->local_df = 1; |
1307 | 1313 | ||
1308 | /* DF bit is set when we want to see DF on outgoing frames. | 1314 | /* DF bit is set when we want to see DF on outgoing frames. |
1309 | * If local_df is set too, we still allow to fragment this frame | 1315 | * If local_df is set too, we still allow to fragment this frame |
1310 | * locally. */ | 1316 | * locally. */ |
1311 | if (inet->pmtudisc >= IP_PMTUDISC_DO || | 1317 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1312 | (skb->len <= dst_mtu(&rt->dst) && | 1318 | (skb->len <= dst_mtu(&rt->dst) && |
1313 | ip_dont_fragment(sk, &rt->dst))) | 1319 | ip_dont_fragment(sk, &rt->dst))) |
1314 | df = htons(IP_DF); | 1320 | df = htons(IP_DF); |
1315 | 1321 | ||
1316 | if (cork->flags & IPCORK_OPT) | 1322 | if (cork->flags & IPCORK_OPT) |
1317 | opt = cork->opt; | 1323 | opt = cork->opt; |
1318 | 1324 | ||
1319 | if (rt->rt_type == RTN_MULTICAST) | 1325 | if (rt->rt_type == RTN_MULTICAST) |
1320 | ttl = inet->mc_ttl; | 1326 | ttl = inet->mc_ttl; |
1321 | else | 1327 | else |
1322 | ttl = ip_select_ttl(inet, &rt->dst); | 1328 | ttl = ip_select_ttl(inet, &rt->dst); |
1323 | 1329 | ||
1324 | iph = (struct iphdr *)skb->data; | 1330 | iph = (struct iphdr *)skb->data; |
1325 | iph->version = 4; | 1331 | iph->version = 4; |
1326 | iph->ihl = 5; | 1332 | iph->ihl = 5; |
1327 | iph->tos = inet->tos; | 1333 | iph->tos = inet->tos; |
1328 | iph->frag_off = df; | 1334 | iph->frag_off = df; |
1329 | ip_select_ident(iph, &rt->dst, sk); | 1335 | ip_select_ident(iph, &rt->dst, sk); |
1330 | iph->ttl = ttl; | 1336 | iph->ttl = ttl; |
1331 | iph->protocol = sk->sk_protocol; | 1337 | iph->protocol = sk->sk_protocol; |
1332 | iph->saddr = fl4->saddr; | 1338 | iph->saddr = fl4->saddr; |
1333 | iph->daddr = fl4->daddr; | 1339 | iph->daddr = fl4->daddr; |
1334 | 1340 | ||
1335 | if (opt) { | 1341 | if (opt) { |
1336 | iph->ihl += opt->optlen>>2; | 1342 | iph->ihl += opt->optlen>>2; |
1337 | ip_options_build(skb, opt, cork->addr, rt, 0); | 1343 | ip_options_build(skb, opt, cork->addr, rt, 0); |
1338 | } | 1344 | } |
1339 | 1345 | ||
1340 | skb->priority = sk->sk_priority; | 1346 | skb->priority = sk->sk_priority; |
1341 | skb->mark = sk->sk_mark; | 1347 | skb->mark = sk->sk_mark; |
1342 | /* | 1348 | /* |
1343 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | 1349 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec |
1344 | * on dst refcount | 1350 | * on dst refcount |
1345 | */ | 1351 | */ |
1346 | cork->dst = NULL; | 1352 | cork->dst = NULL; |
1347 | skb_dst_set(skb, &rt->dst); | 1353 | skb_dst_set(skb, &rt->dst); |
1348 | 1354 | ||
1349 | if (iph->protocol == IPPROTO_ICMP) | 1355 | if (iph->protocol == IPPROTO_ICMP) |
1350 | icmp_out_count(net, ((struct icmphdr *) | 1356 | icmp_out_count(net, ((struct icmphdr *) |
1351 | skb_transport_header(skb))->type); | 1357 | skb_transport_header(skb))->type); |
1352 | 1358 | ||
1353 | ip_cork_release(cork); | 1359 | ip_cork_release(cork); |
1354 | out: | 1360 | out: |
1355 | return skb; | 1361 | return skb; |
1356 | } | 1362 | } |
1357 | 1363 | ||
1358 | int ip_send_skb(struct sk_buff *skb) | 1364 | int ip_send_skb(struct sk_buff *skb) |
1359 | { | 1365 | { |
1360 | struct net *net = sock_net(skb->sk); | 1366 | struct net *net = sock_net(skb->sk); |
1361 | int err; | 1367 | int err; |
1362 | 1368 | ||
1363 | err = ip_local_out(skb); | 1369 | err = ip_local_out(skb); |
1364 | if (err) { | 1370 | if (err) { |
1365 | if (err > 0) | 1371 | if (err > 0) |
1366 | err = net_xmit_errno(err); | 1372 | err = net_xmit_errno(err); |
1367 | if (err) | 1373 | if (err) |
1368 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 1374 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
1369 | } | 1375 | } |
1370 | 1376 | ||
1371 | return err; | 1377 | return err; |
1372 | } | 1378 | } |
1373 | 1379 | ||
1374 | int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) | 1380 | int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) |
1375 | { | 1381 | { |
1376 | struct sk_buff *skb; | 1382 | struct sk_buff *skb; |
1377 | 1383 | ||
1378 | skb = ip_finish_skb(sk, fl4); | 1384 | skb = ip_finish_skb(sk, fl4); |
1379 | if (!skb) | 1385 | if (!skb) |
1380 | return 0; | 1386 | return 0; |
1381 | 1387 | ||
1382 | /* Netfilter gets whole the not fragmented skb. */ | 1388 | /* Netfilter gets whole the not fragmented skb. */ |
1383 | return ip_send_skb(skb); | 1389 | return ip_send_skb(skb); |
1384 | } | 1390 | } |
1385 | 1391 | ||
1386 | /* | 1392 | /* |
1387 | * Throw away all pending data on the socket. | 1393 | * Throw away all pending data on the socket. |
1388 | */ | 1394 | */ |
1389 | static void __ip_flush_pending_frames(struct sock *sk, | 1395 | static void __ip_flush_pending_frames(struct sock *sk, |
1390 | struct sk_buff_head *queue, | 1396 | struct sk_buff_head *queue, |
1391 | struct inet_cork *cork) | 1397 | struct inet_cork *cork) |
1392 | { | 1398 | { |
1393 | struct sk_buff *skb; | 1399 | struct sk_buff *skb; |
1394 | 1400 | ||
1395 | while ((skb = __skb_dequeue_tail(queue)) != NULL) | 1401 | while ((skb = __skb_dequeue_tail(queue)) != NULL) |
1396 | kfree_skb(skb); | 1402 | kfree_skb(skb); |
1397 | 1403 | ||
1398 | ip_cork_release(cork); | 1404 | ip_cork_release(cork); |
1399 | } | 1405 | } |
1400 | 1406 | ||
1401 | void ip_flush_pending_frames(struct sock *sk) | 1407 | void ip_flush_pending_frames(struct sock *sk) |
1402 | { | 1408 | { |
1403 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); | 1409 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); |
1404 | } | 1410 | } |
1405 | 1411 | ||
1406 | struct sk_buff *ip_make_skb(struct sock *sk, | 1412 | struct sk_buff *ip_make_skb(struct sock *sk, |
1407 | struct flowi4 *fl4, | 1413 | struct flowi4 *fl4, |
1408 | int getfrag(void *from, char *to, int offset, | 1414 | int getfrag(void *from, char *to, int offset, |
1409 | int len, int odd, struct sk_buff *skb), | 1415 | int len, int odd, struct sk_buff *skb), |
1410 | void *from, int length, int transhdrlen, | 1416 | void *from, int length, int transhdrlen, |
1411 | struct ipcm_cookie *ipc, struct rtable **rtp, | 1417 | struct ipcm_cookie *ipc, struct rtable **rtp, |
1412 | unsigned int flags) | 1418 | unsigned int flags) |
1413 | { | 1419 | { |
1414 | struct inet_cork cork; | 1420 | struct inet_cork cork; |
1415 | struct sk_buff_head queue; | 1421 | struct sk_buff_head queue; |
1416 | int err; | 1422 | int err; |
1417 | 1423 | ||
1418 | if (flags & MSG_PROBE) | 1424 | if (flags & MSG_PROBE) |
1419 | return NULL; | 1425 | return NULL; |
1420 | 1426 | ||
1421 | __skb_queue_head_init(&queue); | 1427 | __skb_queue_head_init(&queue); |
1422 | 1428 | ||
1423 | cork.flags = 0; | 1429 | cork.flags = 0; |
1424 | cork.addr = 0; | 1430 | cork.addr = 0; |
1425 | cork.opt = NULL; | 1431 | cork.opt = NULL; |
1426 | err = ip_setup_cork(sk, &cork, ipc, rtp); | 1432 | err = ip_setup_cork(sk, &cork, ipc, rtp); |
1427 | if (err) | 1433 | if (err) |
1428 | return ERR_PTR(err); | 1434 | return ERR_PTR(err); |
1429 | 1435 | ||
1430 | err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, | 1436 | err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, |
1431 | from, length, transhdrlen, flags); | 1437 | from, length, transhdrlen, flags); |
1432 | if (err) { | 1438 | if (err) { |
1433 | __ip_flush_pending_frames(sk, &queue, &cork); | 1439 | __ip_flush_pending_frames(sk, &queue, &cork); |
1434 | return ERR_PTR(err); | 1440 | return ERR_PTR(err); |
1435 | } | 1441 | } |
1436 | 1442 | ||
1437 | return __ip_make_skb(sk, fl4, &queue, &cork); | 1443 | return __ip_make_skb(sk, fl4, &queue, &cork); |
1438 | } | 1444 | } |
1439 | 1445 | ||
1440 | /* | 1446 | /* |
1441 | * Fetch data from kernel space and fill in checksum if needed. | 1447 | * Fetch data from kernel space and fill in checksum if needed. |
1442 | */ | 1448 | */ |
1443 | static int ip_reply_glue_bits(void *dptr, char *to, int offset, | 1449 | static int ip_reply_glue_bits(void *dptr, char *to, int offset, |
1444 | int len, int odd, struct sk_buff *skb) | 1450 | int len, int odd, struct sk_buff *skb) |
1445 | { | 1451 | { |
1446 | __wsum csum; | 1452 | __wsum csum; |
1447 | 1453 | ||
1448 | csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0); | 1454 | csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0); |
1449 | skb->csum = csum_block_add(skb->csum, csum, odd); | 1455 | skb->csum = csum_block_add(skb->csum, csum, odd); |
1450 | return 0; | 1456 | return 0; |
1451 | } | 1457 | } |
1452 | 1458 | ||
1453 | /* | 1459 | /* |
1454 | * Generic function to send a packet as reply to another packet. | 1460 | * Generic function to send a packet as reply to another packet. |
1455 | * Used to send TCP resets so far. ICMP should use this function too. | 1461 | * Used to send TCP resets so far. ICMP should use this function too. |
1456 | * | 1462 | * |
1457 | * Should run single threaded per socket because it uses the sock | 1463 | * Should run single threaded per socket because it uses the sock |
1458 | * structure to pass arguments. | 1464 | * structure to pass arguments. |
1459 | */ | 1465 | */ |
1460 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, | 1466 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
1461 | struct ip_reply_arg *arg, unsigned int len) | 1467 | struct ip_reply_arg *arg, unsigned int len) |
1462 | { | 1468 | { |
1463 | struct inet_sock *inet = inet_sk(sk); | 1469 | struct inet_sock *inet = inet_sk(sk); |
1464 | struct ip_options_data replyopts; | 1470 | struct ip_options_data replyopts; |
1465 | struct ipcm_cookie ipc; | 1471 | struct ipcm_cookie ipc; |
1466 | struct flowi4 fl4; | 1472 | struct flowi4 fl4; |
1467 | struct rtable *rt = skb_rtable(skb); | 1473 | struct rtable *rt = skb_rtable(skb); |
1468 | 1474 | ||
1469 | if (ip_options_echo(&replyopts.opt.opt, skb)) | 1475 | if (ip_options_echo(&replyopts.opt.opt, skb)) |
1470 | return; | 1476 | return; |
1471 | 1477 | ||
1472 | ipc.addr = daddr; | 1478 | ipc.addr = daddr; |
1473 | ipc.opt = NULL; | 1479 | ipc.opt = NULL; |
1474 | ipc.tx_flags = 0; | 1480 | ipc.tx_flags = 0; |
1475 | 1481 | ||
1476 | if (replyopts.opt.opt.optlen) { | 1482 | if (replyopts.opt.opt.optlen) { |
1477 | ipc.opt = &replyopts.opt; | 1483 | ipc.opt = &replyopts.opt; |
1478 | 1484 | ||
1479 | if (replyopts.opt.opt.srr) | 1485 | if (replyopts.opt.opt.srr) |
1480 | daddr = replyopts.opt.opt.faddr; | 1486 | daddr = replyopts.opt.opt.faddr; |
1481 | } | 1487 | } |
1482 | 1488 | ||
1483 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, | 1489 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
1484 | RT_TOS(ip_hdr(skb)->tos), | 1490 | RT_TOS(ip_hdr(skb)->tos), |
1485 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | 1491 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1486 | ip_reply_arg_flowi_flags(arg), | 1492 | ip_reply_arg_flowi_flags(arg), |
1487 | daddr, rt->rt_spec_dst, | 1493 | daddr, rt->rt_spec_dst, |
1488 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); | 1494 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
1489 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 1495 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
1490 | rt = ip_route_output_key(sock_net(sk), &fl4); | 1496 | rt = ip_route_output_key(sock_net(sk), &fl4); |
1491 | if (IS_ERR(rt)) | 1497 | if (IS_ERR(rt)) |
1492 | return; | 1498 | return; |
1493 | 1499 | ||
1494 | /* And let IP do all the hard work. | 1500 | /* And let IP do all the hard work. |
1495 | 1501 | ||
1496 | This chunk is not reenterable, hence spinlock. | 1502 | This chunk is not reenterable, hence spinlock. |
1497 | Note that it uses the fact, that this function is called | 1503 | Note that it uses the fact, that this function is called |
1498 | with locally disabled BH and that sk cannot be already spinlocked. | 1504 | with locally disabled BH and that sk cannot be already spinlocked. |
1499 | */ | 1505 | */ |
1500 | bh_lock_sock(sk); | 1506 | bh_lock_sock(sk); |
1501 | inet->tos = ip_hdr(skb)->tos; | 1507 | inet->tos = ip_hdr(skb)->tos; |
1502 | sk->sk_priority = skb->priority; | 1508 | sk->sk_priority = skb->priority; |
1503 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1509 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1504 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1510 | sk->sk_bound_dev_if = arg->bound_dev_if; |
1505 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1511 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1506 | &ipc, &rt, MSG_DONTWAIT); | 1512 | &ipc, &rt, MSG_DONTWAIT); |
1507 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1513 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1508 | if (arg->csumoffset >= 0) | 1514 | if (arg->csumoffset >= 0) |
1509 | *((__sum16 *)skb_transport_header(skb) + | 1515 | *((__sum16 *)skb_transport_header(skb) + |
1510 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | 1516 | arg->csumoffset) = csum_fold(csum_add(skb->csum, |
1511 | arg->csum)); | 1517 | arg->csum)); |
1512 | skb->ip_summed = CHECKSUM_NONE; | 1518 | skb->ip_summed = CHECKSUM_NONE; |
1513 | ip_push_pending_frames(sk, &fl4); | 1519 | ip_push_pending_frames(sk, &fl4); |
1514 | } | 1520 | } |
1515 | 1521 | ||
1516 | bh_unlock_sock(sk); | 1522 | bh_unlock_sock(sk); |
1517 | 1523 | ||
1518 | ip_rt_put(rt); | 1524 | ip_rt_put(rt); |
1519 | } | 1525 | } |
1520 | 1526 | ||
1521 | void __init ip_init(void) | 1527 | void __init ip_init(void) |
1522 | { | 1528 | { |
1523 | ip_rt_init(); | 1529 | ip_rt_init(); |
1524 | inet_initpeers(); | 1530 | inet_initpeers(); |
1525 | 1531 | ||
1526 | #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) | 1532 | #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) |
1527 | igmp_mc_proc_init(); | 1533 | igmp_mc_proc_init(); |
1528 | #endif | 1534 | #endif |
1529 | } | 1535 | } |
1530 | 1536 |
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * ROUTE - implementation of the IP router. | 6 | * ROUTE - implementation of the IP router. |
7 | * | 7 | * |
8 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
10 | * Alan Cox, <gw4pts@gw4pts.ampr.org> | 10 | * Alan Cox, <gw4pts@gw4pts.ampr.org> |
11 | * Linus Torvalds, <Linus.Torvalds@helsinki.fi> | 11 | * Linus Torvalds, <Linus.Torvalds@helsinki.fi> |
12 | * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 12 | * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
13 | * | 13 | * |
14 | * Fixes: | 14 | * Fixes: |
15 | * Alan Cox : Verify area fixes. | 15 | * Alan Cox : Verify area fixes. |
16 | * Alan Cox : cli() protects routing changes | 16 | * Alan Cox : cli() protects routing changes |
17 | * Rui Oliveira : ICMP routing table updates | 17 | * Rui Oliveira : ICMP routing table updates |
18 | * (rco@di.uminho.pt) Routing table insertion and update | 18 | * (rco@di.uminho.pt) Routing table insertion and update |
19 | * Linus Torvalds : Rewrote bits to be sensible | 19 | * Linus Torvalds : Rewrote bits to be sensible |
20 | * Alan Cox : Added BSD route gw semantics | 20 | * Alan Cox : Added BSD route gw semantics |
21 | * Alan Cox : Super /proc >4K | 21 | * Alan Cox : Super /proc >4K |
22 | * Alan Cox : MTU in route table | 22 | * Alan Cox : MTU in route table |
23 | * Alan Cox : MSS actually. Also added the window | 23 | * Alan Cox : MSS actually. Also added the window |
24 | * clamper. | 24 | * clamper. |
25 | * Sam Lantinga : Fixed route matching in rt_del() | 25 | * Sam Lantinga : Fixed route matching in rt_del() |
26 | * Alan Cox : Routing cache support. | 26 | * Alan Cox : Routing cache support. |
27 | * Alan Cox : Removed compatibility cruft. | 27 | * Alan Cox : Removed compatibility cruft. |
28 | * Alan Cox : RTF_REJECT support. | 28 | * Alan Cox : RTF_REJECT support. |
29 | * Alan Cox : TCP irtt support. | 29 | * Alan Cox : TCP irtt support. |
30 | * Jonathan Naylor : Added Metric support. | 30 | * Jonathan Naylor : Added Metric support. |
31 | * Miquel van Smoorenburg : BSD API fixes. | 31 | * Miquel van Smoorenburg : BSD API fixes. |
32 | * Miquel van Smoorenburg : Metrics. | 32 | * Miquel van Smoorenburg : Metrics. |
33 | * Alan Cox : Use __u32 properly | 33 | * Alan Cox : Use __u32 properly |
34 | * Alan Cox : Aligned routing errors more closely with BSD | 34 | * Alan Cox : Aligned routing errors more closely with BSD |
35 | * our system is still very different. | 35 | * our system is still very different. |
36 | * Alan Cox : Faster /proc handling | 36 | * Alan Cox : Faster /proc handling |
37 | * Alexey Kuznetsov : Massive rework to support tree based routing, | 37 | * Alexey Kuznetsov : Massive rework to support tree based routing, |
38 | * routing caches and better behaviour. | 38 | * routing caches and better behaviour. |
39 | * | 39 | * |
40 | * Olaf Erb : irtt wasn't being copied right. | 40 | * Olaf Erb : irtt wasn't being copied right. |
41 | * Bjorn Ekwall : Kerneld route support. | 41 | * Bjorn Ekwall : Kerneld route support. |
42 | * Alan Cox : Multicast fixed (I hope) | 42 | * Alan Cox : Multicast fixed (I hope) |
43 | * Pavel Krauz : Limited broadcast fixed | 43 | * Pavel Krauz : Limited broadcast fixed |
44 | * Mike McLagan : Routing by source | 44 | * Mike McLagan : Routing by source |
45 | * Alexey Kuznetsov : End of old history. Split to fib.c and | 45 | * Alexey Kuznetsov : End of old history. Split to fib.c and |
46 | * route.c and rewritten from scratch. | 46 | * route.c and rewritten from scratch. |
47 | * Andi Kleen : Load-limit warning messages. | 47 | * Andi Kleen : Load-limit warning messages. |
48 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. | 48 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. |
49 | * Vitaly E. Lavrov : Race condition in ip_route_input_slow. | 49 | * Vitaly E. Lavrov : Race condition in ip_route_input_slow. |
50 | * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. | 50 | * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. |
51 | * Vladimir V. Ivanov : IP rule info (flowid) is really useful. | 51 | * Vladimir V. Ivanov : IP rule info (flowid) is really useful. |
52 | * Marc Boucher : routing by fwmark | 52 | * Marc Boucher : routing by fwmark |
53 | * Robert Olsson : Added rt_cache statistics | 53 | * Robert Olsson : Added rt_cache statistics |
54 | * Arnaldo C. Melo : Convert proc stuff to seq_file | 54 | * Arnaldo C. Melo : Convert proc stuff to seq_file |
55 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. | 55 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. |
56 | * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect | 56 | * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect |
57 | * Ilia Sotnikov : Removed TOS from hash calculations | 57 | * Ilia Sotnikov : Removed TOS from hash calculations |
58 | * | 58 | * |
59 | * This program is free software; you can redistribute it and/or | 59 | * This program is free software; you can redistribute it and/or |
60 | * modify it under the terms of the GNU General Public License | 60 | * modify it under the terms of the GNU General Public License |
61 | * as published by the Free Software Foundation; either version | 61 | * as published by the Free Software Foundation; either version |
62 | * 2 of the License, or (at your option) any later version. | 62 | * 2 of the License, or (at your option) any later version. |
63 | */ | 63 | */ |
64 | 64 | ||
65 | #include <linux/module.h> | 65 | #include <linux/module.h> |
66 | #include <asm/uaccess.h> | 66 | #include <asm/uaccess.h> |
67 | #include <asm/system.h> | 67 | #include <asm/system.h> |
68 | #include <linux/bitops.h> | 68 | #include <linux/bitops.h> |
69 | #include <linux/types.h> | 69 | #include <linux/types.h> |
70 | #include <linux/kernel.h> | 70 | #include <linux/kernel.h> |
71 | #include <linux/mm.h> | 71 | #include <linux/mm.h> |
72 | #include <linux/bootmem.h> | 72 | #include <linux/bootmem.h> |
73 | #include <linux/string.h> | 73 | #include <linux/string.h> |
74 | #include <linux/socket.h> | 74 | #include <linux/socket.h> |
75 | #include <linux/sockios.h> | 75 | #include <linux/sockios.h> |
76 | #include <linux/errno.h> | 76 | #include <linux/errno.h> |
77 | #include <linux/in.h> | 77 | #include <linux/in.h> |
78 | #include <linux/inet.h> | 78 | #include <linux/inet.h> |
79 | #include <linux/netdevice.h> | 79 | #include <linux/netdevice.h> |
80 | #include <linux/proc_fs.h> | 80 | #include <linux/proc_fs.h> |
81 | #include <linux/init.h> | 81 | #include <linux/init.h> |
82 | #include <linux/workqueue.h> | 82 | #include <linux/workqueue.h> |
83 | #include <linux/skbuff.h> | 83 | #include <linux/skbuff.h> |
84 | #include <linux/inetdevice.h> | 84 | #include <linux/inetdevice.h> |
85 | #include <linux/igmp.h> | 85 | #include <linux/igmp.h> |
86 | #include <linux/pkt_sched.h> | 86 | #include <linux/pkt_sched.h> |
87 | #include <linux/mroute.h> | 87 | #include <linux/mroute.h> |
88 | #include <linux/netfilter_ipv4.h> | 88 | #include <linux/netfilter_ipv4.h> |
89 | #include <linux/random.h> | 89 | #include <linux/random.h> |
90 | #include <linux/jhash.h> | 90 | #include <linux/jhash.h> |
91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
93 | #include <linux/slab.h> | 93 | #include <linux/slab.h> |
94 | #include <net/dst.h> | 94 | #include <net/dst.h> |
95 | #include <net/net_namespace.h> | 95 | #include <net/net_namespace.h> |
96 | #include <net/protocol.h> | 96 | #include <net/protocol.h> |
97 | #include <net/ip.h> | 97 | #include <net/ip.h> |
98 | #include <net/route.h> | 98 | #include <net/route.h> |
99 | #include <net/inetpeer.h> | 99 | #include <net/inetpeer.h> |
100 | #include <net/sock.h> | 100 | #include <net/sock.h> |
101 | #include <net/ip_fib.h> | 101 | #include <net/ip_fib.h> |
102 | #include <net/arp.h> | 102 | #include <net/arp.h> |
103 | #include <net/tcp.h> | 103 | #include <net/tcp.h> |
104 | #include <net/icmp.h> | 104 | #include <net/icmp.h> |
105 | #include <net/xfrm.h> | 105 | #include <net/xfrm.h> |
106 | #include <net/netevent.h> | 106 | #include <net/netevent.h> |
107 | #include <net/rtnetlink.h> | 107 | #include <net/rtnetlink.h> |
108 | #ifdef CONFIG_SYSCTL | 108 | #ifdef CONFIG_SYSCTL |
109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
110 | #endif | 110 | #endif |
111 | #include <net/atmclip.h> | 111 | #include <net/atmclip.h> |
112 | 112 | ||
113 | #define RT_FL_TOS(oldflp4) \ | 113 | #define RT_FL_TOS(oldflp4) \ |
114 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 114 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
115 | 115 | ||
116 | #define IP_MAX_MTU 0xFFF0 | 116 | #define IP_MAX_MTU 0xFFF0 |
117 | 117 | ||
118 | #define RT_GC_TIMEOUT (300*HZ) | 118 | #define RT_GC_TIMEOUT (300*HZ) |
119 | 119 | ||
120 | static int ip_rt_max_size; | 120 | static int ip_rt_max_size; |
121 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; | 121 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; |
122 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; | 122 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; |
123 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; | 123 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
124 | static int ip_rt_redirect_number __read_mostly = 9; | 124 | static int ip_rt_redirect_number __read_mostly = 9; |
125 | static int ip_rt_redirect_load __read_mostly = HZ / 50; | 125 | static int ip_rt_redirect_load __read_mostly = HZ / 50; |
126 | static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); | 126 | static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); |
127 | static int ip_rt_error_cost __read_mostly = HZ; | 127 | static int ip_rt_error_cost __read_mostly = HZ; |
128 | static int ip_rt_error_burst __read_mostly = 5 * HZ; | 128 | static int ip_rt_error_burst __read_mostly = 5 * HZ; |
129 | static int ip_rt_gc_elasticity __read_mostly = 8; | 129 | static int ip_rt_gc_elasticity __read_mostly = 8; |
130 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | 130 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; |
131 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 131 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
132 | static int ip_rt_min_advmss __read_mostly = 256; | 132 | static int ip_rt_min_advmss __read_mostly = 256; |
133 | static int rt_chain_length_max __read_mostly = 20; | 133 | static int rt_chain_length_max __read_mostly = 20; |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Interface to generic destination cache. | 136 | * Interface to generic destination cache. |
137 | */ | 137 | */ |
138 | 138 | ||
139 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 139 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
140 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); | 140 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); |
141 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst); | 141 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst); |
142 | static void ipv4_dst_destroy(struct dst_entry *dst); | 142 | static void ipv4_dst_destroy(struct dst_entry *dst); |
143 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | 143 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
144 | static void ipv4_link_failure(struct sk_buff *skb); | 144 | static void ipv4_link_failure(struct sk_buff *skb); |
145 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 145 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
146 | static int rt_garbage_collect(struct dst_ops *ops); | 146 | static int rt_garbage_collect(struct dst_ops *ops); |
147 | 147 | ||
148 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 148 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
149 | int how) | 149 | int how) |
150 | { | 150 | { |
151 | } | 151 | } |
152 | 152 | ||
153 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | 153 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) |
154 | { | 154 | { |
155 | struct rtable *rt = (struct rtable *) dst; | 155 | struct rtable *rt = (struct rtable *) dst; |
156 | struct inet_peer *peer; | 156 | struct inet_peer *peer; |
157 | u32 *p = NULL; | 157 | u32 *p = NULL; |
158 | 158 | ||
159 | if (!rt->peer) | 159 | if (!rt->peer) |
160 | rt_bind_peer(rt, rt->rt_dst, 1); | 160 | rt_bind_peer(rt, rt->rt_dst, 1); |
161 | 161 | ||
162 | peer = rt->peer; | 162 | peer = rt->peer; |
163 | if (peer) { | 163 | if (peer) { |
164 | u32 *old_p = __DST_METRICS_PTR(old); | 164 | u32 *old_p = __DST_METRICS_PTR(old); |
165 | unsigned long prev, new; | 165 | unsigned long prev, new; |
166 | 166 | ||
167 | p = peer->metrics; | 167 | p = peer->metrics; |
168 | if (inet_metrics_new(peer)) | 168 | if (inet_metrics_new(peer)) |
169 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | 169 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); |
170 | 170 | ||
171 | new = (unsigned long) p; | 171 | new = (unsigned long) p; |
172 | prev = cmpxchg(&dst->_metrics, old, new); | 172 | prev = cmpxchg(&dst->_metrics, old, new); |
173 | 173 | ||
174 | if (prev != old) { | 174 | if (prev != old) { |
175 | p = __DST_METRICS_PTR(prev); | 175 | p = __DST_METRICS_PTR(prev); |
176 | if (prev & DST_METRICS_READ_ONLY) | 176 | if (prev & DST_METRICS_READ_ONLY) |
177 | p = NULL; | 177 | p = NULL; |
178 | } else { | 178 | } else { |
179 | if (rt->fi) { | 179 | if (rt->fi) { |
180 | fib_info_put(rt->fi); | 180 | fib_info_put(rt->fi); |
181 | rt->fi = NULL; | 181 | rt->fi = NULL; |
182 | } | 182 | } |
183 | } | 183 | } |
184 | } | 184 | } |
185 | return p; | 185 | return p; |
186 | } | 186 | } |
187 | 187 | ||
188 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); | 188 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); |
189 | 189 | ||
190 | static struct dst_ops ipv4_dst_ops = { | 190 | static struct dst_ops ipv4_dst_ops = { |
191 | .family = AF_INET, | 191 | .family = AF_INET, |
192 | .protocol = cpu_to_be16(ETH_P_IP), | 192 | .protocol = cpu_to_be16(ETH_P_IP), |
193 | .gc = rt_garbage_collect, | 193 | .gc = rt_garbage_collect, |
194 | .check = ipv4_dst_check, | 194 | .check = ipv4_dst_check, |
195 | .default_advmss = ipv4_default_advmss, | 195 | .default_advmss = ipv4_default_advmss, |
196 | .default_mtu = ipv4_default_mtu, | 196 | .default_mtu = ipv4_default_mtu, |
197 | .cow_metrics = ipv4_cow_metrics, | 197 | .cow_metrics = ipv4_cow_metrics, |
198 | .destroy = ipv4_dst_destroy, | 198 | .destroy = ipv4_dst_destroy, |
199 | .ifdown = ipv4_dst_ifdown, | 199 | .ifdown = ipv4_dst_ifdown, |
200 | .negative_advice = ipv4_negative_advice, | 200 | .negative_advice = ipv4_negative_advice, |
201 | .link_failure = ipv4_link_failure, | 201 | .link_failure = ipv4_link_failure, |
202 | .update_pmtu = ip_rt_update_pmtu, | 202 | .update_pmtu = ip_rt_update_pmtu, |
203 | .local_out = __ip_local_out, | 203 | .local_out = __ip_local_out, |
204 | .neigh_lookup = ipv4_neigh_lookup, | 204 | .neigh_lookup = ipv4_neigh_lookup, |
205 | }; | 205 | }; |
206 | 206 | ||
207 | #define ECN_OR_COST(class) TC_PRIO_##class | 207 | #define ECN_OR_COST(class) TC_PRIO_##class |
208 | 208 | ||
209 | const __u8 ip_tos2prio[16] = { | 209 | const __u8 ip_tos2prio[16] = { |
210 | TC_PRIO_BESTEFFORT, | 210 | TC_PRIO_BESTEFFORT, |
211 | ECN_OR_COST(BESTEFFORT), | 211 | ECN_OR_COST(BESTEFFORT), |
212 | TC_PRIO_BESTEFFORT, | 212 | TC_PRIO_BESTEFFORT, |
213 | ECN_OR_COST(BESTEFFORT), | 213 | ECN_OR_COST(BESTEFFORT), |
214 | TC_PRIO_BULK, | 214 | TC_PRIO_BULK, |
215 | ECN_OR_COST(BULK), | 215 | ECN_OR_COST(BULK), |
216 | TC_PRIO_BULK, | 216 | TC_PRIO_BULK, |
217 | ECN_OR_COST(BULK), | 217 | ECN_OR_COST(BULK), |
218 | TC_PRIO_INTERACTIVE, | 218 | TC_PRIO_INTERACTIVE, |
219 | ECN_OR_COST(INTERACTIVE), | 219 | ECN_OR_COST(INTERACTIVE), |
220 | TC_PRIO_INTERACTIVE, | 220 | TC_PRIO_INTERACTIVE, |
221 | ECN_OR_COST(INTERACTIVE), | 221 | ECN_OR_COST(INTERACTIVE), |
222 | TC_PRIO_INTERACTIVE_BULK, | 222 | TC_PRIO_INTERACTIVE_BULK, |
223 | ECN_OR_COST(INTERACTIVE_BULK), | 223 | ECN_OR_COST(INTERACTIVE_BULK), |
224 | TC_PRIO_INTERACTIVE_BULK, | 224 | TC_PRIO_INTERACTIVE_BULK, |
225 | ECN_OR_COST(INTERACTIVE_BULK) | 225 | ECN_OR_COST(INTERACTIVE_BULK) |
226 | }; | 226 | }; |
227 | 227 | ||
228 | 228 | ||
229 | /* | 229 | /* |
230 | * Route cache. | 230 | * Route cache. |
231 | */ | 231 | */ |
232 | 232 | ||
233 | /* The locking scheme is rather straight forward: | 233 | /* The locking scheme is rather straight forward: |
234 | * | 234 | * |
235 | * 1) Read-Copy Update protects the buckets of the central route hash. | 235 | * 1) Read-Copy Update protects the buckets of the central route hash. |
236 | * 2) Only writers remove entries, and they hold the lock | 236 | * 2) Only writers remove entries, and they hold the lock |
237 | * as they look at rtable reference counts. | 237 | * as they look at rtable reference counts. |
238 | * 3) Only readers acquire references to rtable entries, | 238 | * 3) Only readers acquire references to rtable entries, |
239 | * they do so with atomic increments and with the | 239 | * they do so with atomic increments and with the |
240 | * lock held. | 240 | * lock held. |
241 | */ | 241 | */ |
242 | 242 | ||
243 | struct rt_hash_bucket { | 243 | struct rt_hash_bucket { |
244 | struct rtable __rcu *chain; | 244 | struct rtable __rcu *chain; |
245 | }; | 245 | }; |
246 | 246 | ||
247 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | 247 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
248 | defined(CONFIG_PROVE_LOCKING) | 248 | defined(CONFIG_PROVE_LOCKING) |
249 | /* | 249 | /* |
250 | * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks | 250 | * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks |
251 | * The size of this table is a power of two and depends on the number of CPUS. | 251 | * The size of this table is a power of two and depends on the number of CPUS. |
252 | * (on lockdep we have a quite big spinlock_t, so keep the size down there) | 252 | * (on lockdep we have a quite big spinlock_t, so keep the size down there) |
253 | */ | 253 | */ |
254 | #ifdef CONFIG_LOCKDEP | 254 | #ifdef CONFIG_LOCKDEP |
255 | # define RT_HASH_LOCK_SZ 256 | 255 | # define RT_HASH_LOCK_SZ 256 |
256 | #else | 256 | #else |
257 | # if NR_CPUS >= 32 | 257 | # if NR_CPUS >= 32 |
258 | # define RT_HASH_LOCK_SZ 4096 | 258 | # define RT_HASH_LOCK_SZ 4096 |
259 | # elif NR_CPUS >= 16 | 259 | # elif NR_CPUS >= 16 |
260 | # define RT_HASH_LOCK_SZ 2048 | 260 | # define RT_HASH_LOCK_SZ 2048 |
261 | # elif NR_CPUS >= 8 | 261 | # elif NR_CPUS >= 8 |
262 | # define RT_HASH_LOCK_SZ 1024 | 262 | # define RT_HASH_LOCK_SZ 1024 |
263 | # elif NR_CPUS >= 4 | 263 | # elif NR_CPUS >= 4 |
264 | # define RT_HASH_LOCK_SZ 512 | 264 | # define RT_HASH_LOCK_SZ 512 |
265 | # else | 265 | # else |
266 | # define RT_HASH_LOCK_SZ 256 | 266 | # define RT_HASH_LOCK_SZ 256 |
267 | # endif | 267 | # endif |
268 | #endif | 268 | #endif |
269 | 269 | ||
270 | static spinlock_t *rt_hash_locks; | 270 | static spinlock_t *rt_hash_locks; |
271 | # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] | 271 | # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] |
272 | 272 | ||
273 | static __init void rt_hash_lock_init(void) | 273 | static __init void rt_hash_lock_init(void) |
274 | { | 274 | { |
275 | int i; | 275 | int i; |
276 | 276 | ||
277 | rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, | 277 | rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, |
278 | GFP_KERNEL); | 278 | GFP_KERNEL); |
279 | if (!rt_hash_locks) | 279 | if (!rt_hash_locks) |
280 | panic("IP: failed to allocate rt_hash_locks\n"); | 280 | panic("IP: failed to allocate rt_hash_locks\n"); |
281 | 281 | ||
282 | for (i = 0; i < RT_HASH_LOCK_SZ; i++) | 282 | for (i = 0; i < RT_HASH_LOCK_SZ; i++) |
283 | spin_lock_init(&rt_hash_locks[i]); | 283 | spin_lock_init(&rt_hash_locks[i]); |
284 | } | 284 | } |
285 | #else | 285 | #else |
286 | # define rt_hash_lock_addr(slot) NULL | 286 | # define rt_hash_lock_addr(slot) NULL |
287 | 287 | ||
288 | static inline void rt_hash_lock_init(void) | 288 | static inline void rt_hash_lock_init(void) |
289 | { | 289 | { |
290 | } | 290 | } |
291 | #endif | 291 | #endif |
292 | 292 | ||
293 | static struct rt_hash_bucket *rt_hash_table __read_mostly; | 293 | static struct rt_hash_bucket *rt_hash_table __read_mostly; |
294 | static unsigned rt_hash_mask __read_mostly; | 294 | static unsigned rt_hash_mask __read_mostly; |
295 | static unsigned int rt_hash_log __read_mostly; | 295 | static unsigned int rt_hash_log __read_mostly; |
296 | 296 | ||
297 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 297 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
298 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) | 298 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) |
299 | 299 | ||
300 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, | 300 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, |
301 | int genid) | 301 | int genid) |
302 | { | 302 | { |
303 | return jhash_3words((__force u32)daddr, (__force u32)saddr, | 303 | return jhash_3words((__force u32)daddr, (__force u32)saddr, |
304 | idx, genid) | 304 | idx, genid) |
305 | & rt_hash_mask; | 305 | & rt_hash_mask; |
306 | } | 306 | } |
307 | 307 | ||
308 | static inline int rt_genid(struct net *net) | 308 | static inline int rt_genid(struct net *net) |
309 | { | 309 | { |
310 | return atomic_read(&net->ipv4.rt_genid); | 310 | return atomic_read(&net->ipv4.rt_genid); |
311 | } | 311 | } |
312 | 312 | ||
313 | #ifdef CONFIG_PROC_FS | 313 | #ifdef CONFIG_PROC_FS |
314 | struct rt_cache_iter_state { | 314 | struct rt_cache_iter_state { |
315 | struct seq_net_private p; | 315 | struct seq_net_private p; |
316 | int bucket; | 316 | int bucket; |
317 | int genid; | 317 | int genid; |
318 | }; | 318 | }; |
319 | 319 | ||
320 | static struct rtable *rt_cache_get_first(struct seq_file *seq) | 320 | static struct rtable *rt_cache_get_first(struct seq_file *seq) |
321 | { | 321 | { |
322 | struct rt_cache_iter_state *st = seq->private; | 322 | struct rt_cache_iter_state *st = seq->private; |
323 | struct rtable *r = NULL; | 323 | struct rtable *r = NULL; |
324 | 324 | ||
325 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 325 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
326 | if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) | 326 | if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) |
327 | continue; | 327 | continue; |
328 | rcu_read_lock_bh(); | 328 | rcu_read_lock_bh(); |
329 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | 329 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
330 | while (r) { | 330 | while (r) { |
331 | if (dev_net(r->dst.dev) == seq_file_net(seq) && | 331 | if (dev_net(r->dst.dev) == seq_file_net(seq) && |
332 | r->rt_genid == st->genid) | 332 | r->rt_genid == st->genid) |
333 | return r; | 333 | return r; |
334 | r = rcu_dereference_bh(r->dst.rt_next); | 334 | r = rcu_dereference_bh(r->dst.rt_next); |
335 | } | 335 | } |
336 | rcu_read_unlock_bh(); | 336 | rcu_read_unlock_bh(); |
337 | } | 337 | } |
338 | return r; | 338 | return r; |
339 | } | 339 | } |
340 | 340 | ||
341 | static struct rtable *__rt_cache_get_next(struct seq_file *seq, | 341 | static struct rtable *__rt_cache_get_next(struct seq_file *seq, |
342 | struct rtable *r) | 342 | struct rtable *r) |
343 | { | 343 | { |
344 | struct rt_cache_iter_state *st = seq->private; | 344 | struct rt_cache_iter_state *st = seq->private; |
345 | 345 | ||
346 | r = rcu_dereference_bh(r->dst.rt_next); | 346 | r = rcu_dereference_bh(r->dst.rt_next); |
347 | while (!r) { | 347 | while (!r) { |
348 | rcu_read_unlock_bh(); | 348 | rcu_read_unlock_bh(); |
349 | do { | 349 | do { |
350 | if (--st->bucket < 0) | 350 | if (--st->bucket < 0) |
351 | return NULL; | 351 | return NULL; |
352 | } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); | 352 | } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); |
353 | rcu_read_lock_bh(); | 353 | rcu_read_lock_bh(); |
354 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | 354 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
355 | } | 355 | } |
356 | return r; | 356 | return r; |
357 | } | 357 | } |
358 | 358 | ||
359 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 359 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
360 | struct rtable *r) | 360 | struct rtable *r) |
361 | { | 361 | { |
362 | struct rt_cache_iter_state *st = seq->private; | 362 | struct rt_cache_iter_state *st = seq->private; |
363 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | 363 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { |
364 | if (dev_net(r->dst.dev) != seq_file_net(seq)) | 364 | if (dev_net(r->dst.dev) != seq_file_net(seq)) |
365 | continue; | 365 | continue; |
366 | if (r->rt_genid == st->genid) | 366 | if (r->rt_genid == st->genid) |
367 | break; | 367 | break; |
368 | } | 368 | } |
369 | return r; | 369 | return r; |
370 | } | 370 | } |
371 | 371 | ||
372 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | 372 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) |
373 | { | 373 | { |
374 | struct rtable *r = rt_cache_get_first(seq); | 374 | struct rtable *r = rt_cache_get_first(seq); |
375 | 375 | ||
376 | if (r) | 376 | if (r) |
377 | while (pos && (r = rt_cache_get_next(seq, r))) | 377 | while (pos && (r = rt_cache_get_next(seq, r))) |
378 | --pos; | 378 | --pos; |
379 | return pos ? NULL : r; | 379 | return pos ? NULL : r; |
380 | } | 380 | } |
381 | 381 | ||
382 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 382 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
383 | { | 383 | { |
384 | struct rt_cache_iter_state *st = seq->private; | 384 | struct rt_cache_iter_state *st = seq->private; |
385 | if (*pos) | 385 | if (*pos) |
386 | return rt_cache_get_idx(seq, *pos - 1); | 386 | return rt_cache_get_idx(seq, *pos - 1); |
387 | st->genid = rt_genid(seq_file_net(seq)); | 387 | st->genid = rt_genid(seq_file_net(seq)); |
388 | return SEQ_START_TOKEN; | 388 | return SEQ_START_TOKEN; |
389 | } | 389 | } |
390 | 390 | ||
391 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 391 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
392 | { | 392 | { |
393 | struct rtable *r; | 393 | struct rtable *r; |
394 | 394 | ||
395 | if (v == SEQ_START_TOKEN) | 395 | if (v == SEQ_START_TOKEN) |
396 | r = rt_cache_get_first(seq); | 396 | r = rt_cache_get_first(seq); |
397 | else | 397 | else |
398 | r = rt_cache_get_next(seq, v); | 398 | r = rt_cache_get_next(seq, v); |
399 | ++*pos; | 399 | ++*pos; |
400 | return r; | 400 | return r; |
401 | } | 401 | } |
402 | 402 | ||
403 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) | 403 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) |
404 | { | 404 | { |
405 | if (v && v != SEQ_START_TOKEN) | 405 | if (v && v != SEQ_START_TOKEN) |
406 | rcu_read_unlock_bh(); | 406 | rcu_read_unlock_bh(); |
407 | } | 407 | } |
408 | 408 | ||
409 | static int rt_cache_seq_show(struct seq_file *seq, void *v) | 409 | static int rt_cache_seq_show(struct seq_file *seq, void *v) |
410 | { | 410 | { |
411 | if (v == SEQ_START_TOKEN) | 411 | if (v == SEQ_START_TOKEN) |
412 | seq_printf(seq, "%-127s\n", | 412 | seq_printf(seq, "%-127s\n", |
413 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" | 413 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" |
414 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" | 414 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" |
415 | "HHUptod\tSpecDst"); | 415 | "HHUptod\tSpecDst"); |
416 | else { | 416 | else { |
417 | struct rtable *r = v; | 417 | struct rtable *r = v; |
418 | struct neighbour *n; | 418 | struct neighbour *n; |
419 | int len; | 419 | int len; |
420 | 420 | ||
421 | n = dst_get_neighbour(&r->dst); | 421 | n = dst_get_neighbour(&r->dst); |
422 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | 422 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
423 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 423 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
424 | r->dst.dev ? r->dst.dev->name : "*", | 424 | r->dst.dev ? r->dst.dev->name : "*", |
425 | (__force u32)r->rt_dst, | 425 | (__force u32)r->rt_dst, |
426 | (__force u32)r->rt_gateway, | 426 | (__force u32)r->rt_gateway, |
427 | r->rt_flags, atomic_read(&r->dst.__refcnt), | 427 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
428 | r->dst.__use, 0, (__force u32)r->rt_src, | 428 | r->dst.__use, 0, (__force u32)r->rt_src, |
429 | dst_metric_advmss(&r->dst) + 40, | 429 | dst_metric_advmss(&r->dst) + 40, |
430 | dst_metric(&r->dst, RTAX_WINDOW), | 430 | dst_metric(&r->dst, RTAX_WINDOW), |
431 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 431 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
432 | dst_metric(&r->dst, RTAX_RTTVAR)), | 432 | dst_metric(&r->dst, RTAX_RTTVAR)), |
433 | r->rt_key_tos, | 433 | r->rt_key_tos, |
434 | -1, | 434 | -1, |
435 | (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, | 435 | (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, |
436 | r->rt_spec_dst, &len); | 436 | r->rt_spec_dst, &len); |
437 | 437 | ||
438 | seq_printf(seq, "%*s\n", 127 - len, ""); | 438 | seq_printf(seq, "%*s\n", 127 - len, ""); |
439 | } | 439 | } |
440 | return 0; | 440 | return 0; |
441 | } | 441 | } |
442 | 442 | ||
443 | static const struct seq_operations rt_cache_seq_ops = { | 443 | static const struct seq_operations rt_cache_seq_ops = { |
444 | .start = rt_cache_seq_start, | 444 | .start = rt_cache_seq_start, |
445 | .next = rt_cache_seq_next, | 445 | .next = rt_cache_seq_next, |
446 | .stop = rt_cache_seq_stop, | 446 | .stop = rt_cache_seq_stop, |
447 | .show = rt_cache_seq_show, | 447 | .show = rt_cache_seq_show, |
448 | }; | 448 | }; |
449 | 449 | ||
450 | static int rt_cache_seq_open(struct inode *inode, struct file *file) | 450 | static int rt_cache_seq_open(struct inode *inode, struct file *file) |
451 | { | 451 | { |
452 | return seq_open_net(inode, file, &rt_cache_seq_ops, | 452 | return seq_open_net(inode, file, &rt_cache_seq_ops, |
453 | sizeof(struct rt_cache_iter_state)); | 453 | sizeof(struct rt_cache_iter_state)); |
454 | } | 454 | } |
455 | 455 | ||
456 | static const struct file_operations rt_cache_seq_fops = { | 456 | static const struct file_operations rt_cache_seq_fops = { |
457 | .owner = THIS_MODULE, | 457 | .owner = THIS_MODULE, |
458 | .open = rt_cache_seq_open, | 458 | .open = rt_cache_seq_open, |
459 | .read = seq_read, | 459 | .read = seq_read, |
460 | .llseek = seq_lseek, | 460 | .llseek = seq_lseek, |
461 | .release = seq_release_net, | 461 | .release = seq_release_net, |
462 | }; | 462 | }; |
463 | 463 | ||
464 | 464 | ||
465 | static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) | 465 | static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) |
466 | { | 466 | { |
467 | int cpu; | 467 | int cpu; |
468 | 468 | ||
469 | if (*pos == 0) | 469 | if (*pos == 0) |
470 | return SEQ_START_TOKEN; | 470 | return SEQ_START_TOKEN; |
471 | 471 | ||
472 | for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { | 472 | for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { |
473 | if (!cpu_possible(cpu)) | 473 | if (!cpu_possible(cpu)) |
474 | continue; | 474 | continue; |
475 | *pos = cpu+1; | 475 | *pos = cpu+1; |
476 | return &per_cpu(rt_cache_stat, cpu); | 476 | return &per_cpu(rt_cache_stat, cpu); |
477 | } | 477 | } |
478 | return NULL; | 478 | return NULL; |
479 | } | 479 | } |
480 | 480 | ||
481 | static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 481 | static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
482 | { | 482 | { |
483 | int cpu; | 483 | int cpu; |
484 | 484 | ||
485 | for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { | 485 | for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { |
486 | if (!cpu_possible(cpu)) | 486 | if (!cpu_possible(cpu)) |
487 | continue; | 487 | continue; |
488 | *pos = cpu+1; | 488 | *pos = cpu+1; |
489 | return &per_cpu(rt_cache_stat, cpu); | 489 | return &per_cpu(rt_cache_stat, cpu); |
490 | } | 490 | } |
491 | return NULL; | 491 | return NULL; |
492 | 492 | ||
493 | } | 493 | } |
494 | 494 | ||
495 | static void rt_cpu_seq_stop(struct seq_file *seq, void *v) | 495 | static void rt_cpu_seq_stop(struct seq_file *seq, void *v) |
496 | { | 496 | { |
497 | 497 | ||
498 | } | 498 | } |
499 | 499 | ||
500 | static int rt_cpu_seq_show(struct seq_file *seq, void *v) | 500 | static int rt_cpu_seq_show(struct seq_file *seq, void *v) |
501 | { | 501 | { |
502 | struct rt_cache_stat *st = v; | 502 | struct rt_cache_stat *st = v; |
503 | 503 | ||
504 | if (v == SEQ_START_TOKEN) { | 504 | if (v == SEQ_START_TOKEN) { |
505 | seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); | 505 | seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); |
506 | return 0; | 506 | return 0; |
507 | } | 507 | } |
508 | 508 | ||
509 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " | 509 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " |
510 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", | 510 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", |
511 | dst_entries_get_slow(&ipv4_dst_ops), | 511 | dst_entries_get_slow(&ipv4_dst_ops), |
512 | st->in_hit, | 512 | st->in_hit, |
513 | st->in_slow_tot, | 513 | st->in_slow_tot, |
514 | st->in_slow_mc, | 514 | st->in_slow_mc, |
515 | st->in_no_route, | 515 | st->in_no_route, |
516 | st->in_brd, | 516 | st->in_brd, |
517 | st->in_martian_dst, | 517 | st->in_martian_dst, |
518 | st->in_martian_src, | 518 | st->in_martian_src, |
519 | 519 | ||
520 | st->out_hit, | 520 | st->out_hit, |
521 | st->out_slow_tot, | 521 | st->out_slow_tot, |
522 | st->out_slow_mc, | 522 | st->out_slow_mc, |
523 | 523 | ||
524 | st->gc_total, | 524 | st->gc_total, |
525 | st->gc_ignored, | 525 | st->gc_ignored, |
526 | st->gc_goal_miss, | 526 | st->gc_goal_miss, |
527 | st->gc_dst_overflow, | 527 | st->gc_dst_overflow, |
528 | st->in_hlist_search, | 528 | st->in_hlist_search, |
529 | st->out_hlist_search | 529 | st->out_hlist_search |
530 | ); | 530 | ); |
531 | return 0; | 531 | return 0; |
532 | } | 532 | } |
533 | 533 | ||
534 | static const struct seq_operations rt_cpu_seq_ops = { | 534 | static const struct seq_operations rt_cpu_seq_ops = { |
535 | .start = rt_cpu_seq_start, | 535 | .start = rt_cpu_seq_start, |
536 | .next = rt_cpu_seq_next, | 536 | .next = rt_cpu_seq_next, |
537 | .stop = rt_cpu_seq_stop, | 537 | .stop = rt_cpu_seq_stop, |
538 | .show = rt_cpu_seq_show, | 538 | .show = rt_cpu_seq_show, |
539 | }; | 539 | }; |
540 | 540 | ||
541 | 541 | ||
542 | static int rt_cpu_seq_open(struct inode *inode, struct file *file) | 542 | static int rt_cpu_seq_open(struct inode *inode, struct file *file) |
543 | { | 543 | { |
544 | return seq_open(file, &rt_cpu_seq_ops); | 544 | return seq_open(file, &rt_cpu_seq_ops); |
545 | } | 545 | } |
546 | 546 | ||
547 | static const struct file_operations rt_cpu_seq_fops = { | 547 | static const struct file_operations rt_cpu_seq_fops = { |
548 | .owner = THIS_MODULE, | 548 | .owner = THIS_MODULE, |
549 | .open = rt_cpu_seq_open, | 549 | .open = rt_cpu_seq_open, |
550 | .read = seq_read, | 550 | .read = seq_read, |
551 | .llseek = seq_lseek, | 551 | .llseek = seq_lseek, |
552 | .release = seq_release, | 552 | .release = seq_release, |
553 | }; | 553 | }; |
554 | 554 | ||
555 | #ifdef CONFIG_IP_ROUTE_CLASSID | 555 | #ifdef CONFIG_IP_ROUTE_CLASSID |
556 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 556 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
557 | { | 557 | { |
558 | struct ip_rt_acct *dst, *src; | 558 | struct ip_rt_acct *dst, *src; |
559 | unsigned int i, j; | 559 | unsigned int i, j; |
560 | 560 | ||
561 | dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); | 561 | dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); |
562 | if (!dst) | 562 | if (!dst) |
563 | return -ENOMEM; | 563 | return -ENOMEM; |
564 | 564 | ||
565 | for_each_possible_cpu(i) { | 565 | for_each_possible_cpu(i) { |
566 | src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); | 566 | src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); |
567 | for (j = 0; j < 256; j++) { | 567 | for (j = 0; j < 256; j++) { |
568 | dst[j].o_bytes += src[j].o_bytes; | 568 | dst[j].o_bytes += src[j].o_bytes; |
569 | dst[j].o_packets += src[j].o_packets; | 569 | dst[j].o_packets += src[j].o_packets; |
570 | dst[j].i_bytes += src[j].i_bytes; | 570 | dst[j].i_bytes += src[j].i_bytes; |
571 | dst[j].i_packets += src[j].i_packets; | 571 | dst[j].i_packets += src[j].i_packets; |
572 | } | 572 | } |
573 | } | 573 | } |
574 | 574 | ||
575 | seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); | 575 | seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); |
576 | kfree(dst); | 576 | kfree(dst); |
577 | return 0; | 577 | return 0; |
578 | } | 578 | } |
579 | 579 | ||
580 | static int rt_acct_proc_open(struct inode *inode, struct file *file) | 580 | static int rt_acct_proc_open(struct inode *inode, struct file *file) |
581 | { | 581 | { |
582 | return single_open(file, rt_acct_proc_show, NULL); | 582 | return single_open(file, rt_acct_proc_show, NULL); |
583 | } | 583 | } |
584 | 584 | ||
585 | static const struct file_operations rt_acct_proc_fops = { | 585 | static const struct file_operations rt_acct_proc_fops = { |
586 | .owner = THIS_MODULE, | 586 | .owner = THIS_MODULE, |
587 | .open = rt_acct_proc_open, | 587 | .open = rt_acct_proc_open, |
588 | .read = seq_read, | 588 | .read = seq_read, |
589 | .llseek = seq_lseek, | 589 | .llseek = seq_lseek, |
590 | .release = single_release, | 590 | .release = single_release, |
591 | }; | 591 | }; |
592 | #endif | 592 | #endif |
593 | 593 | ||
594 | static int __net_init ip_rt_do_proc_init(struct net *net) | 594 | static int __net_init ip_rt_do_proc_init(struct net *net) |
595 | { | 595 | { |
596 | struct proc_dir_entry *pde; | 596 | struct proc_dir_entry *pde; |
597 | 597 | ||
598 | pde = proc_net_fops_create(net, "rt_cache", S_IRUGO, | 598 | pde = proc_net_fops_create(net, "rt_cache", S_IRUGO, |
599 | &rt_cache_seq_fops); | 599 | &rt_cache_seq_fops); |
600 | if (!pde) | 600 | if (!pde) |
601 | goto err1; | 601 | goto err1; |
602 | 602 | ||
603 | pde = proc_create("rt_cache", S_IRUGO, | 603 | pde = proc_create("rt_cache", S_IRUGO, |
604 | net->proc_net_stat, &rt_cpu_seq_fops); | 604 | net->proc_net_stat, &rt_cpu_seq_fops); |
605 | if (!pde) | 605 | if (!pde) |
606 | goto err2; | 606 | goto err2; |
607 | 607 | ||
608 | #ifdef CONFIG_IP_ROUTE_CLASSID | 608 | #ifdef CONFIG_IP_ROUTE_CLASSID |
609 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 609 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
610 | if (!pde) | 610 | if (!pde) |
611 | goto err3; | 611 | goto err3; |
612 | #endif | 612 | #endif |
613 | return 0; | 613 | return 0; |
614 | 614 | ||
615 | #ifdef CONFIG_IP_ROUTE_CLASSID | 615 | #ifdef CONFIG_IP_ROUTE_CLASSID |
616 | err3: | 616 | err3: |
617 | remove_proc_entry("rt_cache", net->proc_net_stat); | 617 | remove_proc_entry("rt_cache", net->proc_net_stat); |
618 | #endif | 618 | #endif |
619 | err2: | 619 | err2: |
620 | remove_proc_entry("rt_cache", net->proc_net); | 620 | remove_proc_entry("rt_cache", net->proc_net); |
621 | err1: | 621 | err1: |
622 | return -ENOMEM; | 622 | return -ENOMEM; |
623 | } | 623 | } |
624 | 624 | ||
625 | static void __net_exit ip_rt_do_proc_exit(struct net *net) | 625 | static void __net_exit ip_rt_do_proc_exit(struct net *net) |
626 | { | 626 | { |
627 | remove_proc_entry("rt_cache", net->proc_net_stat); | 627 | remove_proc_entry("rt_cache", net->proc_net_stat); |
628 | remove_proc_entry("rt_cache", net->proc_net); | 628 | remove_proc_entry("rt_cache", net->proc_net); |
629 | #ifdef CONFIG_IP_ROUTE_CLASSID | 629 | #ifdef CONFIG_IP_ROUTE_CLASSID |
630 | remove_proc_entry("rt_acct", net->proc_net); | 630 | remove_proc_entry("rt_acct", net->proc_net); |
631 | #endif | 631 | #endif |
632 | } | 632 | } |
633 | 633 | ||
634 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { | 634 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { |
635 | .init = ip_rt_do_proc_init, | 635 | .init = ip_rt_do_proc_init, |
636 | .exit = ip_rt_do_proc_exit, | 636 | .exit = ip_rt_do_proc_exit, |
637 | }; | 637 | }; |
638 | 638 | ||
639 | static int __init ip_rt_proc_init(void) | 639 | static int __init ip_rt_proc_init(void) |
640 | { | 640 | { |
641 | return register_pernet_subsys(&ip_rt_proc_ops); | 641 | return register_pernet_subsys(&ip_rt_proc_ops); |
642 | } | 642 | } |
643 | 643 | ||
644 | #else | 644 | #else |
645 | static inline int ip_rt_proc_init(void) | 645 | static inline int ip_rt_proc_init(void) |
646 | { | 646 | { |
647 | return 0; | 647 | return 0; |
648 | } | 648 | } |
649 | #endif /* CONFIG_PROC_FS */ | 649 | #endif /* CONFIG_PROC_FS */ |
650 | 650 | ||
651 | static inline void rt_free(struct rtable *rt) | 651 | static inline void rt_free(struct rtable *rt) |
652 | { | 652 | { |
653 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); | 653 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
654 | } | 654 | } |
655 | 655 | ||
656 | static inline void rt_drop(struct rtable *rt) | 656 | static inline void rt_drop(struct rtable *rt) |
657 | { | 657 | { |
658 | ip_rt_put(rt); | 658 | ip_rt_put(rt); |
659 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); | 659 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
660 | } | 660 | } |
661 | 661 | ||
662 | static inline int rt_fast_clean(struct rtable *rth) | 662 | static inline int rt_fast_clean(struct rtable *rth) |
663 | { | 663 | { |
664 | /* Kill broadcast/multicast entries very aggresively, if they | 664 | /* Kill broadcast/multicast entries very aggresively, if they |
665 | collide in hash table with more useful entries */ | 665 | collide in hash table with more useful entries */ |
666 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 666 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
667 | rt_is_input_route(rth) && rth->dst.rt_next; | 667 | rt_is_input_route(rth) && rth->dst.rt_next; |
668 | } | 668 | } |
669 | 669 | ||
670 | static inline int rt_valuable(struct rtable *rth) | 670 | static inline int rt_valuable(struct rtable *rth) |
671 | { | 671 | { |
672 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 672 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
673 | (rth->peer && rth->peer->pmtu_expires); | 673 | (rth->peer && rth->peer->pmtu_expires); |
674 | } | 674 | } |
675 | 675 | ||
676 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 676 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
677 | { | 677 | { |
678 | unsigned long age; | 678 | unsigned long age; |
679 | int ret = 0; | 679 | int ret = 0; |
680 | 680 | ||
681 | if (atomic_read(&rth->dst.__refcnt)) | 681 | if (atomic_read(&rth->dst.__refcnt)) |
682 | goto out; | 682 | goto out; |
683 | 683 | ||
684 | age = jiffies - rth->dst.lastuse; | 684 | age = jiffies - rth->dst.lastuse; |
685 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 685 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
686 | (age <= tmo2 && rt_valuable(rth))) | 686 | (age <= tmo2 && rt_valuable(rth))) |
687 | goto out; | 687 | goto out; |
688 | ret = 1; | 688 | ret = 1; |
689 | out: return ret; | 689 | out: return ret; |
690 | } | 690 | } |
691 | 691 | ||
692 | /* Bits of score are: | 692 | /* Bits of score are: |
693 | * 31: very valuable | 693 | * 31: very valuable |
694 | * 30: not quite useless | 694 | * 30: not quite useless |
695 | * 29..0: usage counter | 695 | * 29..0: usage counter |
696 | */ | 696 | */ |
697 | static inline u32 rt_score(struct rtable *rt) | 697 | static inline u32 rt_score(struct rtable *rt) |
698 | { | 698 | { |
699 | u32 score = jiffies - rt->dst.lastuse; | 699 | u32 score = jiffies - rt->dst.lastuse; |
700 | 700 | ||
701 | score = ~score & ~(3<<30); | 701 | score = ~score & ~(3<<30); |
702 | 702 | ||
703 | if (rt_valuable(rt)) | 703 | if (rt_valuable(rt)) |
704 | score |= (1<<31); | 704 | score |= (1<<31); |
705 | 705 | ||
706 | if (rt_is_output_route(rt) || | 706 | if (rt_is_output_route(rt) || |
707 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) | 707 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) |
708 | score |= (1<<30); | 708 | score |= (1<<30); |
709 | 709 | ||
710 | return score; | 710 | return score; |
711 | } | 711 | } |
712 | 712 | ||
713 | static inline bool rt_caching(const struct net *net) | 713 | static inline bool rt_caching(const struct net *net) |
714 | { | 714 | { |
715 | return net->ipv4.current_rt_cache_rebuild_count <= | 715 | return net->ipv4.current_rt_cache_rebuild_count <= |
716 | net->ipv4.sysctl_rt_cache_rebuild_count; | 716 | net->ipv4.sysctl_rt_cache_rebuild_count; |
717 | } | 717 | } |
718 | 718 | ||
719 | static inline bool compare_hash_inputs(const struct rtable *rt1, | 719 | static inline bool compare_hash_inputs(const struct rtable *rt1, |
720 | const struct rtable *rt2) | 720 | const struct rtable *rt2) |
721 | { | 721 | { |
722 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | 722 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
723 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | 723 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
724 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); | 724 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); |
725 | } | 725 | } |
726 | 726 | ||
727 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) | 727 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
728 | { | 728 | { |
729 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | 729 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
730 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | 730 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
731 | (rt1->rt_mark ^ rt2->rt_mark) | | 731 | (rt1->rt_mark ^ rt2->rt_mark) | |
732 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | | 732 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | |
733 | (rt1->rt_oif ^ rt2->rt_oif) | | 733 | (rt1->rt_oif ^ rt2->rt_oif) | |
734 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; | 734 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
735 | } | 735 | } |
736 | 736 | ||
737 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 737 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
738 | { | 738 | { |
739 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); | 739 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); |
740 | } | 740 | } |
741 | 741 | ||
742 | static inline int rt_is_expired(struct rtable *rth) | 742 | static inline int rt_is_expired(struct rtable *rth) |
743 | { | 743 | { |
744 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); | 744 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
745 | } | 745 | } |
746 | 746 | ||
747 | /* | 747 | /* |
748 | * Perform a full scan of hash table and free all entries. | 748 | * Perform a full scan of hash table and free all entries. |
749 | * Can be called by a softirq or a process. | 749 | * Can be called by a softirq or a process. |
750 | * In the later case, we want to be reschedule if necessary | 750 | * In the later case, we want to be reschedule if necessary |
751 | */ | 751 | */ |
752 | static void rt_do_flush(struct net *net, int process_context) | 752 | static void rt_do_flush(struct net *net, int process_context) |
753 | { | 753 | { |
754 | unsigned int i; | 754 | unsigned int i; |
755 | struct rtable *rth, *next; | 755 | struct rtable *rth, *next; |
756 | 756 | ||
757 | for (i = 0; i <= rt_hash_mask; i++) { | 757 | for (i = 0; i <= rt_hash_mask; i++) { |
758 | struct rtable __rcu **pprev; | 758 | struct rtable __rcu **pprev; |
759 | struct rtable *list; | 759 | struct rtable *list; |
760 | 760 | ||
761 | if (process_context && need_resched()) | 761 | if (process_context && need_resched()) |
762 | cond_resched(); | 762 | cond_resched(); |
763 | rth = rcu_dereference_raw(rt_hash_table[i].chain); | 763 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
764 | if (!rth) | 764 | if (!rth) |
765 | continue; | 765 | continue; |
766 | 766 | ||
767 | spin_lock_bh(rt_hash_lock_addr(i)); | 767 | spin_lock_bh(rt_hash_lock_addr(i)); |
768 | 768 | ||
769 | list = NULL; | 769 | list = NULL; |
770 | pprev = &rt_hash_table[i].chain; | 770 | pprev = &rt_hash_table[i].chain; |
771 | rth = rcu_dereference_protected(*pprev, | 771 | rth = rcu_dereference_protected(*pprev, |
772 | lockdep_is_held(rt_hash_lock_addr(i))); | 772 | lockdep_is_held(rt_hash_lock_addr(i))); |
773 | 773 | ||
774 | while (rth) { | 774 | while (rth) { |
775 | next = rcu_dereference_protected(rth->dst.rt_next, | 775 | next = rcu_dereference_protected(rth->dst.rt_next, |
776 | lockdep_is_held(rt_hash_lock_addr(i))); | 776 | lockdep_is_held(rt_hash_lock_addr(i))); |
777 | 777 | ||
778 | if (!net || | 778 | if (!net || |
779 | net_eq(dev_net(rth->dst.dev), net)) { | 779 | net_eq(dev_net(rth->dst.dev), net)) { |
780 | rcu_assign_pointer(*pprev, next); | 780 | rcu_assign_pointer(*pprev, next); |
781 | rcu_assign_pointer(rth->dst.rt_next, list); | 781 | rcu_assign_pointer(rth->dst.rt_next, list); |
782 | list = rth; | 782 | list = rth; |
783 | } else { | 783 | } else { |
784 | pprev = &rth->dst.rt_next; | 784 | pprev = &rth->dst.rt_next; |
785 | } | 785 | } |
786 | rth = next; | 786 | rth = next; |
787 | } | 787 | } |
788 | 788 | ||
789 | spin_unlock_bh(rt_hash_lock_addr(i)); | 789 | spin_unlock_bh(rt_hash_lock_addr(i)); |
790 | 790 | ||
791 | for (; list; list = next) { | 791 | for (; list; list = next) { |
792 | next = rcu_dereference_protected(list->dst.rt_next, 1); | 792 | next = rcu_dereference_protected(list->dst.rt_next, 1); |
793 | rt_free(list); | 793 | rt_free(list); |
794 | } | 794 | } |
795 | } | 795 | } |
796 | } | 796 | } |
797 | 797 | ||
798 | /* | 798 | /* |
799 | * While freeing expired entries, we compute average chain length | 799 | * While freeing expired entries, we compute average chain length |
800 | * and standard deviation, using fixed-point arithmetic. | 800 | * and standard deviation, using fixed-point arithmetic. |
801 | * This to have an estimation of rt_chain_length_max | 801 | * This to have an estimation of rt_chain_length_max |
802 | * rt_chain_length_max = max(elasticity, AVG + 4*SD) | 802 | * rt_chain_length_max = max(elasticity, AVG + 4*SD) |
803 | * We use 3 bits for frational part, and 29 (or 61) for magnitude. | 803 | * We use 3 bits for frational part, and 29 (or 61) for magnitude. |
804 | */ | 804 | */ |
805 | 805 | ||
806 | #define FRACT_BITS 3 | 806 | #define FRACT_BITS 3 |
807 | #define ONE (1UL << FRACT_BITS) | 807 | #define ONE (1UL << FRACT_BITS) |
808 | 808 | ||
809 | /* | 809 | /* |
810 | * Given a hash chain and an item in this hash chain, | 810 | * Given a hash chain and an item in this hash chain, |
811 | * find if a previous entry has the same hash_inputs | 811 | * find if a previous entry has the same hash_inputs |
812 | * (but differs on tos, mark or oif) | 812 | * (but differs on tos, mark or oif) |
813 | * Returns 0 if an alias is found. | 813 | * Returns 0 if an alias is found. |
814 | * Returns ONE if rth has no alias before itself. | 814 | * Returns ONE if rth has no alias before itself. |
815 | */ | 815 | */ |
816 | static int has_noalias(const struct rtable *head, const struct rtable *rth) | 816 | static int has_noalias(const struct rtable *head, const struct rtable *rth) |
817 | { | 817 | { |
818 | const struct rtable *aux = head; | 818 | const struct rtable *aux = head; |
819 | 819 | ||
820 | while (aux != rth) { | 820 | while (aux != rth) { |
821 | if (compare_hash_inputs(aux, rth)) | 821 | if (compare_hash_inputs(aux, rth)) |
822 | return 0; | 822 | return 0; |
823 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); | 823 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
824 | } | 824 | } |
825 | return ONE; | 825 | return ONE; |
826 | } | 826 | } |
827 | 827 | ||
828 | /* | 828 | /* |
829 | * Perturbation of rt_genid by a small quantity [1..256] | 829 | * Perturbation of rt_genid by a small quantity [1..256] |
830 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 830 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
831 | * many times (2^24) without giving recent rt_genid. | 831 | * many times (2^24) without giving recent rt_genid. |
832 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | 832 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. |
833 | */ | 833 | */ |
834 | static void rt_cache_invalidate(struct net *net) | 834 | static void rt_cache_invalidate(struct net *net) |
835 | { | 835 | { |
836 | unsigned char shuffle; | 836 | unsigned char shuffle; |
837 | 837 | ||
838 | get_random_bytes(&shuffle, sizeof(shuffle)); | 838 | get_random_bytes(&shuffle, sizeof(shuffle)); |
839 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); | 839 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
840 | } | 840 | } |
841 | 841 | ||
842 | /* | 842 | /* |
843 | * delay < 0 : invalidate cache (fast : entries will be deleted later) | 843 | * delay < 0 : invalidate cache (fast : entries will be deleted later) |
844 | * delay >= 0 : invalidate & flush cache (can be long) | 844 | * delay >= 0 : invalidate & flush cache (can be long) |
845 | */ | 845 | */ |
846 | void rt_cache_flush(struct net *net, int delay) | 846 | void rt_cache_flush(struct net *net, int delay) |
847 | { | 847 | { |
848 | rt_cache_invalidate(net); | 848 | rt_cache_invalidate(net); |
849 | if (delay >= 0) | 849 | if (delay >= 0) |
850 | rt_do_flush(net, !in_softirq()); | 850 | rt_do_flush(net, !in_softirq()); |
851 | } | 851 | } |
852 | 852 | ||
853 | /* Flush previous cache invalidated entries from the cache */ | 853 | /* Flush previous cache invalidated entries from the cache */ |
854 | void rt_cache_flush_batch(struct net *net) | 854 | void rt_cache_flush_batch(struct net *net) |
855 | { | 855 | { |
856 | rt_do_flush(net, !in_softirq()); | 856 | rt_do_flush(net, !in_softirq()); |
857 | } | 857 | } |
858 | 858 | ||
859 | static void rt_emergency_hash_rebuild(struct net *net) | 859 | static void rt_emergency_hash_rebuild(struct net *net) |
860 | { | 860 | { |
861 | if (net_ratelimit()) | 861 | if (net_ratelimit()) |
862 | printk(KERN_WARNING "Route hash chain too long!\n"); | 862 | printk(KERN_WARNING "Route hash chain too long!\n"); |
863 | rt_cache_invalidate(net); | 863 | rt_cache_invalidate(net); |
864 | } | 864 | } |
865 | 865 | ||
866 | /* | 866 | /* |
867 | Short description of GC goals. | 867 | Short description of GC goals. |
868 | 868 | ||
869 | We want to build algorithm, which will keep routing cache | 869 | We want to build algorithm, which will keep routing cache |
870 | at some equilibrium point, when number of aged off entries | 870 | at some equilibrium point, when number of aged off entries |
871 | is kept approximately equal to newly generated ones. | 871 | is kept approximately equal to newly generated ones. |
872 | 872 | ||
873 | Current expiration strength is variable "expire". | 873 | Current expiration strength is variable "expire". |
874 | We try to adjust it dynamically, so that if networking | 874 | We try to adjust it dynamically, so that if networking |
875 | is idle expires is large enough to keep enough of warm entries, | 875 | is idle expires is large enough to keep enough of warm entries, |
876 | and when load increases it reduces to limit cache size. | 876 | and when load increases it reduces to limit cache size. |
877 | */ | 877 | */ |
878 | 878 | ||
879 | static int rt_garbage_collect(struct dst_ops *ops) | 879 | static int rt_garbage_collect(struct dst_ops *ops) |
880 | { | 880 | { |
881 | static unsigned long expire = RT_GC_TIMEOUT; | 881 | static unsigned long expire = RT_GC_TIMEOUT; |
882 | static unsigned long last_gc; | 882 | static unsigned long last_gc; |
883 | static int rover; | 883 | static int rover; |
884 | static int equilibrium; | 884 | static int equilibrium; |
885 | struct rtable *rth; | 885 | struct rtable *rth; |
886 | struct rtable __rcu **rthp; | 886 | struct rtable __rcu **rthp; |
887 | unsigned long now = jiffies; | 887 | unsigned long now = jiffies; |
888 | int goal; | 888 | int goal; |
889 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | 889 | int entries = dst_entries_get_fast(&ipv4_dst_ops); |
890 | 890 | ||
891 | /* | 891 | /* |
892 | * Garbage collection is pretty expensive, | 892 | * Garbage collection is pretty expensive, |
893 | * do not make it too frequently. | 893 | * do not make it too frequently. |
894 | */ | 894 | */ |
895 | 895 | ||
896 | RT_CACHE_STAT_INC(gc_total); | 896 | RT_CACHE_STAT_INC(gc_total); |
897 | 897 | ||
898 | if (now - last_gc < ip_rt_gc_min_interval && | 898 | if (now - last_gc < ip_rt_gc_min_interval && |
899 | entries < ip_rt_max_size) { | 899 | entries < ip_rt_max_size) { |
900 | RT_CACHE_STAT_INC(gc_ignored); | 900 | RT_CACHE_STAT_INC(gc_ignored); |
901 | goto out; | 901 | goto out; |
902 | } | 902 | } |
903 | 903 | ||
904 | entries = dst_entries_get_slow(&ipv4_dst_ops); | 904 | entries = dst_entries_get_slow(&ipv4_dst_ops); |
905 | /* Calculate number of entries, which we want to expire now. */ | 905 | /* Calculate number of entries, which we want to expire now. */ |
906 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); | 906 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); |
907 | if (goal <= 0) { | 907 | if (goal <= 0) { |
908 | if (equilibrium < ipv4_dst_ops.gc_thresh) | 908 | if (equilibrium < ipv4_dst_ops.gc_thresh) |
909 | equilibrium = ipv4_dst_ops.gc_thresh; | 909 | equilibrium = ipv4_dst_ops.gc_thresh; |
910 | goal = entries - equilibrium; | 910 | goal = entries - equilibrium; |
911 | if (goal > 0) { | 911 | if (goal > 0) { |
912 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 912 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
913 | goal = entries - equilibrium; | 913 | goal = entries - equilibrium; |
914 | } | 914 | } |
915 | } else { | 915 | } else { |
916 | /* We are in dangerous area. Try to reduce cache really | 916 | /* We are in dangerous area. Try to reduce cache really |
917 | * aggressively. | 917 | * aggressively. |
918 | */ | 918 | */ |
919 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 919 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
920 | equilibrium = entries - goal; | 920 | equilibrium = entries - goal; |
921 | } | 921 | } |
922 | 922 | ||
923 | if (now - last_gc >= ip_rt_gc_min_interval) | 923 | if (now - last_gc >= ip_rt_gc_min_interval) |
924 | last_gc = now; | 924 | last_gc = now; |
925 | 925 | ||
926 | if (goal <= 0) { | 926 | if (goal <= 0) { |
927 | equilibrium += goal; | 927 | equilibrium += goal; |
928 | goto work_done; | 928 | goto work_done; |
929 | } | 929 | } |
930 | 930 | ||
931 | do { | 931 | do { |
932 | int i, k; | 932 | int i, k; |
933 | 933 | ||
934 | for (i = rt_hash_mask, k = rover; i >= 0; i--) { | 934 | for (i = rt_hash_mask, k = rover; i >= 0; i--) { |
935 | unsigned long tmo = expire; | 935 | unsigned long tmo = expire; |
936 | 936 | ||
937 | k = (k + 1) & rt_hash_mask; | 937 | k = (k + 1) & rt_hash_mask; |
938 | rthp = &rt_hash_table[k].chain; | 938 | rthp = &rt_hash_table[k].chain; |
939 | spin_lock_bh(rt_hash_lock_addr(k)); | 939 | spin_lock_bh(rt_hash_lock_addr(k)); |
940 | while ((rth = rcu_dereference_protected(*rthp, | 940 | while ((rth = rcu_dereference_protected(*rthp, |
941 | lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { | 941 | lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { |
942 | if (!rt_is_expired(rth) && | 942 | if (!rt_is_expired(rth) && |
943 | !rt_may_expire(rth, tmo, expire)) { | 943 | !rt_may_expire(rth, tmo, expire)) { |
944 | tmo >>= 1; | 944 | tmo >>= 1; |
945 | rthp = &rth->dst.rt_next; | 945 | rthp = &rth->dst.rt_next; |
946 | continue; | 946 | continue; |
947 | } | 947 | } |
948 | *rthp = rth->dst.rt_next; | 948 | *rthp = rth->dst.rt_next; |
949 | rt_free(rth); | 949 | rt_free(rth); |
950 | goal--; | 950 | goal--; |
951 | } | 951 | } |
952 | spin_unlock_bh(rt_hash_lock_addr(k)); | 952 | spin_unlock_bh(rt_hash_lock_addr(k)); |
953 | if (goal <= 0) | 953 | if (goal <= 0) |
954 | break; | 954 | break; |
955 | } | 955 | } |
956 | rover = k; | 956 | rover = k; |
957 | 957 | ||
958 | if (goal <= 0) | 958 | if (goal <= 0) |
959 | goto work_done; | 959 | goto work_done; |
960 | 960 | ||
961 | /* Goal is not achieved. We stop process if: | 961 | /* Goal is not achieved. We stop process if: |
962 | 962 | ||
963 | - if expire reduced to zero. Otherwise, expire is halfed. | 963 | - if expire reduced to zero. Otherwise, expire is halfed. |
964 | - if table is not full. | 964 | - if table is not full. |
965 | - if we are called from interrupt. | 965 | - if we are called from interrupt. |
966 | - jiffies check is just fallback/debug loop breaker. | 966 | - jiffies check is just fallback/debug loop breaker. |
967 | We will not spin here for long time in any case. | 967 | We will not spin here for long time in any case. |
968 | */ | 968 | */ |
969 | 969 | ||
970 | RT_CACHE_STAT_INC(gc_goal_miss); | 970 | RT_CACHE_STAT_INC(gc_goal_miss); |
971 | 971 | ||
972 | if (expire == 0) | 972 | if (expire == 0) |
973 | break; | 973 | break; |
974 | 974 | ||
975 | expire >>= 1; | 975 | expire >>= 1; |
976 | 976 | ||
977 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) | 977 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
978 | goto out; | 978 | goto out; |
979 | } while (!in_softirq() && time_before_eq(jiffies, now)); | 979 | } while (!in_softirq() && time_before_eq(jiffies, now)); |
980 | 980 | ||
981 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) | 981 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
982 | goto out; | 982 | goto out; |
983 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | 983 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) |
984 | goto out; | 984 | goto out; |
985 | if (net_ratelimit()) | 985 | if (net_ratelimit()) |
986 | printk(KERN_WARNING "dst cache overflow\n"); | 986 | printk(KERN_WARNING "dst cache overflow\n"); |
987 | RT_CACHE_STAT_INC(gc_dst_overflow); | 987 | RT_CACHE_STAT_INC(gc_dst_overflow); |
988 | return 1; | 988 | return 1; |
989 | 989 | ||
990 | work_done: | 990 | work_done: |
991 | expire += ip_rt_gc_min_interval; | 991 | expire += ip_rt_gc_min_interval; |
992 | if (expire > ip_rt_gc_timeout || | 992 | if (expire > ip_rt_gc_timeout || |
993 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || | 993 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || |
994 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | 994 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) |
995 | expire = ip_rt_gc_timeout; | 995 | expire = ip_rt_gc_timeout; |
996 | out: return 0; | 996 | out: return 0; |
997 | } | 997 | } |
998 | 998 | ||
999 | /* | 999 | /* |
1000 | * Returns number of entries in a hash chain that have different hash_inputs | 1000 | * Returns number of entries in a hash chain that have different hash_inputs |
1001 | */ | 1001 | */ |
1002 | static int slow_chain_length(const struct rtable *head) | 1002 | static int slow_chain_length(const struct rtable *head) |
1003 | { | 1003 | { |
1004 | int length = 0; | 1004 | int length = 0; |
1005 | const struct rtable *rth = head; | 1005 | const struct rtable *rth = head; |
1006 | 1006 | ||
1007 | while (rth) { | 1007 | while (rth) { |
1008 | length += has_noalias(head, rth); | 1008 | length += has_noalias(head, rth); |
1009 | rth = rcu_dereference_protected(rth->dst.rt_next, 1); | 1009 | rth = rcu_dereference_protected(rth->dst.rt_next, 1); |
1010 | } | 1010 | } |
1011 | return length >> FRACT_BITS; | 1011 | return length >> FRACT_BITS; |
1012 | } | 1012 | } |
1013 | 1013 | ||
1014 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) | 1014 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
1015 | { | 1015 | { |
1016 | struct neigh_table *tbl = &arp_tbl; | 1016 | struct neigh_table *tbl = &arp_tbl; |
1017 | static const __be32 inaddr_any = 0; | 1017 | static const __be32 inaddr_any = 0; |
1018 | struct net_device *dev = dst->dev; | 1018 | struct net_device *dev = dst->dev; |
1019 | const __be32 *pkey = daddr; | 1019 | const __be32 *pkey = daddr; |
1020 | struct neighbour *n; | 1020 | struct neighbour *n; |
1021 | 1021 | ||
1022 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | 1022 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) |
1023 | if (dev->type == ARPHRD_ATM) | 1023 | if (dev->type == ARPHRD_ATM) |
1024 | tbl = clip_tbl_hook; | 1024 | tbl = clip_tbl_hook; |
1025 | #endif | 1025 | #endif |
1026 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | 1026 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) |
1027 | pkey = &inaddr_any; | 1027 | pkey = &inaddr_any; |
1028 | 1028 | ||
1029 | n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); | 1029 | n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); |
1030 | if (n) | 1030 | if (n) |
1031 | return n; | 1031 | return n; |
1032 | return neigh_create(tbl, pkey, dev); | 1032 | return neigh_create(tbl, pkey, dev); |
1033 | } | 1033 | } |
1034 | 1034 | ||
1035 | static int rt_bind_neighbour(struct rtable *rt) | 1035 | static int rt_bind_neighbour(struct rtable *rt) |
1036 | { | 1036 | { |
1037 | struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); | 1037 | struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); |
1038 | if (IS_ERR(n)) | 1038 | if (IS_ERR(n)) |
1039 | return PTR_ERR(n); | 1039 | return PTR_ERR(n); |
1040 | dst_set_neighbour(&rt->dst, n); | 1040 | dst_set_neighbour(&rt->dst, n); |
1041 | 1041 | ||
1042 | return 0; | 1042 | return 0; |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, | 1045 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1046 | struct sk_buff *skb, int ifindex) | 1046 | struct sk_buff *skb, int ifindex) |
1047 | { | 1047 | { |
1048 | struct rtable *rth, *cand; | 1048 | struct rtable *rth, *cand; |
1049 | struct rtable __rcu **rthp, **candp; | 1049 | struct rtable __rcu **rthp, **candp; |
1050 | unsigned long now; | 1050 | unsigned long now; |
1051 | u32 min_score; | 1051 | u32 min_score; |
1052 | int chain_length; | 1052 | int chain_length; |
1053 | int attempts = !in_softirq(); | 1053 | int attempts = !in_softirq(); |
1054 | 1054 | ||
1055 | restart: | 1055 | restart: |
1056 | chain_length = 0; | 1056 | chain_length = 0; |
1057 | min_score = ~(u32)0; | 1057 | min_score = ~(u32)0; |
1058 | cand = NULL; | 1058 | cand = NULL; |
1059 | candp = NULL; | 1059 | candp = NULL; |
1060 | now = jiffies; | 1060 | now = jiffies; |
1061 | 1061 | ||
1062 | if (!rt_caching(dev_net(rt->dst.dev))) { | 1062 | if (!rt_caching(dev_net(rt->dst.dev))) { |
1063 | /* | 1063 | /* |
1064 | * If we're not caching, just tell the caller we | 1064 | * If we're not caching, just tell the caller we |
1065 | * were successful and don't touch the route. The | 1065 | * were successful and don't touch the route. The |
1066 | * caller hold the sole reference to the cache entry, and | 1066 | * caller hold the sole reference to the cache entry, and |
1067 | * it will be released when the caller is done with it. | 1067 | * it will be released when the caller is done with it. |
1068 | * If we drop it here, the callers have no way to resolve routes | 1068 | * If we drop it here, the callers have no way to resolve routes |
1069 | * when we're not caching. Instead, just point *rp at rt, so | 1069 | * when we're not caching. Instead, just point *rp at rt, so |
1070 | * the caller gets a single use out of the route | 1070 | * the caller gets a single use out of the route |
1071 | * Note that we do rt_free on this new route entry, so that | 1071 | * Note that we do rt_free on this new route entry, so that |
1072 | * once its refcount hits zero, we are still able to reap it | 1072 | * once its refcount hits zero, we are still able to reap it |
1073 | * (Thanks Alexey) | 1073 | * (Thanks Alexey) |
1074 | * Note: To avoid expensive rcu stuff for this uncached dst, | 1074 | * Note: To avoid expensive rcu stuff for this uncached dst, |
1075 | * we set DST_NOCACHE so that dst_release() can free dst without | 1075 | * we set DST_NOCACHE so that dst_release() can free dst without |
1076 | * waiting a grace period. | 1076 | * waiting a grace period. |
1077 | */ | 1077 | */ |
1078 | 1078 | ||
1079 | rt->dst.flags |= DST_NOCACHE; | 1079 | rt->dst.flags |= DST_NOCACHE; |
1080 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1080 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1081 | int err = rt_bind_neighbour(rt); | 1081 | int err = rt_bind_neighbour(rt); |
1082 | if (err) { | 1082 | if (err) { |
1083 | if (net_ratelimit()) | 1083 | if (net_ratelimit()) |
1084 | printk(KERN_WARNING | 1084 | printk(KERN_WARNING |
1085 | "Neighbour table failure & not caching routes.\n"); | 1085 | "Neighbour table failure & not caching routes.\n"); |
1086 | ip_rt_put(rt); | 1086 | ip_rt_put(rt); |
1087 | return ERR_PTR(err); | 1087 | return ERR_PTR(err); |
1088 | } | 1088 | } |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | goto skip_hashing; | 1091 | goto skip_hashing; |
1092 | } | 1092 | } |
1093 | 1093 | ||
1094 | rthp = &rt_hash_table[hash].chain; | 1094 | rthp = &rt_hash_table[hash].chain; |
1095 | 1095 | ||
1096 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1096 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1097 | while ((rth = rcu_dereference_protected(*rthp, | 1097 | while ((rth = rcu_dereference_protected(*rthp, |
1098 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | 1098 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { |
1099 | if (rt_is_expired(rth)) { | 1099 | if (rt_is_expired(rth)) { |
1100 | *rthp = rth->dst.rt_next; | 1100 | *rthp = rth->dst.rt_next; |
1101 | rt_free(rth); | 1101 | rt_free(rth); |
1102 | continue; | 1102 | continue; |
1103 | } | 1103 | } |
1104 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { | 1104 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { |
1105 | /* Put it first */ | 1105 | /* Put it first */ |
1106 | *rthp = rth->dst.rt_next; | 1106 | *rthp = rth->dst.rt_next; |
1107 | /* | 1107 | /* |
1108 | * Since lookup is lockfree, the deletion | 1108 | * Since lookup is lockfree, the deletion |
1109 | * must be visible to another weakly ordered CPU before | 1109 | * must be visible to another weakly ordered CPU before |
1110 | * the insertion at the start of the hash chain. | 1110 | * the insertion at the start of the hash chain. |
1111 | */ | 1111 | */ |
1112 | rcu_assign_pointer(rth->dst.rt_next, | 1112 | rcu_assign_pointer(rth->dst.rt_next, |
1113 | rt_hash_table[hash].chain); | 1113 | rt_hash_table[hash].chain); |
1114 | /* | 1114 | /* |
1115 | * Since lookup is lockfree, the update writes | 1115 | * Since lookup is lockfree, the update writes |
1116 | * must be ordered for consistency on SMP. | 1116 | * must be ordered for consistency on SMP. |
1117 | */ | 1117 | */ |
1118 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); | 1118 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); |
1119 | 1119 | ||
1120 | dst_use(&rth->dst, now); | 1120 | dst_use(&rth->dst, now); |
1121 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1121 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1122 | 1122 | ||
1123 | rt_drop(rt); | 1123 | rt_drop(rt); |
1124 | if (skb) | 1124 | if (skb) |
1125 | skb_dst_set(skb, &rth->dst); | 1125 | skb_dst_set(skb, &rth->dst); |
1126 | return rth; | 1126 | return rth; |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | if (!atomic_read(&rth->dst.__refcnt)) { | 1129 | if (!atomic_read(&rth->dst.__refcnt)) { |
1130 | u32 score = rt_score(rth); | 1130 | u32 score = rt_score(rth); |
1131 | 1131 | ||
1132 | if (score <= min_score) { | 1132 | if (score <= min_score) { |
1133 | cand = rth; | 1133 | cand = rth; |
1134 | candp = rthp; | 1134 | candp = rthp; |
1135 | min_score = score; | 1135 | min_score = score; |
1136 | } | 1136 | } |
1137 | } | 1137 | } |
1138 | 1138 | ||
1139 | chain_length++; | 1139 | chain_length++; |
1140 | 1140 | ||
1141 | rthp = &rth->dst.rt_next; | 1141 | rthp = &rth->dst.rt_next; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | if (cand) { | 1144 | if (cand) { |
1145 | /* ip_rt_gc_elasticity used to be average length of chain | 1145 | /* ip_rt_gc_elasticity used to be average length of chain |
1146 | * length, when exceeded gc becomes really aggressive. | 1146 | * length, when exceeded gc becomes really aggressive. |
1147 | * | 1147 | * |
1148 | * The second limit is less certain. At the moment it allows | 1148 | * The second limit is less certain. At the moment it allows |
1149 | * only 2 entries per bucket. We will see. | 1149 | * only 2 entries per bucket. We will see. |
1150 | */ | 1150 | */ |
1151 | if (chain_length > ip_rt_gc_elasticity) { | 1151 | if (chain_length > ip_rt_gc_elasticity) { |
1152 | *candp = cand->dst.rt_next; | 1152 | *candp = cand->dst.rt_next; |
1153 | rt_free(cand); | 1153 | rt_free(cand); |
1154 | } | 1154 | } |
1155 | } else { | 1155 | } else { |
1156 | if (chain_length > rt_chain_length_max && | 1156 | if (chain_length > rt_chain_length_max && |
1157 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { | 1157 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { |
1158 | struct net *net = dev_net(rt->dst.dev); | 1158 | struct net *net = dev_net(rt->dst.dev); |
1159 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | 1159 | int num = ++net->ipv4.current_rt_cache_rebuild_count; |
1160 | if (!rt_caching(net)) { | 1160 | if (!rt_caching(net)) { |
1161 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | 1161 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", |
1162 | rt->dst.dev->name, num); | 1162 | rt->dst.dev->name, num); |
1163 | } | 1163 | } |
1164 | rt_emergency_hash_rebuild(net); | 1164 | rt_emergency_hash_rebuild(net); |
1165 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1165 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1166 | 1166 | ||
1167 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1167 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1168 | ifindex, rt_genid(net)); | 1168 | ifindex, rt_genid(net)); |
1169 | goto restart; | 1169 | goto restart; |
1170 | } | 1170 | } |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | /* Try to bind route to arp only if it is output | 1173 | /* Try to bind route to arp only if it is output |
1174 | route or unicast forwarding path. | 1174 | route or unicast forwarding path. |
1175 | */ | 1175 | */ |
1176 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1176 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1177 | int err = rt_bind_neighbour(rt); | 1177 | int err = rt_bind_neighbour(rt); |
1178 | if (err) { | 1178 | if (err) { |
1179 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1179 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1180 | 1180 | ||
1181 | if (err != -ENOBUFS) { | 1181 | if (err != -ENOBUFS) { |
1182 | rt_drop(rt); | 1182 | rt_drop(rt); |
1183 | return ERR_PTR(err); | 1183 | return ERR_PTR(err); |
1184 | } | 1184 | } |
1185 | 1185 | ||
1186 | /* Neighbour tables are full and nothing | 1186 | /* Neighbour tables are full and nothing |
1187 | can be released. Try to shrink route cache, | 1187 | can be released. Try to shrink route cache, |
1188 | it is most likely it holds some neighbour records. | 1188 | it is most likely it holds some neighbour records. |
1189 | */ | 1189 | */ |
1190 | if (attempts-- > 0) { | 1190 | if (attempts-- > 0) { |
1191 | int saved_elasticity = ip_rt_gc_elasticity; | 1191 | int saved_elasticity = ip_rt_gc_elasticity; |
1192 | int saved_int = ip_rt_gc_min_interval; | 1192 | int saved_int = ip_rt_gc_min_interval; |
1193 | ip_rt_gc_elasticity = 1; | 1193 | ip_rt_gc_elasticity = 1; |
1194 | ip_rt_gc_min_interval = 0; | 1194 | ip_rt_gc_min_interval = 0; |
1195 | rt_garbage_collect(&ipv4_dst_ops); | 1195 | rt_garbage_collect(&ipv4_dst_ops); |
1196 | ip_rt_gc_min_interval = saved_int; | 1196 | ip_rt_gc_min_interval = saved_int; |
1197 | ip_rt_gc_elasticity = saved_elasticity; | 1197 | ip_rt_gc_elasticity = saved_elasticity; |
1198 | goto restart; | 1198 | goto restart; |
1199 | } | 1199 | } |
1200 | 1200 | ||
1201 | if (net_ratelimit()) | 1201 | if (net_ratelimit()) |
1202 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); | 1202 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
1203 | rt_drop(rt); | 1203 | rt_drop(rt); |
1204 | return ERR_PTR(-ENOBUFS); | 1204 | return ERR_PTR(-ENOBUFS); |
1205 | } | 1205 | } |
1206 | } | 1206 | } |
1207 | 1207 | ||
1208 | rt->dst.rt_next = rt_hash_table[hash].chain; | 1208 | rt->dst.rt_next = rt_hash_table[hash].chain; |
1209 | 1209 | ||
1210 | /* | 1210 | /* |
1211 | * Since lookup is lockfree, we must make sure | 1211 | * Since lookup is lockfree, we must make sure |
1212 | * previous writes to rt are committed to memory | 1212 | * previous writes to rt are committed to memory |
1213 | * before making rt visible to other CPUS. | 1213 | * before making rt visible to other CPUS. |
1214 | */ | 1214 | */ |
1215 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | 1215 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); |
1216 | 1216 | ||
1217 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1217 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1218 | 1218 | ||
1219 | skip_hashing: | 1219 | skip_hashing: |
1220 | if (skb) | 1220 | if (skb) |
1221 | skb_dst_set(skb, &rt->dst); | 1221 | skb_dst_set(skb, &rt->dst); |
1222 | return rt; | 1222 | return rt; |
1223 | } | 1223 | } |
1224 | 1224 | ||
1225 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | 1225 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); |
1226 | 1226 | ||
1227 | static u32 rt_peer_genid(void) | 1227 | static u32 rt_peer_genid(void) |
1228 | { | 1228 | { |
1229 | return atomic_read(&__rt_peer_genid); | 1229 | return atomic_read(&__rt_peer_genid); |
1230 | } | 1230 | } |
1231 | 1231 | ||
1232 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) | 1232 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) |
1233 | { | 1233 | { |
1234 | struct inet_peer *peer; | 1234 | struct inet_peer *peer; |
1235 | 1235 | ||
1236 | peer = inet_getpeer_v4(daddr, create); | 1236 | peer = inet_getpeer_v4(daddr, create); |
1237 | 1237 | ||
1238 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1238 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1239 | inet_putpeer(peer); | 1239 | inet_putpeer(peer); |
1240 | else | 1240 | else |
1241 | rt->rt_peer_genid = rt_peer_genid(); | 1241 | rt->rt_peer_genid = rt_peer_genid(); |
1242 | } | 1242 | } |
1243 | 1243 | ||
1244 | /* | 1244 | /* |
1245 | * Peer allocation may fail only in serious out-of-memory conditions. However | 1245 | * Peer allocation may fail only in serious out-of-memory conditions. However |
1246 | * we still can generate some output. | 1246 | * we still can generate some output. |
1247 | * Random ID selection looks a bit dangerous because we have no chances to | 1247 | * Random ID selection looks a bit dangerous because we have no chances to |
1248 | * select ID being unique in a reasonable period of time. | 1248 | * select ID being unique in a reasonable period of time. |
1249 | * But broken packet identifier may be better than no packet at all. | 1249 | * But broken packet identifier may be better than no packet at all. |
1250 | */ | 1250 | */ |
1251 | static void ip_select_fb_ident(struct iphdr *iph) | 1251 | static void ip_select_fb_ident(struct iphdr *iph) |
1252 | { | 1252 | { |
1253 | static DEFINE_SPINLOCK(ip_fb_id_lock); | 1253 | static DEFINE_SPINLOCK(ip_fb_id_lock); |
1254 | static u32 ip_fallback_id; | 1254 | static u32 ip_fallback_id; |
1255 | u32 salt; | 1255 | u32 salt; |
1256 | 1256 | ||
1257 | spin_lock_bh(&ip_fb_id_lock); | 1257 | spin_lock_bh(&ip_fb_id_lock); |
1258 | salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); | 1258 | salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); |
1259 | iph->id = htons(salt & 0xFFFF); | 1259 | iph->id = htons(salt & 0xFFFF); |
1260 | ip_fallback_id = salt; | 1260 | ip_fallback_id = salt; |
1261 | spin_unlock_bh(&ip_fb_id_lock); | 1261 | spin_unlock_bh(&ip_fb_id_lock); |
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | 1264 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) |
1265 | { | 1265 | { |
1266 | struct rtable *rt = (struct rtable *) dst; | 1266 | struct rtable *rt = (struct rtable *) dst; |
1267 | 1267 | ||
1268 | if (rt) { | 1268 | if (rt) { |
1269 | if (rt->peer == NULL) | 1269 | if (rt->peer == NULL) |
1270 | rt_bind_peer(rt, rt->rt_dst, 1); | 1270 | rt_bind_peer(rt, rt->rt_dst, 1); |
1271 | 1271 | ||
1272 | /* If peer is attached to destination, it is never detached, | 1272 | /* If peer is attached to destination, it is never detached, |
1273 | so that we need not to grab a lock to dereference it. | 1273 | so that we need not to grab a lock to dereference it. |
1274 | */ | 1274 | */ |
1275 | if (rt->peer) { | 1275 | if (rt->peer) { |
1276 | iph->id = htons(inet_getid(rt->peer, more)); | 1276 | iph->id = htons(inet_getid(rt->peer, more)); |
1277 | return; | 1277 | return; |
1278 | } | 1278 | } |
1279 | } else | 1279 | } else |
1280 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", | 1280 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", |
1281 | __builtin_return_address(0)); | 1281 | __builtin_return_address(0)); |
1282 | 1282 | ||
1283 | ip_select_fb_ident(iph); | 1283 | ip_select_fb_ident(iph); |
1284 | } | 1284 | } |
1285 | EXPORT_SYMBOL(__ip_select_ident); | 1285 | EXPORT_SYMBOL(__ip_select_ident); |
1286 | 1286 | ||
1287 | static void rt_del(unsigned hash, struct rtable *rt) | 1287 | static void rt_del(unsigned hash, struct rtable *rt) |
1288 | { | 1288 | { |
1289 | struct rtable __rcu **rthp; | 1289 | struct rtable __rcu **rthp; |
1290 | struct rtable *aux; | 1290 | struct rtable *aux; |
1291 | 1291 | ||
1292 | rthp = &rt_hash_table[hash].chain; | 1292 | rthp = &rt_hash_table[hash].chain; |
1293 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1293 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1294 | ip_rt_put(rt); | 1294 | ip_rt_put(rt); |
1295 | while ((aux = rcu_dereference_protected(*rthp, | 1295 | while ((aux = rcu_dereference_protected(*rthp, |
1296 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | 1296 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { |
1297 | if (aux == rt || rt_is_expired(aux)) { | 1297 | if (aux == rt || rt_is_expired(aux)) { |
1298 | *rthp = aux->dst.rt_next; | 1298 | *rthp = aux->dst.rt_next; |
1299 | rt_free(aux); | 1299 | rt_free(aux); |
1300 | continue; | 1300 | continue; |
1301 | } | 1301 | } |
1302 | rthp = &aux->dst.rt_next; | 1302 | rthp = &aux->dst.rt_next; |
1303 | } | 1303 | } |
1304 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1304 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1305 | } | 1305 | } |
1306 | 1306 | ||
1307 | /* called in rcu_read_lock() section */ | 1307 | /* called in rcu_read_lock() section */ |
1308 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1308 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1309 | __be32 saddr, struct net_device *dev) | 1309 | __be32 saddr, struct net_device *dev) |
1310 | { | 1310 | { |
1311 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1311 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1312 | struct inet_peer *peer; | 1312 | struct inet_peer *peer; |
1313 | struct net *net; | 1313 | struct net *net; |
1314 | 1314 | ||
1315 | if (!in_dev) | 1315 | if (!in_dev) |
1316 | return; | 1316 | return; |
1317 | 1317 | ||
1318 | net = dev_net(dev); | 1318 | net = dev_net(dev); |
1319 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || | 1319 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || |
1320 | ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || | 1320 | ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || |
1321 | ipv4_is_zeronet(new_gw)) | 1321 | ipv4_is_zeronet(new_gw)) |
1322 | goto reject_redirect; | 1322 | goto reject_redirect; |
1323 | 1323 | ||
1324 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1324 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1325 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1325 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1326 | goto reject_redirect; | 1326 | goto reject_redirect; |
1327 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) | 1327 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) |
1328 | goto reject_redirect; | 1328 | goto reject_redirect; |
1329 | } else { | 1329 | } else { |
1330 | if (inet_addr_type(net, new_gw) != RTN_UNICAST) | 1330 | if (inet_addr_type(net, new_gw) != RTN_UNICAST) |
1331 | goto reject_redirect; | 1331 | goto reject_redirect; |
1332 | } | 1332 | } |
1333 | 1333 | ||
1334 | peer = inet_getpeer_v4(daddr, 1); | 1334 | peer = inet_getpeer_v4(daddr, 1); |
1335 | if (peer) { | 1335 | if (peer) { |
1336 | peer->redirect_learned.a4 = new_gw; | 1336 | peer->redirect_learned.a4 = new_gw; |
1337 | 1337 | ||
1338 | inet_putpeer(peer); | 1338 | inet_putpeer(peer); |
1339 | 1339 | ||
1340 | atomic_inc(&__rt_peer_genid); | 1340 | atomic_inc(&__rt_peer_genid); |
1341 | } | 1341 | } |
1342 | return; | 1342 | return; |
1343 | 1343 | ||
1344 | reject_redirect: | 1344 | reject_redirect: |
1345 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1345 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1346 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1346 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
1347 | printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" | 1347 | printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" |
1348 | " Advised path = %pI4 -> %pI4\n", | 1348 | " Advised path = %pI4 -> %pI4\n", |
1349 | &old_gw, dev->name, &new_gw, | 1349 | &old_gw, dev->name, &new_gw, |
1350 | &saddr, &daddr); | 1350 | &saddr, &daddr); |
1351 | #endif | 1351 | #endif |
1352 | ; | 1352 | ; |
1353 | } | 1353 | } |
1354 | 1354 | ||
1355 | static bool peer_pmtu_expired(struct inet_peer *peer) | 1355 | static bool peer_pmtu_expired(struct inet_peer *peer) |
1356 | { | 1356 | { |
1357 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | 1357 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); |
1358 | 1358 | ||
1359 | return orig && | 1359 | return orig && |
1360 | time_after_eq(jiffies, orig) && | 1360 | time_after_eq(jiffies, orig) && |
1361 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | 1361 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; |
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | static bool peer_pmtu_cleaned(struct inet_peer *peer) | 1364 | static bool peer_pmtu_cleaned(struct inet_peer *peer) |
1365 | { | 1365 | { |
1366 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | 1366 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); |
1367 | 1367 | ||
1368 | return orig && | 1368 | return orig && |
1369 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | 1369 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; |
1370 | } | 1370 | } |
1371 | 1371 | ||
1372 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1372 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
1373 | { | 1373 | { |
1374 | struct rtable *rt = (struct rtable *)dst; | 1374 | struct rtable *rt = (struct rtable *)dst; |
1375 | struct dst_entry *ret = dst; | 1375 | struct dst_entry *ret = dst; |
1376 | 1376 | ||
1377 | if (rt) { | 1377 | if (rt) { |
1378 | if (dst->obsolete > 0) { | 1378 | if (dst->obsolete > 0) { |
1379 | ip_rt_put(rt); | 1379 | ip_rt_put(rt); |
1380 | ret = NULL; | 1380 | ret = NULL; |
1381 | } else if (rt->rt_flags & RTCF_REDIRECTED) { | 1381 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1382 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1382 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1383 | rt->rt_oif, | 1383 | rt->rt_oif, |
1384 | rt_genid(dev_net(dst->dev))); | 1384 | rt_genid(dev_net(dst->dev))); |
1385 | rt_del(hash, rt); | 1385 | rt_del(hash, rt); |
1386 | ret = NULL; | 1386 | ret = NULL; |
1387 | } else if (rt->peer && peer_pmtu_expired(rt->peer)) { | 1387 | } else if (rt->peer && peer_pmtu_expired(rt->peer)) { |
1388 | dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); | 1388 | dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); |
1389 | } | 1389 | } |
1390 | } | 1390 | } |
1391 | return ret; | 1391 | return ret; |
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | /* | 1394 | /* |
1395 | * Algorithm: | 1395 | * Algorithm: |
1396 | * 1. The first ip_rt_redirect_number redirects are sent | 1396 | * 1. The first ip_rt_redirect_number redirects are sent |
1397 | * with exponential backoff, then we stop sending them at all, | 1397 | * with exponential backoff, then we stop sending them at all, |
1398 | * assuming that the host ignores our redirects. | 1398 | * assuming that the host ignores our redirects. |
1399 | * 2. If we did not see packets requiring redirects | 1399 | * 2. If we did not see packets requiring redirects |
1400 | * during ip_rt_redirect_silence, we assume that the host | 1400 | * during ip_rt_redirect_silence, we assume that the host |
1401 | * forgot redirected route and start to send redirects again. | 1401 | * forgot redirected route and start to send redirects again. |
1402 | * | 1402 | * |
1403 | * This algorithm is much cheaper and more intelligent than dumb load limiting | 1403 | * This algorithm is much cheaper and more intelligent than dumb load limiting |
1404 | * in icmp.c. | 1404 | * in icmp.c. |
1405 | * | 1405 | * |
1406 | * NOTE. Do not forget to inhibit load limiting for redirects (redundant) | 1406 | * NOTE. Do not forget to inhibit load limiting for redirects (redundant) |
1407 | * and "frag. need" (breaks PMTU discovery) in icmp.c. | 1407 | * and "frag. need" (breaks PMTU discovery) in icmp.c. |
1408 | */ | 1408 | */ |
1409 | 1409 | ||
1410 | void ip_rt_send_redirect(struct sk_buff *skb) | 1410 | void ip_rt_send_redirect(struct sk_buff *skb) |
1411 | { | 1411 | { |
1412 | struct rtable *rt = skb_rtable(skb); | 1412 | struct rtable *rt = skb_rtable(skb); |
1413 | struct in_device *in_dev; | 1413 | struct in_device *in_dev; |
1414 | struct inet_peer *peer; | 1414 | struct inet_peer *peer; |
1415 | int log_martians; | 1415 | int log_martians; |
1416 | 1416 | ||
1417 | rcu_read_lock(); | 1417 | rcu_read_lock(); |
1418 | in_dev = __in_dev_get_rcu(rt->dst.dev); | 1418 | in_dev = __in_dev_get_rcu(rt->dst.dev); |
1419 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | 1419 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { |
1420 | rcu_read_unlock(); | 1420 | rcu_read_unlock(); |
1421 | return; | 1421 | return; |
1422 | } | 1422 | } |
1423 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1423 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1424 | rcu_read_unlock(); | 1424 | rcu_read_unlock(); |
1425 | 1425 | ||
1426 | if (!rt->peer) | 1426 | if (!rt->peer) |
1427 | rt_bind_peer(rt, rt->rt_dst, 1); | 1427 | rt_bind_peer(rt, rt->rt_dst, 1); |
1428 | peer = rt->peer; | 1428 | peer = rt->peer; |
1429 | if (!peer) { | 1429 | if (!peer) { |
1430 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1430 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1431 | return; | 1431 | return; |
1432 | } | 1432 | } |
1433 | 1433 | ||
1434 | /* No redirected packets during ip_rt_redirect_silence; | 1434 | /* No redirected packets during ip_rt_redirect_silence; |
1435 | * reset the algorithm. | 1435 | * reset the algorithm. |
1436 | */ | 1436 | */ |
1437 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) | 1437 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1438 | peer->rate_tokens = 0; | 1438 | peer->rate_tokens = 0; |
1439 | 1439 | ||
1440 | /* Too many ignored redirects; do not send anything | 1440 | /* Too many ignored redirects; do not send anything |
1441 | * set dst.rate_last to the last seen redirected packet. | 1441 | * set dst.rate_last to the last seen redirected packet. |
1442 | */ | 1442 | */ |
1443 | if (peer->rate_tokens >= ip_rt_redirect_number) { | 1443 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1444 | peer->rate_last = jiffies; | 1444 | peer->rate_last = jiffies; |
1445 | return; | 1445 | return; |
1446 | } | 1446 | } |
1447 | 1447 | ||
1448 | /* Check for load limit; set rate_last to the latest sent | 1448 | /* Check for load limit; set rate_last to the latest sent |
1449 | * redirect. | 1449 | * redirect. |
1450 | */ | 1450 | */ |
1451 | if (peer->rate_tokens == 0 || | 1451 | if (peer->rate_tokens == 0 || |
1452 | time_after(jiffies, | 1452 | time_after(jiffies, |
1453 | (peer->rate_last + | 1453 | (peer->rate_last + |
1454 | (ip_rt_redirect_load << peer->rate_tokens)))) { | 1454 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1455 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1455 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1456 | peer->rate_last = jiffies; | 1456 | peer->rate_last = jiffies; |
1457 | ++peer->rate_tokens; | 1457 | ++peer->rate_tokens; |
1458 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1458 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1459 | if (log_martians && | 1459 | if (log_martians && |
1460 | peer->rate_tokens == ip_rt_redirect_number && | 1460 | peer->rate_tokens == ip_rt_redirect_number && |
1461 | net_ratelimit()) | 1461 | net_ratelimit()) |
1462 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1462 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1463 | &ip_hdr(skb)->saddr, rt->rt_iif, | 1463 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1464 | &rt->rt_dst, &rt->rt_gateway); | 1464 | &rt->rt_dst, &rt->rt_gateway); |
1465 | #endif | 1465 | #endif |
1466 | } | 1466 | } |
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | static int ip_error(struct sk_buff *skb) | 1469 | static int ip_error(struct sk_buff *skb) |
1470 | { | 1470 | { |
1471 | struct rtable *rt = skb_rtable(skb); | 1471 | struct rtable *rt = skb_rtable(skb); |
1472 | struct inet_peer *peer; | 1472 | struct inet_peer *peer; |
1473 | unsigned long now; | 1473 | unsigned long now; |
1474 | bool send; | 1474 | bool send; |
1475 | int code; | 1475 | int code; |
1476 | 1476 | ||
1477 | switch (rt->dst.error) { | 1477 | switch (rt->dst.error) { |
1478 | case EINVAL: | 1478 | case EINVAL: |
1479 | default: | 1479 | default: |
1480 | goto out; | 1480 | goto out; |
1481 | case EHOSTUNREACH: | 1481 | case EHOSTUNREACH: |
1482 | code = ICMP_HOST_UNREACH; | 1482 | code = ICMP_HOST_UNREACH; |
1483 | break; | 1483 | break; |
1484 | case ENETUNREACH: | 1484 | case ENETUNREACH: |
1485 | code = ICMP_NET_UNREACH; | 1485 | code = ICMP_NET_UNREACH; |
1486 | IP_INC_STATS_BH(dev_net(rt->dst.dev), | 1486 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
1487 | IPSTATS_MIB_INNOROUTES); | 1487 | IPSTATS_MIB_INNOROUTES); |
1488 | break; | 1488 | break; |
1489 | case EACCES: | 1489 | case EACCES: |
1490 | code = ICMP_PKT_FILTERED; | 1490 | code = ICMP_PKT_FILTERED; |
1491 | break; | 1491 | break; |
1492 | } | 1492 | } |
1493 | 1493 | ||
1494 | if (!rt->peer) | 1494 | if (!rt->peer) |
1495 | rt_bind_peer(rt, rt->rt_dst, 1); | 1495 | rt_bind_peer(rt, rt->rt_dst, 1); |
1496 | peer = rt->peer; | 1496 | peer = rt->peer; |
1497 | 1497 | ||
1498 | send = true; | 1498 | send = true; |
1499 | if (peer) { | 1499 | if (peer) { |
1500 | now = jiffies; | 1500 | now = jiffies; |
1501 | peer->rate_tokens += now - peer->rate_last; | 1501 | peer->rate_tokens += now - peer->rate_last; |
1502 | if (peer->rate_tokens > ip_rt_error_burst) | 1502 | if (peer->rate_tokens > ip_rt_error_burst) |
1503 | peer->rate_tokens = ip_rt_error_burst; | 1503 | peer->rate_tokens = ip_rt_error_burst; |
1504 | peer->rate_last = now; | 1504 | peer->rate_last = now; |
1505 | if (peer->rate_tokens >= ip_rt_error_cost) | 1505 | if (peer->rate_tokens >= ip_rt_error_cost) |
1506 | peer->rate_tokens -= ip_rt_error_cost; | 1506 | peer->rate_tokens -= ip_rt_error_cost; |
1507 | else | 1507 | else |
1508 | send = false; | 1508 | send = false; |
1509 | } | 1509 | } |
1510 | if (send) | 1510 | if (send) |
1511 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1511 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
1512 | 1512 | ||
1513 | out: kfree_skb(skb); | 1513 | out: kfree_skb(skb); |
1514 | return 0; | 1514 | return 0; |
1515 | } | 1515 | } |
1516 | 1516 | ||
1517 | /* | 1517 | /* |
1518 | * The last two values are not from the RFC but | 1518 | * The last two values are not from the RFC but |
1519 | * are needed for AMPRnet AX.25 paths. | 1519 | * are needed for AMPRnet AX.25 paths. |
1520 | */ | 1520 | */ |
1521 | 1521 | ||
1522 | static const unsigned short mtu_plateau[] = | 1522 | static const unsigned short mtu_plateau[] = |
1523 | {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; | 1523 | {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; |
1524 | 1524 | ||
1525 | static inline unsigned short guess_mtu(unsigned short old_mtu) | 1525 | static inline unsigned short guess_mtu(unsigned short old_mtu) |
1526 | { | 1526 | { |
1527 | int i; | 1527 | int i; |
1528 | 1528 | ||
1529 | for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) | 1529 | for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) |
1530 | if (old_mtu > mtu_plateau[i]) | 1530 | if (old_mtu > mtu_plateau[i]) |
1531 | return mtu_plateau[i]; | 1531 | return mtu_plateau[i]; |
1532 | return 68; | 1532 | return 68; |
1533 | } | 1533 | } |
1534 | 1534 | ||
1535 | unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, | 1535 | unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, |
1536 | unsigned short new_mtu, | 1536 | unsigned short new_mtu, |
1537 | struct net_device *dev) | 1537 | struct net_device *dev) |
1538 | { | 1538 | { |
1539 | unsigned short old_mtu = ntohs(iph->tot_len); | 1539 | unsigned short old_mtu = ntohs(iph->tot_len); |
1540 | unsigned short est_mtu = 0; | 1540 | unsigned short est_mtu = 0; |
1541 | struct inet_peer *peer; | 1541 | struct inet_peer *peer; |
1542 | 1542 | ||
1543 | peer = inet_getpeer_v4(iph->daddr, 1); | 1543 | peer = inet_getpeer_v4(iph->daddr, 1); |
1544 | if (peer) { | 1544 | if (peer) { |
1545 | unsigned short mtu = new_mtu; | 1545 | unsigned short mtu = new_mtu; |
1546 | 1546 | ||
1547 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1547 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
1548 | /* BSD 4.2 derived systems incorrectly adjust | 1548 | /* BSD 4.2 derived systems incorrectly adjust |
1549 | * tot_len by the IP header length, and report | 1549 | * tot_len by the IP header length, and report |
1550 | * a zero MTU in the ICMP message. | 1550 | * a zero MTU in the ICMP message. |
1551 | */ | 1551 | */ |
1552 | if (mtu == 0 && | 1552 | if (mtu == 0 && |
1553 | old_mtu >= 68 + (iph->ihl << 2)) | 1553 | old_mtu >= 68 + (iph->ihl << 2)) |
1554 | old_mtu -= iph->ihl << 2; | 1554 | old_mtu -= iph->ihl << 2; |
1555 | mtu = guess_mtu(old_mtu); | 1555 | mtu = guess_mtu(old_mtu); |
1556 | } | 1556 | } |
1557 | 1557 | ||
1558 | if (mtu < ip_rt_min_pmtu) | 1558 | if (mtu < ip_rt_min_pmtu) |
1559 | mtu = ip_rt_min_pmtu; | 1559 | mtu = ip_rt_min_pmtu; |
1560 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { | 1560 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1561 | unsigned long pmtu_expires; | 1561 | unsigned long pmtu_expires; |
1562 | 1562 | ||
1563 | pmtu_expires = jiffies + ip_rt_mtu_expires; | 1563 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
1564 | if (!pmtu_expires) | 1564 | if (!pmtu_expires) |
1565 | pmtu_expires = 1UL; | 1565 | pmtu_expires = 1UL; |
1566 | 1566 | ||
1567 | est_mtu = mtu; | 1567 | est_mtu = mtu; |
1568 | peer->pmtu_learned = mtu; | 1568 | peer->pmtu_learned = mtu; |
1569 | peer->pmtu_expires = pmtu_expires; | 1569 | peer->pmtu_expires = pmtu_expires; |
1570 | } | 1570 | } |
1571 | 1571 | ||
1572 | inet_putpeer(peer); | 1572 | inet_putpeer(peer); |
1573 | 1573 | ||
1574 | atomic_inc(&__rt_peer_genid); | 1574 | atomic_inc(&__rt_peer_genid); |
1575 | } | 1575 | } |
1576 | return est_mtu ? : new_mtu; | 1576 | return est_mtu ? : new_mtu; |
1577 | } | 1577 | } |
1578 | 1578 | ||
1579 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | 1579 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) |
1580 | { | 1580 | { |
1581 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); | 1581 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); |
1582 | 1582 | ||
1583 | if (!expires) | 1583 | if (!expires) |
1584 | return; | 1584 | return; |
1585 | if (time_before(jiffies, expires)) { | 1585 | if (time_before(jiffies, expires)) { |
1586 | u32 orig_dst_mtu = dst_mtu(dst); | 1586 | u32 orig_dst_mtu = dst_mtu(dst); |
1587 | if (peer->pmtu_learned < orig_dst_mtu) { | 1587 | if (peer->pmtu_learned < orig_dst_mtu) { |
1588 | if (!peer->pmtu_orig) | 1588 | if (!peer->pmtu_orig) |
1589 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | 1589 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); |
1590 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | 1590 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); |
1591 | } | 1591 | } |
1592 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | 1592 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) |
1593 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | 1593 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); |
1594 | } | 1594 | } |
1595 | 1595 | ||
1596 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1596 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1597 | { | 1597 | { |
1598 | struct rtable *rt = (struct rtable *) dst; | 1598 | struct rtable *rt = (struct rtable *) dst; |
1599 | struct inet_peer *peer; | 1599 | struct inet_peer *peer; |
1600 | 1600 | ||
1601 | dst_confirm(dst); | 1601 | dst_confirm(dst); |
1602 | 1602 | ||
1603 | if (!rt->peer) | 1603 | if (!rt->peer) |
1604 | rt_bind_peer(rt, rt->rt_dst, 1); | 1604 | rt_bind_peer(rt, rt->rt_dst, 1); |
1605 | peer = rt->peer; | 1605 | peer = rt->peer; |
1606 | if (peer) { | 1606 | if (peer) { |
1607 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); | 1607 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); |
1608 | 1608 | ||
1609 | if (mtu < ip_rt_min_pmtu) | 1609 | if (mtu < ip_rt_min_pmtu) |
1610 | mtu = ip_rt_min_pmtu; | 1610 | mtu = ip_rt_min_pmtu; |
1611 | if (!pmtu_expires || mtu < peer->pmtu_learned) { | 1611 | if (!pmtu_expires || mtu < peer->pmtu_learned) { |
1612 | 1612 | ||
1613 | pmtu_expires = jiffies + ip_rt_mtu_expires; | 1613 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
1614 | if (!pmtu_expires) | 1614 | if (!pmtu_expires) |
1615 | pmtu_expires = 1UL; | 1615 | pmtu_expires = 1UL; |
1616 | 1616 | ||
1617 | peer->pmtu_learned = mtu; | 1617 | peer->pmtu_learned = mtu; |
1618 | peer->pmtu_expires = pmtu_expires; | 1618 | peer->pmtu_expires = pmtu_expires; |
1619 | 1619 | ||
1620 | atomic_inc(&__rt_peer_genid); | 1620 | atomic_inc(&__rt_peer_genid); |
1621 | rt->rt_peer_genid = rt_peer_genid(); | 1621 | rt->rt_peer_genid = rt_peer_genid(); |
1622 | } | 1622 | } |
1623 | check_peer_pmtu(dst, peer); | 1623 | check_peer_pmtu(dst, peer); |
1624 | } | 1624 | } |
1625 | } | 1625 | } |
1626 | 1626 | ||
1627 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | 1627 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) |
1628 | { | 1628 | { |
1629 | struct rtable *rt = (struct rtable *) dst; | 1629 | struct rtable *rt = (struct rtable *) dst; |
1630 | __be32 orig_gw = rt->rt_gateway; | 1630 | __be32 orig_gw = rt->rt_gateway; |
1631 | struct neighbour *n; | 1631 | struct neighbour *n, *old_n; |
1632 | 1632 | ||
1633 | dst_confirm(&rt->dst); | 1633 | dst_confirm(&rt->dst); |
1634 | 1634 | ||
1635 | neigh_release(dst_get_neighbour(&rt->dst)); | ||
1636 | dst_set_neighbour(&rt->dst, NULL); | ||
1637 | |||
1638 | rt->rt_gateway = peer->redirect_learned.a4; | 1635 | rt->rt_gateway = peer->redirect_learned.a4; |
1639 | rt_bind_neighbour(rt); | 1636 | |
1640 | n = dst_get_neighbour(&rt->dst); | 1637 | n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); |
1638 | if (IS_ERR(n)) | ||
1639 | return PTR_ERR(n); | ||
1640 | old_n = xchg(&rt->dst._neighbour, n); | ||
1641 | if (old_n) | ||
1642 | neigh_release(old_n); | ||
1641 | if (!n || !(n->nud_state & NUD_VALID)) { | 1643 | if (!n || !(n->nud_state & NUD_VALID)) { |
1642 | if (n) | 1644 | if (n) |
1643 | neigh_event_send(n, NULL); | 1645 | neigh_event_send(n, NULL); |
1644 | rt->rt_gateway = orig_gw; | 1646 | rt->rt_gateway = orig_gw; |
1645 | return -EAGAIN; | 1647 | return -EAGAIN; |
1646 | } else { | 1648 | } else { |
1647 | rt->rt_flags |= RTCF_REDIRECTED; | 1649 | rt->rt_flags |= RTCF_REDIRECTED; |
1648 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); | 1650 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); |
1649 | } | 1651 | } |
1650 | return 0; | 1652 | return 0; |
1651 | } | 1653 | } |
1652 | 1654 | ||
1653 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1655 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1654 | { | 1656 | { |
1655 | struct rtable *rt = (struct rtable *) dst; | 1657 | struct rtable *rt = (struct rtable *) dst; |
1656 | 1658 | ||
1657 | if (rt_is_expired(rt)) | 1659 | if (rt_is_expired(rt)) |
1658 | return NULL; | 1660 | return NULL; |
1659 | if (rt->rt_peer_genid != rt_peer_genid()) { | 1661 | if (rt->rt_peer_genid != rt_peer_genid()) { |
1660 | struct inet_peer *peer; | 1662 | struct inet_peer *peer; |
1661 | 1663 | ||
1662 | if (!rt->peer) | 1664 | if (!rt->peer) |
1663 | rt_bind_peer(rt, rt->rt_dst, 0); | 1665 | rt_bind_peer(rt, rt->rt_dst, 0); |
1664 | 1666 | ||
1665 | peer = rt->peer; | 1667 | peer = rt->peer; |
1666 | if (peer) { | 1668 | if (peer) { |
1667 | check_peer_pmtu(dst, peer); | 1669 | check_peer_pmtu(dst, peer); |
1668 | 1670 | ||
1669 | if (peer->redirect_learned.a4 && | 1671 | if (peer->redirect_learned.a4 && |
1670 | peer->redirect_learned.a4 != rt->rt_gateway) { | 1672 | peer->redirect_learned.a4 != rt->rt_gateway) { |
1671 | if (check_peer_redir(dst, peer)) | 1673 | if (check_peer_redir(dst, peer)) |
1672 | return NULL; | 1674 | return NULL; |
1673 | } | 1675 | } |
1674 | } | 1676 | } |
1675 | 1677 | ||
1676 | rt->rt_peer_genid = rt_peer_genid(); | 1678 | rt->rt_peer_genid = rt_peer_genid(); |
1677 | } | 1679 | } |
1678 | return dst; | 1680 | return dst; |
1679 | } | 1681 | } |
1680 | 1682 | ||
1681 | static void ipv4_dst_destroy(struct dst_entry *dst) | 1683 | static void ipv4_dst_destroy(struct dst_entry *dst) |
1682 | { | 1684 | { |
1683 | struct rtable *rt = (struct rtable *) dst; | 1685 | struct rtable *rt = (struct rtable *) dst; |
1684 | struct inet_peer *peer = rt->peer; | 1686 | struct inet_peer *peer = rt->peer; |
1685 | 1687 | ||
1686 | if (rt->fi) { | 1688 | if (rt->fi) { |
1687 | fib_info_put(rt->fi); | 1689 | fib_info_put(rt->fi); |
1688 | rt->fi = NULL; | 1690 | rt->fi = NULL; |
1689 | } | 1691 | } |
1690 | if (peer) { | 1692 | if (peer) { |
1691 | rt->peer = NULL; | 1693 | rt->peer = NULL; |
1692 | inet_putpeer(peer); | 1694 | inet_putpeer(peer); |
1693 | } | 1695 | } |
1694 | } | 1696 | } |
1695 | 1697 | ||
1696 | 1698 | ||
1697 | static void ipv4_link_failure(struct sk_buff *skb) | 1699 | static void ipv4_link_failure(struct sk_buff *skb) |
1698 | { | 1700 | { |
1699 | struct rtable *rt; | 1701 | struct rtable *rt; |
1700 | 1702 | ||
1701 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1703 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1702 | 1704 | ||
1703 | rt = skb_rtable(skb); | 1705 | rt = skb_rtable(skb); |
1704 | if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) | 1706 | if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) |
1705 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | 1707 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); |
1706 | } | 1708 | } |
1707 | 1709 | ||
1708 | static int ip_rt_bug(struct sk_buff *skb) | 1710 | static int ip_rt_bug(struct sk_buff *skb) |
1709 | { | 1711 | { |
1710 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", | 1712 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", |
1711 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 1713 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1712 | skb->dev ? skb->dev->name : "?"); | 1714 | skb->dev ? skb->dev->name : "?"); |
1713 | kfree_skb(skb); | 1715 | kfree_skb(skb); |
1714 | WARN_ON(1); | 1716 | WARN_ON(1); |
1715 | return 0; | 1717 | return 0; |
1716 | } | 1718 | } |
1717 | 1719 | ||
1718 | /* | 1720 | /* |
1719 | We do not cache source address of outgoing interface, | 1721 | We do not cache source address of outgoing interface, |
1720 | because it is used only by IP RR, TS and SRR options, | 1722 | because it is used only by IP RR, TS and SRR options, |
1721 | so that it out of fast path. | 1723 | so that it out of fast path. |
1722 | 1724 | ||
1723 | BTW remember: "addr" is allowed to be not aligned | 1725 | BTW remember: "addr" is allowed to be not aligned |
1724 | in IP options! | 1726 | in IP options! |
1725 | */ | 1727 | */ |
1726 | 1728 | ||
1727 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) | 1729 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) |
1728 | { | 1730 | { |
1729 | __be32 src; | 1731 | __be32 src; |
1730 | 1732 | ||
1731 | if (rt_is_output_route(rt)) | 1733 | if (rt_is_output_route(rt)) |
1732 | src = ip_hdr(skb)->saddr; | 1734 | src = ip_hdr(skb)->saddr; |
1733 | else { | 1735 | else { |
1734 | struct fib_result res; | 1736 | struct fib_result res; |
1735 | struct flowi4 fl4; | 1737 | struct flowi4 fl4; |
1736 | struct iphdr *iph; | 1738 | struct iphdr *iph; |
1737 | 1739 | ||
1738 | iph = ip_hdr(skb); | 1740 | iph = ip_hdr(skb); |
1739 | 1741 | ||
1740 | memset(&fl4, 0, sizeof(fl4)); | 1742 | memset(&fl4, 0, sizeof(fl4)); |
1741 | fl4.daddr = iph->daddr; | 1743 | fl4.daddr = iph->daddr; |
1742 | fl4.saddr = iph->saddr; | 1744 | fl4.saddr = iph->saddr; |
1743 | fl4.flowi4_tos = RT_TOS(iph->tos); | 1745 | fl4.flowi4_tos = RT_TOS(iph->tos); |
1744 | fl4.flowi4_oif = rt->dst.dev->ifindex; | 1746 | fl4.flowi4_oif = rt->dst.dev->ifindex; |
1745 | fl4.flowi4_iif = skb->dev->ifindex; | 1747 | fl4.flowi4_iif = skb->dev->ifindex; |
1746 | fl4.flowi4_mark = skb->mark; | 1748 | fl4.flowi4_mark = skb->mark; |
1747 | 1749 | ||
1748 | rcu_read_lock(); | 1750 | rcu_read_lock(); |
1749 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) | 1751 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
1750 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); | 1752 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); |
1751 | else | 1753 | else |
1752 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1754 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
1753 | RT_SCOPE_UNIVERSE); | 1755 | RT_SCOPE_UNIVERSE); |
1754 | rcu_read_unlock(); | 1756 | rcu_read_unlock(); |
1755 | } | 1757 | } |
1756 | memcpy(addr, &src, 4); | 1758 | memcpy(addr, &src, 4); |
1757 | } | 1759 | } |
1758 | 1760 | ||
1759 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1761 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1760 | static void set_class_tag(struct rtable *rt, u32 tag) | 1762 | static void set_class_tag(struct rtable *rt, u32 tag) |
1761 | { | 1763 | { |
1762 | if (!(rt->dst.tclassid & 0xFFFF)) | 1764 | if (!(rt->dst.tclassid & 0xFFFF)) |
1763 | rt->dst.tclassid |= tag & 0xFFFF; | 1765 | rt->dst.tclassid |= tag & 0xFFFF; |
1764 | if (!(rt->dst.tclassid & 0xFFFF0000)) | 1766 | if (!(rt->dst.tclassid & 0xFFFF0000)) |
1765 | rt->dst.tclassid |= tag & 0xFFFF0000; | 1767 | rt->dst.tclassid |= tag & 0xFFFF0000; |
1766 | } | 1768 | } |
1767 | #endif | 1769 | #endif |
1768 | 1770 | ||
1769 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) | 1771 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) |
1770 | { | 1772 | { |
1771 | unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); | 1773 | unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); |
1772 | 1774 | ||
1773 | if (advmss == 0) { | 1775 | if (advmss == 0) { |
1774 | advmss = max_t(unsigned int, dst->dev->mtu - 40, | 1776 | advmss = max_t(unsigned int, dst->dev->mtu - 40, |
1775 | ip_rt_min_advmss); | 1777 | ip_rt_min_advmss); |
1776 | if (advmss > 65535 - 40) | 1778 | if (advmss > 65535 - 40) |
1777 | advmss = 65535 - 40; | 1779 | advmss = 65535 - 40; |
1778 | } | 1780 | } |
1779 | return advmss; | 1781 | return advmss; |
1780 | } | 1782 | } |
1781 | 1783 | ||
1782 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | 1784 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst) |
1783 | { | 1785 | { |
1784 | unsigned int mtu = dst->dev->mtu; | 1786 | unsigned int mtu = dst->dev->mtu; |
1785 | 1787 | ||
1786 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | 1788 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { |
1787 | const struct rtable *rt = (const struct rtable *) dst; | 1789 | const struct rtable *rt = (const struct rtable *) dst; |
1788 | 1790 | ||
1789 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) | 1791 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) |
1790 | mtu = 576; | 1792 | mtu = 576; |
1791 | } | 1793 | } |
1792 | 1794 | ||
1793 | if (mtu > IP_MAX_MTU) | 1795 | if (mtu > IP_MAX_MTU) |
1794 | mtu = IP_MAX_MTU; | 1796 | mtu = IP_MAX_MTU; |
1795 | 1797 | ||
1796 | return mtu; | 1798 | return mtu; |
1797 | } | 1799 | } |
1798 | 1800 | ||
1799 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | 1801 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, |
1800 | struct fib_info *fi) | 1802 | struct fib_info *fi) |
1801 | { | 1803 | { |
1802 | struct inet_peer *peer; | 1804 | struct inet_peer *peer; |
1803 | int create = 0; | 1805 | int create = 0; |
1804 | 1806 | ||
1805 | /* If a peer entry exists for this destination, we must hook | 1807 | /* If a peer entry exists for this destination, we must hook |
1806 | * it up in order to get at cached metrics. | 1808 | * it up in order to get at cached metrics. |
1807 | */ | 1809 | */ |
1808 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | 1810 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) |
1809 | create = 1; | 1811 | create = 1; |
1810 | 1812 | ||
1811 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | 1813 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); |
1812 | if (peer) { | 1814 | if (peer) { |
1813 | rt->rt_peer_genid = rt_peer_genid(); | 1815 | rt->rt_peer_genid = rt_peer_genid(); |
1814 | if (inet_metrics_new(peer)) | 1816 | if (inet_metrics_new(peer)) |
1815 | memcpy(peer->metrics, fi->fib_metrics, | 1817 | memcpy(peer->metrics, fi->fib_metrics, |
1816 | sizeof(u32) * RTAX_MAX); | 1818 | sizeof(u32) * RTAX_MAX); |
1817 | dst_init_metrics(&rt->dst, peer->metrics, false); | 1819 | dst_init_metrics(&rt->dst, peer->metrics, false); |
1818 | 1820 | ||
1819 | check_peer_pmtu(&rt->dst, peer); | 1821 | check_peer_pmtu(&rt->dst, peer); |
1820 | if (peer->redirect_learned.a4 && | 1822 | if (peer->redirect_learned.a4 && |
1821 | peer->redirect_learned.a4 != rt->rt_gateway) { | 1823 | peer->redirect_learned.a4 != rt->rt_gateway) { |
1822 | rt->rt_gateway = peer->redirect_learned.a4; | 1824 | rt->rt_gateway = peer->redirect_learned.a4; |
1823 | rt->rt_flags |= RTCF_REDIRECTED; | 1825 | rt->rt_flags |= RTCF_REDIRECTED; |
1824 | } | 1826 | } |
1825 | } else { | 1827 | } else { |
1826 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | 1828 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { |
1827 | rt->fi = fi; | 1829 | rt->fi = fi; |
1828 | atomic_inc(&fi->fib_clntref); | 1830 | atomic_inc(&fi->fib_clntref); |
1829 | } | 1831 | } |
1830 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1832 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1831 | } | 1833 | } |
1832 | } | 1834 | } |
1833 | 1835 | ||
1834 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | 1836 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, |
1835 | const struct fib_result *res, | 1837 | const struct fib_result *res, |
1836 | struct fib_info *fi, u16 type, u32 itag) | 1838 | struct fib_info *fi, u16 type, u32 itag) |
1837 | { | 1839 | { |
1838 | struct dst_entry *dst = &rt->dst; | 1840 | struct dst_entry *dst = &rt->dst; |
1839 | 1841 | ||
1840 | if (fi) { | 1842 | if (fi) { |
1841 | if (FIB_RES_GW(*res) && | 1843 | if (FIB_RES_GW(*res) && |
1842 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1844 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1843 | rt->rt_gateway = FIB_RES_GW(*res); | 1845 | rt->rt_gateway = FIB_RES_GW(*res); |
1844 | rt_init_metrics(rt, fl4, fi); | 1846 | rt_init_metrics(rt, fl4, fi); |
1845 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1847 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1846 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1848 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1847 | #endif | 1849 | #endif |
1848 | } | 1850 | } |
1849 | 1851 | ||
1850 | if (dst_mtu(dst) > IP_MAX_MTU) | 1852 | if (dst_mtu(dst) > IP_MAX_MTU) |
1851 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); | 1853 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); |
1852 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1854 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1853 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1855 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1854 | 1856 | ||
1855 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1857 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1856 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1858 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1857 | set_class_tag(rt, fib_rules_tclass(res)); | 1859 | set_class_tag(rt, fib_rules_tclass(res)); |
1858 | #endif | 1860 | #endif |
1859 | set_class_tag(rt, itag); | 1861 | set_class_tag(rt, itag); |
1860 | #endif | 1862 | #endif |
1861 | } | 1863 | } |
1862 | 1864 | ||
1863 | static struct rtable *rt_dst_alloc(struct net_device *dev, | 1865 | static struct rtable *rt_dst_alloc(struct net_device *dev, |
1864 | bool nopolicy, bool noxfrm) | 1866 | bool nopolicy, bool noxfrm) |
1865 | { | 1867 | { |
1866 | return dst_alloc(&ipv4_dst_ops, dev, 1, -1, | 1868 | return dst_alloc(&ipv4_dst_ops, dev, 1, -1, |
1867 | DST_HOST | | 1869 | DST_HOST | |
1868 | (nopolicy ? DST_NOPOLICY : 0) | | 1870 | (nopolicy ? DST_NOPOLICY : 0) | |
1869 | (noxfrm ? DST_NOXFRM : 0)); | 1871 | (noxfrm ? DST_NOXFRM : 0)); |
1870 | } | 1872 | } |
1871 | 1873 | ||
1872 | /* called in rcu_read_lock() section */ | 1874 | /* called in rcu_read_lock() section */ |
1873 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1875 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
1874 | u8 tos, struct net_device *dev, int our) | 1876 | u8 tos, struct net_device *dev, int our) |
1875 | { | 1877 | { |
1876 | unsigned int hash; | 1878 | unsigned int hash; |
1877 | struct rtable *rth; | 1879 | struct rtable *rth; |
1878 | __be32 spec_dst; | 1880 | __be32 spec_dst; |
1879 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1881 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1880 | u32 itag = 0; | 1882 | u32 itag = 0; |
1881 | int err; | 1883 | int err; |
1882 | 1884 | ||
1883 | /* Primary sanity checks. */ | 1885 | /* Primary sanity checks. */ |
1884 | 1886 | ||
1885 | if (in_dev == NULL) | 1887 | if (in_dev == NULL) |
1886 | return -EINVAL; | 1888 | return -EINVAL; |
1887 | 1889 | ||
1888 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || | 1890 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || |
1889 | ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP)) | 1891 | ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP)) |
1890 | goto e_inval; | 1892 | goto e_inval; |
1891 | 1893 | ||
1892 | if (ipv4_is_zeronet(saddr)) { | 1894 | if (ipv4_is_zeronet(saddr)) { |
1893 | if (!ipv4_is_local_multicast(daddr)) | 1895 | if (!ipv4_is_local_multicast(daddr)) |
1894 | goto e_inval; | 1896 | goto e_inval; |
1895 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1897 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1896 | } else { | 1898 | } else { |
1897 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, | 1899 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
1898 | &itag); | 1900 | &itag); |
1899 | if (err < 0) | 1901 | if (err < 0) |
1900 | goto e_err; | 1902 | goto e_err; |
1901 | } | 1903 | } |
1902 | rth = rt_dst_alloc(init_net.loopback_dev, | 1904 | rth = rt_dst_alloc(init_net.loopback_dev, |
1903 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1905 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
1904 | if (!rth) | 1906 | if (!rth) |
1905 | goto e_nobufs; | 1907 | goto e_nobufs; |
1906 | 1908 | ||
1907 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1909 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1908 | rth->dst.tclassid = itag; | 1910 | rth->dst.tclassid = itag; |
1909 | #endif | 1911 | #endif |
1910 | rth->dst.output = ip_rt_bug; | 1912 | rth->dst.output = ip_rt_bug; |
1911 | 1913 | ||
1912 | rth->rt_key_dst = daddr; | 1914 | rth->rt_key_dst = daddr; |
1913 | rth->rt_key_src = saddr; | 1915 | rth->rt_key_src = saddr; |
1914 | rth->rt_genid = rt_genid(dev_net(dev)); | 1916 | rth->rt_genid = rt_genid(dev_net(dev)); |
1915 | rth->rt_flags = RTCF_MULTICAST; | 1917 | rth->rt_flags = RTCF_MULTICAST; |
1916 | rth->rt_type = RTN_MULTICAST; | 1918 | rth->rt_type = RTN_MULTICAST; |
1917 | rth->rt_key_tos = tos; | 1919 | rth->rt_key_tos = tos; |
1918 | rth->rt_dst = daddr; | 1920 | rth->rt_dst = daddr; |
1919 | rth->rt_src = saddr; | 1921 | rth->rt_src = saddr; |
1920 | rth->rt_route_iif = dev->ifindex; | 1922 | rth->rt_route_iif = dev->ifindex; |
1921 | rth->rt_iif = dev->ifindex; | 1923 | rth->rt_iif = dev->ifindex; |
1922 | rth->rt_oif = 0; | 1924 | rth->rt_oif = 0; |
1923 | rth->rt_mark = skb->mark; | 1925 | rth->rt_mark = skb->mark; |
1924 | rth->rt_gateway = daddr; | 1926 | rth->rt_gateway = daddr; |
1925 | rth->rt_spec_dst= spec_dst; | 1927 | rth->rt_spec_dst= spec_dst; |
1926 | rth->rt_peer_genid = 0; | 1928 | rth->rt_peer_genid = 0; |
1927 | rth->peer = NULL; | 1929 | rth->peer = NULL; |
1928 | rth->fi = NULL; | 1930 | rth->fi = NULL; |
1929 | if (our) { | 1931 | if (our) { |
1930 | rth->dst.input= ip_local_deliver; | 1932 | rth->dst.input= ip_local_deliver; |
1931 | rth->rt_flags |= RTCF_LOCAL; | 1933 | rth->rt_flags |= RTCF_LOCAL; |
1932 | } | 1934 | } |
1933 | 1935 | ||
1934 | #ifdef CONFIG_IP_MROUTE | 1936 | #ifdef CONFIG_IP_MROUTE |
1935 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) | 1937 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) |
1936 | rth->dst.input = ip_mr_input; | 1938 | rth->dst.input = ip_mr_input; |
1937 | #endif | 1939 | #endif |
1938 | RT_CACHE_STAT_INC(in_slow_mc); | 1940 | RT_CACHE_STAT_INC(in_slow_mc); |
1939 | 1941 | ||
1940 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1942 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1941 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); | 1943 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); |
1942 | return IS_ERR(rth) ? PTR_ERR(rth) : 0; | 1944 | return IS_ERR(rth) ? PTR_ERR(rth) : 0; |
1943 | 1945 | ||
1944 | e_nobufs: | 1946 | e_nobufs: |
1945 | return -ENOBUFS; | 1947 | return -ENOBUFS; |
1946 | e_inval: | 1948 | e_inval: |
1947 | return -EINVAL; | 1949 | return -EINVAL; |
1948 | e_err: | 1950 | e_err: |
1949 | return err; | 1951 | return err; |
1950 | } | 1952 | } |
1951 | 1953 | ||
1952 | 1954 | ||
1953 | static void ip_handle_martian_source(struct net_device *dev, | 1955 | static void ip_handle_martian_source(struct net_device *dev, |
1954 | struct in_device *in_dev, | 1956 | struct in_device *in_dev, |
1955 | struct sk_buff *skb, | 1957 | struct sk_buff *skb, |
1956 | __be32 daddr, | 1958 | __be32 daddr, |
1957 | __be32 saddr) | 1959 | __be32 saddr) |
1958 | { | 1960 | { |
1959 | RT_CACHE_STAT_INC(in_martian_src); | 1961 | RT_CACHE_STAT_INC(in_martian_src); |
1960 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1962 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1961 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { | 1963 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { |
1962 | /* | 1964 | /* |
1963 | * RFC1812 recommendation, if source is martian, | 1965 | * RFC1812 recommendation, if source is martian, |
1964 | * the only hint is MAC header. | 1966 | * the only hint is MAC header. |
1965 | */ | 1967 | */ |
1966 | printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", | 1968 | printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", |
1967 | &daddr, &saddr, dev->name); | 1969 | &daddr, &saddr, dev->name); |
1968 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { | 1970 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1969 | int i; | 1971 | int i; |
1970 | const unsigned char *p = skb_mac_header(skb); | 1972 | const unsigned char *p = skb_mac_header(skb); |
1971 | printk(KERN_WARNING "ll header: "); | 1973 | printk(KERN_WARNING "ll header: "); |
1972 | for (i = 0; i < dev->hard_header_len; i++, p++) { | 1974 | for (i = 0; i < dev->hard_header_len; i++, p++) { |
1973 | printk("%02x", *p); | 1975 | printk("%02x", *p); |
1974 | if (i < (dev->hard_header_len - 1)) | 1976 | if (i < (dev->hard_header_len - 1)) |
1975 | printk(":"); | 1977 | printk(":"); |
1976 | } | 1978 | } |
1977 | printk("\n"); | 1979 | printk("\n"); |
1978 | } | 1980 | } |
1979 | } | 1981 | } |
1980 | #endif | 1982 | #endif |
1981 | } | 1983 | } |
1982 | 1984 | ||
1983 | /* called in rcu_read_lock() section */ | 1985 | /* called in rcu_read_lock() section */ |
1984 | static int __mkroute_input(struct sk_buff *skb, | 1986 | static int __mkroute_input(struct sk_buff *skb, |
1985 | const struct fib_result *res, | 1987 | const struct fib_result *res, |
1986 | struct in_device *in_dev, | 1988 | struct in_device *in_dev, |
1987 | __be32 daddr, __be32 saddr, u32 tos, | 1989 | __be32 daddr, __be32 saddr, u32 tos, |
1988 | struct rtable **result) | 1990 | struct rtable **result) |
1989 | { | 1991 | { |
1990 | struct rtable *rth; | 1992 | struct rtable *rth; |
1991 | int err; | 1993 | int err; |
1992 | struct in_device *out_dev; | 1994 | struct in_device *out_dev; |
1993 | unsigned int flags = 0; | 1995 | unsigned int flags = 0; |
1994 | __be32 spec_dst; | 1996 | __be32 spec_dst; |
1995 | u32 itag; | 1997 | u32 itag; |
1996 | 1998 | ||
1997 | /* get a working reference to the output device */ | 1999 | /* get a working reference to the output device */ |
1998 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); | 2000 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
1999 | if (out_dev == NULL) { | 2001 | if (out_dev == NULL) { |
2000 | if (net_ratelimit()) | 2002 | if (net_ratelimit()) |
2001 | printk(KERN_CRIT "Bug in ip_route_input" \ | 2003 | printk(KERN_CRIT "Bug in ip_route_input" \ |
2002 | "_slow(). Please, report\n"); | 2004 | "_slow(). Please, report\n"); |
2003 | return -EINVAL; | 2005 | return -EINVAL; |
2004 | } | 2006 | } |
2005 | 2007 | ||
2006 | 2008 | ||
2007 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), | 2009 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
2008 | in_dev->dev, &spec_dst, &itag); | 2010 | in_dev->dev, &spec_dst, &itag); |
2009 | if (err < 0) { | 2011 | if (err < 0) { |
2010 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 2012 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
2011 | saddr); | 2013 | saddr); |
2012 | 2014 | ||
2013 | goto cleanup; | 2015 | goto cleanup; |
2014 | } | 2016 | } |
2015 | 2017 | ||
2016 | if (err) | 2018 | if (err) |
2017 | flags |= RTCF_DIRECTSRC; | 2019 | flags |= RTCF_DIRECTSRC; |
2018 | 2020 | ||
2019 | if (out_dev == in_dev && err && | 2021 | if (out_dev == in_dev && err && |
2020 | (IN_DEV_SHARED_MEDIA(out_dev) || | 2022 | (IN_DEV_SHARED_MEDIA(out_dev) || |
2021 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) | 2023 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) |
2022 | flags |= RTCF_DOREDIRECT; | 2024 | flags |= RTCF_DOREDIRECT; |
2023 | 2025 | ||
2024 | if (skb->protocol != htons(ETH_P_IP)) { | 2026 | if (skb->protocol != htons(ETH_P_IP)) { |
2025 | /* Not IP (i.e. ARP). Do not create route, if it is | 2027 | /* Not IP (i.e. ARP). Do not create route, if it is |
2026 | * invalid for proxy arp. DNAT routes are always valid. | 2028 | * invalid for proxy arp. DNAT routes are always valid. |
2027 | * | 2029 | * |
2028 | * Proxy arp feature have been extended to allow, ARP | 2030 | * Proxy arp feature have been extended to allow, ARP |
2029 | * replies back to the same interface, to support | 2031 | * replies back to the same interface, to support |
2030 | * Private VLAN switch technologies. See arp.c. | 2032 | * Private VLAN switch technologies. See arp.c. |
2031 | */ | 2033 | */ |
2032 | if (out_dev == in_dev && | 2034 | if (out_dev == in_dev && |
2033 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { | 2035 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { |
2034 | err = -EINVAL; | 2036 | err = -EINVAL; |
2035 | goto cleanup; | 2037 | goto cleanup; |
2036 | } | 2038 | } |
2037 | } | 2039 | } |
2038 | 2040 | ||
2039 | rth = rt_dst_alloc(out_dev->dev, | 2041 | rth = rt_dst_alloc(out_dev->dev, |
2040 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 2042 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
2041 | IN_DEV_CONF_GET(out_dev, NOXFRM)); | 2043 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
2042 | if (!rth) { | 2044 | if (!rth) { |
2043 | err = -ENOBUFS; | 2045 | err = -ENOBUFS; |
2044 | goto cleanup; | 2046 | goto cleanup; |
2045 | } | 2047 | } |
2046 | 2048 | ||
2047 | rth->rt_key_dst = daddr; | 2049 | rth->rt_key_dst = daddr; |
2048 | rth->rt_key_src = saddr; | 2050 | rth->rt_key_src = saddr; |
2049 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 2051 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2050 | rth->rt_flags = flags; | 2052 | rth->rt_flags = flags; |
2051 | rth->rt_type = res->type; | 2053 | rth->rt_type = res->type; |
2052 | rth->rt_key_tos = tos; | 2054 | rth->rt_key_tos = tos; |
2053 | rth->rt_dst = daddr; | 2055 | rth->rt_dst = daddr; |
2054 | rth->rt_src = saddr; | 2056 | rth->rt_src = saddr; |
2055 | rth->rt_route_iif = in_dev->dev->ifindex; | 2057 | rth->rt_route_iif = in_dev->dev->ifindex; |
2056 | rth->rt_iif = in_dev->dev->ifindex; | 2058 | rth->rt_iif = in_dev->dev->ifindex; |
2057 | rth->rt_oif = 0; | 2059 | rth->rt_oif = 0; |
2058 | rth->rt_mark = skb->mark; | 2060 | rth->rt_mark = skb->mark; |
2059 | rth->rt_gateway = daddr; | 2061 | rth->rt_gateway = daddr; |
2060 | rth->rt_spec_dst= spec_dst; | 2062 | rth->rt_spec_dst= spec_dst; |
2061 | rth->rt_peer_genid = 0; | 2063 | rth->rt_peer_genid = 0; |
2062 | rth->peer = NULL; | 2064 | rth->peer = NULL; |
2063 | rth->fi = NULL; | 2065 | rth->fi = NULL; |
2064 | 2066 | ||
2065 | rth->dst.input = ip_forward; | 2067 | rth->dst.input = ip_forward; |
2066 | rth->dst.output = ip_output; | 2068 | rth->dst.output = ip_output; |
2067 | 2069 | ||
2068 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); | 2070 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
2069 | 2071 | ||
2070 | *result = rth; | 2072 | *result = rth; |
2071 | err = 0; | 2073 | err = 0; |
2072 | cleanup: | 2074 | cleanup: |
2073 | return err; | 2075 | return err; |
2074 | } | 2076 | } |
2075 | 2077 | ||
2076 | static int ip_mkroute_input(struct sk_buff *skb, | 2078 | static int ip_mkroute_input(struct sk_buff *skb, |
2077 | struct fib_result *res, | 2079 | struct fib_result *res, |
2078 | const struct flowi4 *fl4, | 2080 | const struct flowi4 *fl4, |
2079 | struct in_device *in_dev, | 2081 | struct in_device *in_dev, |
2080 | __be32 daddr, __be32 saddr, u32 tos) | 2082 | __be32 daddr, __be32 saddr, u32 tos) |
2081 | { | 2083 | { |
2082 | struct rtable* rth = NULL; | 2084 | struct rtable* rth = NULL; |
2083 | int err; | 2085 | int err; |
2084 | unsigned hash; | 2086 | unsigned hash; |
2085 | 2087 | ||
2086 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2088 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2087 | if (res->fi && res->fi->fib_nhs > 1) | 2089 | if (res->fi && res->fi->fib_nhs > 1) |
2088 | fib_select_multipath(res); | 2090 | fib_select_multipath(res); |
2089 | #endif | 2091 | #endif |
2090 | 2092 | ||
2091 | /* create a routing cache entry */ | 2093 | /* create a routing cache entry */ |
2092 | err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); | 2094 | err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); |
2093 | if (err) | 2095 | if (err) |
2094 | return err; | 2096 | return err; |
2095 | 2097 | ||
2096 | /* put it into the cache */ | 2098 | /* put it into the cache */ |
2097 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, | 2099 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, |
2098 | rt_genid(dev_net(rth->dst.dev))); | 2100 | rt_genid(dev_net(rth->dst.dev))); |
2099 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); | 2101 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); |
2100 | if (IS_ERR(rth)) | 2102 | if (IS_ERR(rth)) |
2101 | return PTR_ERR(rth); | 2103 | return PTR_ERR(rth); |
2102 | return 0; | 2104 | return 0; |
2103 | } | 2105 | } |
2104 | 2106 | ||
2105 | /* | 2107 | /* |
2106 | * NOTE. We drop all the packets that has local source | 2108 | * NOTE. We drop all the packets that has local source |
2107 | * addresses, because every properly looped back packet | 2109 | * addresses, because every properly looped back packet |
2108 | * must have correct destination already attached by output routine. | 2110 | * must have correct destination already attached by output routine. |
2109 | * | 2111 | * |
2110 | * Such approach solves two big problems: | 2112 | * Such approach solves two big problems: |
2111 | * 1. Not simplex devices are handled properly. | 2113 | * 1. Not simplex devices are handled properly. |
2112 | * 2. IP spoofing attempts are filtered with 100% of guarantee. | 2114 | * 2. IP spoofing attempts are filtered with 100% of guarantee. |
2113 | * called with rcu_read_lock() | 2115 | * called with rcu_read_lock() |
2114 | */ | 2116 | */ |
2115 | 2117 | ||
2116 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2118 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2117 | u8 tos, struct net_device *dev) | 2119 | u8 tos, struct net_device *dev) |
2118 | { | 2120 | { |
2119 | struct fib_result res; | 2121 | struct fib_result res; |
2120 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2122 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2121 | struct flowi4 fl4; | 2123 | struct flowi4 fl4; |
2122 | unsigned flags = 0; | 2124 | unsigned flags = 0; |
2123 | u32 itag = 0; | 2125 | u32 itag = 0; |
2124 | struct rtable * rth; | 2126 | struct rtable * rth; |
2125 | unsigned hash; | 2127 | unsigned hash; |
2126 | __be32 spec_dst; | 2128 | __be32 spec_dst; |
2127 | int err = -EINVAL; | 2129 | int err = -EINVAL; |
2128 | struct net * net = dev_net(dev); | 2130 | struct net * net = dev_net(dev); |
2129 | 2131 | ||
2130 | /* IP on this device is disabled. */ | 2132 | /* IP on this device is disabled. */ |
2131 | 2133 | ||
2132 | if (!in_dev) | 2134 | if (!in_dev) |
2133 | goto out; | 2135 | goto out; |
2134 | 2136 | ||
2135 | /* Check for the most weird martians, which can be not detected | 2137 | /* Check for the most weird martians, which can be not detected |
2136 | by fib_lookup. | 2138 | by fib_lookup. |
2137 | */ | 2139 | */ |
2138 | 2140 | ||
2139 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || | 2141 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || |
2140 | ipv4_is_loopback(saddr)) | 2142 | ipv4_is_loopback(saddr)) |
2141 | goto martian_source; | 2143 | goto martian_source; |
2142 | 2144 | ||
2143 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) | 2145 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) |
2144 | goto brd_input; | 2146 | goto brd_input; |
2145 | 2147 | ||
2146 | /* Accept zero addresses only to limited broadcast; | 2148 | /* Accept zero addresses only to limited broadcast; |
2147 | * I even do not know to fix it or not. Waiting for complains :-) | 2149 | * I even do not know to fix it or not. Waiting for complains :-) |
2148 | */ | 2150 | */ |
2149 | if (ipv4_is_zeronet(saddr)) | 2151 | if (ipv4_is_zeronet(saddr)) |
2150 | goto martian_source; | 2152 | goto martian_source; |
2151 | 2153 | ||
2152 | if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) | 2154 | if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) |
2153 | goto martian_destination; | 2155 | goto martian_destination; |
2154 | 2156 | ||
2155 | /* | 2157 | /* |
2156 | * Now we are ready to route packet. | 2158 | * Now we are ready to route packet. |
2157 | */ | 2159 | */ |
2158 | fl4.flowi4_oif = 0; | 2160 | fl4.flowi4_oif = 0; |
2159 | fl4.flowi4_iif = dev->ifindex; | 2161 | fl4.flowi4_iif = dev->ifindex; |
2160 | fl4.flowi4_mark = skb->mark; | 2162 | fl4.flowi4_mark = skb->mark; |
2161 | fl4.flowi4_tos = tos; | 2163 | fl4.flowi4_tos = tos; |
2162 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | 2164 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; |
2163 | fl4.daddr = daddr; | 2165 | fl4.daddr = daddr; |
2164 | fl4.saddr = saddr; | 2166 | fl4.saddr = saddr; |
2165 | err = fib_lookup(net, &fl4, &res); | 2167 | err = fib_lookup(net, &fl4, &res); |
2166 | if (err != 0) { | 2168 | if (err != 0) { |
2167 | if (!IN_DEV_FORWARD(in_dev)) | 2169 | if (!IN_DEV_FORWARD(in_dev)) |
2168 | goto e_hostunreach; | 2170 | goto e_hostunreach; |
2169 | goto no_route; | 2171 | goto no_route; |
2170 | } | 2172 | } |
2171 | 2173 | ||
2172 | RT_CACHE_STAT_INC(in_slow_tot); | 2174 | RT_CACHE_STAT_INC(in_slow_tot); |
2173 | 2175 | ||
2174 | if (res.type == RTN_BROADCAST) | 2176 | if (res.type == RTN_BROADCAST) |
2175 | goto brd_input; | 2177 | goto brd_input; |
2176 | 2178 | ||
2177 | if (res.type == RTN_LOCAL) { | 2179 | if (res.type == RTN_LOCAL) { |
2178 | err = fib_validate_source(skb, saddr, daddr, tos, | 2180 | err = fib_validate_source(skb, saddr, daddr, tos, |
2179 | net->loopback_dev->ifindex, | 2181 | net->loopback_dev->ifindex, |
2180 | dev, &spec_dst, &itag); | 2182 | dev, &spec_dst, &itag); |
2181 | if (err < 0) | 2183 | if (err < 0) |
2182 | goto martian_source_keep_err; | 2184 | goto martian_source_keep_err; |
2183 | if (err) | 2185 | if (err) |
2184 | flags |= RTCF_DIRECTSRC; | 2186 | flags |= RTCF_DIRECTSRC; |
2185 | spec_dst = daddr; | 2187 | spec_dst = daddr; |
2186 | goto local_input; | 2188 | goto local_input; |
2187 | } | 2189 | } |
2188 | 2190 | ||
2189 | if (!IN_DEV_FORWARD(in_dev)) | 2191 | if (!IN_DEV_FORWARD(in_dev)) |
2190 | goto e_hostunreach; | 2192 | goto e_hostunreach; |
2191 | if (res.type != RTN_UNICAST) | 2193 | if (res.type != RTN_UNICAST) |
2192 | goto martian_destination; | 2194 | goto martian_destination; |
2193 | 2195 | ||
2194 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); | 2196 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); |
2195 | out: return err; | 2197 | out: return err; |
2196 | 2198 | ||
2197 | brd_input: | 2199 | brd_input: |
2198 | if (skb->protocol != htons(ETH_P_IP)) | 2200 | if (skb->protocol != htons(ETH_P_IP)) |
2199 | goto e_inval; | 2201 | goto e_inval; |
2200 | 2202 | ||
2201 | if (ipv4_is_zeronet(saddr)) | 2203 | if (ipv4_is_zeronet(saddr)) |
2202 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2204 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
2203 | else { | 2205 | else { |
2204 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, | 2206 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
2205 | &itag); | 2207 | &itag); |
2206 | if (err < 0) | 2208 | if (err < 0) |
2207 | goto martian_source_keep_err; | 2209 | goto martian_source_keep_err; |
2208 | if (err) | 2210 | if (err) |
2209 | flags |= RTCF_DIRECTSRC; | 2211 | flags |= RTCF_DIRECTSRC; |
2210 | } | 2212 | } |
2211 | flags |= RTCF_BROADCAST; | 2213 | flags |= RTCF_BROADCAST; |
2212 | res.type = RTN_BROADCAST; | 2214 | res.type = RTN_BROADCAST; |
2213 | RT_CACHE_STAT_INC(in_brd); | 2215 | RT_CACHE_STAT_INC(in_brd); |
2214 | 2216 | ||
2215 | local_input: | 2217 | local_input: |
2216 | rth = rt_dst_alloc(net->loopback_dev, | 2218 | rth = rt_dst_alloc(net->loopback_dev, |
2217 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 2219 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
2218 | if (!rth) | 2220 | if (!rth) |
2219 | goto e_nobufs; | 2221 | goto e_nobufs; |
2220 | 2222 | ||
2221 | rth->dst.input= ip_local_deliver; | 2223 | rth->dst.input= ip_local_deliver; |
2222 | rth->dst.output= ip_rt_bug; | 2224 | rth->dst.output= ip_rt_bug; |
2223 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2225 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2224 | rth->dst.tclassid = itag; | 2226 | rth->dst.tclassid = itag; |
2225 | #endif | 2227 | #endif |
2226 | 2228 | ||
2227 | rth->rt_key_dst = daddr; | 2229 | rth->rt_key_dst = daddr; |
2228 | rth->rt_key_src = saddr; | 2230 | rth->rt_key_src = saddr; |
2229 | rth->rt_genid = rt_genid(net); | 2231 | rth->rt_genid = rt_genid(net); |
2230 | rth->rt_flags = flags|RTCF_LOCAL; | 2232 | rth->rt_flags = flags|RTCF_LOCAL; |
2231 | rth->rt_type = res.type; | 2233 | rth->rt_type = res.type; |
2232 | rth->rt_key_tos = tos; | 2234 | rth->rt_key_tos = tos; |
2233 | rth->rt_dst = daddr; | 2235 | rth->rt_dst = daddr; |
2234 | rth->rt_src = saddr; | 2236 | rth->rt_src = saddr; |
2235 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2237 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2236 | rth->dst.tclassid = itag; | 2238 | rth->dst.tclassid = itag; |
2237 | #endif | 2239 | #endif |
2238 | rth->rt_route_iif = dev->ifindex; | 2240 | rth->rt_route_iif = dev->ifindex; |
2239 | rth->rt_iif = dev->ifindex; | 2241 | rth->rt_iif = dev->ifindex; |
2240 | rth->rt_oif = 0; | 2242 | rth->rt_oif = 0; |
2241 | rth->rt_mark = skb->mark; | 2243 | rth->rt_mark = skb->mark; |
2242 | rth->rt_gateway = daddr; | 2244 | rth->rt_gateway = daddr; |
2243 | rth->rt_spec_dst= spec_dst; | 2245 | rth->rt_spec_dst= spec_dst; |
2244 | rth->rt_peer_genid = 0; | 2246 | rth->rt_peer_genid = 0; |
2245 | rth->peer = NULL; | 2247 | rth->peer = NULL; |
2246 | rth->fi = NULL; | 2248 | rth->fi = NULL; |
2247 | if (res.type == RTN_UNREACHABLE) { | 2249 | if (res.type == RTN_UNREACHABLE) { |
2248 | rth->dst.input= ip_error; | 2250 | rth->dst.input= ip_error; |
2249 | rth->dst.error= -err; | 2251 | rth->dst.error= -err; |
2250 | rth->rt_flags &= ~RTCF_LOCAL; | 2252 | rth->rt_flags &= ~RTCF_LOCAL; |
2251 | } | 2253 | } |
2252 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); | 2254 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
2253 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); | 2255 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
2254 | err = 0; | 2256 | err = 0; |
2255 | if (IS_ERR(rth)) | 2257 | if (IS_ERR(rth)) |
2256 | err = PTR_ERR(rth); | 2258 | err = PTR_ERR(rth); |
2257 | goto out; | 2259 | goto out; |
2258 | 2260 | ||
2259 | no_route: | 2261 | no_route: |
2260 | RT_CACHE_STAT_INC(in_no_route); | 2262 | RT_CACHE_STAT_INC(in_no_route); |
2261 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 2263 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
2262 | res.type = RTN_UNREACHABLE; | 2264 | res.type = RTN_UNREACHABLE; |
2263 | if (err == -ESRCH) | 2265 | if (err == -ESRCH) |
2264 | err = -ENETUNREACH; | 2266 | err = -ENETUNREACH; |
2265 | goto local_input; | 2267 | goto local_input; |
2266 | 2268 | ||
2267 | /* | 2269 | /* |
2268 | * Do not cache martian addresses: they should be logged (RFC1812) | 2270 | * Do not cache martian addresses: they should be logged (RFC1812) |
2269 | */ | 2271 | */ |
2270 | martian_destination: | 2272 | martian_destination: |
2271 | RT_CACHE_STAT_INC(in_martian_dst); | 2273 | RT_CACHE_STAT_INC(in_martian_dst); |
2272 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2274 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2273 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2275 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
2274 | printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", | 2276 | printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", |
2275 | &daddr, &saddr, dev->name); | 2277 | &daddr, &saddr, dev->name); |
2276 | #endif | 2278 | #endif |
2277 | 2279 | ||
2278 | e_hostunreach: | 2280 | e_hostunreach: |
2279 | err = -EHOSTUNREACH; | 2281 | err = -EHOSTUNREACH; |
2280 | goto out; | 2282 | goto out; |
2281 | 2283 | ||
2282 | e_inval: | 2284 | e_inval: |
2283 | err = -EINVAL; | 2285 | err = -EINVAL; |
2284 | goto out; | 2286 | goto out; |
2285 | 2287 | ||
2286 | e_nobufs: | 2288 | e_nobufs: |
2287 | err = -ENOBUFS; | 2289 | err = -ENOBUFS; |
2288 | goto out; | 2290 | goto out; |
2289 | 2291 | ||
2290 | martian_source: | 2292 | martian_source: |
2291 | err = -EINVAL; | 2293 | err = -EINVAL; |
2292 | martian_source_keep_err: | 2294 | martian_source_keep_err: |
2293 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2295 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2294 | goto out; | 2296 | goto out; |
2295 | } | 2297 | } |
2296 | 2298 | ||
2297 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2299 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2298 | u8 tos, struct net_device *dev, bool noref) | 2300 | u8 tos, struct net_device *dev, bool noref) |
2299 | { | 2301 | { |
2300 | struct rtable * rth; | 2302 | struct rtable * rth; |
2301 | unsigned hash; | 2303 | unsigned hash; |
2302 | int iif = dev->ifindex; | 2304 | int iif = dev->ifindex; |
2303 | struct net *net; | 2305 | struct net *net; |
2304 | int res; | 2306 | int res; |
2305 | 2307 | ||
2306 | net = dev_net(dev); | 2308 | net = dev_net(dev); |
2307 | 2309 | ||
2308 | rcu_read_lock(); | 2310 | rcu_read_lock(); |
2309 | 2311 | ||
2310 | if (!rt_caching(net)) | 2312 | if (!rt_caching(net)) |
2311 | goto skip_cache; | 2313 | goto skip_cache; |
2312 | 2314 | ||
2313 | tos &= IPTOS_RT_MASK; | 2315 | tos &= IPTOS_RT_MASK; |
2314 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2316 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
2315 | 2317 | ||
2316 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2318 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2317 | rth = rcu_dereference(rth->dst.rt_next)) { | 2319 | rth = rcu_dereference(rth->dst.rt_next)) { |
2318 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | | 2320 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
2319 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | | 2321 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2320 | (rth->rt_iif ^ iif) | | 2322 | (rth->rt_iif ^ iif) | |
2321 | rth->rt_oif | | 2323 | rth->rt_oif | |
2322 | (rth->rt_key_tos ^ tos)) == 0 && | 2324 | (rth->rt_key_tos ^ tos)) == 0 && |
2323 | rth->rt_mark == skb->mark && | 2325 | rth->rt_mark == skb->mark && |
2324 | net_eq(dev_net(rth->dst.dev), net) && | 2326 | net_eq(dev_net(rth->dst.dev), net) && |
2325 | !rt_is_expired(rth)) { | 2327 | !rt_is_expired(rth)) { |
2326 | if (noref) { | 2328 | if (noref) { |
2327 | dst_use_noref(&rth->dst, jiffies); | 2329 | dst_use_noref(&rth->dst, jiffies); |
2328 | skb_dst_set_noref(skb, &rth->dst); | 2330 | skb_dst_set_noref(skb, &rth->dst); |
2329 | } else { | 2331 | } else { |
2330 | dst_use(&rth->dst, jiffies); | 2332 | dst_use(&rth->dst, jiffies); |
2331 | skb_dst_set(skb, &rth->dst); | 2333 | skb_dst_set(skb, &rth->dst); |
2332 | } | 2334 | } |
2333 | RT_CACHE_STAT_INC(in_hit); | 2335 | RT_CACHE_STAT_INC(in_hit); |
2334 | rcu_read_unlock(); | 2336 | rcu_read_unlock(); |
2335 | return 0; | 2337 | return 0; |
2336 | } | 2338 | } |
2337 | RT_CACHE_STAT_INC(in_hlist_search); | 2339 | RT_CACHE_STAT_INC(in_hlist_search); |
2338 | } | 2340 | } |
2339 | 2341 | ||
2340 | skip_cache: | 2342 | skip_cache: |
2341 | /* Multicast recognition logic is moved from route cache to here. | 2343 | /* Multicast recognition logic is moved from route cache to here. |
2342 | The problem was that too many Ethernet cards have broken/missing | 2344 | The problem was that too many Ethernet cards have broken/missing |
2343 | hardware multicast filters :-( As result the host on multicasting | 2345 | hardware multicast filters :-( As result the host on multicasting |
2344 | network acquires a lot of useless route cache entries, sort of | 2346 | network acquires a lot of useless route cache entries, sort of |
2345 | SDR messages from all the world. Now we try to get rid of them. | 2347 | SDR messages from all the world. Now we try to get rid of them. |
2346 | Really, provided software IP multicast filter is organized | 2348 | Really, provided software IP multicast filter is organized |
2347 | reasonably (at least, hashed), it does not result in a slowdown | 2349 | reasonably (at least, hashed), it does not result in a slowdown |
2348 | comparing with route cache reject entries. | 2350 | comparing with route cache reject entries. |
2349 | Note, that multicast routers are not affected, because | 2351 | Note, that multicast routers are not affected, because |
2350 | route cache entry is created eventually. | 2352 | route cache entry is created eventually. |
2351 | */ | 2353 | */ |
2352 | if (ipv4_is_multicast(daddr)) { | 2354 | if (ipv4_is_multicast(daddr)) { |
2353 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2355 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2354 | 2356 | ||
2355 | if (in_dev) { | 2357 | if (in_dev) { |
2356 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, | 2358 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, |
2357 | ip_hdr(skb)->protocol); | 2359 | ip_hdr(skb)->protocol); |
2358 | if (our | 2360 | if (our |
2359 | #ifdef CONFIG_IP_MROUTE | 2361 | #ifdef CONFIG_IP_MROUTE |
2360 | || | 2362 | || |
2361 | (!ipv4_is_local_multicast(daddr) && | 2363 | (!ipv4_is_local_multicast(daddr) && |
2362 | IN_DEV_MFORWARD(in_dev)) | 2364 | IN_DEV_MFORWARD(in_dev)) |
2363 | #endif | 2365 | #endif |
2364 | ) { | 2366 | ) { |
2365 | int res = ip_route_input_mc(skb, daddr, saddr, | 2367 | int res = ip_route_input_mc(skb, daddr, saddr, |
2366 | tos, dev, our); | 2368 | tos, dev, our); |
2367 | rcu_read_unlock(); | 2369 | rcu_read_unlock(); |
2368 | return res; | 2370 | return res; |
2369 | } | 2371 | } |
2370 | } | 2372 | } |
2371 | rcu_read_unlock(); | 2373 | rcu_read_unlock(); |
2372 | return -EINVAL; | 2374 | return -EINVAL; |
2373 | } | 2375 | } |
2374 | res = ip_route_input_slow(skb, daddr, saddr, tos, dev); | 2376 | res = ip_route_input_slow(skb, daddr, saddr, tos, dev); |
2375 | rcu_read_unlock(); | 2377 | rcu_read_unlock(); |
2376 | return res; | 2378 | return res; |
2377 | } | 2379 | } |
2378 | EXPORT_SYMBOL(ip_route_input_common); | 2380 | EXPORT_SYMBOL(ip_route_input_common); |
2379 | 2381 | ||
2380 | /* called with rcu_read_lock() */ | 2382 | /* called with rcu_read_lock() */ |
2381 | static struct rtable *__mkroute_output(const struct fib_result *res, | 2383 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2382 | const struct flowi4 *fl4, | 2384 | const struct flowi4 *fl4, |
2383 | __be32 orig_daddr, __be32 orig_saddr, | 2385 | __be32 orig_daddr, __be32 orig_saddr, |
2384 | int orig_oif, struct net_device *dev_out, | 2386 | int orig_oif, struct net_device *dev_out, |
2385 | unsigned int flags) | 2387 | unsigned int flags) |
2386 | { | 2388 | { |
2387 | struct fib_info *fi = res->fi; | 2389 | struct fib_info *fi = res->fi; |
2388 | u32 tos = RT_FL_TOS(fl4); | 2390 | u32 tos = RT_FL_TOS(fl4); |
2389 | struct in_device *in_dev; | 2391 | struct in_device *in_dev; |
2390 | u16 type = res->type; | 2392 | u16 type = res->type; |
2391 | struct rtable *rth; | 2393 | struct rtable *rth; |
2392 | 2394 | ||
2393 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) | 2395 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) |
2394 | return ERR_PTR(-EINVAL); | 2396 | return ERR_PTR(-EINVAL); |
2395 | 2397 | ||
2396 | if (ipv4_is_lbcast(fl4->daddr)) | 2398 | if (ipv4_is_lbcast(fl4->daddr)) |
2397 | type = RTN_BROADCAST; | 2399 | type = RTN_BROADCAST; |
2398 | else if (ipv4_is_multicast(fl4->daddr)) | 2400 | else if (ipv4_is_multicast(fl4->daddr)) |
2399 | type = RTN_MULTICAST; | 2401 | type = RTN_MULTICAST; |
2400 | else if (ipv4_is_zeronet(fl4->daddr)) | 2402 | else if (ipv4_is_zeronet(fl4->daddr)) |
2401 | return ERR_PTR(-EINVAL); | 2403 | return ERR_PTR(-EINVAL); |
2402 | 2404 | ||
2403 | if (dev_out->flags & IFF_LOOPBACK) | 2405 | if (dev_out->flags & IFF_LOOPBACK) |
2404 | flags |= RTCF_LOCAL; | 2406 | flags |= RTCF_LOCAL; |
2405 | 2407 | ||
2406 | in_dev = __in_dev_get_rcu(dev_out); | 2408 | in_dev = __in_dev_get_rcu(dev_out); |
2407 | if (!in_dev) | 2409 | if (!in_dev) |
2408 | return ERR_PTR(-EINVAL); | 2410 | return ERR_PTR(-EINVAL); |
2409 | 2411 | ||
2410 | if (type == RTN_BROADCAST) { | 2412 | if (type == RTN_BROADCAST) { |
2411 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2413 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2412 | fi = NULL; | 2414 | fi = NULL; |
2413 | } else if (type == RTN_MULTICAST) { | 2415 | } else if (type == RTN_MULTICAST) { |
2414 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2416 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2415 | if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, | 2417 | if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, |
2416 | fl4->flowi4_proto)) | 2418 | fl4->flowi4_proto)) |
2417 | flags &= ~RTCF_LOCAL; | 2419 | flags &= ~RTCF_LOCAL; |
2418 | /* If multicast route do not exist use | 2420 | /* If multicast route do not exist use |
2419 | * default one, but do not gateway in this case. | 2421 | * default one, but do not gateway in this case. |
2420 | * Yes, it is hack. | 2422 | * Yes, it is hack. |
2421 | */ | 2423 | */ |
2422 | if (fi && res->prefixlen < 4) | 2424 | if (fi && res->prefixlen < 4) |
2423 | fi = NULL; | 2425 | fi = NULL; |
2424 | } | 2426 | } |
2425 | 2427 | ||
2426 | rth = rt_dst_alloc(dev_out, | 2428 | rth = rt_dst_alloc(dev_out, |
2427 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 2429 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
2428 | IN_DEV_CONF_GET(in_dev, NOXFRM)); | 2430 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
2429 | if (!rth) | 2431 | if (!rth) |
2430 | return ERR_PTR(-ENOBUFS); | 2432 | return ERR_PTR(-ENOBUFS); |
2431 | 2433 | ||
2432 | rth->dst.output = ip_output; | 2434 | rth->dst.output = ip_output; |
2433 | 2435 | ||
2434 | rth->rt_key_dst = orig_daddr; | 2436 | rth->rt_key_dst = orig_daddr; |
2435 | rth->rt_key_src = orig_saddr; | 2437 | rth->rt_key_src = orig_saddr; |
2436 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2438 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2437 | rth->rt_flags = flags; | 2439 | rth->rt_flags = flags; |
2438 | rth->rt_type = type; | 2440 | rth->rt_type = type; |
2439 | rth->rt_key_tos = tos; | 2441 | rth->rt_key_tos = tos; |
2440 | rth->rt_dst = fl4->daddr; | 2442 | rth->rt_dst = fl4->daddr; |
2441 | rth->rt_src = fl4->saddr; | 2443 | rth->rt_src = fl4->saddr; |
2442 | rth->rt_route_iif = 0; | 2444 | rth->rt_route_iif = 0; |
2443 | rth->rt_iif = orig_oif ? : dev_out->ifindex; | 2445 | rth->rt_iif = orig_oif ? : dev_out->ifindex; |
2444 | rth->rt_oif = orig_oif; | 2446 | rth->rt_oif = orig_oif; |
2445 | rth->rt_mark = fl4->flowi4_mark; | 2447 | rth->rt_mark = fl4->flowi4_mark; |
2446 | rth->rt_gateway = fl4->daddr; | 2448 | rth->rt_gateway = fl4->daddr; |
2447 | rth->rt_spec_dst= fl4->saddr; | 2449 | rth->rt_spec_dst= fl4->saddr; |
2448 | rth->rt_peer_genid = 0; | 2450 | rth->rt_peer_genid = 0; |
2449 | rth->peer = NULL; | 2451 | rth->peer = NULL; |
2450 | rth->fi = NULL; | 2452 | rth->fi = NULL; |
2451 | 2453 | ||
2452 | RT_CACHE_STAT_INC(out_slow_tot); | 2454 | RT_CACHE_STAT_INC(out_slow_tot); |
2453 | 2455 | ||
2454 | if (flags & RTCF_LOCAL) { | 2456 | if (flags & RTCF_LOCAL) { |
2455 | rth->dst.input = ip_local_deliver; | 2457 | rth->dst.input = ip_local_deliver; |
2456 | rth->rt_spec_dst = fl4->daddr; | 2458 | rth->rt_spec_dst = fl4->daddr; |
2457 | } | 2459 | } |
2458 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2460 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2459 | rth->rt_spec_dst = fl4->saddr; | 2461 | rth->rt_spec_dst = fl4->saddr; |
2460 | if (flags & RTCF_LOCAL && | 2462 | if (flags & RTCF_LOCAL && |
2461 | !(dev_out->flags & IFF_LOOPBACK)) { | 2463 | !(dev_out->flags & IFF_LOOPBACK)) { |
2462 | rth->dst.output = ip_mc_output; | 2464 | rth->dst.output = ip_mc_output; |
2463 | RT_CACHE_STAT_INC(out_slow_mc); | 2465 | RT_CACHE_STAT_INC(out_slow_mc); |
2464 | } | 2466 | } |
2465 | #ifdef CONFIG_IP_MROUTE | 2467 | #ifdef CONFIG_IP_MROUTE |
2466 | if (type == RTN_MULTICAST) { | 2468 | if (type == RTN_MULTICAST) { |
2467 | if (IN_DEV_MFORWARD(in_dev) && | 2469 | if (IN_DEV_MFORWARD(in_dev) && |
2468 | !ipv4_is_local_multicast(fl4->daddr)) { | 2470 | !ipv4_is_local_multicast(fl4->daddr)) { |
2469 | rth->dst.input = ip_mr_input; | 2471 | rth->dst.input = ip_mr_input; |
2470 | rth->dst.output = ip_mc_output; | 2472 | rth->dst.output = ip_mc_output; |
2471 | } | 2473 | } |
2472 | } | 2474 | } |
2473 | #endif | 2475 | #endif |
2474 | } | 2476 | } |
2475 | 2477 | ||
2476 | rt_set_nexthop(rth, fl4, res, fi, type, 0); | 2478 | rt_set_nexthop(rth, fl4, res, fi, type, 0); |
2477 | 2479 | ||
2478 | return rth; | 2480 | return rth; |
2479 | } | 2481 | } |
2480 | 2482 | ||
2481 | /* | 2483 | /* |
2482 | * Major route resolver routine. | 2484 | * Major route resolver routine. |
2483 | * called with rcu_read_lock(); | 2485 | * called with rcu_read_lock(); |
2484 | */ | 2486 | */ |
2485 | 2487 | ||
2486 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | 2488 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) |
2487 | { | 2489 | { |
2488 | struct net_device *dev_out = NULL; | 2490 | struct net_device *dev_out = NULL; |
2489 | u32 tos = RT_FL_TOS(fl4); | 2491 | u32 tos = RT_FL_TOS(fl4); |
2490 | unsigned int flags = 0; | 2492 | unsigned int flags = 0; |
2491 | struct fib_result res; | 2493 | struct fib_result res; |
2492 | struct rtable *rth; | 2494 | struct rtable *rth; |
2493 | __be32 orig_daddr; | 2495 | __be32 orig_daddr; |
2494 | __be32 orig_saddr; | 2496 | __be32 orig_saddr; |
2495 | int orig_oif; | 2497 | int orig_oif; |
2496 | 2498 | ||
2497 | res.fi = NULL; | 2499 | res.fi = NULL; |
2498 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2500 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2499 | res.r = NULL; | 2501 | res.r = NULL; |
2500 | #endif | 2502 | #endif |
2501 | 2503 | ||
2502 | orig_daddr = fl4->daddr; | 2504 | orig_daddr = fl4->daddr; |
2503 | orig_saddr = fl4->saddr; | 2505 | orig_saddr = fl4->saddr; |
2504 | orig_oif = fl4->flowi4_oif; | 2506 | orig_oif = fl4->flowi4_oif; |
2505 | 2507 | ||
2506 | fl4->flowi4_iif = net->loopback_dev->ifindex; | 2508 | fl4->flowi4_iif = net->loopback_dev->ifindex; |
2507 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; | 2509 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
2508 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? | 2510 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? |
2509 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | 2511 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); |
2510 | 2512 | ||
2511 | rcu_read_lock(); | 2513 | rcu_read_lock(); |
2512 | if (fl4->saddr) { | 2514 | if (fl4->saddr) { |
2513 | rth = ERR_PTR(-EINVAL); | 2515 | rth = ERR_PTR(-EINVAL); |
2514 | if (ipv4_is_multicast(fl4->saddr) || | 2516 | if (ipv4_is_multicast(fl4->saddr) || |
2515 | ipv4_is_lbcast(fl4->saddr) || | 2517 | ipv4_is_lbcast(fl4->saddr) || |
2516 | ipv4_is_zeronet(fl4->saddr)) | 2518 | ipv4_is_zeronet(fl4->saddr)) |
2517 | goto out; | 2519 | goto out; |
2518 | 2520 | ||
2519 | /* I removed check for oif == dev_out->oif here. | 2521 | /* I removed check for oif == dev_out->oif here. |
2520 | It was wrong for two reasons: | 2522 | It was wrong for two reasons: |
2521 | 1. ip_dev_find(net, saddr) can return wrong iface, if saddr | 2523 | 1. ip_dev_find(net, saddr) can return wrong iface, if saddr |
2522 | is assigned to multiple interfaces. | 2524 | is assigned to multiple interfaces. |
2523 | 2. Moreover, we are allowed to send packets with saddr | 2525 | 2. Moreover, we are allowed to send packets with saddr |
2524 | of another iface. --ANK | 2526 | of another iface. --ANK |
2525 | */ | 2527 | */ |
2526 | 2528 | ||
2527 | if (fl4->flowi4_oif == 0 && | 2529 | if (fl4->flowi4_oif == 0 && |
2528 | (ipv4_is_multicast(fl4->daddr) || | 2530 | (ipv4_is_multicast(fl4->daddr) || |
2529 | ipv4_is_lbcast(fl4->daddr))) { | 2531 | ipv4_is_lbcast(fl4->daddr))) { |
2530 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2532 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2531 | dev_out = __ip_dev_find(net, fl4->saddr, false); | 2533 | dev_out = __ip_dev_find(net, fl4->saddr, false); |
2532 | if (dev_out == NULL) | 2534 | if (dev_out == NULL) |
2533 | goto out; | 2535 | goto out; |
2534 | 2536 | ||
2535 | /* Special hack: user can direct multicasts | 2537 | /* Special hack: user can direct multicasts |
2536 | and limited broadcast via necessary interface | 2538 | and limited broadcast via necessary interface |
2537 | without fiddling with IP_MULTICAST_IF or IP_PKTINFO. | 2539 | without fiddling with IP_MULTICAST_IF or IP_PKTINFO. |
2538 | This hack is not just for fun, it allows | 2540 | This hack is not just for fun, it allows |
2539 | vic,vat and friends to work. | 2541 | vic,vat and friends to work. |
2540 | They bind socket to loopback, set ttl to zero | 2542 | They bind socket to loopback, set ttl to zero |
2541 | and expect that it will work. | 2543 | and expect that it will work. |
2542 | From the viewpoint of routing cache they are broken, | 2544 | From the viewpoint of routing cache they are broken, |
2543 | because we are not allowed to build multicast path | 2545 | because we are not allowed to build multicast path |
2544 | with loopback source addr (look, routing cache | 2546 | with loopback source addr (look, routing cache |
2545 | cannot know, that ttl is zero, so that packet | 2547 | cannot know, that ttl is zero, so that packet |
2546 | will not leave this host and route is valid). | 2548 | will not leave this host and route is valid). |
2547 | Luckily, this hack is good workaround. | 2549 | Luckily, this hack is good workaround. |
2548 | */ | 2550 | */ |
2549 | 2551 | ||
2550 | fl4->flowi4_oif = dev_out->ifindex; | 2552 | fl4->flowi4_oif = dev_out->ifindex; |
2551 | goto make_route; | 2553 | goto make_route; |
2552 | } | 2554 | } |
2553 | 2555 | ||
2554 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { | 2556 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2555 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2557 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2556 | if (!__ip_dev_find(net, fl4->saddr, false)) | 2558 | if (!__ip_dev_find(net, fl4->saddr, false)) |
2557 | goto out; | 2559 | goto out; |
2558 | } | 2560 | } |
2559 | } | 2561 | } |
2560 | 2562 | ||
2561 | 2563 | ||
2562 | if (fl4->flowi4_oif) { | 2564 | if (fl4->flowi4_oif) { |
2563 | dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); | 2565 | dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); |
2564 | rth = ERR_PTR(-ENODEV); | 2566 | rth = ERR_PTR(-ENODEV); |
2565 | if (dev_out == NULL) | 2567 | if (dev_out == NULL) |
2566 | goto out; | 2568 | goto out; |
2567 | 2569 | ||
2568 | /* RACE: Check return value of inet_select_addr instead. */ | 2570 | /* RACE: Check return value of inet_select_addr instead. */ |
2569 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { | 2571 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2570 | rth = ERR_PTR(-ENETUNREACH); | 2572 | rth = ERR_PTR(-ENETUNREACH); |
2571 | goto out; | 2573 | goto out; |
2572 | } | 2574 | } |
2573 | if (ipv4_is_local_multicast(fl4->daddr) || | 2575 | if (ipv4_is_local_multicast(fl4->daddr) || |
2574 | ipv4_is_lbcast(fl4->daddr)) { | 2576 | ipv4_is_lbcast(fl4->daddr)) { |
2575 | if (!fl4->saddr) | 2577 | if (!fl4->saddr) |
2576 | fl4->saddr = inet_select_addr(dev_out, 0, | 2578 | fl4->saddr = inet_select_addr(dev_out, 0, |
2577 | RT_SCOPE_LINK); | 2579 | RT_SCOPE_LINK); |
2578 | goto make_route; | 2580 | goto make_route; |
2579 | } | 2581 | } |
2580 | if (fl4->saddr) { | 2582 | if (fl4->saddr) { |
2581 | if (ipv4_is_multicast(fl4->daddr)) | 2583 | if (ipv4_is_multicast(fl4->daddr)) |
2582 | fl4->saddr = inet_select_addr(dev_out, 0, | 2584 | fl4->saddr = inet_select_addr(dev_out, 0, |
2583 | fl4->flowi4_scope); | 2585 | fl4->flowi4_scope); |
2584 | else if (!fl4->daddr) | 2586 | else if (!fl4->daddr) |
2585 | fl4->saddr = inet_select_addr(dev_out, 0, | 2587 | fl4->saddr = inet_select_addr(dev_out, 0, |
2586 | RT_SCOPE_HOST); | 2588 | RT_SCOPE_HOST); |
2587 | } | 2589 | } |
2588 | } | 2590 | } |
2589 | 2591 | ||
2590 | if (!fl4->daddr) { | 2592 | if (!fl4->daddr) { |
2591 | fl4->daddr = fl4->saddr; | 2593 | fl4->daddr = fl4->saddr; |
2592 | if (!fl4->daddr) | 2594 | if (!fl4->daddr) |
2593 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); | 2595 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); |
2594 | dev_out = net->loopback_dev; | 2596 | dev_out = net->loopback_dev; |
2595 | fl4->flowi4_oif = net->loopback_dev->ifindex; | 2597 | fl4->flowi4_oif = net->loopback_dev->ifindex; |
2596 | res.type = RTN_LOCAL; | 2598 | res.type = RTN_LOCAL; |
2597 | flags |= RTCF_LOCAL; | 2599 | flags |= RTCF_LOCAL; |
2598 | goto make_route; | 2600 | goto make_route; |
2599 | } | 2601 | } |
2600 | 2602 | ||
2601 | if (fib_lookup(net, fl4, &res)) { | 2603 | if (fib_lookup(net, fl4, &res)) { |
2602 | res.fi = NULL; | 2604 | res.fi = NULL; |
2603 | if (fl4->flowi4_oif) { | 2605 | if (fl4->flowi4_oif) { |
2604 | /* Apparently, routing tables are wrong. Assume, | 2606 | /* Apparently, routing tables are wrong. Assume, |
2605 | that the destination is on link. | 2607 | that the destination is on link. |
2606 | 2608 | ||
2607 | WHY? DW. | 2609 | WHY? DW. |
2608 | Because we are allowed to send to iface | 2610 | Because we are allowed to send to iface |
2609 | even if it has NO routes and NO assigned | 2611 | even if it has NO routes and NO assigned |
2610 | addresses. When oif is specified, routing | 2612 | addresses. When oif is specified, routing |
2611 | tables are looked up with only one purpose: | 2613 | tables are looked up with only one purpose: |
2612 | to catch if destination is gatewayed, rather than | 2614 | to catch if destination is gatewayed, rather than |
2613 | direct. Moreover, if MSG_DONTROUTE is set, | 2615 | direct. Moreover, if MSG_DONTROUTE is set, |
2614 | we send packet, ignoring both routing tables | 2616 | we send packet, ignoring both routing tables |
2615 | and ifaddr state. --ANK | 2617 | and ifaddr state. --ANK |
2616 | 2618 | ||
2617 | 2619 | ||
2618 | We could make it even if oif is unknown, | 2620 | We could make it even if oif is unknown, |
2619 | likely IPv6, but we do not. | 2621 | likely IPv6, but we do not. |
2620 | */ | 2622 | */ |
2621 | 2623 | ||
2622 | if (fl4->saddr == 0) | 2624 | if (fl4->saddr == 0) |
2623 | fl4->saddr = inet_select_addr(dev_out, 0, | 2625 | fl4->saddr = inet_select_addr(dev_out, 0, |
2624 | RT_SCOPE_LINK); | 2626 | RT_SCOPE_LINK); |
2625 | res.type = RTN_UNICAST; | 2627 | res.type = RTN_UNICAST; |
2626 | goto make_route; | 2628 | goto make_route; |
2627 | } | 2629 | } |
2628 | rth = ERR_PTR(-ENETUNREACH); | 2630 | rth = ERR_PTR(-ENETUNREACH); |
2629 | goto out; | 2631 | goto out; |
2630 | } | 2632 | } |
2631 | 2633 | ||
2632 | if (res.type == RTN_LOCAL) { | 2634 | if (res.type == RTN_LOCAL) { |
2633 | if (!fl4->saddr) { | 2635 | if (!fl4->saddr) { |
2634 | if (res.fi->fib_prefsrc) | 2636 | if (res.fi->fib_prefsrc) |
2635 | fl4->saddr = res.fi->fib_prefsrc; | 2637 | fl4->saddr = res.fi->fib_prefsrc; |
2636 | else | 2638 | else |
2637 | fl4->saddr = fl4->daddr; | 2639 | fl4->saddr = fl4->daddr; |
2638 | } | 2640 | } |
2639 | dev_out = net->loopback_dev; | 2641 | dev_out = net->loopback_dev; |
2640 | fl4->flowi4_oif = dev_out->ifindex; | 2642 | fl4->flowi4_oif = dev_out->ifindex; |
2641 | res.fi = NULL; | 2643 | res.fi = NULL; |
2642 | flags |= RTCF_LOCAL; | 2644 | flags |= RTCF_LOCAL; |
2643 | goto make_route; | 2645 | goto make_route; |
2644 | } | 2646 | } |
2645 | 2647 | ||
2646 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2648 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2647 | if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) | 2649 | if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) |
2648 | fib_select_multipath(&res); | 2650 | fib_select_multipath(&res); |
2649 | else | 2651 | else |
2650 | #endif | 2652 | #endif |
2651 | if (!res.prefixlen && | 2653 | if (!res.prefixlen && |
2652 | res.table->tb_num_default > 1 && | 2654 | res.table->tb_num_default > 1 && |
2653 | res.type == RTN_UNICAST && !fl4->flowi4_oif) | 2655 | res.type == RTN_UNICAST && !fl4->flowi4_oif) |
2654 | fib_select_default(&res); | 2656 | fib_select_default(&res); |
2655 | 2657 | ||
2656 | if (!fl4->saddr) | 2658 | if (!fl4->saddr) |
2657 | fl4->saddr = FIB_RES_PREFSRC(net, res); | 2659 | fl4->saddr = FIB_RES_PREFSRC(net, res); |
2658 | 2660 | ||
2659 | dev_out = FIB_RES_DEV(res); | 2661 | dev_out = FIB_RES_DEV(res); |
2660 | fl4->flowi4_oif = dev_out->ifindex; | 2662 | fl4->flowi4_oif = dev_out->ifindex; |
2661 | 2663 | ||
2662 | 2664 | ||
2663 | make_route: | 2665 | make_route: |
2664 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, | 2666 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, |
2665 | dev_out, flags); | 2667 | dev_out, flags); |
2666 | if (!IS_ERR(rth)) { | 2668 | if (!IS_ERR(rth)) { |
2667 | unsigned int hash; | 2669 | unsigned int hash; |
2668 | 2670 | ||
2669 | hash = rt_hash(orig_daddr, orig_saddr, orig_oif, | 2671 | hash = rt_hash(orig_daddr, orig_saddr, orig_oif, |
2670 | rt_genid(dev_net(dev_out))); | 2672 | rt_genid(dev_net(dev_out))); |
2671 | rth = rt_intern_hash(hash, rth, NULL, orig_oif); | 2673 | rth = rt_intern_hash(hash, rth, NULL, orig_oif); |
2672 | } | 2674 | } |
2673 | 2675 | ||
2674 | out: | 2676 | out: |
2675 | rcu_read_unlock(); | 2677 | rcu_read_unlock(); |
2676 | return rth; | 2678 | return rth; |
2677 | } | 2679 | } |
2678 | 2680 | ||
2679 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) | 2681 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) |
2680 | { | 2682 | { |
2681 | struct rtable *rth; | 2683 | struct rtable *rth; |
2682 | unsigned int hash; | 2684 | unsigned int hash; |
2683 | 2685 | ||
2684 | if (!rt_caching(net)) | 2686 | if (!rt_caching(net)) |
2685 | goto slow_output; | 2687 | goto slow_output; |
2686 | 2688 | ||
2687 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); | 2689 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); |
2688 | 2690 | ||
2689 | rcu_read_lock_bh(); | 2691 | rcu_read_lock_bh(); |
2690 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2692 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2691 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2693 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2692 | if (rth->rt_key_dst == flp4->daddr && | 2694 | if (rth->rt_key_dst == flp4->daddr && |
2693 | rth->rt_key_src == flp4->saddr && | 2695 | rth->rt_key_src == flp4->saddr && |
2694 | rt_is_output_route(rth) && | 2696 | rt_is_output_route(rth) && |
2695 | rth->rt_oif == flp4->flowi4_oif && | 2697 | rth->rt_oif == flp4->flowi4_oif && |
2696 | rth->rt_mark == flp4->flowi4_mark && | 2698 | rth->rt_mark == flp4->flowi4_mark && |
2697 | !((rth->rt_key_tos ^ flp4->flowi4_tos) & | 2699 | !((rth->rt_key_tos ^ flp4->flowi4_tos) & |
2698 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2700 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2699 | net_eq(dev_net(rth->dst.dev), net) && | 2701 | net_eq(dev_net(rth->dst.dev), net) && |
2700 | !rt_is_expired(rth)) { | 2702 | !rt_is_expired(rth)) { |
2701 | dst_use(&rth->dst, jiffies); | 2703 | dst_use(&rth->dst, jiffies); |
2702 | RT_CACHE_STAT_INC(out_hit); | 2704 | RT_CACHE_STAT_INC(out_hit); |
2703 | rcu_read_unlock_bh(); | 2705 | rcu_read_unlock_bh(); |
2704 | if (!flp4->saddr) | 2706 | if (!flp4->saddr) |
2705 | flp4->saddr = rth->rt_src; | 2707 | flp4->saddr = rth->rt_src; |
2706 | if (!flp4->daddr) | 2708 | if (!flp4->daddr) |
2707 | flp4->daddr = rth->rt_dst; | 2709 | flp4->daddr = rth->rt_dst; |
2708 | return rth; | 2710 | return rth; |
2709 | } | 2711 | } |
2710 | RT_CACHE_STAT_INC(out_hlist_search); | 2712 | RT_CACHE_STAT_INC(out_hlist_search); |
2711 | } | 2713 | } |
2712 | rcu_read_unlock_bh(); | 2714 | rcu_read_unlock_bh(); |
2713 | 2715 | ||
2714 | slow_output: | 2716 | slow_output: |
2715 | return ip_route_output_slow(net, flp4); | 2717 | return ip_route_output_slow(net, flp4); |
2716 | } | 2718 | } |
2717 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2719 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2718 | 2720 | ||
2719 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) | 2721 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) |
2720 | { | 2722 | { |
2721 | return NULL; | 2723 | return NULL; |
2722 | } | 2724 | } |
2723 | 2725 | ||
2724 | static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) | 2726 | static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) |
2725 | { | 2727 | { |
2726 | return 0; | 2728 | return 0; |
2727 | } | 2729 | } |
2728 | 2730 | ||
2729 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2731 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
2730 | { | 2732 | { |
2731 | } | 2733 | } |
2732 | 2734 | ||
2733 | static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, | 2735 | static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, |
2734 | unsigned long old) | 2736 | unsigned long old) |
2735 | { | 2737 | { |
2736 | return NULL; | 2738 | return NULL; |
2737 | } | 2739 | } |
2738 | 2740 | ||
2739 | static struct dst_ops ipv4_dst_blackhole_ops = { | 2741 | static struct dst_ops ipv4_dst_blackhole_ops = { |
2740 | .family = AF_INET, | 2742 | .family = AF_INET, |
2741 | .protocol = cpu_to_be16(ETH_P_IP), | 2743 | .protocol = cpu_to_be16(ETH_P_IP), |
2742 | .destroy = ipv4_dst_destroy, | 2744 | .destroy = ipv4_dst_destroy, |
2743 | .check = ipv4_blackhole_dst_check, | 2745 | .check = ipv4_blackhole_dst_check, |
2744 | .default_mtu = ipv4_blackhole_default_mtu, | 2746 | .default_mtu = ipv4_blackhole_default_mtu, |
2745 | .default_advmss = ipv4_default_advmss, | 2747 | .default_advmss = ipv4_default_advmss, |
2746 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2748 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2747 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, | 2749 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, |
2748 | .neigh_lookup = ipv4_neigh_lookup, | 2750 | .neigh_lookup = ipv4_neigh_lookup, |
2749 | }; | 2751 | }; |
2750 | 2752 | ||
2751 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 2753 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
2752 | { | 2754 | { |
2753 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); | 2755 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); |
2754 | struct rtable *ort = (struct rtable *) dst_orig; | 2756 | struct rtable *ort = (struct rtable *) dst_orig; |
2755 | 2757 | ||
2756 | if (rt) { | 2758 | if (rt) { |
2757 | struct dst_entry *new = &rt->dst; | 2759 | struct dst_entry *new = &rt->dst; |
2758 | 2760 | ||
2759 | new->__use = 1; | 2761 | new->__use = 1; |
2760 | new->input = dst_discard; | 2762 | new->input = dst_discard; |
2761 | new->output = dst_discard; | 2763 | new->output = dst_discard; |
2762 | dst_copy_metrics(new, &ort->dst); | 2764 | dst_copy_metrics(new, &ort->dst); |
2763 | 2765 | ||
2764 | new->dev = ort->dst.dev; | 2766 | new->dev = ort->dst.dev; |
2765 | if (new->dev) | 2767 | if (new->dev) |
2766 | dev_hold(new->dev); | 2768 | dev_hold(new->dev); |
2767 | 2769 | ||
2768 | rt->rt_key_dst = ort->rt_key_dst; | 2770 | rt->rt_key_dst = ort->rt_key_dst; |
2769 | rt->rt_key_src = ort->rt_key_src; | 2771 | rt->rt_key_src = ort->rt_key_src; |
2770 | rt->rt_key_tos = ort->rt_key_tos; | 2772 | rt->rt_key_tos = ort->rt_key_tos; |
2771 | rt->rt_route_iif = ort->rt_route_iif; | 2773 | rt->rt_route_iif = ort->rt_route_iif; |
2772 | rt->rt_iif = ort->rt_iif; | 2774 | rt->rt_iif = ort->rt_iif; |
2773 | rt->rt_oif = ort->rt_oif; | 2775 | rt->rt_oif = ort->rt_oif; |
2774 | rt->rt_mark = ort->rt_mark; | 2776 | rt->rt_mark = ort->rt_mark; |
2775 | 2777 | ||
2776 | rt->rt_genid = rt_genid(net); | 2778 | rt->rt_genid = rt_genid(net); |
2777 | rt->rt_flags = ort->rt_flags; | 2779 | rt->rt_flags = ort->rt_flags; |
2778 | rt->rt_type = ort->rt_type; | 2780 | rt->rt_type = ort->rt_type; |
2779 | rt->rt_dst = ort->rt_dst; | 2781 | rt->rt_dst = ort->rt_dst; |
2780 | rt->rt_src = ort->rt_src; | 2782 | rt->rt_src = ort->rt_src; |
2781 | rt->rt_gateway = ort->rt_gateway; | 2783 | rt->rt_gateway = ort->rt_gateway; |
2782 | rt->rt_spec_dst = ort->rt_spec_dst; | 2784 | rt->rt_spec_dst = ort->rt_spec_dst; |
2783 | rt->peer = ort->peer; | 2785 | rt->peer = ort->peer; |
2784 | if (rt->peer) | 2786 | if (rt->peer) |
2785 | atomic_inc(&rt->peer->refcnt); | 2787 | atomic_inc(&rt->peer->refcnt); |
2786 | rt->fi = ort->fi; | 2788 | rt->fi = ort->fi; |
2787 | if (rt->fi) | 2789 | if (rt->fi) |
2788 | atomic_inc(&rt->fi->fib_clntref); | 2790 | atomic_inc(&rt->fi->fib_clntref); |
2789 | 2791 | ||
2790 | dst_free(new); | 2792 | dst_free(new); |
2791 | } | 2793 | } |
2792 | 2794 | ||
2793 | dst_release(dst_orig); | 2795 | dst_release(dst_orig); |
2794 | 2796 | ||
2795 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); | 2797 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); |
2796 | } | 2798 | } |
2797 | 2799 | ||
2798 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | 2800 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
2799 | struct sock *sk) | 2801 | struct sock *sk) |
2800 | { | 2802 | { |
2801 | struct rtable *rt = __ip_route_output_key(net, flp4); | 2803 | struct rtable *rt = __ip_route_output_key(net, flp4); |
2802 | 2804 | ||
2803 | if (IS_ERR(rt)) | 2805 | if (IS_ERR(rt)) |
2804 | return rt; | 2806 | return rt; |
2805 | 2807 | ||
2806 | if (flp4->flowi4_proto) | 2808 | if (flp4->flowi4_proto) |
2807 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | 2809 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
2808 | flowi4_to_flowi(flp4), | 2810 | flowi4_to_flowi(flp4), |
2809 | sk, 0); | 2811 | sk, 0); |
2810 | 2812 | ||
2811 | return rt; | 2813 | return rt; |
2812 | } | 2814 | } |
2813 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2815 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2814 | 2816 | ||
2815 | static int rt_fill_info(struct net *net, | 2817 | static int rt_fill_info(struct net *net, |
2816 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2818 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
2817 | int nowait, unsigned int flags) | 2819 | int nowait, unsigned int flags) |
2818 | { | 2820 | { |
2819 | struct rtable *rt = skb_rtable(skb); | 2821 | struct rtable *rt = skb_rtable(skb); |
2820 | struct rtmsg *r; | 2822 | struct rtmsg *r; |
2821 | struct nlmsghdr *nlh; | 2823 | struct nlmsghdr *nlh; |
2822 | long expires = 0; | 2824 | long expires = 0; |
2823 | const struct inet_peer *peer = rt->peer; | 2825 | const struct inet_peer *peer = rt->peer; |
2824 | u32 id = 0, ts = 0, tsage = 0, error; | 2826 | u32 id = 0, ts = 0, tsage = 0, error; |
2825 | 2827 | ||
2826 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); | 2828 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); |
2827 | if (nlh == NULL) | 2829 | if (nlh == NULL) |
2828 | return -EMSGSIZE; | 2830 | return -EMSGSIZE; |
2829 | 2831 | ||
2830 | r = nlmsg_data(nlh); | 2832 | r = nlmsg_data(nlh); |
2831 | r->rtm_family = AF_INET; | 2833 | r->rtm_family = AF_INET; |
2832 | r->rtm_dst_len = 32; | 2834 | r->rtm_dst_len = 32; |
2833 | r->rtm_src_len = 0; | 2835 | r->rtm_src_len = 0; |
2834 | r->rtm_tos = rt->rt_key_tos; | 2836 | r->rtm_tos = rt->rt_key_tos; |
2835 | r->rtm_table = RT_TABLE_MAIN; | 2837 | r->rtm_table = RT_TABLE_MAIN; |
2836 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2838 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
2837 | r->rtm_type = rt->rt_type; | 2839 | r->rtm_type = rt->rt_type; |
2838 | r->rtm_scope = RT_SCOPE_UNIVERSE; | 2840 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
2839 | r->rtm_protocol = RTPROT_UNSPEC; | 2841 | r->rtm_protocol = RTPROT_UNSPEC; |
2840 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; | 2842 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; |
2841 | if (rt->rt_flags & RTCF_NOTIFY) | 2843 | if (rt->rt_flags & RTCF_NOTIFY) |
2842 | r->rtm_flags |= RTM_F_NOTIFY; | 2844 | r->rtm_flags |= RTM_F_NOTIFY; |
2843 | 2845 | ||
2844 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2846 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); |
2845 | 2847 | ||
2846 | if (rt->rt_key_src) { | 2848 | if (rt->rt_key_src) { |
2847 | r->rtm_src_len = 32; | 2849 | r->rtm_src_len = 32; |
2848 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); | 2850 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); |
2849 | } | 2851 | } |
2850 | if (rt->dst.dev) | 2852 | if (rt->dst.dev) |
2851 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2853 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2852 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2854 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2853 | if (rt->dst.tclassid) | 2855 | if (rt->dst.tclassid) |
2854 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2856 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2855 | #endif | 2857 | #endif |
2856 | if (rt_is_input_route(rt)) | 2858 | if (rt_is_input_route(rt)) |
2857 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2859 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2858 | else if (rt->rt_src != rt->rt_key_src) | 2860 | else if (rt->rt_src != rt->rt_key_src) |
2859 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2861 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
2860 | 2862 | ||
2861 | if (rt->rt_dst != rt->rt_gateway) | 2863 | if (rt->rt_dst != rt->rt_gateway) |
2862 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2864 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
2863 | 2865 | ||
2864 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 2866 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2865 | goto nla_put_failure; | 2867 | goto nla_put_failure; |
2866 | 2868 | ||
2867 | if (rt->rt_mark) | 2869 | if (rt->rt_mark) |
2868 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); | 2870 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); |
2869 | 2871 | ||
2870 | error = rt->dst.error; | 2872 | error = rt->dst.error; |
2871 | if (peer) { | 2873 | if (peer) { |
2872 | inet_peer_refcheck(rt->peer); | 2874 | inet_peer_refcheck(rt->peer); |
2873 | id = atomic_read(&peer->ip_id_count) & 0xffff; | 2875 | id = atomic_read(&peer->ip_id_count) & 0xffff; |
2874 | if (peer->tcp_ts_stamp) { | 2876 | if (peer->tcp_ts_stamp) { |
2875 | ts = peer->tcp_ts; | 2877 | ts = peer->tcp_ts; |
2876 | tsage = get_seconds() - peer->tcp_ts_stamp; | 2878 | tsage = get_seconds() - peer->tcp_ts_stamp; |
2877 | } | 2879 | } |
2878 | expires = ACCESS_ONCE(peer->pmtu_expires); | 2880 | expires = ACCESS_ONCE(peer->pmtu_expires); |
2879 | if (expires) | 2881 | if (expires) |
2880 | expires -= jiffies; | 2882 | expires -= jiffies; |
2881 | } | 2883 | } |
2882 | 2884 | ||
2883 | if (rt_is_input_route(rt)) { | 2885 | if (rt_is_input_route(rt)) { |
2884 | #ifdef CONFIG_IP_MROUTE | 2886 | #ifdef CONFIG_IP_MROUTE |
2885 | __be32 dst = rt->rt_dst; | 2887 | __be32 dst = rt->rt_dst; |
2886 | 2888 | ||
2887 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && | 2889 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && |
2888 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { | 2890 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
2889 | int err = ipmr_get_route(net, skb, | 2891 | int err = ipmr_get_route(net, skb, |
2890 | rt->rt_src, rt->rt_dst, | 2892 | rt->rt_src, rt->rt_dst, |
2891 | r, nowait); | 2893 | r, nowait); |
2892 | if (err <= 0) { | 2894 | if (err <= 0) { |
2893 | if (!nowait) { | 2895 | if (!nowait) { |
2894 | if (err == 0) | 2896 | if (err == 0) |
2895 | return 0; | 2897 | return 0; |
2896 | goto nla_put_failure; | 2898 | goto nla_put_failure; |
2897 | } else { | 2899 | } else { |
2898 | if (err == -EMSGSIZE) | 2900 | if (err == -EMSGSIZE) |
2899 | goto nla_put_failure; | 2901 | goto nla_put_failure; |
2900 | error = err; | 2902 | error = err; |
2901 | } | 2903 | } |
2902 | } | 2904 | } |
2903 | } else | 2905 | } else |
2904 | #endif | 2906 | #endif |
2905 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); | 2907 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); |
2906 | } | 2908 | } |
2907 | 2909 | ||
2908 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2910 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
2909 | expires, error) < 0) | 2911 | expires, error) < 0) |
2910 | goto nla_put_failure; | 2912 | goto nla_put_failure; |
2911 | 2913 | ||
2912 | return nlmsg_end(skb, nlh); | 2914 | return nlmsg_end(skb, nlh); |
2913 | 2915 | ||
2914 | nla_put_failure: | 2916 | nla_put_failure: |
2915 | nlmsg_cancel(skb, nlh); | 2917 | nlmsg_cancel(skb, nlh); |
2916 | return -EMSGSIZE; | 2918 | return -EMSGSIZE; |
2917 | } | 2919 | } |
2918 | 2920 | ||
2919 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 2921 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
2920 | { | 2922 | { |
2921 | struct net *net = sock_net(in_skb->sk); | 2923 | struct net *net = sock_net(in_skb->sk); |
2922 | struct rtmsg *rtm; | 2924 | struct rtmsg *rtm; |
2923 | struct nlattr *tb[RTA_MAX+1]; | 2925 | struct nlattr *tb[RTA_MAX+1]; |
2924 | struct rtable *rt = NULL; | 2926 | struct rtable *rt = NULL; |
2925 | __be32 dst = 0; | 2927 | __be32 dst = 0; |
2926 | __be32 src = 0; | 2928 | __be32 src = 0; |
2927 | u32 iif; | 2929 | u32 iif; |
2928 | int err; | 2930 | int err; |
2929 | int mark; | 2931 | int mark; |
2930 | struct sk_buff *skb; | 2932 | struct sk_buff *skb; |
2931 | 2933 | ||
2932 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2934 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
2933 | if (err < 0) | 2935 | if (err < 0) |
2934 | goto errout; | 2936 | goto errout; |
2935 | 2937 | ||
2936 | rtm = nlmsg_data(nlh); | 2938 | rtm = nlmsg_data(nlh); |
2937 | 2939 | ||
2938 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 2940 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
2939 | if (skb == NULL) { | 2941 | if (skb == NULL) { |
2940 | err = -ENOBUFS; | 2942 | err = -ENOBUFS; |
2941 | goto errout; | 2943 | goto errout; |
2942 | } | 2944 | } |
2943 | 2945 | ||
2944 | /* Reserve room for dummy headers, this skb can pass | 2946 | /* Reserve room for dummy headers, this skb can pass |
2945 | through good chunk of routing engine. | 2947 | through good chunk of routing engine. |
2946 | */ | 2948 | */ |
2947 | skb_reset_mac_header(skb); | 2949 | skb_reset_mac_header(skb); |
2948 | skb_reset_network_header(skb); | 2950 | skb_reset_network_header(skb); |
2949 | 2951 | ||
2950 | /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ | 2952 | /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ |
2951 | ip_hdr(skb)->protocol = IPPROTO_ICMP; | 2953 | ip_hdr(skb)->protocol = IPPROTO_ICMP; |
2952 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); | 2954 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); |
2953 | 2955 | ||
2954 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; | 2956 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; |
2955 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; | 2957 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; |
2956 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; | 2958 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
2957 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; | 2959 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; |
2958 | 2960 | ||
2959 | if (iif) { | 2961 | if (iif) { |
2960 | struct net_device *dev; | 2962 | struct net_device *dev; |
2961 | 2963 | ||
2962 | dev = __dev_get_by_index(net, iif); | 2964 | dev = __dev_get_by_index(net, iif); |
2963 | if (dev == NULL) { | 2965 | if (dev == NULL) { |
2964 | err = -ENODEV; | 2966 | err = -ENODEV; |
2965 | goto errout_free; | 2967 | goto errout_free; |
2966 | } | 2968 | } |
2967 | 2969 | ||
2968 | skb->protocol = htons(ETH_P_IP); | 2970 | skb->protocol = htons(ETH_P_IP); |
2969 | skb->dev = dev; | 2971 | skb->dev = dev; |
2970 | skb->mark = mark; | 2972 | skb->mark = mark; |
2971 | local_bh_disable(); | 2973 | local_bh_disable(); |
2972 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2974 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
2973 | local_bh_enable(); | 2975 | local_bh_enable(); |
2974 | 2976 | ||
2975 | rt = skb_rtable(skb); | 2977 | rt = skb_rtable(skb); |
2976 | if (err == 0 && rt->dst.error) | 2978 | if (err == 0 && rt->dst.error) |
2977 | err = -rt->dst.error; | 2979 | err = -rt->dst.error; |
2978 | } else { | 2980 | } else { |
2979 | struct flowi4 fl4 = { | 2981 | struct flowi4 fl4 = { |
2980 | .daddr = dst, | 2982 | .daddr = dst, |
2981 | .saddr = src, | 2983 | .saddr = src, |
2982 | .flowi4_tos = rtm->rtm_tos, | 2984 | .flowi4_tos = rtm->rtm_tos, |
2983 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2985 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2984 | .flowi4_mark = mark, | 2986 | .flowi4_mark = mark, |
2985 | }; | 2987 | }; |
2986 | rt = ip_route_output_key(net, &fl4); | 2988 | rt = ip_route_output_key(net, &fl4); |
2987 | 2989 | ||
2988 | err = 0; | 2990 | err = 0; |
2989 | if (IS_ERR(rt)) | 2991 | if (IS_ERR(rt)) |
2990 | err = PTR_ERR(rt); | 2992 | err = PTR_ERR(rt); |
2991 | } | 2993 | } |
2992 | 2994 | ||
2993 | if (err) | 2995 | if (err) |
2994 | goto errout_free; | 2996 | goto errout_free; |
2995 | 2997 | ||
2996 | skb_dst_set(skb, &rt->dst); | 2998 | skb_dst_set(skb, &rt->dst); |
2997 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2999 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
2998 | rt->rt_flags |= RTCF_NOTIFY; | 3000 | rt->rt_flags |= RTCF_NOTIFY; |
2999 | 3001 | ||
3000 | err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 3002 | err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
3001 | RTM_NEWROUTE, 0, 0); | 3003 | RTM_NEWROUTE, 0, 0); |
3002 | if (err <= 0) | 3004 | if (err <= 0) |
3003 | goto errout_free; | 3005 | goto errout_free; |
3004 | 3006 | ||
3005 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); | 3007 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); |
3006 | errout: | 3008 | errout: |
3007 | return err; | 3009 | return err; |
3008 | 3010 | ||
3009 | errout_free: | 3011 | errout_free: |
3010 | kfree_skb(skb); | 3012 | kfree_skb(skb); |
3011 | goto errout; | 3013 | goto errout; |
3012 | } | 3014 | } |
3013 | 3015 | ||
3014 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | 3016 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) |
3015 | { | 3017 | { |
3016 | struct rtable *rt; | 3018 | struct rtable *rt; |
3017 | int h, s_h; | 3019 | int h, s_h; |
3018 | int idx, s_idx; | 3020 | int idx, s_idx; |
3019 | struct net *net; | 3021 | struct net *net; |
3020 | 3022 | ||
3021 | net = sock_net(skb->sk); | 3023 | net = sock_net(skb->sk); |
3022 | 3024 | ||
3023 | s_h = cb->args[0]; | 3025 | s_h = cb->args[0]; |
3024 | if (s_h < 0) | 3026 | if (s_h < 0) |
3025 | s_h = 0; | 3027 | s_h = 0; |
3026 | s_idx = idx = cb->args[1]; | 3028 | s_idx = idx = cb->args[1]; |
3027 | for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { | 3029 | for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { |
3028 | if (!rt_hash_table[h].chain) | 3030 | if (!rt_hash_table[h].chain) |
3029 | continue; | 3031 | continue; |
3030 | rcu_read_lock_bh(); | 3032 | rcu_read_lock_bh(); |
3031 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; | 3033 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; |
3032 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { | 3034 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { |
3033 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) | 3035 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) |
3034 | continue; | 3036 | continue; |
3035 | if (rt_is_expired(rt)) | 3037 | if (rt_is_expired(rt)) |
3036 | continue; | 3038 | continue; |
3037 | skb_dst_set_noref(skb, &rt->dst); | 3039 | skb_dst_set_noref(skb, &rt->dst); |
3038 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | 3040 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
3039 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 3041 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
3040 | 1, NLM_F_MULTI) <= 0) { | 3042 | 1, NLM_F_MULTI) <= 0) { |
3041 | skb_dst_drop(skb); | 3043 | skb_dst_drop(skb); |
3042 | rcu_read_unlock_bh(); | 3044 | rcu_read_unlock_bh(); |
3043 | goto done; | 3045 | goto done; |
3044 | } | 3046 | } |
3045 | skb_dst_drop(skb); | 3047 | skb_dst_drop(skb); |
3046 | } | 3048 | } |
3047 | rcu_read_unlock_bh(); | 3049 | rcu_read_unlock_bh(); |
3048 | } | 3050 | } |
3049 | 3051 | ||
3050 | done: | 3052 | done: |
3051 | cb->args[0] = h; | 3053 | cb->args[0] = h; |
3052 | cb->args[1] = idx; | 3054 | cb->args[1] = idx; |
3053 | return skb->len; | 3055 | return skb->len; |
3054 | } | 3056 | } |
3055 | 3057 | ||
3056 | void ip_rt_multicast_event(struct in_device *in_dev) | 3058 | void ip_rt_multicast_event(struct in_device *in_dev) |
3057 | { | 3059 | { |
3058 | rt_cache_flush(dev_net(in_dev->dev), 0); | 3060 | rt_cache_flush(dev_net(in_dev->dev), 0); |
3059 | } | 3061 | } |
3060 | 3062 | ||
3061 | #ifdef CONFIG_SYSCTL | 3063 | #ifdef CONFIG_SYSCTL |
3062 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | 3064 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, |
3063 | void __user *buffer, | 3065 | void __user *buffer, |
3064 | size_t *lenp, loff_t *ppos) | 3066 | size_t *lenp, loff_t *ppos) |
3065 | { | 3067 | { |
3066 | if (write) { | 3068 | if (write) { |
3067 | int flush_delay; | 3069 | int flush_delay; |
3068 | ctl_table ctl; | 3070 | ctl_table ctl; |
3069 | struct net *net; | 3071 | struct net *net; |
3070 | 3072 | ||
3071 | memcpy(&ctl, __ctl, sizeof(ctl)); | 3073 | memcpy(&ctl, __ctl, sizeof(ctl)); |
3072 | ctl.data = &flush_delay; | 3074 | ctl.data = &flush_delay; |
3073 | proc_dointvec(&ctl, write, buffer, lenp, ppos); | 3075 | proc_dointvec(&ctl, write, buffer, lenp, ppos); |
3074 | 3076 | ||
3075 | net = (struct net *)__ctl->extra1; | 3077 | net = (struct net *)__ctl->extra1; |
3076 | rt_cache_flush(net, flush_delay); | 3078 | rt_cache_flush(net, flush_delay); |
3077 | return 0; | 3079 | return 0; |
3078 | } | 3080 | } |
3079 | 3081 | ||
3080 | return -EINVAL; | 3082 | return -EINVAL; |
3081 | } | 3083 | } |
3082 | 3084 | ||
3083 | static ctl_table ipv4_route_table[] = { | 3085 | static ctl_table ipv4_route_table[] = { |
3084 | { | 3086 | { |
3085 | .procname = "gc_thresh", | 3087 | .procname = "gc_thresh", |
3086 | .data = &ipv4_dst_ops.gc_thresh, | 3088 | .data = &ipv4_dst_ops.gc_thresh, |
3087 | .maxlen = sizeof(int), | 3089 | .maxlen = sizeof(int), |
3088 | .mode = 0644, | 3090 | .mode = 0644, |
3089 | .proc_handler = proc_dointvec, | 3091 | .proc_handler = proc_dointvec, |
3090 | }, | 3092 | }, |
3091 | { | 3093 | { |
3092 | .procname = "max_size", | 3094 | .procname = "max_size", |
3093 | .data = &ip_rt_max_size, | 3095 | .data = &ip_rt_max_size, |
3094 | .maxlen = sizeof(int), | 3096 | .maxlen = sizeof(int), |
3095 | .mode = 0644, | 3097 | .mode = 0644, |
3096 | .proc_handler = proc_dointvec, | 3098 | .proc_handler = proc_dointvec, |
3097 | }, | 3099 | }, |
3098 | { | 3100 | { |
3099 | /* Deprecated. Use gc_min_interval_ms */ | 3101 | /* Deprecated. Use gc_min_interval_ms */ |
3100 | 3102 | ||
3101 | .procname = "gc_min_interval", | 3103 | .procname = "gc_min_interval", |
3102 | .data = &ip_rt_gc_min_interval, | 3104 | .data = &ip_rt_gc_min_interval, |
3103 | .maxlen = sizeof(int), | 3105 | .maxlen = sizeof(int), |
3104 | .mode = 0644, | 3106 | .mode = 0644, |
3105 | .proc_handler = proc_dointvec_jiffies, | 3107 | .proc_handler = proc_dointvec_jiffies, |
3106 | }, | 3108 | }, |
3107 | { | 3109 | { |
3108 | .procname = "gc_min_interval_ms", | 3110 | .procname = "gc_min_interval_ms", |
3109 | .data = &ip_rt_gc_min_interval, | 3111 | .data = &ip_rt_gc_min_interval, |
3110 | .maxlen = sizeof(int), | 3112 | .maxlen = sizeof(int), |
3111 | .mode = 0644, | 3113 | .mode = 0644, |
3112 | .proc_handler = proc_dointvec_ms_jiffies, | 3114 | .proc_handler = proc_dointvec_ms_jiffies, |
3113 | }, | 3115 | }, |
3114 | { | 3116 | { |
3115 | .procname = "gc_timeout", | 3117 | .procname = "gc_timeout", |
3116 | .data = &ip_rt_gc_timeout, | 3118 | .data = &ip_rt_gc_timeout, |
3117 | .maxlen = sizeof(int), | 3119 | .maxlen = sizeof(int), |
3118 | .mode = 0644, | 3120 | .mode = 0644, |
3119 | .proc_handler = proc_dointvec_jiffies, | 3121 | .proc_handler = proc_dointvec_jiffies, |
3120 | }, | 3122 | }, |
3121 | { | 3123 | { |
3122 | .procname = "gc_interval", | 3124 | .procname = "gc_interval", |
3123 | .data = &ip_rt_gc_interval, | 3125 | .data = &ip_rt_gc_interval, |
3124 | .maxlen = sizeof(int), | 3126 | .maxlen = sizeof(int), |
3125 | .mode = 0644, | 3127 | .mode = 0644, |
3126 | .proc_handler = proc_dointvec_jiffies, | 3128 | .proc_handler = proc_dointvec_jiffies, |
3127 | }, | 3129 | }, |
3128 | { | 3130 | { |
3129 | .procname = "redirect_load", | 3131 | .procname = "redirect_load", |
3130 | .data = &ip_rt_redirect_load, | 3132 | .data = &ip_rt_redirect_load, |
3131 | .maxlen = sizeof(int), | 3133 | .maxlen = sizeof(int), |
3132 | .mode = 0644, | 3134 | .mode = 0644, |
3133 | .proc_handler = proc_dointvec, | 3135 | .proc_handler = proc_dointvec, |
3134 | }, | 3136 | }, |
3135 | { | 3137 | { |
3136 | .procname = "redirect_number", | 3138 | .procname = "redirect_number", |
3137 | .data = &ip_rt_redirect_number, | 3139 | .data = &ip_rt_redirect_number, |
3138 | .maxlen = sizeof(int), | 3140 | .maxlen = sizeof(int), |
3139 | .mode = 0644, | 3141 | .mode = 0644, |
3140 | .proc_handler = proc_dointvec, | 3142 | .proc_handler = proc_dointvec, |
3141 | }, | 3143 | }, |
3142 | { | 3144 | { |
3143 | .procname = "redirect_silence", | 3145 | .procname = "redirect_silence", |
3144 | .data = &ip_rt_redirect_silence, | 3146 | .data = &ip_rt_redirect_silence, |
3145 | .maxlen = sizeof(int), | 3147 | .maxlen = sizeof(int), |
3146 | .mode = 0644, | 3148 | .mode = 0644, |
3147 | .proc_handler = proc_dointvec, | 3149 | .proc_handler = proc_dointvec, |
3148 | }, | 3150 | }, |
3149 | { | 3151 | { |
3150 | .procname = "error_cost", | 3152 | .procname = "error_cost", |
3151 | .data = &ip_rt_error_cost, | 3153 | .data = &ip_rt_error_cost, |
3152 | .maxlen = sizeof(int), | 3154 | .maxlen = sizeof(int), |
3153 | .mode = 0644, | 3155 | .mode = 0644, |
3154 | .proc_handler = proc_dointvec, | 3156 | .proc_handler = proc_dointvec, |
3155 | }, | 3157 | }, |
3156 | { | 3158 | { |
3157 | .procname = "error_burst", | 3159 | .procname = "error_burst", |
3158 | .data = &ip_rt_error_burst, | 3160 | .data = &ip_rt_error_burst, |
3159 | .maxlen = sizeof(int), | 3161 | .maxlen = sizeof(int), |
3160 | .mode = 0644, | 3162 | .mode = 0644, |
3161 | .proc_handler = proc_dointvec, | 3163 | .proc_handler = proc_dointvec, |
3162 | }, | 3164 | }, |
3163 | { | 3165 | { |
3164 | .procname = "gc_elasticity", | 3166 | .procname = "gc_elasticity", |
3165 | .data = &ip_rt_gc_elasticity, | 3167 | .data = &ip_rt_gc_elasticity, |
3166 | .maxlen = sizeof(int), | 3168 | .maxlen = sizeof(int), |
3167 | .mode = 0644, | 3169 | .mode = 0644, |
3168 | .proc_handler = proc_dointvec, | 3170 | .proc_handler = proc_dointvec, |
3169 | }, | 3171 | }, |
3170 | { | 3172 | { |
3171 | .procname = "mtu_expires", | 3173 | .procname = "mtu_expires", |
3172 | .data = &ip_rt_mtu_expires, | 3174 | .data = &ip_rt_mtu_expires, |
3173 | .maxlen = sizeof(int), | 3175 | .maxlen = sizeof(int), |
3174 | .mode = 0644, | 3176 | .mode = 0644, |
3175 | .proc_handler = proc_dointvec_jiffies, | 3177 | .proc_handler = proc_dointvec_jiffies, |
3176 | }, | 3178 | }, |
3177 | { | 3179 | { |
3178 | .procname = "min_pmtu", | 3180 | .procname = "min_pmtu", |
3179 | .data = &ip_rt_min_pmtu, | 3181 | .data = &ip_rt_min_pmtu, |
3180 | .maxlen = sizeof(int), | 3182 | .maxlen = sizeof(int), |
3181 | .mode = 0644, | 3183 | .mode = 0644, |
3182 | .proc_handler = proc_dointvec, | 3184 | .proc_handler = proc_dointvec, |
3183 | }, | 3185 | }, |
3184 | { | 3186 | { |
3185 | .procname = "min_adv_mss", | 3187 | .procname = "min_adv_mss", |
3186 | .data = &ip_rt_min_advmss, | 3188 | .data = &ip_rt_min_advmss, |
3187 | .maxlen = sizeof(int), | 3189 | .maxlen = sizeof(int), |
3188 | .mode = 0644, | 3190 | .mode = 0644, |
3189 | .proc_handler = proc_dointvec, | 3191 | .proc_handler = proc_dointvec, |
3190 | }, | 3192 | }, |
3191 | { } | 3193 | { } |
3192 | }; | 3194 | }; |
3193 | 3195 | ||
3194 | static struct ctl_table empty[1]; | 3196 | static struct ctl_table empty[1]; |
3195 | 3197 | ||
3196 | static struct ctl_table ipv4_skeleton[] = | 3198 | static struct ctl_table ipv4_skeleton[] = |
3197 | { | 3199 | { |
3198 | { .procname = "route", | 3200 | { .procname = "route", |
3199 | .mode = 0555, .child = ipv4_route_table}, | 3201 | .mode = 0555, .child = ipv4_route_table}, |
3200 | { .procname = "neigh", | 3202 | { .procname = "neigh", |
3201 | .mode = 0555, .child = empty}, | 3203 | .mode = 0555, .child = empty}, |
3202 | { } | 3204 | { } |
3203 | }; | 3205 | }; |
3204 | 3206 | ||
3205 | static __net_initdata struct ctl_path ipv4_path[] = { | 3207 | static __net_initdata struct ctl_path ipv4_path[] = { |
3206 | { .procname = "net", }, | 3208 | { .procname = "net", }, |
3207 | { .procname = "ipv4", }, | 3209 | { .procname = "ipv4", }, |
3208 | { }, | 3210 | { }, |
3209 | }; | 3211 | }; |
3210 | 3212 | ||
3211 | static struct ctl_table ipv4_route_flush_table[] = { | 3213 | static struct ctl_table ipv4_route_flush_table[] = { |
3212 | { | 3214 | { |
3213 | .procname = "flush", | 3215 | .procname = "flush", |
3214 | .maxlen = sizeof(int), | 3216 | .maxlen = sizeof(int), |
3215 | .mode = 0200, | 3217 | .mode = 0200, |
3216 | .proc_handler = ipv4_sysctl_rtcache_flush, | 3218 | .proc_handler = ipv4_sysctl_rtcache_flush, |
3217 | }, | 3219 | }, |
3218 | { }, | 3220 | { }, |
3219 | }; | 3221 | }; |
3220 | 3222 | ||
3221 | static __net_initdata struct ctl_path ipv4_route_path[] = { | 3223 | static __net_initdata struct ctl_path ipv4_route_path[] = { |
3222 | { .procname = "net", }, | 3224 | { .procname = "net", }, |
3223 | { .procname = "ipv4", }, | 3225 | { .procname = "ipv4", }, |
3224 | { .procname = "route", }, | 3226 | { .procname = "route", }, |
3225 | { }, | 3227 | { }, |
3226 | }; | 3228 | }; |
3227 | 3229 | ||
3228 | static __net_init int sysctl_route_net_init(struct net *net) | 3230 | static __net_init int sysctl_route_net_init(struct net *net) |
3229 | { | 3231 | { |
3230 | struct ctl_table *tbl; | 3232 | struct ctl_table *tbl; |
3231 | 3233 | ||
3232 | tbl = ipv4_route_flush_table; | 3234 | tbl = ipv4_route_flush_table; |
3233 | if (!net_eq(net, &init_net)) { | 3235 | if (!net_eq(net, &init_net)) { |
3234 | tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); | 3236 | tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); |
3235 | if (tbl == NULL) | 3237 | if (tbl == NULL) |
3236 | goto err_dup; | 3238 | goto err_dup; |
3237 | } | 3239 | } |
3238 | tbl[0].extra1 = net; | 3240 | tbl[0].extra1 = net; |
3239 | 3241 | ||
3240 | net->ipv4.route_hdr = | 3242 | net->ipv4.route_hdr = |
3241 | register_net_sysctl_table(net, ipv4_route_path, tbl); | 3243 | register_net_sysctl_table(net, ipv4_route_path, tbl); |
3242 | if (net->ipv4.route_hdr == NULL) | 3244 | if (net->ipv4.route_hdr == NULL) |
3243 | goto err_reg; | 3245 | goto err_reg; |
3244 | return 0; | 3246 | return 0; |
3245 | 3247 | ||
3246 | err_reg: | 3248 | err_reg: |
3247 | if (tbl != ipv4_route_flush_table) | 3249 | if (tbl != ipv4_route_flush_table) |
3248 | kfree(tbl); | 3250 | kfree(tbl); |
3249 | err_dup: | 3251 | err_dup: |
3250 | return -ENOMEM; | 3252 | return -ENOMEM; |
3251 | } | 3253 | } |
3252 | 3254 | ||
3253 | static __net_exit void sysctl_route_net_exit(struct net *net) | 3255 | static __net_exit void sysctl_route_net_exit(struct net *net) |
3254 | { | 3256 | { |
3255 | struct ctl_table *tbl; | 3257 | struct ctl_table *tbl; |
3256 | 3258 | ||
3257 | tbl = net->ipv4.route_hdr->ctl_table_arg; | 3259 | tbl = net->ipv4.route_hdr->ctl_table_arg; |
3258 | unregister_net_sysctl_table(net->ipv4.route_hdr); | 3260 | unregister_net_sysctl_table(net->ipv4.route_hdr); |
3259 | BUG_ON(tbl == ipv4_route_flush_table); | 3261 | BUG_ON(tbl == ipv4_route_flush_table); |
3260 | kfree(tbl); | 3262 | kfree(tbl); |
3261 | } | 3263 | } |
3262 | 3264 | ||
3263 | static __net_initdata struct pernet_operations sysctl_route_ops = { | 3265 | static __net_initdata struct pernet_operations sysctl_route_ops = { |
3264 | .init = sysctl_route_net_init, | 3266 | .init = sysctl_route_net_init, |
3265 | .exit = sysctl_route_net_exit, | 3267 | .exit = sysctl_route_net_exit, |
3266 | }; | 3268 | }; |
3267 | #endif | 3269 | #endif |
3268 | 3270 | ||
3269 | static __net_init int rt_genid_init(struct net *net) | 3271 | static __net_init int rt_genid_init(struct net *net) |
3270 | { | 3272 | { |
3271 | get_random_bytes(&net->ipv4.rt_genid, | 3273 | get_random_bytes(&net->ipv4.rt_genid, |
3272 | sizeof(net->ipv4.rt_genid)); | 3274 | sizeof(net->ipv4.rt_genid)); |
3273 | get_random_bytes(&net->ipv4.dev_addr_genid, | 3275 | get_random_bytes(&net->ipv4.dev_addr_genid, |
3274 | sizeof(net->ipv4.dev_addr_genid)); | 3276 | sizeof(net->ipv4.dev_addr_genid)); |
3275 | return 0; | 3277 | return 0; |
3276 | } | 3278 | } |
3277 | 3279 | ||
3278 | static __net_initdata struct pernet_operations rt_genid_ops = { | 3280 | static __net_initdata struct pernet_operations rt_genid_ops = { |
3279 | .init = rt_genid_init, | 3281 | .init = rt_genid_init, |
3280 | }; | 3282 | }; |
3281 | 3283 | ||
3282 | 3284 | ||
3283 | #ifdef CONFIG_IP_ROUTE_CLASSID | 3285 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3284 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3286 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3285 | #endif /* CONFIG_IP_ROUTE_CLASSID */ | 3287 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3286 | 3288 | ||
3287 | static __initdata unsigned long rhash_entries; | 3289 | static __initdata unsigned long rhash_entries; |
3288 | static int __init set_rhash_entries(char *str) | 3290 | static int __init set_rhash_entries(char *str) |
3289 | { | 3291 | { |
3290 | if (!str) | 3292 | if (!str) |
3291 | return 0; | 3293 | return 0; |
3292 | rhash_entries = simple_strtoul(str, &str, 0); | 3294 | rhash_entries = simple_strtoul(str, &str, 0); |
3293 | return 1; | 3295 | return 1; |
3294 | } | 3296 | } |
3295 | __setup("rhash_entries=", set_rhash_entries); | 3297 | __setup("rhash_entries=", set_rhash_entries); |
3296 | 3298 | ||
3297 | int __init ip_rt_init(void) | 3299 | int __init ip_rt_init(void) |
3298 | { | 3300 | { |
3299 | int rc = 0; | 3301 | int rc = 0; |
3300 | 3302 | ||
3301 | #ifdef CONFIG_IP_ROUTE_CLASSID | 3303 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3302 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3304 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3303 | if (!ip_rt_acct) | 3305 | if (!ip_rt_acct) |
3304 | panic("IP: failed to allocate ip_rt_acct\n"); | 3306 | panic("IP: failed to allocate ip_rt_acct\n"); |
3305 | #endif | 3307 | #endif |
3306 | 3308 | ||
3307 | ipv4_dst_ops.kmem_cachep = | 3309 | ipv4_dst_ops.kmem_cachep = |
3308 | kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, | 3310 | kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, |
3309 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 3311 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
3310 | 3312 | ||
3311 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; | 3313 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; |
3312 | 3314 | ||
3313 | if (dst_entries_init(&ipv4_dst_ops) < 0) | 3315 | if (dst_entries_init(&ipv4_dst_ops) < 0) |
3314 | panic("IP: failed to allocate ipv4_dst_ops counter\n"); | 3316 | panic("IP: failed to allocate ipv4_dst_ops counter\n"); |
3315 | 3317 | ||
3316 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) | 3318 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) |
3317 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | 3319 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); |
3318 | 3320 | ||
3319 | rt_hash_table = (struct rt_hash_bucket *) | 3321 | rt_hash_table = (struct rt_hash_bucket *) |
3320 | alloc_large_system_hash("IP route cache", | 3322 | alloc_large_system_hash("IP route cache", |
3321 | sizeof(struct rt_hash_bucket), | 3323 | sizeof(struct rt_hash_bucket), |
3322 | rhash_entries, | 3324 | rhash_entries, |
3323 | (totalram_pages >= 128 * 1024) ? | 3325 | (totalram_pages >= 128 * 1024) ? |
3324 | 15 : 17, | 3326 | 15 : 17, |
3325 | 0, | 3327 | 0, |
3326 | &rt_hash_log, | 3328 | &rt_hash_log, |
3327 | &rt_hash_mask, | 3329 | &rt_hash_mask, |
3328 | rhash_entries ? 0 : 512 * 1024); | 3330 | rhash_entries ? 0 : 512 * 1024); |
3329 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); | 3331 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); |
3330 | rt_hash_lock_init(); | 3332 | rt_hash_lock_init(); |
3331 | 3333 | ||
3332 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); | 3334 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); |
3333 | ip_rt_max_size = (rt_hash_mask + 1) * 16; | 3335 | ip_rt_max_size = (rt_hash_mask + 1) * 16; |
3334 | 3336 | ||
3335 | devinet_init(); | 3337 | devinet_init(); |
3336 | ip_fib_init(); | 3338 | ip_fib_init(); |
3337 | 3339 | ||
3338 | if (ip_rt_proc_init()) | 3340 | if (ip_rt_proc_init()) |
3339 | printk(KERN_ERR "Unable to create route proc files\n"); | 3341 | printk(KERN_ERR "Unable to create route proc files\n"); |
3340 | #ifdef CONFIG_XFRM | 3342 | #ifdef CONFIG_XFRM |
3341 | xfrm_init(); | 3343 | xfrm_init(); |
3342 | xfrm4_init(ip_rt_max_size); | 3344 | xfrm4_init(ip_rt_max_size); |
3343 | #endif | 3345 | #endif |
3344 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); | 3346 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); |
3345 | 3347 | ||
3346 | #ifdef CONFIG_SYSCTL | 3348 | #ifdef CONFIG_SYSCTL |
3347 | register_pernet_subsys(&sysctl_route_ops); | 3349 | register_pernet_subsys(&sysctl_route_ops); |
3348 | #endif | 3350 | #endif |
3349 | register_pernet_subsys(&rt_genid_ops); | 3351 | register_pernet_subsys(&rt_genid_ops); |
3350 | return rc; | 3352 | return rc; |
3351 | } | 3353 | } |
3352 | 3354 | ||
3353 | #ifdef CONFIG_SYSCTL | 3355 | #ifdef CONFIG_SYSCTL |
3354 | /* | 3356 | /* |
3355 | * We really need to sanitize the damn ipv4 init order, then all | 3357 | * We really need to sanitize the damn ipv4 init order, then all |
3356 | * this nonsense will go away. | 3358 | * this nonsense will go away. |
3357 | */ | 3359 | */ |
3358 | void __init ip_static_sysctl_init(void) | 3360 | void __init ip_static_sysctl_init(void) |
3359 | { | 3361 | { |
3360 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3362 | register_sysctl_paths(ipv4_path, ipv4_skeleton); |
1 | /* | 1 | /* |
2 | * IPv6 Address [auto]configuration | 2 | * IPv6 Address [auto]configuration |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 7 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or | 9 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License | 10 | * modify it under the terms of the GNU General Public License |
11 | * as published by the Free Software Foundation; either version | 11 | * as published by the Free Software Foundation; either version |
12 | * 2 of the License, or (at your option) any later version. | 12 | * 2 of the License, or (at your option) any later version. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Changes: | 16 | * Changes: |
17 | * | 17 | * |
18 | * Janos Farkas : delete timer on ifdown | 18 | * Janos Farkas : delete timer on ifdown |
19 | * <chexum@bankinf.banki.hu> | 19 | * <chexum@bankinf.banki.hu> |
20 | * Andi Kleen : kill double kfree on module | 20 | * Andi Kleen : kill double kfree on module |
21 | * unload. | 21 | * unload. |
22 | * Maciej W. Rozycki : FDDI support | 22 | * Maciej W. Rozycki : FDDI support |
23 | * sekiya@USAGI : Don't send too many RS | 23 | * sekiya@USAGI : Don't send too many RS |
24 | * packets. | 24 | * packets. |
25 | * yoshfuji@USAGI : Fixed interval between DAD | 25 | * yoshfuji@USAGI : Fixed interval between DAD |
26 | * packets. | 26 | * packets. |
27 | * YOSHIFUJI Hideaki @USAGI : improved accuracy of | 27 | * YOSHIFUJI Hideaki @USAGI : improved accuracy of |
28 | * address validation timer. | 28 | * address validation timer. |
29 | * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041) | 29 | * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041) |
30 | * support. | 30 | * support. |
31 | * Yuji SEKIYA @USAGI : Don't assign a same IPv6 | 31 | * Yuji SEKIYA @USAGI : Don't assign a same IPv6 |
32 | * address on a same interface. | 32 | * address on a same interface. |
33 | * YOSHIFUJI Hideaki @USAGI : ARCnet support | 33 | * YOSHIFUJI Hideaki @USAGI : ARCnet support |
34 | * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to | 34 | * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to |
35 | * seq_file. | 35 | * seq_file. |
36 | * YOSHIFUJI Hideaki @USAGI : improved source address | 36 | * YOSHIFUJI Hideaki @USAGI : improved source address |
37 | * selection; consider scope, | 37 | * selection; consider scope, |
38 | * status etc. | 38 | * status etc. |
39 | */ | 39 | */ |
40 | 40 | ||
41 | #include <linux/errno.h> | 41 | #include <linux/errno.h> |
42 | #include <linux/types.h> | 42 | #include <linux/types.h> |
43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
44 | #include <linux/socket.h> | 44 | #include <linux/socket.h> |
45 | #include <linux/sockios.h> | 45 | #include <linux/sockios.h> |
46 | #include <linux/net.h> | 46 | #include <linux/net.h> |
47 | #include <linux/in6.h> | 47 | #include <linux/in6.h> |
48 | #include <linux/netdevice.h> | 48 | #include <linux/netdevice.h> |
49 | #include <linux/if_addr.h> | 49 | #include <linux/if_addr.h> |
50 | #include <linux/if_arp.h> | 50 | #include <linux/if_arp.h> |
51 | #include <linux/if_arcnet.h> | 51 | #include <linux/if_arcnet.h> |
52 | #include <linux/if_infiniband.h> | 52 | #include <linux/if_infiniband.h> |
53 | #include <linux/route.h> | 53 | #include <linux/route.h> |
54 | #include <linux/inetdevice.h> | 54 | #include <linux/inetdevice.h> |
55 | #include <linux/init.h> | 55 | #include <linux/init.h> |
56 | #include <linux/slab.h> | 56 | #include <linux/slab.h> |
57 | #ifdef CONFIG_SYSCTL | 57 | #ifdef CONFIG_SYSCTL |
58 | #include <linux/sysctl.h> | 58 | #include <linux/sysctl.h> |
59 | #endif | 59 | #endif |
60 | #include <linux/capability.h> | 60 | #include <linux/capability.h> |
61 | #include <linux/delay.h> | 61 | #include <linux/delay.h> |
62 | #include <linux/notifier.h> | 62 | #include <linux/notifier.h> |
63 | #include <linux/string.h> | 63 | #include <linux/string.h> |
64 | 64 | ||
65 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
66 | #include <net/sock.h> | 66 | #include <net/sock.h> |
67 | #include <net/snmp.h> | 67 | #include <net/snmp.h> |
68 | 68 | ||
69 | #include <net/ipv6.h> | 69 | #include <net/ipv6.h> |
70 | #include <net/protocol.h> | 70 | #include <net/protocol.h> |
71 | #include <net/ndisc.h> | 71 | #include <net/ndisc.h> |
72 | #include <net/ip6_route.h> | 72 | #include <net/ip6_route.h> |
73 | #include <net/addrconf.h> | 73 | #include <net/addrconf.h> |
74 | #include <net/tcp.h> | 74 | #include <net/tcp.h> |
75 | #include <net/ip.h> | 75 | #include <net/ip.h> |
76 | #include <net/netlink.h> | 76 | #include <net/netlink.h> |
77 | #include <net/pkt_sched.h> | 77 | #include <net/pkt_sched.h> |
78 | #include <linux/if_tunnel.h> | 78 | #include <linux/if_tunnel.h> |
79 | #include <linux/rtnetlink.h> | 79 | #include <linux/rtnetlink.h> |
80 | 80 | ||
81 | #ifdef CONFIG_IPV6_PRIVACY | 81 | #ifdef CONFIG_IPV6_PRIVACY |
82 | #include <linux/random.h> | 82 | #include <linux/random.h> |
83 | #endif | 83 | #endif |
84 | 84 | ||
85 | #include <linux/uaccess.h> | 85 | #include <linux/uaccess.h> |
86 | #include <asm/unaligned.h> | 86 | #include <asm/unaligned.h> |
87 | 87 | ||
88 | #include <linux/proc_fs.h> | 88 | #include <linux/proc_fs.h> |
89 | #include <linux/seq_file.h> | 89 | #include <linux/seq_file.h> |
90 | 90 | ||
91 | /* Set to 3 to get tracing... */ | 91 | /* Set to 3 to get tracing... */ |
92 | #define ACONF_DEBUG 2 | 92 | #define ACONF_DEBUG 2 |
93 | 93 | ||
94 | #if ACONF_DEBUG >= 3 | 94 | #if ACONF_DEBUG >= 3 |
95 | #define ADBG(x) printk x | 95 | #define ADBG(x) printk x |
96 | #else | 96 | #else |
97 | #define ADBG(x) | 97 | #define ADBG(x) |
98 | #endif | 98 | #endif |
99 | 99 | ||
100 | #define INFINITY_LIFE_TIME 0xFFFFFFFF | 100 | #define INFINITY_LIFE_TIME 0xFFFFFFFF |
101 | 101 | ||
102 | static inline u32 cstamp_delta(unsigned long cstamp) | 102 | static inline u32 cstamp_delta(unsigned long cstamp) |
103 | { | 103 | { |
104 | return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; | 104 | return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; |
105 | } | 105 | } |
106 | 106 | ||
107 | #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) | 107 | #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) |
108 | #define ADDRCONF_TIMER_FUZZ (HZ / 4) | 108 | #define ADDRCONF_TIMER_FUZZ (HZ / 4) |
109 | #define ADDRCONF_TIMER_FUZZ_MAX (HZ) | 109 | #define ADDRCONF_TIMER_FUZZ_MAX (HZ) |
110 | 110 | ||
111 | #ifdef CONFIG_SYSCTL | 111 | #ifdef CONFIG_SYSCTL |
112 | static void addrconf_sysctl_register(struct inet6_dev *idev); | 112 | static void addrconf_sysctl_register(struct inet6_dev *idev); |
113 | static void addrconf_sysctl_unregister(struct inet6_dev *idev); | 113 | static void addrconf_sysctl_unregister(struct inet6_dev *idev); |
114 | #else | 114 | #else |
115 | static inline void addrconf_sysctl_register(struct inet6_dev *idev) | 115 | static inline void addrconf_sysctl_register(struct inet6_dev *idev) |
116 | { | 116 | { |
117 | } | 117 | } |
118 | 118 | ||
119 | static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) | 119 | static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) |
120 | { | 120 | { |
121 | } | 121 | } |
122 | #endif | 122 | #endif |
123 | 123 | ||
124 | #ifdef CONFIG_IPV6_PRIVACY | 124 | #ifdef CONFIG_IPV6_PRIVACY |
125 | static int __ipv6_regen_rndid(struct inet6_dev *idev); | 125 | static int __ipv6_regen_rndid(struct inet6_dev *idev); |
126 | static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); | 126 | static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); |
127 | static void ipv6_regen_rndid(unsigned long data); | 127 | static void ipv6_regen_rndid(unsigned long data); |
128 | #endif | 128 | #endif |
129 | 129 | ||
130 | static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); | 130 | static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); |
131 | static int ipv6_count_addresses(struct inet6_dev *idev); | 131 | static int ipv6_count_addresses(struct inet6_dev *idev); |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * Configured unicast address hash table | 134 | * Configured unicast address hash table |
135 | */ | 135 | */ |
136 | static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; | 136 | static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; |
137 | static DEFINE_SPINLOCK(addrconf_hash_lock); | 137 | static DEFINE_SPINLOCK(addrconf_hash_lock); |
138 | 138 | ||
139 | static void addrconf_verify(unsigned long); | 139 | static void addrconf_verify(unsigned long); |
140 | 140 | ||
141 | static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); | 141 | static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); |
142 | static DEFINE_SPINLOCK(addrconf_verify_lock); | 142 | static DEFINE_SPINLOCK(addrconf_verify_lock); |
143 | 143 | ||
144 | static void addrconf_join_anycast(struct inet6_ifaddr *ifp); | 144 | static void addrconf_join_anycast(struct inet6_ifaddr *ifp); |
145 | static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); | 145 | static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); |
146 | 146 | ||
147 | static void addrconf_type_change(struct net_device *dev, | 147 | static void addrconf_type_change(struct net_device *dev, |
148 | unsigned long event); | 148 | unsigned long event); |
149 | static int addrconf_ifdown(struct net_device *dev, int how); | 149 | static int addrconf_ifdown(struct net_device *dev, int how); |
150 | 150 | ||
151 | static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); | 151 | static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); |
152 | static void addrconf_dad_timer(unsigned long data); | 152 | static void addrconf_dad_timer(unsigned long data); |
153 | static void addrconf_dad_completed(struct inet6_ifaddr *ifp); | 153 | static void addrconf_dad_completed(struct inet6_ifaddr *ifp); |
154 | static void addrconf_dad_run(struct inet6_dev *idev); | 154 | static void addrconf_dad_run(struct inet6_dev *idev); |
155 | static void addrconf_rs_timer(unsigned long data); | 155 | static void addrconf_rs_timer(unsigned long data); |
156 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); | 156 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); |
157 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); | 157 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); |
158 | 158 | ||
159 | static void inet6_prefix_notify(int event, struct inet6_dev *idev, | 159 | static void inet6_prefix_notify(int event, struct inet6_dev *idev, |
160 | struct prefix_info *pinfo); | 160 | struct prefix_info *pinfo); |
161 | static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, | 161 | static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, |
162 | struct net_device *dev); | 162 | struct net_device *dev); |
163 | 163 | ||
164 | static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); | 164 | static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); |
165 | 165 | ||
166 | static struct ipv6_devconf ipv6_devconf __read_mostly = { | 166 | static struct ipv6_devconf ipv6_devconf __read_mostly = { |
167 | .forwarding = 0, | 167 | .forwarding = 0, |
168 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, | 168 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, |
169 | .mtu6 = IPV6_MIN_MTU, | 169 | .mtu6 = IPV6_MIN_MTU, |
170 | .accept_ra = 1, | 170 | .accept_ra = 1, |
171 | .accept_redirects = 1, | 171 | .accept_redirects = 1, |
172 | .autoconf = 1, | 172 | .autoconf = 1, |
173 | .force_mld_version = 0, | 173 | .force_mld_version = 0, |
174 | .dad_transmits = 1, | 174 | .dad_transmits = 1, |
175 | .rtr_solicits = MAX_RTR_SOLICITATIONS, | 175 | .rtr_solicits = MAX_RTR_SOLICITATIONS, |
176 | .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, | 176 | .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, |
177 | .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, | 177 | .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, |
178 | #ifdef CONFIG_IPV6_PRIVACY | 178 | #ifdef CONFIG_IPV6_PRIVACY |
179 | .use_tempaddr = 0, | 179 | .use_tempaddr = 0, |
180 | .temp_valid_lft = TEMP_VALID_LIFETIME, | 180 | .temp_valid_lft = TEMP_VALID_LIFETIME, |
181 | .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, | 181 | .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, |
182 | .regen_max_retry = REGEN_MAX_RETRY, | 182 | .regen_max_retry = REGEN_MAX_RETRY, |
183 | .max_desync_factor = MAX_DESYNC_FACTOR, | 183 | .max_desync_factor = MAX_DESYNC_FACTOR, |
184 | #endif | 184 | #endif |
185 | .max_addresses = IPV6_MAX_ADDRESSES, | 185 | .max_addresses = IPV6_MAX_ADDRESSES, |
186 | .accept_ra_defrtr = 1, | 186 | .accept_ra_defrtr = 1, |
187 | .accept_ra_pinfo = 1, | 187 | .accept_ra_pinfo = 1, |
188 | #ifdef CONFIG_IPV6_ROUTER_PREF | 188 | #ifdef CONFIG_IPV6_ROUTER_PREF |
189 | .accept_ra_rtr_pref = 1, | 189 | .accept_ra_rtr_pref = 1, |
190 | .rtr_probe_interval = 60 * HZ, | 190 | .rtr_probe_interval = 60 * HZ, |
191 | #ifdef CONFIG_IPV6_ROUTE_INFO | 191 | #ifdef CONFIG_IPV6_ROUTE_INFO |
192 | .accept_ra_rt_info_max_plen = 0, | 192 | .accept_ra_rt_info_max_plen = 0, |
193 | #endif | 193 | #endif |
194 | #endif | 194 | #endif |
195 | .proxy_ndp = 0, | 195 | .proxy_ndp = 0, |
196 | .accept_source_route = 0, /* we do not accept RH0 by default. */ | 196 | .accept_source_route = 0, /* we do not accept RH0 by default. */ |
197 | .disable_ipv6 = 0, | 197 | .disable_ipv6 = 0, |
198 | .accept_dad = 1, | 198 | .accept_dad = 1, |
199 | }; | 199 | }; |
200 | 200 | ||
201 | static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { | 201 | static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { |
202 | .forwarding = 0, | 202 | .forwarding = 0, |
203 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, | 203 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, |
204 | .mtu6 = IPV6_MIN_MTU, | 204 | .mtu6 = IPV6_MIN_MTU, |
205 | .accept_ra = 1, | 205 | .accept_ra = 1, |
206 | .accept_redirects = 1, | 206 | .accept_redirects = 1, |
207 | .autoconf = 1, | 207 | .autoconf = 1, |
208 | .dad_transmits = 1, | 208 | .dad_transmits = 1, |
209 | .rtr_solicits = MAX_RTR_SOLICITATIONS, | 209 | .rtr_solicits = MAX_RTR_SOLICITATIONS, |
210 | .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, | 210 | .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, |
211 | .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, | 211 | .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, |
212 | #ifdef CONFIG_IPV6_PRIVACY | 212 | #ifdef CONFIG_IPV6_PRIVACY |
213 | .use_tempaddr = 0, | 213 | .use_tempaddr = 0, |
214 | .temp_valid_lft = TEMP_VALID_LIFETIME, | 214 | .temp_valid_lft = TEMP_VALID_LIFETIME, |
215 | .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, | 215 | .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, |
216 | .regen_max_retry = REGEN_MAX_RETRY, | 216 | .regen_max_retry = REGEN_MAX_RETRY, |
217 | .max_desync_factor = MAX_DESYNC_FACTOR, | 217 | .max_desync_factor = MAX_DESYNC_FACTOR, |
218 | #endif | 218 | #endif |
219 | .max_addresses = IPV6_MAX_ADDRESSES, | 219 | .max_addresses = IPV6_MAX_ADDRESSES, |
220 | .accept_ra_defrtr = 1, | 220 | .accept_ra_defrtr = 1, |
221 | .accept_ra_pinfo = 1, | 221 | .accept_ra_pinfo = 1, |
222 | #ifdef CONFIG_IPV6_ROUTER_PREF | 222 | #ifdef CONFIG_IPV6_ROUTER_PREF |
223 | .accept_ra_rtr_pref = 1, | 223 | .accept_ra_rtr_pref = 1, |
224 | .rtr_probe_interval = 60 * HZ, | 224 | .rtr_probe_interval = 60 * HZ, |
225 | #ifdef CONFIG_IPV6_ROUTE_INFO | 225 | #ifdef CONFIG_IPV6_ROUTE_INFO |
226 | .accept_ra_rt_info_max_plen = 0, | 226 | .accept_ra_rt_info_max_plen = 0, |
227 | #endif | 227 | #endif |
228 | #endif | 228 | #endif |
229 | .proxy_ndp = 0, | 229 | .proxy_ndp = 0, |
230 | .accept_source_route = 0, /* we do not accept RH0 by default. */ | 230 | .accept_source_route = 0, /* we do not accept RH0 by default. */ |
231 | .disable_ipv6 = 0, | 231 | .disable_ipv6 = 0, |
232 | .accept_dad = 1, | 232 | .accept_dad = 1, |
233 | }; | 233 | }; |
234 | 234 | ||
235 | /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ | 235 | /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ |
236 | const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; | 236 | const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; |
237 | const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; | 237 | const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; |
238 | const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; | 238 | const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; |
239 | const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; | 239 | const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; |
240 | 240 | ||
241 | /* Check if a valid qdisc is available */ | 241 | /* Check if a valid qdisc is available */ |
242 | static inline bool addrconf_qdisc_ok(const struct net_device *dev) | 242 | static inline bool addrconf_qdisc_ok(const struct net_device *dev) |
243 | { | 243 | { |
244 | return !qdisc_tx_is_noop(dev); | 244 | return !qdisc_tx_is_noop(dev); |
245 | } | 245 | } |
246 | 246 | ||
247 | /* Check if a route is valid prefix route */ | 247 | /* Check if a route is valid prefix route */ |
248 | static inline int addrconf_is_prefix_route(const struct rt6_info *rt) | 248 | static inline int addrconf_is_prefix_route(const struct rt6_info *rt) |
249 | { | 249 | { |
250 | return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; | 250 | return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; |
251 | } | 251 | } |
252 | 252 | ||
253 | static void addrconf_del_timer(struct inet6_ifaddr *ifp) | 253 | static void addrconf_del_timer(struct inet6_ifaddr *ifp) |
254 | { | 254 | { |
255 | if (del_timer(&ifp->timer)) | 255 | if (del_timer(&ifp->timer)) |
256 | __in6_ifa_put(ifp); | 256 | __in6_ifa_put(ifp); |
257 | } | 257 | } |
258 | 258 | ||
259 | enum addrconf_timer_t { | 259 | enum addrconf_timer_t { |
260 | AC_NONE, | 260 | AC_NONE, |
261 | AC_DAD, | 261 | AC_DAD, |
262 | AC_RS, | 262 | AC_RS, |
263 | }; | 263 | }; |
264 | 264 | ||
265 | static void addrconf_mod_timer(struct inet6_ifaddr *ifp, | 265 | static void addrconf_mod_timer(struct inet6_ifaddr *ifp, |
266 | enum addrconf_timer_t what, | 266 | enum addrconf_timer_t what, |
267 | unsigned long when) | 267 | unsigned long when) |
268 | { | 268 | { |
269 | if (!del_timer(&ifp->timer)) | 269 | if (!del_timer(&ifp->timer)) |
270 | in6_ifa_hold(ifp); | 270 | in6_ifa_hold(ifp); |
271 | 271 | ||
272 | switch (what) { | 272 | switch (what) { |
273 | case AC_DAD: | 273 | case AC_DAD: |
274 | ifp->timer.function = addrconf_dad_timer; | 274 | ifp->timer.function = addrconf_dad_timer; |
275 | break; | 275 | break; |
276 | case AC_RS: | 276 | case AC_RS: |
277 | ifp->timer.function = addrconf_rs_timer; | 277 | ifp->timer.function = addrconf_rs_timer; |
278 | break; | 278 | break; |
279 | default: | 279 | default: |
280 | break; | 280 | break; |
281 | } | 281 | } |
282 | ifp->timer.expires = jiffies + when; | 282 | ifp->timer.expires = jiffies + when; |
283 | add_timer(&ifp->timer); | 283 | add_timer(&ifp->timer); |
284 | } | 284 | } |
285 | 285 | ||
286 | static int snmp6_alloc_dev(struct inet6_dev *idev) | 286 | static int snmp6_alloc_dev(struct inet6_dev *idev) |
287 | { | 287 | { |
288 | if (snmp_mib_init((void __percpu **)idev->stats.ipv6, | 288 | if (snmp_mib_init((void __percpu **)idev->stats.ipv6, |
289 | sizeof(struct ipstats_mib), | 289 | sizeof(struct ipstats_mib), |
290 | __alignof__(struct ipstats_mib)) < 0) | 290 | __alignof__(struct ipstats_mib)) < 0) |
291 | goto err_ip; | 291 | goto err_ip; |
292 | idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), | 292 | idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), |
293 | GFP_KERNEL); | 293 | GFP_KERNEL); |
294 | if (!idev->stats.icmpv6dev) | 294 | if (!idev->stats.icmpv6dev) |
295 | goto err_icmp; | 295 | goto err_icmp; |
296 | idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device), | 296 | idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device), |
297 | GFP_KERNEL); | 297 | GFP_KERNEL); |
298 | if (!idev->stats.icmpv6msgdev) | 298 | if (!idev->stats.icmpv6msgdev) |
299 | goto err_icmpmsg; | 299 | goto err_icmpmsg; |
300 | 300 | ||
301 | return 0; | 301 | return 0; |
302 | 302 | ||
303 | err_icmpmsg: | 303 | err_icmpmsg: |
304 | kfree(idev->stats.icmpv6dev); | 304 | kfree(idev->stats.icmpv6dev); |
305 | err_icmp: | 305 | err_icmp: |
306 | snmp_mib_free((void __percpu **)idev->stats.ipv6); | 306 | snmp_mib_free((void __percpu **)idev->stats.ipv6); |
307 | err_ip: | 307 | err_ip: |
308 | return -ENOMEM; | 308 | return -ENOMEM; |
309 | } | 309 | } |
310 | 310 | ||
311 | static void snmp6_free_dev(struct inet6_dev *idev) | 311 | static void snmp6_free_dev(struct inet6_dev *idev) |
312 | { | 312 | { |
313 | kfree(idev->stats.icmpv6msgdev); | 313 | kfree(idev->stats.icmpv6msgdev); |
314 | kfree(idev->stats.icmpv6dev); | 314 | kfree(idev->stats.icmpv6dev); |
315 | snmp_mib_free((void __percpu **)idev->stats.ipv6); | 315 | snmp_mib_free((void __percpu **)idev->stats.ipv6); |
316 | } | 316 | } |
317 | 317 | ||
318 | /* Nobody refers to this device, we may destroy it. */ | 318 | /* Nobody refers to this device, we may destroy it. */ |
319 | 319 | ||
320 | void in6_dev_finish_destroy(struct inet6_dev *idev) | 320 | void in6_dev_finish_destroy(struct inet6_dev *idev) |
321 | { | 321 | { |
322 | struct net_device *dev = idev->dev; | 322 | struct net_device *dev = idev->dev; |
323 | 323 | ||
324 | WARN_ON(!list_empty(&idev->addr_list)); | 324 | WARN_ON(!list_empty(&idev->addr_list)); |
325 | WARN_ON(idev->mc_list != NULL); | 325 | WARN_ON(idev->mc_list != NULL); |
326 | 326 | ||
327 | #ifdef NET_REFCNT_DEBUG | 327 | #ifdef NET_REFCNT_DEBUG |
328 | printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); | 328 | printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); |
329 | #endif | 329 | #endif |
330 | dev_put(dev); | 330 | dev_put(dev); |
331 | if (!idev->dead) { | 331 | if (!idev->dead) { |
332 | pr_warning("Freeing alive inet6 device %p\n", idev); | 332 | pr_warning("Freeing alive inet6 device %p\n", idev); |
333 | return; | 333 | return; |
334 | } | 334 | } |
335 | snmp6_free_dev(idev); | 335 | snmp6_free_dev(idev); |
336 | kfree_rcu(idev, rcu); | 336 | kfree_rcu(idev, rcu); |
337 | } | 337 | } |
338 | 338 | ||
339 | EXPORT_SYMBOL(in6_dev_finish_destroy); | 339 | EXPORT_SYMBOL(in6_dev_finish_destroy); |
340 | 340 | ||
341 | static struct inet6_dev * ipv6_add_dev(struct net_device *dev) | 341 | static struct inet6_dev * ipv6_add_dev(struct net_device *dev) |
342 | { | 342 | { |
343 | struct inet6_dev *ndev; | 343 | struct inet6_dev *ndev; |
344 | 344 | ||
345 | ASSERT_RTNL(); | 345 | ASSERT_RTNL(); |
346 | 346 | ||
347 | if (dev->mtu < IPV6_MIN_MTU) | 347 | if (dev->mtu < IPV6_MIN_MTU) |
348 | return NULL; | 348 | return NULL; |
349 | 349 | ||
350 | ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); | 350 | ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); |
351 | 351 | ||
352 | if (ndev == NULL) | 352 | if (ndev == NULL) |
353 | return NULL; | 353 | return NULL; |
354 | 354 | ||
355 | rwlock_init(&ndev->lock); | 355 | rwlock_init(&ndev->lock); |
356 | ndev->dev = dev; | 356 | ndev->dev = dev; |
357 | INIT_LIST_HEAD(&ndev->addr_list); | 357 | INIT_LIST_HEAD(&ndev->addr_list); |
358 | 358 | ||
359 | memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); | 359 | memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); |
360 | ndev->cnf.mtu6 = dev->mtu; | 360 | ndev->cnf.mtu6 = dev->mtu; |
361 | ndev->cnf.sysctl = NULL; | 361 | ndev->cnf.sysctl = NULL; |
362 | ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); | 362 | ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); |
363 | if (ndev->nd_parms == NULL) { | 363 | if (ndev->nd_parms == NULL) { |
364 | kfree(ndev); | 364 | kfree(ndev); |
365 | return NULL; | 365 | return NULL; |
366 | } | 366 | } |
367 | if (ndev->cnf.forwarding) | 367 | if (ndev->cnf.forwarding) |
368 | dev_disable_lro(dev); | 368 | dev_disable_lro(dev); |
369 | /* We refer to the device */ | 369 | /* We refer to the device */ |
370 | dev_hold(dev); | 370 | dev_hold(dev); |
371 | 371 | ||
372 | if (snmp6_alloc_dev(ndev) < 0) { | 372 | if (snmp6_alloc_dev(ndev) < 0) { |
373 | ADBG((KERN_WARNING | 373 | ADBG((KERN_WARNING |
374 | "%s(): cannot allocate memory for statistics; dev=%s.\n", | 374 | "%s(): cannot allocate memory for statistics; dev=%s.\n", |
375 | __func__, dev->name)); | 375 | __func__, dev->name)); |
376 | neigh_parms_release(&nd_tbl, ndev->nd_parms); | 376 | neigh_parms_release(&nd_tbl, ndev->nd_parms); |
377 | ndev->dead = 1; | 377 | ndev->dead = 1; |
378 | in6_dev_finish_destroy(ndev); | 378 | in6_dev_finish_destroy(ndev); |
379 | return NULL; | 379 | return NULL; |
380 | } | 380 | } |
381 | 381 | ||
382 | if (snmp6_register_dev(ndev) < 0) { | 382 | if (snmp6_register_dev(ndev) < 0) { |
383 | ADBG((KERN_WARNING | 383 | ADBG((KERN_WARNING |
384 | "%s(): cannot create /proc/net/dev_snmp6/%s\n", | 384 | "%s(): cannot create /proc/net/dev_snmp6/%s\n", |
385 | __func__, dev->name)); | 385 | __func__, dev->name)); |
386 | neigh_parms_release(&nd_tbl, ndev->nd_parms); | 386 | neigh_parms_release(&nd_tbl, ndev->nd_parms); |
387 | ndev->dead = 1; | 387 | ndev->dead = 1; |
388 | in6_dev_finish_destroy(ndev); | 388 | in6_dev_finish_destroy(ndev); |
389 | return NULL; | 389 | return NULL; |
390 | } | 390 | } |
391 | 391 | ||
392 | /* One reference from device. We must do this before | 392 | /* One reference from device. We must do this before |
393 | * we invoke __ipv6_regen_rndid(). | 393 | * we invoke __ipv6_regen_rndid(). |
394 | */ | 394 | */ |
395 | in6_dev_hold(ndev); | 395 | in6_dev_hold(ndev); |
396 | 396 | ||
397 | if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) | 397 | if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) |
398 | ndev->cnf.accept_dad = -1; | 398 | ndev->cnf.accept_dad = -1; |
399 | 399 | ||
400 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 400 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
401 | if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { | 401 | if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { |
402 | printk(KERN_INFO | 402 | printk(KERN_INFO |
403 | "%s: Disabled Multicast RS\n", | 403 | "%s: Disabled Multicast RS\n", |
404 | dev->name); | 404 | dev->name); |
405 | ndev->cnf.rtr_solicits = 0; | 405 | ndev->cnf.rtr_solicits = 0; |
406 | } | 406 | } |
407 | #endif | 407 | #endif |
408 | 408 | ||
409 | #ifdef CONFIG_IPV6_PRIVACY | 409 | #ifdef CONFIG_IPV6_PRIVACY |
410 | INIT_LIST_HEAD(&ndev->tempaddr_list); | 410 | INIT_LIST_HEAD(&ndev->tempaddr_list); |
411 | setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); | 411 | setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); |
412 | if ((dev->flags&IFF_LOOPBACK) || | 412 | if ((dev->flags&IFF_LOOPBACK) || |
413 | dev->type == ARPHRD_TUNNEL || | 413 | dev->type == ARPHRD_TUNNEL || |
414 | dev->type == ARPHRD_TUNNEL6 || | 414 | dev->type == ARPHRD_TUNNEL6 || |
415 | dev->type == ARPHRD_SIT || | 415 | dev->type == ARPHRD_SIT || |
416 | dev->type == ARPHRD_NONE) { | 416 | dev->type == ARPHRD_NONE) { |
417 | ndev->cnf.use_tempaddr = -1; | 417 | ndev->cnf.use_tempaddr = -1; |
418 | } else { | 418 | } else { |
419 | in6_dev_hold(ndev); | 419 | in6_dev_hold(ndev); |
420 | ipv6_regen_rndid((unsigned long) ndev); | 420 | ipv6_regen_rndid((unsigned long) ndev); |
421 | } | 421 | } |
422 | #endif | 422 | #endif |
423 | 423 | ||
424 | if (netif_running(dev) && addrconf_qdisc_ok(dev)) | 424 | if (netif_running(dev) && addrconf_qdisc_ok(dev)) |
425 | ndev->if_flags |= IF_READY; | 425 | ndev->if_flags |= IF_READY; |
426 | 426 | ||
427 | ipv6_mc_init_dev(ndev); | 427 | ipv6_mc_init_dev(ndev); |
428 | ndev->tstamp = jiffies; | 428 | ndev->tstamp = jiffies; |
429 | addrconf_sysctl_register(ndev); | 429 | addrconf_sysctl_register(ndev); |
430 | /* protected by rtnl_lock */ | 430 | /* protected by rtnl_lock */ |
431 | rcu_assign_pointer(dev->ip6_ptr, ndev); | 431 | rcu_assign_pointer(dev->ip6_ptr, ndev); |
432 | 432 | ||
433 | /* Join all-node multicast group */ | 433 | /* Join all-node multicast group */ |
434 | ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); | 434 | ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); |
435 | 435 | ||
436 | return ndev; | 436 | return ndev; |
437 | } | 437 | } |
438 | 438 | ||
439 | static struct inet6_dev * ipv6_find_idev(struct net_device *dev) | 439 | static struct inet6_dev * ipv6_find_idev(struct net_device *dev) |
440 | { | 440 | { |
441 | struct inet6_dev *idev; | 441 | struct inet6_dev *idev; |
442 | 442 | ||
443 | ASSERT_RTNL(); | 443 | ASSERT_RTNL(); |
444 | 444 | ||
445 | idev = __in6_dev_get(dev); | 445 | idev = __in6_dev_get(dev); |
446 | if (!idev) { | 446 | if (!idev) { |
447 | idev = ipv6_add_dev(dev); | 447 | idev = ipv6_add_dev(dev); |
448 | if (!idev) | 448 | if (!idev) |
449 | return NULL; | 449 | return NULL; |
450 | } | 450 | } |
451 | 451 | ||
452 | if (dev->flags&IFF_UP) | 452 | if (dev->flags&IFF_UP) |
453 | ipv6_mc_up(idev); | 453 | ipv6_mc_up(idev); |
454 | return idev; | 454 | return idev; |
455 | } | 455 | } |
456 | 456 | ||
457 | #ifdef CONFIG_SYSCTL | 457 | #ifdef CONFIG_SYSCTL |
458 | static void dev_forward_change(struct inet6_dev *idev) | 458 | static void dev_forward_change(struct inet6_dev *idev) |
459 | { | 459 | { |
460 | struct net_device *dev; | 460 | struct net_device *dev; |
461 | struct inet6_ifaddr *ifa; | 461 | struct inet6_ifaddr *ifa; |
462 | 462 | ||
463 | if (!idev) | 463 | if (!idev) |
464 | return; | 464 | return; |
465 | dev = idev->dev; | 465 | dev = idev->dev; |
466 | if (idev->cnf.forwarding) | 466 | if (idev->cnf.forwarding) |
467 | dev_disable_lro(dev); | 467 | dev_disable_lro(dev); |
468 | if (dev && (dev->flags & IFF_MULTICAST)) { | 468 | if (dev && (dev->flags & IFF_MULTICAST)) { |
469 | if (idev->cnf.forwarding) | 469 | if (idev->cnf.forwarding) |
470 | ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); | 470 | ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); |
471 | else | 471 | else |
472 | ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); | 472 | ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); |
473 | } | 473 | } |
474 | 474 | ||
475 | list_for_each_entry(ifa, &idev->addr_list, if_list) { | 475 | list_for_each_entry(ifa, &idev->addr_list, if_list) { |
476 | if (ifa->flags&IFA_F_TENTATIVE) | 476 | if (ifa->flags&IFA_F_TENTATIVE) |
477 | continue; | 477 | continue; |
478 | if (idev->cnf.forwarding) | 478 | if (idev->cnf.forwarding) |
479 | addrconf_join_anycast(ifa); | 479 | addrconf_join_anycast(ifa); |
480 | else | 480 | else |
481 | addrconf_leave_anycast(ifa); | 481 | addrconf_leave_anycast(ifa); |
482 | } | 482 | } |
483 | } | 483 | } |
484 | 484 | ||
485 | 485 | ||
486 | static void addrconf_forward_change(struct net *net, __s32 newf) | 486 | static void addrconf_forward_change(struct net *net, __s32 newf) |
487 | { | 487 | { |
488 | struct net_device *dev; | 488 | struct net_device *dev; |
489 | struct inet6_dev *idev; | 489 | struct inet6_dev *idev; |
490 | 490 | ||
491 | rcu_read_lock(); | 491 | rcu_read_lock(); |
492 | for_each_netdev_rcu(net, dev) { | 492 | for_each_netdev_rcu(net, dev) { |
493 | idev = __in6_dev_get(dev); | 493 | idev = __in6_dev_get(dev); |
494 | if (idev) { | 494 | if (idev) { |
495 | int changed = (!idev->cnf.forwarding) ^ (!newf); | 495 | int changed = (!idev->cnf.forwarding) ^ (!newf); |
496 | idev->cnf.forwarding = newf; | 496 | idev->cnf.forwarding = newf; |
497 | if (changed) | 497 | if (changed) |
498 | dev_forward_change(idev); | 498 | dev_forward_change(idev); |
499 | } | 499 | } |
500 | } | 500 | } |
501 | rcu_read_unlock(); | 501 | rcu_read_unlock(); |
502 | } | 502 | } |
503 | 503 | ||
504 | static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) | 504 | static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) |
505 | { | 505 | { |
506 | struct net *net; | 506 | struct net *net; |
507 | 507 | ||
508 | net = (struct net *)table->extra2; | 508 | net = (struct net *)table->extra2; |
509 | if (p == &net->ipv6.devconf_dflt->forwarding) | 509 | if (p == &net->ipv6.devconf_dflt->forwarding) |
510 | return 0; | 510 | return 0; |
511 | 511 | ||
512 | if (!rtnl_trylock()) { | 512 | if (!rtnl_trylock()) { |
513 | /* Restore the original values before restarting */ | 513 | /* Restore the original values before restarting */ |
514 | *p = old; | 514 | *p = old; |
515 | return restart_syscall(); | 515 | return restart_syscall(); |
516 | } | 516 | } |
517 | 517 | ||
518 | if (p == &net->ipv6.devconf_all->forwarding) { | 518 | if (p == &net->ipv6.devconf_all->forwarding) { |
519 | __s32 newf = net->ipv6.devconf_all->forwarding; | 519 | __s32 newf = net->ipv6.devconf_all->forwarding; |
520 | net->ipv6.devconf_dflt->forwarding = newf; | 520 | net->ipv6.devconf_dflt->forwarding = newf; |
521 | addrconf_forward_change(net, newf); | 521 | addrconf_forward_change(net, newf); |
522 | } else if ((!*p) ^ (!old)) | 522 | } else if ((!*p) ^ (!old)) |
523 | dev_forward_change((struct inet6_dev *)table->extra1); | 523 | dev_forward_change((struct inet6_dev *)table->extra1); |
524 | rtnl_unlock(); | 524 | rtnl_unlock(); |
525 | 525 | ||
526 | if (*p) | 526 | if (*p) |
527 | rt6_purge_dflt_routers(net); | 527 | rt6_purge_dflt_routers(net); |
528 | return 1; | 528 | return 1; |
529 | } | 529 | } |
530 | #endif | 530 | #endif |
531 | 531 | ||
532 | /* Nobody refers to this ifaddr, destroy it */ | 532 | /* Nobody refers to this ifaddr, destroy it */ |
533 | void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) | 533 | void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) |
534 | { | 534 | { |
535 | WARN_ON(!hlist_unhashed(&ifp->addr_lst)); | 535 | WARN_ON(!hlist_unhashed(&ifp->addr_lst)); |
536 | 536 | ||
537 | #ifdef NET_REFCNT_DEBUG | 537 | #ifdef NET_REFCNT_DEBUG |
538 | printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); | 538 | printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); |
539 | #endif | 539 | #endif |
540 | 540 | ||
541 | in6_dev_put(ifp->idev); | 541 | in6_dev_put(ifp->idev); |
542 | 542 | ||
543 | if (del_timer(&ifp->timer)) | 543 | if (del_timer(&ifp->timer)) |
544 | pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); | 544 | pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); |
545 | 545 | ||
546 | if (ifp->state != INET6_IFADDR_STATE_DEAD) { | 546 | if (ifp->state != INET6_IFADDR_STATE_DEAD) { |
547 | pr_warning("Freeing alive inet6 address %p\n", ifp); | 547 | pr_warning("Freeing alive inet6 address %p\n", ifp); |
548 | return; | 548 | return; |
549 | } | 549 | } |
550 | dst_release(&ifp->rt->dst); | 550 | dst_release(&ifp->rt->dst); |
551 | 551 | ||
552 | kfree_rcu(ifp, rcu); | 552 | kfree_rcu(ifp, rcu); |
553 | } | 553 | } |
554 | 554 | ||
555 | static void | 555 | static void |
556 | ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) | 556 | ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) |
557 | { | 557 | { |
558 | struct list_head *p; | 558 | struct list_head *p; |
559 | int ifp_scope = ipv6_addr_src_scope(&ifp->addr); | 559 | int ifp_scope = ipv6_addr_src_scope(&ifp->addr); |
560 | 560 | ||
561 | /* | 561 | /* |
562 | * Each device address list is sorted in order of scope - | 562 | * Each device address list is sorted in order of scope - |
563 | * global before linklocal. | 563 | * global before linklocal. |
564 | */ | 564 | */ |
565 | list_for_each(p, &idev->addr_list) { | 565 | list_for_each(p, &idev->addr_list) { |
566 | struct inet6_ifaddr *ifa | 566 | struct inet6_ifaddr *ifa |
567 | = list_entry(p, struct inet6_ifaddr, if_list); | 567 | = list_entry(p, struct inet6_ifaddr, if_list); |
568 | if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) | 568 | if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) |
569 | break; | 569 | break; |
570 | } | 570 | } |
571 | 571 | ||
572 | list_add_tail(&ifp->if_list, p); | 572 | list_add_tail(&ifp->if_list, p); |
573 | } | 573 | } |
574 | 574 | ||
575 | static u32 ipv6_addr_hash(const struct in6_addr *addr) | 575 | static u32 ipv6_addr_hash(const struct in6_addr *addr) |
576 | { | 576 | { |
577 | /* | 577 | /* |
578 | * We perform the hash function over the last 64 bits of the address | 578 | * We perform the hash function over the last 64 bits of the address |
579 | * This will include the IEEE address token on links that support it. | 579 | * This will include the IEEE address token on links that support it. |
580 | */ | 580 | */ |
581 | return jhash_2words((__force u32)addr->s6_addr32[2], | 581 | return jhash_2words((__force u32)addr->s6_addr32[2], |
582 | (__force u32)addr->s6_addr32[3], 0) | 582 | (__force u32)addr->s6_addr32[3], 0) |
583 | & (IN6_ADDR_HSIZE - 1); | 583 | & (IN6_ADDR_HSIZE - 1); |
584 | } | 584 | } |
585 | 585 | ||
586 | /* On success it returns ifp with increased reference count */ | 586 | /* On success it returns ifp with increased reference count */ |
587 | 587 | ||
588 | static struct inet6_ifaddr * | 588 | static struct inet6_ifaddr * |
589 | ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, | 589 | ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, |
590 | int scope, u32 flags) | 590 | int scope, u32 flags) |
591 | { | 591 | { |
592 | struct inet6_ifaddr *ifa = NULL; | 592 | struct inet6_ifaddr *ifa = NULL; |
593 | struct rt6_info *rt; | 593 | struct rt6_info *rt; |
594 | unsigned int hash; | 594 | unsigned int hash; |
595 | int err = 0; | 595 | int err = 0; |
596 | int addr_type = ipv6_addr_type(addr); | 596 | int addr_type = ipv6_addr_type(addr); |
597 | 597 | ||
598 | if (addr_type == IPV6_ADDR_ANY || | 598 | if (addr_type == IPV6_ADDR_ANY || |
599 | addr_type & IPV6_ADDR_MULTICAST || | 599 | addr_type & IPV6_ADDR_MULTICAST || |
600 | (!(idev->dev->flags & IFF_LOOPBACK) && | 600 | (!(idev->dev->flags & IFF_LOOPBACK) && |
601 | addr_type & IPV6_ADDR_LOOPBACK)) | 601 | addr_type & IPV6_ADDR_LOOPBACK)) |
602 | return ERR_PTR(-EADDRNOTAVAIL); | 602 | return ERR_PTR(-EADDRNOTAVAIL); |
603 | 603 | ||
604 | rcu_read_lock_bh(); | 604 | rcu_read_lock_bh(); |
605 | if (idev->dead) { | 605 | if (idev->dead) { |
606 | err = -ENODEV; /*XXX*/ | 606 | err = -ENODEV; /*XXX*/ |
607 | goto out2; | 607 | goto out2; |
608 | } | 608 | } |
609 | 609 | ||
610 | if (idev->cnf.disable_ipv6) { | 610 | if (idev->cnf.disable_ipv6) { |
611 | err = -EACCES; | 611 | err = -EACCES; |
612 | goto out2; | 612 | goto out2; |
613 | } | 613 | } |
614 | 614 | ||
615 | spin_lock(&addrconf_hash_lock); | 615 | spin_lock(&addrconf_hash_lock); |
616 | 616 | ||
617 | /* Ignore adding duplicate addresses on an interface */ | 617 | /* Ignore adding duplicate addresses on an interface */ |
618 | if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { | 618 | if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { |
619 | ADBG(("ipv6_add_addr: already assigned\n")); | 619 | ADBG(("ipv6_add_addr: already assigned\n")); |
620 | err = -EEXIST; | 620 | err = -EEXIST; |
621 | goto out; | 621 | goto out; |
622 | } | 622 | } |
623 | 623 | ||
624 | ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); | 624 | ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); |
625 | 625 | ||
626 | if (ifa == NULL) { | 626 | if (ifa == NULL) { |
627 | ADBG(("ipv6_add_addr: malloc failed\n")); | 627 | ADBG(("ipv6_add_addr: malloc failed\n")); |
628 | err = -ENOBUFS; | 628 | err = -ENOBUFS; |
629 | goto out; | 629 | goto out; |
630 | } | 630 | } |
631 | 631 | ||
632 | rt = addrconf_dst_alloc(idev, addr, 0); | 632 | rt = addrconf_dst_alloc(idev, addr, 0); |
633 | if (IS_ERR(rt)) { | 633 | if (IS_ERR(rt)) { |
634 | err = PTR_ERR(rt); | 634 | err = PTR_ERR(rt); |
635 | goto out; | 635 | goto out; |
636 | } | 636 | } |
637 | 637 | ||
638 | ipv6_addr_copy(&ifa->addr, addr); | 638 | ipv6_addr_copy(&ifa->addr, addr); |
639 | 639 | ||
640 | spin_lock_init(&ifa->lock); | 640 | spin_lock_init(&ifa->lock); |
641 | spin_lock_init(&ifa->state_lock); | 641 | spin_lock_init(&ifa->state_lock); |
642 | init_timer(&ifa->timer); | 642 | init_timer(&ifa->timer); |
643 | INIT_HLIST_NODE(&ifa->addr_lst); | 643 | INIT_HLIST_NODE(&ifa->addr_lst); |
644 | ifa->timer.data = (unsigned long) ifa; | 644 | ifa->timer.data = (unsigned long) ifa; |
645 | ifa->scope = scope; | 645 | ifa->scope = scope; |
646 | ifa->prefix_len = pfxlen; | 646 | ifa->prefix_len = pfxlen; |
647 | ifa->flags = flags | IFA_F_TENTATIVE; | 647 | ifa->flags = flags | IFA_F_TENTATIVE; |
648 | ifa->cstamp = ifa->tstamp = jiffies; | 648 | ifa->cstamp = ifa->tstamp = jiffies; |
649 | 649 | ||
650 | ifa->rt = rt; | 650 | ifa->rt = rt; |
651 | 651 | ||
652 | /* | 652 | /* |
653 | * part one of RFC 4429, section 3.3 | 653 | * part one of RFC 4429, section 3.3 |
654 | * We should not configure an address as | 654 | * We should not configure an address as |
655 | * optimistic if we do not yet know the link | 655 | * optimistic if we do not yet know the link |
656 | * layer address of our nexhop router | 656 | * layer address of our nexhop router |
657 | */ | 657 | */ |
658 | 658 | ||
659 | if (dst_get_neighbour(&rt->dst) == NULL) | 659 | if (dst_get_neighbour_raw(&rt->dst) == NULL) |
660 | ifa->flags &= ~IFA_F_OPTIMISTIC; | 660 | ifa->flags &= ~IFA_F_OPTIMISTIC; |
661 | 661 | ||
662 | ifa->idev = idev; | 662 | ifa->idev = idev; |
663 | in6_dev_hold(idev); | 663 | in6_dev_hold(idev); |
664 | /* For caller */ | 664 | /* For caller */ |
665 | in6_ifa_hold(ifa); | 665 | in6_ifa_hold(ifa); |
666 | 666 | ||
667 | /* Add to big hash table */ | 667 | /* Add to big hash table */ |
668 | hash = ipv6_addr_hash(addr); | 668 | hash = ipv6_addr_hash(addr); |
669 | 669 | ||
670 | hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); | 670 | hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); |
671 | spin_unlock(&addrconf_hash_lock); | 671 | spin_unlock(&addrconf_hash_lock); |
672 | 672 | ||
673 | write_lock(&idev->lock); | 673 | write_lock(&idev->lock); |
674 | /* Add to inet6_dev unicast addr list. */ | 674 | /* Add to inet6_dev unicast addr list. */ |
675 | ipv6_link_dev_addr(idev, ifa); | 675 | ipv6_link_dev_addr(idev, ifa); |
676 | 676 | ||
677 | #ifdef CONFIG_IPV6_PRIVACY | 677 | #ifdef CONFIG_IPV6_PRIVACY |
678 | if (ifa->flags&IFA_F_TEMPORARY) { | 678 | if (ifa->flags&IFA_F_TEMPORARY) { |
679 | list_add(&ifa->tmp_list, &idev->tempaddr_list); | 679 | list_add(&ifa->tmp_list, &idev->tempaddr_list); |
680 | in6_ifa_hold(ifa); | 680 | in6_ifa_hold(ifa); |
681 | } | 681 | } |
682 | #endif | 682 | #endif |
683 | 683 | ||
684 | in6_ifa_hold(ifa); | 684 | in6_ifa_hold(ifa); |
685 | write_unlock(&idev->lock); | 685 | write_unlock(&idev->lock); |
686 | out2: | 686 | out2: |
687 | rcu_read_unlock_bh(); | 687 | rcu_read_unlock_bh(); |
688 | 688 | ||
689 | if (likely(err == 0)) | 689 | if (likely(err == 0)) |
690 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); | 690 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); |
691 | else { | 691 | else { |
692 | kfree(ifa); | 692 | kfree(ifa); |
693 | ifa = ERR_PTR(err); | 693 | ifa = ERR_PTR(err); |
694 | } | 694 | } |
695 | 695 | ||
696 | return ifa; | 696 | return ifa; |
697 | out: | 697 | out: |
698 | spin_unlock(&addrconf_hash_lock); | 698 | spin_unlock(&addrconf_hash_lock); |
699 | goto out2; | 699 | goto out2; |
700 | } | 700 | } |
701 | 701 | ||
702 | /* This function wants to get referenced ifp and releases it before return */ | 702 | /* This function wants to get referenced ifp and releases it before return */ |
703 | 703 | ||
704 | static void ipv6_del_addr(struct inet6_ifaddr *ifp) | 704 | static void ipv6_del_addr(struct inet6_ifaddr *ifp) |
705 | { | 705 | { |
706 | struct inet6_ifaddr *ifa, *ifn; | 706 | struct inet6_ifaddr *ifa, *ifn; |
707 | struct inet6_dev *idev = ifp->idev; | 707 | struct inet6_dev *idev = ifp->idev; |
708 | int state; | 708 | int state; |
709 | int deleted = 0, onlink = 0; | 709 | int deleted = 0, onlink = 0; |
710 | unsigned long expires = jiffies; | 710 | unsigned long expires = jiffies; |
711 | 711 | ||
712 | spin_lock_bh(&ifp->state_lock); | 712 | spin_lock_bh(&ifp->state_lock); |
713 | state = ifp->state; | 713 | state = ifp->state; |
714 | ifp->state = INET6_IFADDR_STATE_DEAD; | 714 | ifp->state = INET6_IFADDR_STATE_DEAD; |
715 | spin_unlock_bh(&ifp->state_lock); | 715 | spin_unlock_bh(&ifp->state_lock); |
716 | 716 | ||
717 | if (state == INET6_IFADDR_STATE_DEAD) | 717 | if (state == INET6_IFADDR_STATE_DEAD) |
718 | goto out; | 718 | goto out; |
719 | 719 | ||
720 | spin_lock_bh(&addrconf_hash_lock); | 720 | spin_lock_bh(&addrconf_hash_lock); |
721 | hlist_del_init_rcu(&ifp->addr_lst); | 721 | hlist_del_init_rcu(&ifp->addr_lst); |
722 | spin_unlock_bh(&addrconf_hash_lock); | 722 | spin_unlock_bh(&addrconf_hash_lock); |
723 | 723 | ||
724 | write_lock_bh(&idev->lock); | 724 | write_lock_bh(&idev->lock); |
725 | #ifdef CONFIG_IPV6_PRIVACY | 725 | #ifdef CONFIG_IPV6_PRIVACY |
726 | if (ifp->flags&IFA_F_TEMPORARY) { | 726 | if (ifp->flags&IFA_F_TEMPORARY) { |
727 | list_del(&ifp->tmp_list); | 727 | list_del(&ifp->tmp_list); |
728 | if (ifp->ifpub) { | 728 | if (ifp->ifpub) { |
729 | in6_ifa_put(ifp->ifpub); | 729 | in6_ifa_put(ifp->ifpub); |
730 | ifp->ifpub = NULL; | 730 | ifp->ifpub = NULL; |
731 | } | 731 | } |
732 | __in6_ifa_put(ifp); | 732 | __in6_ifa_put(ifp); |
733 | } | 733 | } |
734 | #endif | 734 | #endif |
735 | 735 | ||
736 | list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { | 736 | list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { |
737 | if (ifa == ifp) { | 737 | if (ifa == ifp) { |
738 | list_del_init(&ifp->if_list); | 738 | list_del_init(&ifp->if_list); |
739 | __in6_ifa_put(ifp); | 739 | __in6_ifa_put(ifp); |
740 | 740 | ||
741 | if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) | 741 | if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) |
742 | break; | 742 | break; |
743 | deleted = 1; | 743 | deleted = 1; |
744 | continue; | 744 | continue; |
745 | } else if (ifp->flags & IFA_F_PERMANENT) { | 745 | } else if (ifp->flags & IFA_F_PERMANENT) { |
746 | if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, | 746 | if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, |
747 | ifp->prefix_len)) { | 747 | ifp->prefix_len)) { |
748 | if (ifa->flags & IFA_F_PERMANENT) { | 748 | if (ifa->flags & IFA_F_PERMANENT) { |
749 | onlink = 1; | 749 | onlink = 1; |
750 | if (deleted) | 750 | if (deleted) |
751 | break; | 751 | break; |
752 | } else { | 752 | } else { |
753 | unsigned long lifetime; | 753 | unsigned long lifetime; |
754 | 754 | ||
755 | if (!onlink) | 755 | if (!onlink) |
756 | onlink = -1; | 756 | onlink = -1; |
757 | 757 | ||
758 | spin_lock(&ifa->lock); | 758 | spin_lock(&ifa->lock); |
759 | 759 | ||
760 | lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); | 760 | lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); |
761 | /* | 761 | /* |
762 | * Note: Because this address is | 762 | * Note: Because this address is |
763 | * not permanent, lifetime < | 763 | * not permanent, lifetime < |
764 | * LONG_MAX / HZ here. | 764 | * LONG_MAX / HZ here. |
765 | */ | 765 | */ |
766 | if (time_before(expires, | 766 | if (time_before(expires, |
767 | ifa->tstamp + lifetime * HZ)) | 767 | ifa->tstamp + lifetime * HZ)) |
768 | expires = ifa->tstamp + lifetime * HZ; | 768 | expires = ifa->tstamp + lifetime * HZ; |
769 | spin_unlock(&ifa->lock); | 769 | spin_unlock(&ifa->lock); |
770 | } | 770 | } |
771 | } | 771 | } |
772 | } | 772 | } |
773 | } | 773 | } |
774 | write_unlock_bh(&idev->lock); | 774 | write_unlock_bh(&idev->lock); |
775 | 775 | ||
776 | addrconf_del_timer(ifp); | 776 | addrconf_del_timer(ifp); |
777 | 777 | ||
778 | ipv6_ifa_notify(RTM_DELADDR, ifp); | 778 | ipv6_ifa_notify(RTM_DELADDR, ifp); |
779 | 779 | ||
780 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); | 780 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); |
781 | 781 | ||
782 | /* | 782 | /* |
783 | * Purge or update corresponding prefix | 783 | * Purge or update corresponding prefix |
784 | * | 784 | * |
785 | * 1) we don't purge prefix here if address was not permanent. | 785 | * 1) we don't purge prefix here if address was not permanent. |
786 | * prefix is managed by its own lifetime. | 786 | * prefix is managed by its own lifetime. |
787 | * 2) if there're no addresses, delete prefix. | 787 | * 2) if there're no addresses, delete prefix. |
788 | * 3) if there're still other permanent address(es), | 788 | * 3) if there're still other permanent address(es), |
789 | * corresponding prefix is still permanent. | 789 | * corresponding prefix is still permanent. |
790 | * 4) otherwise, update prefix lifetime to the | 790 | * 4) otherwise, update prefix lifetime to the |
791 | * longest valid lifetime among the corresponding | 791 | * longest valid lifetime among the corresponding |
792 | * addresses on the device. | 792 | * addresses on the device. |
793 | * Note: subsequent RA will update lifetime. | 793 | * Note: subsequent RA will update lifetime. |
794 | * | 794 | * |
795 | * --yoshfuji | 795 | * --yoshfuji |
796 | */ | 796 | */ |
797 | if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { | 797 | if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { |
798 | struct in6_addr prefix; | 798 | struct in6_addr prefix; |
799 | struct rt6_info *rt; | 799 | struct rt6_info *rt; |
800 | struct net *net = dev_net(ifp->idev->dev); | 800 | struct net *net = dev_net(ifp->idev->dev); |
801 | ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); | 801 | ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); |
802 | rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); | 802 | rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); |
803 | 803 | ||
804 | if (rt && addrconf_is_prefix_route(rt)) { | 804 | if (rt && addrconf_is_prefix_route(rt)) { |
805 | if (onlink == 0) { | 805 | if (onlink == 0) { |
806 | ip6_del_rt(rt); | 806 | ip6_del_rt(rt); |
807 | rt = NULL; | 807 | rt = NULL; |
808 | } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { | 808 | } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { |
809 | rt->rt6i_expires = expires; | 809 | rt->rt6i_expires = expires; |
810 | rt->rt6i_flags |= RTF_EXPIRES; | 810 | rt->rt6i_flags |= RTF_EXPIRES; |
811 | } | 811 | } |
812 | } | 812 | } |
813 | dst_release(&rt->dst); | 813 | dst_release(&rt->dst); |
814 | } | 814 | } |
815 | 815 | ||
816 | /* clean up prefsrc entries */ | 816 | /* clean up prefsrc entries */ |
817 | rt6_remove_prefsrc(ifp); | 817 | rt6_remove_prefsrc(ifp); |
818 | out: | 818 | out: |
819 | in6_ifa_put(ifp); | 819 | in6_ifa_put(ifp); |
820 | } | 820 | } |
821 | 821 | ||
822 | #ifdef CONFIG_IPV6_PRIVACY | 822 | #ifdef CONFIG_IPV6_PRIVACY |
823 | static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) | 823 | static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) |
824 | { | 824 | { |
825 | struct inet6_dev *idev = ifp->idev; | 825 | struct inet6_dev *idev = ifp->idev; |
826 | struct in6_addr addr, *tmpaddr; | 826 | struct in6_addr addr, *tmpaddr; |
827 | unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; | 827 | unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; |
828 | unsigned long regen_advance; | 828 | unsigned long regen_advance; |
829 | int tmp_plen; | 829 | int tmp_plen; |
830 | int ret = 0; | 830 | int ret = 0; |
831 | int max_addresses; | 831 | int max_addresses; |
832 | u32 addr_flags; | 832 | u32 addr_flags; |
833 | 833 | ||
834 | write_lock(&idev->lock); | 834 | write_lock(&idev->lock); |
835 | if (ift) { | 835 | if (ift) { |
836 | spin_lock_bh(&ift->lock); | 836 | spin_lock_bh(&ift->lock); |
837 | memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); | 837 | memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); |
838 | spin_unlock_bh(&ift->lock); | 838 | spin_unlock_bh(&ift->lock); |
839 | tmpaddr = &addr; | 839 | tmpaddr = &addr; |
840 | } else { | 840 | } else { |
841 | tmpaddr = NULL; | 841 | tmpaddr = NULL; |
842 | } | 842 | } |
843 | retry: | 843 | retry: |
844 | in6_dev_hold(idev); | 844 | in6_dev_hold(idev); |
845 | if (idev->cnf.use_tempaddr <= 0) { | 845 | if (idev->cnf.use_tempaddr <= 0) { |
846 | write_unlock(&idev->lock); | 846 | write_unlock(&idev->lock); |
847 | printk(KERN_INFO | 847 | printk(KERN_INFO |
848 | "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); | 848 | "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); |
849 | in6_dev_put(idev); | 849 | in6_dev_put(idev); |
850 | ret = -1; | 850 | ret = -1; |
851 | goto out; | 851 | goto out; |
852 | } | 852 | } |
853 | spin_lock_bh(&ifp->lock); | 853 | spin_lock_bh(&ifp->lock); |
854 | if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { | 854 | if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { |
855 | idev->cnf.use_tempaddr = -1; /*XXX*/ | 855 | idev->cnf.use_tempaddr = -1; /*XXX*/ |
856 | spin_unlock_bh(&ifp->lock); | 856 | spin_unlock_bh(&ifp->lock); |
857 | write_unlock(&idev->lock); | 857 | write_unlock(&idev->lock); |
858 | printk(KERN_WARNING | 858 | printk(KERN_WARNING |
859 | "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); | 859 | "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); |
860 | in6_dev_put(idev); | 860 | in6_dev_put(idev); |
861 | ret = -1; | 861 | ret = -1; |
862 | goto out; | 862 | goto out; |
863 | } | 863 | } |
864 | in6_ifa_hold(ifp); | 864 | in6_ifa_hold(ifp); |
865 | memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); | 865 | memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); |
866 | if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { | 866 | if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { |
867 | spin_unlock_bh(&ifp->lock); | 867 | spin_unlock_bh(&ifp->lock); |
868 | write_unlock(&idev->lock); | 868 | write_unlock(&idev->lock); |
869 | printk(KERN_WARNING | 869 | printk(KERN_WARNING |
870 | "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); | 870 | "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); |
871 | in6_ifa_put(ifp); | 871 | in6_ifa_put(ifp); |
872 | in6_dev_put(idev); | 872 | in6_dev_put(idev); |
873 | ret = -1; | 873 | ret = -1; |
874 | goto out; | 874 | goto out; |
875 | } | 875 | } |
876 | memcpy(&addr.s6_addr[8], idev->rndid, 8); | 876 | memcpy(&addr.s6_addr[8], idev->rndid, 8); |
877 | age = (jiffies - ifp->tstamp) / HZ; | 877 | age = (jiffies - ifp->tstamp) / HZ; |
878 | tmp_valid_lft = min_t(__u32, | 878 | tmp_valid_lft = min_t(__u32, |
879 | ifp->valid_lft, | 879 | ifp->valid_lft, |
880 | idev->cnf.temp_valid_lft + age); | 880 | idev->cnf.temp_valid_lft + age); |
881 | tmp_prefered_lft = min_t(__u32, | 881 | tmp_prefered_lft = min_t(__u32, |
882 | ifp->prefered_lft, | 882 | ifp->prefered_lft, |
883 | idev->cnf.temp_prefered_lft + age - | 883 | idev->cnf.temp_prefered_lft + age - |
884 | idev->cnf.max_desync_factor); | 884 | idev->cnf.max_desync_factor); |
885 | tmp_plen = ifp->prefix_len; | 885 | tmp_plen = ifp->prefix_len; |
886 | max_addresses = idev->cnf.max_addresses; | 886 | max_addresses = idev->cnf.max_addresses; |
887 | tmp_cstamp = ifp->cstamp; | 887 | tmp_cstamp = ifp->cstamp; |
888 | tmp_tstamp = ifp->tstamp; | 888 | tmp_tstamp = ifp->tstamp; |
889 | spin_unlock_bh(&ifp->lock); | 889 | spin_unlock_bh(&ifp->lock); |
890 | 890 | ||
891 | regen_advance = idev->cnf.regen_max_retry * | 891 | regen_advance = idev->cnf.regen_max_retry * |
892 | idev->cnf.dad_transmits * | 892 | idev->cnf.dad_transmits * |
893 | idev->nd_parms->retrans_time / HZ; | 893 | idev->nd_parms->retrans_time / HZ; |
894 | write_unlock(&idev->lock); | 894 | write_unlock(&idev->lock); |
895 | 895 | ||
896 | /* A temporary address is created only if this calculated Preferred | 896 | /* A temporary address is created only if this calculated Preferred |
897 | * Lifetime is greater than REGEN_ADVANCE time units. In particular, | 897 | * Lifetime is greater than REGEN_ADVANCE time units. In particular, |
898 | * an implementation must not create a temporary address with a zero | 898 | * an implementation must not create a temporary address with a zero |
899 | * Preferred Lifetime. | 899 | * Preferred Lifetime. |
900 | */ | 900 | */ |
901 | if (tmp_prefered_lft <= regen_advance) { | 901 | if (tmp_prefered_lft <= regen_advance) { |
902 | in6_ifa_put(ifp); | 902 | in6_ifa_put(ifp); |
903 | in6_dev_put(idev); | 903 | in6_dev_put(idev); |
904 | ret = -1; | 904 | ret = -1; |
905 | goto out; | 905 | goto out; |
906 | } | 906 | } |
907 | 907 | ||
908 | addr_flags = IFA_F_TEMPORARY; | 908 | addr_flags = IFA_F_TEMPORARY; |
909 | /* set in addrconf_prefix_rcv() */ | 909 | /* set in addrconf_prefix_rcv() */ |
910 | if (ifp->flags & IFA_F_OPTIMISTIC) | 910 | if (ifp->flags & IFA_F_OPTIMISTIC) |
911 | addr_flags |= IFA_F_OPTIMISTIC; | 911 | addr_flags |= IFA_F_OPTIMISTIC; |
912 | 912 | ||
913 | ift = !max_addresses || | 913 | ift = !max_addresses || |
914 | ipv6_count_addresses(idev) < max_addresses ? | 914 | ipv6_count_addresses(idev) < max_addresses ? |
915 | ipv6_add_addr(idev, &addr, tmp_plen, | 915 | ipv6_add_addr(idev, &addr, tmp_plen, |
916 | ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, | 916 | ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, |
917 | addr_flags) : NULL; | 917 | addr_flags) : NULL; |
918 | if (!ift || IS_ERR(ift)) { | 918 | if (!ift || IS_ERR(ift)) { |
919 | in6_ifa_put(ifp); | 919 | in6_ifa_put(ifp); |
920 | in6_dev_put(idev); | 920 | in6_dev_put(idev); |
921 | printk(KERN_INFO | 921 | printk(KERN_INFO |
922 | "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); | 922 | "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); |
923 | tmpaddr = &addr; | 923 | tmpaddr = &addr; |
924 | write_lock(&idev->lock); | 924 | write_lock(&idev->lock); |
925 | goto retry; | 925 | goto retry; |
926 | } | 926 | } |
927 | 927 | ||
928 | spin_lock_bh(&ift->lock); | 928 | spin_lock_bh(&ift->lock); |
929 | ift->ifpub = ifp; | 929 | ift->ifpub = ifp; |
930 | ift->valid_lft = tmp_valid_lft; | 930 | ift->valid_lft = tmp_valid_lft; |
931 | ift->prefered_lft = tmp_prefered_lft; | 931 | ift->prefered_lft = tmp_prefered_lft; |
932 | ift->cstamp = tmp_cstamp; | 932 | ift->cstamp = tmp_cstamp; |
933 | ift->tstamp = tmp_tstamp; | 933 | ift->tstamp = tmp_tstamp; |
934 | spin_unlock_bh(&ift->lock); | 934 | spin_unlock_bh(&ift->lock); |
935 | 935 | ||
936 | addrconf_dad_start(ift, 0); | 936 | addrconf_dad_start(ift, 0); |
937 | in6_ifa_put(ift); | 937 | in6_ifa_put(ift); |
938 | in6_dev_put(idev); | 938 | in6_dev_put(idev); |
939 | out: | 939 | out: |
940 | return ret; | 940 | return ret; |
941 | } | 941 | } |
942 | #endif | 942 | #endif |
943 | 943 | ||
944 | /* | 944 | /* |
945 | * Choose an appropriate source address (RFC3484) | 945 | * Choose an appropriate source address (RFC3484) |
946 | */ | 946 | */ |
947 | enum { | 947 | enum { |
948 | IPV6_SADDR_RULE_INIT = 0, | 948 | IPV6_SADDR_RULE_INIT = 0, |
949 | IPV6_SADDR_RULE_LOCAL, | 949 | IPV6_SADDR_RULE_LOCAL, |
950 | IPV6_SADDR_RULE_SCOPE, | 950 | IPV6_SADDR_RULE_SCOPE, |
951 | IPV6_SADDR_RULE_PREFERRED, | 951 | IPV6_SADDR_RULE_PREFERRED, |
952 | #ifdef CONFIG_IPV6_MIP6 | 952 | #ifdef CONFIG_IPV6_MIP6 |
953 | IPV6_SADDR_RULE_HOA, | 953 | IPV6_SADDR_RULE_HOA, |
954 | #endif | 954 | #endif |
955 | IPV6_SADDR_RULE_OIF, | 955 | IPV6_SADDR_RULE_OIF, |
956 | IPV6_SADDR_RULE_LABEL, | 956 | IPV6_SADDR_RULE_LABEL, |
957 | #ifdef CONFIG_IPV6_PRIVACY | 957 | #ifdef CONFIG_IPV6_PRIVACY |
958 | IPV6_SADDR_RULE_PRIVACY, | 958 | IPV6_SADDR_RULE_PRIVACY, |
959 | #endif | 959 | #endif |
960 | IPV6_SADDR_RULE_ORCHID, | 960 | IPV6_SADDR_RULE_ORCHID, |
961 | IPV6_SADDR_RULE_PREFIX, | 961 | IPV6_SADDR_RULE_PREFIX, |
962 | IPV6_SADDR_RULE_MAX | 962 | IPV6_SADDR_RULE_MAX |
963 | }; | 963 | }; |
964 | 964 | ||
965 | struct ipv6_saddr_score { | 965 | struct ipv6_saddr_score { |
966 | int rule; | 966 | int rule; |
967 | int addr_type; | 967 | int addr_type; |
968 | struct inet6_ifaddr *ifa; | 968 | struct inet6_ifaddr *ifa; |
969 | DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX); | 969 | DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX); |
970 | int scopedist; | 970 | int scopedist; |
971 | int matchlen; | 971 | int matchlen; |
972 | }; | 972 | }; |
973 | 973 | ||
974 | struct ipv6_saddr_dst { | 974 | struct ipv6_saddr_dst { |
975 | const struct in6_addr *addr; | 975 | const struct in6_addr *addr; |
976 | int ifindex; | 976 | int ifindex; |
977 | int scope; | 977 | int scope; |
978 | int label; | 978 | int label; |
979 | unsigned int prefs; | 979 | unsigned int prefs; |
980 | }; | 980 | }; |
981 | 981 | ||
982 | static inline int ipv6_saddr_preferred(int type) | 982 | static inline int ipv6_saddr_preferred(int type) |
983 | { | 983 | { |
984 | if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK)) | 984 | if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK)) |
985 | return 1; | 985 | return 1; |
986 | return 0; | 986 | return 0; |
987 | } | 987 | } |
988 | 988 | ||
989 | static int ipv6_get_saddr_eval(struct net *net, | 989 | static int ipv6_get_saddr_eval(struct net *net, |
990 | struct ipv6_saddr_score *score, | 990 | struct ipv6_saddr_score *score, |
991 | struct ipv6_saddr_dst *dst, | 991 | struct ipv6_saddr_dst *dst, |
992 | int i) | 992 | int i) |
993 | { | 993 | { |
994 | int ret; | 994 | int ret; |
995 | 995 | ||
996 | if (i <= score->rule) { | 996 | if (i <= score->rule) { |
997 | switch (i) { | 997 | switch (i) { |
998 | case IPV6_SADDR_RULE_SCOPE: | 998 | case IPV6_SADDR_RULE_SCOPE: |
999 | ret = score->scopedist; | 999 | ret = score->scopedist; |
1000 | break; | 1000 | break; |
1001 | case IPV6_SADDR_RULE_PREFIX: | 1001 | case IPV6_SADDR_RULE_PREFIX: |
1002 | ret = score->matchlen; | 1002 | ret = score->matchlen; |
1003 | break; | 1003 | break; |
1004 | default: | 1004 | default: |
1005 | ret = !!test_bit(i, score->scorebits); | 1005 | ret = !!test_bit(i, score->scorebits); |
1006 | } | 1006 | } |
1007 | goto out; | 1007 | goto out; |
1008 | } | 1008 | } |
1009 | 1009 | ||
1010 | switch (i) { | 1010 | switch (i) { |
1011 | case IPV6_SADDR_RULE_INIT: | 1011 | case IPV6_SADDR_RULE_INIT: |
1012 | /* Rule 0: remember if hiscore is not ready yet */ | 1012 | /* Rule 0: remember if hiscore is not ready yet */ |
1013 | ret = !!score->ifa; | 1013 | ret = !!score->ifa; |
1014 | break; | 1014 | break; |
1015 | case IPV6_SADDR_RULE_LOCAL: | 1015 | case IPV6_SADDR_RULE_LOCAL: |
1016 | /* Rule 1: Prefer same address */ | 1016 | /* Rule 1: Prefer same address */ |
1017 | ret = ipv6_addr_equal(&score->ifa->addr, dst->addr); | 1017 | ret = ipv6_addr_equal(&score->ifa->addr, dst->addr); |
1018 | break; | 1018 | break; |
1019 | case IPV6_SADDR_RULE_SCOPE: | 1019 | case IPV6_SADDR_RULE_SCOPE: |
1020 | /* Rule 2: Prefer appropriate scope | 1020 | /* Rule 2: Prefer appropriate scope |
1021 | * | 1021 | * |
1022 | * ret | 1022 | * ret |
1023 | * ^ | 1023 | * ^ |
1024 | * -1 | d 15 | 1024 | * -1 | d 15 |
1025 | * ---+--+-+---> scope | 1025 | * ---+--+-+---> scope |
1026 | * | | 1026 | * | |
1027 | * | d is scope of the destination. | 1027 | * | d is scope of the destination. |
1028 | * B-d | \ | 1028 | * B-d | \ |
1029 | * | \ <- smaller scope is better if | 1029 | * | \ <- smaller scope is better if |
1030 | * B-15 | \ if scope is enough for destinaion. | 1030 | * B-15 | \ if scope is enough for destinaion. |
1031 | * | ret = B - scope (-1 <= scope >= d <= 15). | 1031 | * | ret = B - scope (-1 <= scope >= d <= 15). |
1032 | * d-C-1 | / | 1032 | * d-C-1 | / |
1033 | * |/ <- greater is better | 1033 | * |/ <- greater is better |
1034 | * -C / if scope is not enough for destination. | 1034 | * -C / if scope is not enough for destination. |
1035 | * /| ret = scope - C (-1 <= d < scope <= 15). | 1035 | * /| ret = scope - C (-1 <= d < scope <= 15). |
1036 | * | 1036 | * |
1037 | * d - C - 1 < B -15 (for all -1 <= d <= 15). | 1037 | * d - C - 1 < B -15 (for all -1 <= d <= 15). |
1038 | * C > d + 14 - B >= 15 + 14 - B = 29 - B. | 1038 | * C > d + 14 - B >= 15 + 14 - B = 29 - B. |
1039 | * Assume B = 0 and we get C > 29. | 1039 | * Assume B = 0 and we get C > 29. |
1040 | */ | 1040 | */ |
1041 | ret = __ipv6_addr_src_scope(score->addr_type); | 1041 | ret = __ipv6_addr_src_scope(score->addr_type); |
1042 | if (ret >= dst->scope) | 1042 | if (ret >= dst->scope) |
1043 | ret = -ret; | 1043 | ret = -ret; |
1044 | else | 1044 | else |
1045 | ret -= 128; /* 30 is enough */ | 1045 | ret -= 128; /* 30 is enough */ |
1046 | score->scopedist = ret; | 1046 | score->scopedist = ret; |
1047 | break; | 1047 | break; |
1048 | case IPV6_SADDR_RULE_PREFERRED: | 1048 | case IPV6_SADDR_RULE_PREFERRED: |
1049 | /* Rule 3: Avoid deprecated and optimistic addresses */ | 1049 | /* Rule 3: Avoid deprecated and optimistic addresses */ |
1050 | ret = ipv6_saddr_preferred(score->addr_type) || | 1050 | ret = ipv6_saddr_preferred(score->addr_type) || |
1051 | !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); | 1051 | !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); |
1052 | break; | 1052 | break; |
1053 | #ifdef CONFIG_IPV6_MIP6 | 1053 | #ifdef CONFIG_IPV6_MIP6 |
1054 | case IPV6_SADDR_RULE_HOA: | 1054 | case IPV6_SADDR_RULE_HOA: |
1055 | { | 1055 | { |
1056 | /* Rule 4: Prefer home address */ | 1056 | /* Rule 4: Prefer home address */ |
1057 | int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA); | 1057 | int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA); |
1058 | ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome; | 1058 | ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome; |
1059 | break; | 1059 | break; |
1060 | } | 1060 | } |
1061 | #endif | 1061 | #endif |
1062 | case IPV6_SADDR_RULE_OIF: | 1062 | case IPV6_SADDR_RULE_OIF: |
1063 | /* Rule 5: Prefer outgoing interface */ | 1063 | /* Rule 5: Prefer outgoing interface */ |
1064 | ret = (!dst->ifindex || | 1064 | ret = (!dst->ifindex || |
1065 | dst->ifindex == score->ifa->idev->dev->ifindex); | 1065 | dst->ifindex == score->ifa->idev->dev->ifindex); |
1066 | break; | 1066 | break; |
1067 | case IPV6_SADDR_RULE_LABEL: | 1067 | case IPV6_SADDR_RULE_LABEL: |
1068 | /* Rule 6: Prefer matching label */ | 1068 | /* Rule 6: Prefer matching label */ |
1069 | ret = ipv6_addr_label(net, | 1069 | ret = ipv6_addr_label(net, |
1070 | &score->ifa->addr, score->addr_type, | 1070 | &score->ifa->addr, score->addr_type, |
1071 | score->ifa->idev->dev->ifindex) == dst->label; | 1071 | score->ifa->idev->dev->ifindex) == dst->label; |
1072 | break; | 1072 | break; |
1073 | #ifdef CONFIG_IPV6_PRIVACY | 1073 | #ifdef CONFIG_IPV6_PRIVACY |
1074 | case IPV6_SADDR_RULE_PRIVACY: | 1074 | case IPV6_SADDR_RULE_PRIVACY: |
1075 | { | 1075 | { |
1076 | /* Rule 7: Prefer public address | 1076 | /* Rule 7: Prefer public address |
1077 | * Note: prefer temporary address if use_tempaddr >= 2 | 1077 | * Note: prefer temporary address if use_tempaddr >= 2 |
1078 | */ | 1078 | */ |
1079 | int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? | 1079 | int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? |
1080 | !!(dst->prefs & IPV6_PREFER_SRC_TMP) : | 1080 | !!(dst->prefs & IPV6_PREFER_SRC_TMP) : |
1081 | score->ifa->idev->cnf.use_tempaddr >= 2; | 1081 | score->ifa->idev->cnf.use_tempaddr >= 2; |
1082 | ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; | 1082 | ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; |
1083 | break; | 1083 | break; |
1084 | } | 1084 | } |
1085 | #endif | 1085 | #endif |
1086 | case IPV6_SADDR_RULE_ORCHID: | 1086 | case IPV6_SADDR_RULE_ORCHID: |
1087 | /* Rule 8-: Prefer ORCHID vs ORCHID or | 1087 | /* Rule 8-: Prefer ORCHID vs ORCHID or |
1088 | * non-ORCHID vs non-ORCHID | 1088 | * non-ORCHID vs non-ORCHID |
1089 | */ | 1089 | */ |
1090 | ret = !(ipv6_addr_orchid(&score->ifa->addr) ^ | 1090 | ret = !(ipv6_addr_orchid(&score->ifa->addr) ^ |
1091 | ipv6_addr_orchid(dst->addr)); | 1091 | ipv6_addr_orchid(dst->addr)); |
1092 | break; | 1092 | break; |
1093 | case IPV6_SADDR_RULE_PREFIX: | 1093 | case IPV6_SADDR_RULE_PREFIX: |
1094 | /* Rule 8: Use longest matching prefix */ | 1094 | /* Rule 8: Use longest matching prefix */ |
1095 | score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, | 1095 | score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, |
1096 | dst->addr); | 1096 | dst->addr); |
1097 | break; | 1097 | break; |
1098 | default: | 1098 | default: |
1099 | ret = 0; | 1099 | ret = 0; |
1100 | } | 1100 | } |
1101 | 1101 | ||
1102 | if (ret) | 1102 | if (ret) |
1103 | __set_bit(i, score->scorebits); | 1103 | __set_bit(i, score->scorebits); |
1104 | score->rule = i; | 1104 | score->rule = i; |
1105 | out: | 1105 | out: |
1106 | return ret; | 1106 | return ret; |
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, | 1109 | int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, |
1110 | const struct in6_addr *daddr, unsigned int prefs, | 1110 | const struct in6_addr *daddr, unsigned int prefs, |
1111 | struct in6_addr *saddr) | 1111 | struct in6_addr *saddr) |
1112 | { | 1112 | { |
1113 | struct ipv6_saddr_score scores[2], | 1113 | struct ipv6_saddr_score scores[2], |
1114 | *score = &scores[0], *hiscore = &scores[1]; | 1114 | *score = &scores[0], *hiscore = &scores[1]; |
1115 | struct ipv6_saddr_dst dst; | 1115 | struct ipv6_saddr_dst dst; |
1116 | struct net_device *dev; | 1116 | struct net_device *dev; |
1117 | int dst_type; | 1117 | int dst_type; |
1118 | 1118 | ||
1119 | dst_type = __ipv6_addr_type(daddr); | 1119 | dst_type = __ipv6_addr_type(daddr); |
1120 | dst.addr = daddr; | 1120 | dst.addr = daddr; |
1121 | dst.ifindex = dst_dev ? dst_dev->ifindex : 0; | 1121 | dst.ifindex = dst_dev ? dst_dev->ifindex : 0; |
1122 | dst.scope = __ipv6_addr_src_scope(dst_type); | 1122 | dst.scope = __ipv6_addr_src_scope(dst_type); |
1123 | dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); | 1123 | dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); |
1124 | dst.prefs = prefs; | 1124 | dst.prefs = prefs; |
1125 | 1125 | ||
1126 | hiscore->rule = -1; | 1126 | hiscore->rule = -1; |
1127 | hiscore->ifa = NULL; | 1127 | hiscore->ifa = NULL; |
1128 | 1128 | ||
1129 | rcu_read_lock(); | 1129 | rcu_read_lock(); |
1130 | 1130 | ||
1131 | for_each_netdev_rcu(net, dev) { | 1131 | for_each_netdev_rcu(net, dev) { |
1132 | struct inet6_dev *idev; | 1132 | struct inet6_dev *idev; |
1133 | 1133 | ||
1134 | /* Candidate Source Address (section 4) | 1134 | /* Candidate Source Address (section 4) |
1135 | * - multicast and link-local destination address, | 1135 | * - multicast and link-local destination address, |
1136 | * the set of candidate source address MUST only | 1136 | * the set of candidate source address MUST only |
1137 | * include addresses assigned to interfaces | 1137 | * include addresses assigned to interfaces |
1138 | * belonging to the same link as the outgoing | 1138 | * belonging to the same link as the outgoing |
1139 | * interface. | 1139 | * interface. |
1140 | * (- For site-local destination addresses, the | 1140 | * (- For site-local destination addresses, the |
1141 | * set of candidate source addresses MUST only | 1141 | * set of candidate source addresses MUST only |
1142 | * include addresses assigned to interfaces | 1142 | * include addresses assigned to interfaces |
1143 | * belonging to the same site as the outgoing | 1143 | * belonging to the same site as the outgoing |
1144 | * interface.) | 1144 | * interface.) |
1145 | */ | 1145 | */ |
1146 | if (((dst_type & IPV6_ADDR_MULTICAST) || | 1146 | if (((dst_type & IPV6_ADDR_MULTICAST) || |
1147 | dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && | 1147 | dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && |
1148 | dst.ifindex && dev->ifindex != dst.ifindex) | 1148 | dst.ifindex && dev->ifindex != dst.ifindex) |
1149 | continue; | 1149 | continue; |
1150 | 1150 | ||
1151 | idev = __in6_dev_get(dev); | 1151 | idev = __in6_dev_get(dev); |
1152 | if (!idev) | 1152 | if (!idev) |
1153 | continue; | 1153 | continue; |
1154 | 1154 | ||
1155 | read_lock_bh(&idev->lock); | 1155 | read_lock_bh(&idev->lock); |
1156 | list_for_each_entry(score->ifa, &idev->addr_list, if_list) { | 1156 | list_for_each_entry(score->ifa, &idev->addr_list, if_list) { |
1157 | int i; | 1157 | int i; |
1158 | 1158 | ||
1159 | /* | 1159 | /* |
1160 | * - Tentative Address (RFC2462 section 5.4) | 1160 | * - Tentative Address (RFC2462 section 5.4) |
1161 | * - A tentative address is not considered | 1161 | * - A tentative address is not considered |
1162 | * "assigned to an interface" in the traditional | 1162 | * "assigned to an interface" in the traditional |
1163 | * sense, unless it is also flagged as optimistic. | 1163 | * sense, unless it is also flagged as optimistic. |
1164 | * - Candidate Source Address (section 4) | 1164 | * - Candidate Source Address (section 4) |
1165 | * - In any case, anycast addresses, multicast | 1165 | * - In any case, anycast addresses, multicast |
1166 | * addresses, and the unspecified address MUST | 1166 | * addresses, and the unspecified address MUST |
1167 | * NOT be included in a candidate set. | 1167 | * NOT be included in a candidate set. |
1168 | */ | 1168 | */ |
1169 | if ((score->ifa->flags & IFA_F_TENTATIVE) && | 1169 | if ((score->ifa->flags & IFA_F_TENTATIVE) && |
1170 | (!(score->ifa->flags & IFA_F_OPTIMISTIC))) | 1170 | (!(score->ifa->flags & IFA_F_OPTIMISTIC))) |
1171 | continue; | 1171 | continue; |
1172 | 1172 | ||
1173 | score->addr_type = __ipv6_addr_type(&score->ifa->addr); | 1173 | score->addr_type = __ipv6_addr_type(&score->ifa->addr); |
1174 | 1174 | ||
1175 | if (unlikely(score->addr_type == IPV6_ADDR_ANY || | 1175 | if (unlikely(score->addr_type == IPV6_ADDR_ANY || |
1176 | score->addr_type & IPV6_ADDR_MULTICAST)) { | 1176 | score->addr_type & IPV6_ADDR_MULTICAST)) { |
1177 | LIMIT_NETDEBUG(KERN_DEBUG | 1177 | LIMIT_NETDEBUG(KERN_DEBUG |
1178 | "ADDRCONF: unspecified / multicast address " | 1178 | "ADDRCONF: unspecified / multicast address " |
1179 | "assigned as unicast address on %s", | 1179 | "assigned as unicast address on %s", |
1180 | dev->name); | 1180 | dev->name); |
1181 | continue; | 1181 | continue; |
1182 | } | 1182 | } |
1183 | 1183 | ||
1184 | score->rule = -1; | 1184 | score->rule = -1; |
1185 | bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); | 1185 | bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); |
1186 | 1186 | ||
1187 | for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { | 1187 | for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { |
1188 | int minihiscore, miniscore; | 1188 | int minihiscore, miniscore; |
1189 | 1189 | ||
1190 | minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); | 1190 | minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); |
1191 | miniscore = ipv6_get_saddr_eval(net, score, &dst, i); | 1191 | miniscore = ipv6_get_saddr_eval(net, score, &dst, i); |
1192 | 1192 | ||
1193 | if (minihiscore > miniscore) { | 1193 | if (minihiscore > miniscore) { |
1194 | if (i == IPV6_SADDR_RULE_SCOPE && | 1194 | if (i == IPV6_SADDR_RULE_SCOPE && |
1195 | score->scopedist > 0) { | 1195 | score->scopedist > 0) { |
1196 | /* | 1196 | /* |
1197 | * special case: | 1197 | * special case: |
1198 | * each remaining entry | 1198 | * each remaining entry |
1199 | * has too small (not enough) | 1199 | * has too small (not enough) |
1200 | * scope, because ifa entries | 1200 | * scope, because ifa entries |
1201 | * are sorted by their scope | 1201 | * are sorted by their scope |
1202 | * values. | 1202 | * values. |
1203 | */ | 1203 | */ |
1204 | goto try_nextdev; | 1204 | goto try_nextdev; |
1205 | } | 1205 | } |
1206 | break; | 1206 | break; |
1207 | } else if (minihiscore < miniscore) { | 1207 | } else if (minihiscore < miniscore) { |
1208 | if (hiscore->ifa) | 1208 | if (hiscore->ifa) |
1209 | in6_ifa_put(hiscore->ifa); | 1209 | in6_ifa_put(hiscore->ifa); |
1210 | 1210 | ||
1211 | in6_ifa_hold(score->ifa); | 1211 | in6_ifa_hold(score->ifa); |
1212 | 1212 | ||
1213 | swap(hiscore, score); | 1213 | swap(hiscore, score); |
1214 | 1214 | ||
1215 | /* restore our iterator */ | 1215 | /* restore our iterator */ |
1216 | score->ifa = hiscore->ifa; | 1216 | score->ifa = hiscore->ifa; |
1217 | 1217 | ||
1218 | break; | 1218 | break; |
1219 | } | 1219 | } |
1220 | } | 1220 | } |
1221 | } | 1221 | } |
1222 | try_nextdev: | 1222 | try_nextdev: |
1223 | read_unlock_bh(&idev->lock); | 1223 | read_unlock_bh(&idev->lock); |
1224 | } | 1224 | } |
1225 | rcu_read_unlock(); | 1225 | rcu_read_unlock(); |
1226 | 1226 | ||
1227 | if (!hiscore->ifa) | 1227 | if (!hiscore->ifa) |
1228 | return -EADDRNOTAVAIL; | 1228 | return -EADDRNOTAVAIL; |
1229 | 1229 | ||
1230 | ipv6_addr_copy(saddr, &hiscore->ifa->addr); | 1230 | ipv6_addr_copy(saddr, &hiscore->ifa->addr); |
1231 | in6_ifa_put(hiscore->ifa); | 1231 | in6_ifa_put(hiscore->ifa); |
1232 | return 0; | 1232 | return 0; |
1233 | } | 1233 | } |
1234 | EXPORT_SYMBOL(ipv6_dev_get_saddr); | 1234 | EXPORT_SYMBOL(ipv6_dev_get_saddr); |
1235 | 1235 | ||
1236 | int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, | 1236 | int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, |
1237 | unsigned char banned_flags) | 1237 | unsigned char banned_flags) |
1238 | { | 1238 | { |
1239 | struct inet6_dev *idev; | 1239 | struct inet6_dev *idev; |
1240 | int err = -EADDRNOTAVAIL; | 1240 | int err = -EADDRNOTAVAIL; |
1241 | 1241 | ||
1242 | rcu_read_lock(); | 1242 | rcu_read_lock(); |
1243 | idev = __in6_dev_get(dev); | 1243 | idev = __in6_dev_get(dev); |
1244 | if (idev) { | 1244 | if (idev) { |
1245 | struct inet6_ifaddr *ifp; | 1245 | struct inet6_ifaddr *ifp; |
1246 | 1246 | ||
1247 | read_lock_bh(&idev->lock); | 1247 | read_lock_bh(&idev->lock); |
1248 | list_for_each_entry(ifp, &idev->addr_list, if_list) { | 1248 | list_for_each_entry(ifp, &idev->addr_list, if_list) { |
1249 | if (ifp->scope == IFA_LINK && | 1249 | if (ifp->scope == IFA_LINK && |
1250 | !(ifp->flags & banned_flags)) { | 1250 | !(ifp->flags & banned_flags)) { |
1251 | ipv6_addr_copy(addr, &ifp->addr); | 1251 | ipv6_addr_copy(addr, &ifp->addr); |
1252 | err = 0; | 1252 | err = 0; |
1253 | break; | 1253 | break; |
1254 | } | 1254 | } |
1255 | } | 1255 | } |
1256 | read_unlock_bh(&idev->lock); | 1256 | read_unlock_bh(&idev->lock); |
1257 | } | 1257 | } |
1258 | rcu_read_unlock(); | 1258 | rcu_read_unlock(); |
1259 | return err; | 1259 | return err; |
1260 | } | 1260 | } |
1261 | 1261 | ||
1262 | static int ipv6_count_addresses(struct inet6_dev *idev) | 1262 | static int ipv6_count_addresses(struct inet6_dev *idev) |
1263 | { | 1263 | { |
1264 | int cnt = 0; | 1264 | int cnt = 0; |
1265 | struct inet6_ifaddr *ifp; | 1265 | struct inet6_ifaddr *ifp; |
1266 | 1266 | ||
1267 | read_lock_bh(&idev->lock); | 1267 | read_lock_bh(&idev->lock); |
1268 | list_for_each_entry(ifp, &idev->addr_list, if_list) | 1268 | list_for_each_entry(ifp, &idev->addr_list, if_list) |
1269 | cnt++; | 1269 | cnt++; |
1270 | read_unlock_bh(&idev->lock); | 1270 | read_unlock_bh(&idev->lock); |
1271 | return cnt; | 1271 | return cnt; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, | 1274 | int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, |
1275 | struct net_device *dev, int strict) | 1275 | struct net_device *dev, int strict) |
1276 | { | 1276 | { |
1277 | struct inet6_ifaddr *ifp; | 1277 | struct inet6_ifaddr *ifp; |
1278 | struct hlist_node *node; | 1278 | struct hlist_node *node; |
1279 | unsigned int hash = ipv6_addr_hash(addr); | 1279 | unsigned int hash = ipv6_addr_hash(addr); |
1280 | 1280 | ||
1281 | rcu_read_lock_bh(); | 1281 | rcu_read_lock_bh(); |
1282 | hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { | 1282 | hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { |
1283 | if (!net_eq(dev_net(ifp->idev->dev), net)) | 1283 | if (!net_eq(dev_net(ifp->idev->dev), net)) |
1284 | continue; | 1284 | continue; |
1285 | if (ipv6_addr_equal(&ifp->addr, addr) && | 1285 | if (ipv6_addr_equal(&ifp->addr, addr) && |
1286 | !(ifp->flags&IFA_F_TENTATIVE) && | 1286 | !(ifp->flags&IFA_F_TENTATIVE) && |
1287 | (dev == NULL || ifp->idev->dev == dev || | 1287 | (dev == NULL || ifp->idev->dev == dev || |
1288 | !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { | 1288 | !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { |
1289 | rcu_read_unlock_bh(); | 1289 | rcu_read_unlock_bh(); |
1290 | return 1; | 1290 | return 1; |
1291 | } | 1291 | } |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | rcu_read_unlock_bh(); | 1294 | rcu_read_unlock_bh(); |
1295 | return 0; | 1295 | return 0; |
1296 | } | 1296 | } |
1297 | EXPORT_SYMBOL(ipv6_chk_addr); | 1297 | EXPORT_SYMBOL(ipv6_chk_addr); |
1298 | 1298 | ||
1299 | static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, | 1299 | static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, |
1300 | struct net_device *dev) | 1300 | struct net_device *dev) |
1301 | { | 1301 | { |
1302 | unsigned int hash = ipv6_addr_hash(addr); | 1302 | unsigned int hash = ipv6_addr_hash(addr); |
1303 | struct inet6_ifaddr *ifp; | 1303 | struct inet6_ifaddr *ifp; |
1304 | struct hlist_node *node; | 1304 | struct hlist_node *node; |
1305 | 1305 | ||
1306 | hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { | 1306 | hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { |
1307 | if (!net_eq(dev_net(ifp->idev->dev), net)) | 1307 | if (!net_eq(dev_net(ifp->idev->dev), net)) |
1308 | continue; | 1308 | continue; |
1309 | if (ipv6_addr_equal(&ifp->addr, addr)) { | 1309 | if (ipv6_addr_equal(&ifp->addr, addr)) { |
1310 | if (dev == NULL || ifp->idev->dev == dev) | 1310 | if (dev == NULL || ifp->idev->dev == dev) |
1311 | return true; | 1311 | return true; |
1312 | } | 1312 | } |
1313 | } | 1313 | } |
1314 | return false; | 1314 | return false; |
1315 | } | 1315 | } |
1316 | 1316 | ||
1317 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) | 1317 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) |
1318 | { | 1318 | { |
1319 | struct inet6_dev *idev; | 1319 | struct inet6_dev *idev; |
1320 | struct inet6_ifaddr *ifa; | 1320 | struct inet6_ifaddr *ifa; |
1321 | int onlink; | 1321 | int onlink; |
1322 | 1322 | ||
1323 | onlink = 0; | 1323 | onlink = 0; |
1324 | rcu_read_lock(); | 1324 | rcu_read_lock(); |
1325 | idev = __in6_dev_get(dev); | 1325 | idev = __in6_dev_get(dev); |
1326 | if (idev) { | 1326 | if (idev) { |
1327 | read_lock_bh(&idev->lock); | 1327 | read_lock_bh(&idev->lock); |
1328 | list_for_each_entry(ifa, &idev->addr_list, if_list) { | 1328 | list_for_each_entry(ifa, &idev->addr_list, if_list) { |
1329 | onlink = ipv6_prefix_equal(addr, &ifa->addr, | 1329 | onlink = ipv6_prefix_equal(addr, &ifa->addr, |
1330 | ifa->prefix_len); | 1330 | ifa->prefix_len); |
1331 | if (onlink) | 1331 | if (onlink) |
1332 | break; | 1332 | break; |
1333 | } | 1333 | } |
1334 | read_unlock_bh(&idev->lock); | 1334 | read_unlock_bh(&idev->lock); |
1335 | } | 1335 | } |
1336 | rcu_read_unlock(); | 1336 | rcu_read_unlock(); |
1337 | return onlink; | 1337 | return onlink; |
1338 | } | 1338 | } |
1339 | 1339 | ||
1340 | EXPORT_SYMBOL(ipv6_chk_prefix); | 1340 | EXPORT_SYMBOL(ipv6_chk_prefix); |
1341 | 1341 | ||
1342 | struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, | 1342 | struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, |
1343 | struct net_device *dev, int strict) | 1343 | struct net_device *dev, int strict) |
1344 | { | 1344 | { |
1345 | struct inet6_ifaddr *ifp, *result = NULL; | 1345 | struct inet6_ifaddr *ifp, *result = NULL; |
1346 | unsigned int hash = ipv6_addr_hash(addr); | 1346 | unsigned int hash = ipv6_addr_hash(addr); |
1347 | struct hlist_node *node; | 1347 | struct hlist_node *node; |
1348 | 1348 | ||
1349 | rcu_read_lock_bh(); | 1349 | rcu_read_lock_bh(); |
1350 | hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { | 1350 | hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { |
1351 | if (!net_eq(dev_net(ifp->idev->dev), net)) | 1351 | if (!net_eq(dev_net(ifp->idev->dev), net)) |
1352 | continue; | 1352 | continue; |
1353 | if (ipv6_addr_equal(&ifp->addr, addr)) { | 1353 | if (ipv6_addr_equal(&ifp->addr, addr)) { |
1354 | if (dev == NULL || ifp->idev->dev == dev || | 1354 | if (dev == NULL || ifp->idev->dev == dev || |
1355 | !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { | 1355 | !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { |
1356 | result = ifp; | 1356 | result = ifp; |
1357 | in6_ifa_hold(ifp); | 1357 | in6_ifa_hold(ifp); |
1358 | break; | 1358 | break; |
1359 | } | 1359 | } |
1360 | } | 1360 | } |
1361 | } | 1361 | } |
1362 | rcu_read_unlock_bh(); | 1362 | rcu_read_unlock_bh(); |
1363 | 1363 | ||
1364 | return result; | 1364 | return result; |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | /* Gets referenced address, destroys ifaddr */ | 1367 | /* Gets referenced address, destroys ifaddr */ |
1368 | 1368 | ||
1369 | static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) | 1369 | static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) |
1370 | { | 1370 | { |
1371 | if (ifp->flags&IFA_F_PERMANENT) { | 1371 | if (ifp->flags&IFA_F_PERMANENT) { |
1372 | spin_lock_bh(&ifp->lock); | 1372 | spin_lock_bh(&ifp->lock); |
1373 | addrconf_del_timer(ifp); | 1373 | addrconf_del_timer(ifp); |
1374 | ifp->flags |= IFA_F_TENTATIVE; | 1374 | ifp->flags |= IFA_F_TENTATIVE; |
1375 | if (dad_failed) | 1375 | if (dad_failed) |
1376 | ifp->flags |= IFA_F_DADFAILED; | 1376 | ifp->flags |= IFA_F_DADFAILED; |
1377 | spin_unlock_bh(&ifp->lock); | 1377 | spin_unlock_bh(&ifp->lock); |
1378 | if (dad_failed) | 1378 | if (dad_failed) |
1379 | ipv6_ifa_notify(0, ifp); | 1379 | ipv6_ifa_notify(0, ifp); |
1380 | in6_ifa_put(ifp); | 1380 | in6_ifa_put(ifp); |
1381 | #ifdef CONFIG_IPV6_PRIVACY | 1381 | #ifdef CONFIG_IPV6_PRIVACY |
1382 | } else if (ifp->flags&IFA_F_TEMPORARY) { | 1382 | } else if (ifp->flags&IFA_F_TEMPORARY) { |
1383 | struct inet6_ifaddr *ifpub; | 1383 | struct inet6_ifaddr *ifpub; |
1384 | spin_lock_bh(&ifp->lock); | 1384 | spin_lock_bh(&ifp->lock); |
1385 | ifpub = ifp->ifpub; | 1385 | ifpub = ifp->ifpub; |
1386 | if (ifpub) { | 1386 | if (ifpub) { |
1387 | in6_ifa_hold(ifpub); | 1387 | in6_ifa_hold(ifpub); |
1388 | spin_unlock_bh(&ifp->lock); | 1388 | spin_unlock_bh(&ifp->lock); |
1389 | ipv6_create_tempaddr(ifpub, ifp); | 1389 | ipv6_create_tempaddr(ifpub, ifp); |
1390 | in6_ifa_put(ifpub); | 1390 | in6_ifa_put(ifpub); |
1391 | } else { | 1391 | } else { |
1392 | spin_unlock_bh(&ifp->lock); | 1392 | spin_unlock_bh(&ifp->lock); |
1393 | } | 1393 | } |
1394 | ipv6_del_addr(ifp); | 1394 | ipv6_del_addr(ifp); |
1395 | #endif | 1395 | #endif |
1396 | } else | 1396 | } else |
1397 | ipv6_del_addr(ifp); | 1397 | ipv6_del_addr(ifp); |
1398 | } | 1398 | } |
1399 | 1399 | ||
1400 | static int addrconf_dad_end(struct inet6_ifaddr *ifp) | 1400 | static int addrconf_dad_end(struct inet6_ifaddr *ifp) |
1401 | { | 1401 | { |
1402 | int err = -ENOENT; | 1402 | int err = -ENOENT; |
1403 | 1403 | ||
1404 | spin_lock(&ifp->state_lock); | 1404 | spin_lock(&ifp->state_lock); |
1405 | if (ifp->state == INET6_IFADDR_STATE_DAD) { | 1405 | if (ifp->state == INET6_IFADDR_STATE_DAD) { |
1406 | ifp->state = INET6_IFADDR_STATE_POSTDAD; | 1406 | ifp->state = INET6_IFADDR_STATE_POSTDAD; |
1407 | err = 0; | 1407 | err = 0; |
1408 | } | 1408 | } |
1409 | spin_unlock(&ifp->state_lock); | 1409 | spin_unlock(&ifp->state_lock); |
1410 | 1410 | ||
1411 | return err; | 1411 | return err; |
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | void addrconf_dad_failure(struct inet6_ifaddr *ifp) | 1414 | void addrconf_dad_failure(struct inet6_ifaddr *ifp) |
1415 | { | 1415 | { |
1416 | struct inet6_dev *idev = ifp->idev; | 1416 | struct inet6_dev *idev = ifp->idev; |
1417 | 1417 | ||
1418 | if (addrconf_dad_end(ifp)) { | 1418 | if (addrconf_dad_end(ifp)) { |
1419 | in6_ifa_put(ifp); | 1419 | in6_ifa_put(ifp); |
1420 | return; | 1420 | return; |
1421 | } | 1421 | } |
1422 | 1422 | ||
1423 | if (net_ratelimit()) | 1423 | if (net_ratelimit()) |
1424 | printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", | 1424 | printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", |
1425 | ifp->idev->dev->name, &ifp->addr); | 1425 | ifp->idev->dev->name, &ifp->addr); |
1426 | 1426 | ||
1427 | if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { | 1427 | if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { |
1428 | struct in6_addr addr; | 1428 | struct in6_addr addr; |
1429 | 1429 | ||
1430 | addr.s6_addr32[0] = htonl(0xfe800000); | 1430 | addr.s6_addr32[0] = htonl(0xfe800000); |
1431 | addr.s6_addr32[1] = 0; | 1431 | addr.s6_addr32[1] = 0; |
1432 | 1432 | ||
1433 | if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && | 1433 | if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && |
1434 | ipv6_addr_equal(&ifp->addr, &addr)) { | 1434 | ipv6_addr_equal(&ifp->addr, &addr)) { |
1435 | /* DAD failed for link-local based on MAC address */ | 1435 | /* DAD failed for link-local based on MAC address */ |
1436 | idev->cnf.disable_ipv6 = 1; | 1436 | idev->cnf.disable_ipv6 = 1; |
1437 | 1437 | ||
1438 | printk(KERN_INFO "%s: IPv6 being disabled!\n", | 1438 | printk(KERN_INFO "%s: IPv6 being disabled!\n", |
1439 | ifp->idev->dev->name); | 1439 | ifp->idev->dev->name); |
1440 | } | 1440 | } |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | addrconf_dad_stop(ifp, 1); | 1443 | addrconf_dad_stop(ifp, 1); |
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | /* Join to solicited addr multicast group. */ | 1446 | /* Join to solicited addr multicast group. */ |
1447 | 1447 | ||
1448 | void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) | 1448 | void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) |
1449 | { | 1449 | { |
1450 | struct in6_addr maddr; | 1450 | struct in6_addr maddr; |
1451 | 1451 | ||
1452 | if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) | 1452 | if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) |
1453 | return; | 1453 | return; |
1454 | 1454 | ||
1455 | addrconf_addr_solict_mult(addr, &maddr); | 1455 | addrconf_addr_solict_mult(addr, &maddr); |
1456 | ipv6_dev_mc_inc(dev, &maddr); | 1456 | ipv6_dev_mc_inc(dev, &maddr); |
1457 | } | 1457 | } |
1458 | 1458 | ||
1459 | void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) | 1459 | void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) |
1460 | { | 1460 | { |
1461 | struct in6_addr maddr; | 1461 | struct in6_addr maddr; |
1462 | 1462 | ||
1463 | if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) | 1463 | if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) |
1464 | return; | 1464 | return; |
1465 | 1465 | ||
1466 | addrconf_addr_solict_mult(addr, &maddr); | 1466 | addrconf_addr_solict_mult(addr, &maddr); |
1467 | __ipv6_dev_mc_dec(idev, &maddr); | 1467 | __ipv6_dev_mc_dec(idev, &maddr); |
1468 | } | 1468 | } |
1469 | 1469 | ||
1470 | static void addrconf_join_anycast(struct inet6_ifaddr *ifp) | 1470 | static void addrconf_join_anycast(struct inet6_ifaddr *ifp) |
1471 | { | 1471 | { |
1472 | struct in6_addr addr; | 1472 | struct in6_addr addr; |
1473 | if (ifp->prefix_len == 127) /* RFC 6164 */ | 1473 | if (ifp->prefix_len == 127) /* RFC 6164 */ |
1474 | return; | 1474 | return; |
1475 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); | 1475 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); |
1476 | if (ipv6_addr_any(&addr)) | 1476 | if (ipv6_addr_any(&addr)) |
1477 | return; | 1477 | return; |
1478 | ipv6_dev_ac_inc(ifp->idev->dev, &addr); | 1478 | ipv6_dev_ac_inc(ifp->idev->dev, &addr); |
1479 | } | 1479 | } |
1480 | 1480 | ||
1481 | static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) | 1481 | static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) |
1482 | { | 1482 | { |
1483 | struct in6_addr addr; | 1483 | struct in6_addr addr; |
1484 | if (ifp->prefix_len == 127) /* RFC 6164 */ | 1484 | if (ifp->prefix_len == 127) /* RFC 6164 */ |
1485 | return; | 1485 | return; |
1486 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); | 1486 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); |
1487 | if (ipv6_addr_any(&addr)) | 1487 | if (ipv6_addr_any(&addr)) |
1488 | return; | 1488 | return; |
1489 | __ipv6_dev_ac_dec(ifp->idev, &addr); | 1489 | __ipv6_dev_ac_dec(ifp->idev, &addr); |
1490 | } | 1490 | } |
1491 | 1491 | ||
1492 | static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) | 1492 | static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) |
1493 | { | 1493 | { |
1494 | if (dev->addr_len != ETH_ALEN) | 1494 | if (dev->addr_len != ETH_ALEN) |
1495 | return -1; | 1495 | return -1; |
1496 | memcpy(eui, dev->dev_addr, 3); | 1496 | memcpy(eui, dev->dev_addr, 3); |
1497 | memcpy(eui + 5, dev->dev_addr + 3, 3); | 1497 | memcpy(eui + 5, dev->dev_addr + 3, 3); |
1498 | 1498 | ||
1499 | /* | 1499 | /* |
1500 | * The zSeries OSA network cards can be shared among various | 1500 | * The zSeries OSA network cards can be shared among various |
1501 | * OS instances, but the OSA cards have only one MAC address. | 1501 | * OS instances, but the OSA cards have only one MAC address. |
1502 | * This leads to duplicate address conflicts in conjunction | 1502 | * This leads to duplicate address conflicts in conjunction |
1503 | * with IPv6 if more than one instance uses the same card. | 1503 | * with IPv6 if more than one instance uses the same card. |
1504 | * | 1504 | * |
1505 | * The driver for these cards can deliver a unique 16-bit | 1505 | * The driver for these cards can deliver a unique 16-bit |
1506 | * identifier for each instance sharing the same card. It is | 1506 | * identifier for each instance sharing the same card. It is |
1507 | * placed instead of 0xFFFE in the interface identifier. The | 1507 | * placed instead of 0xFFFE in the interface identifier. The |
1508 | * "u" bit of the interface identifier is not inverted in this | 1508 | * "u" bit of the interface identifier is not inverted in this |
1509 | * case. Hence the resulting interface identifier has local | 1509 | * case. Hence the resulting interface identifier has local |
1510 | * scope according to RFC2373. | 1510 | * scope according to RFC2373. |
1511 | */ | 1511 | */ |
1512 | if (dev->dev_id) { | 1512 | if (dev->dev_id) { |
1513 | eui[3] = (dev->dev_id >> 8) & 0xFF; | 1513 | eui[3] = (dev->dev_id >> 8) & 0xFF; |
1514 | eui[4] = dev->dev_id & 0xFF; | 1514 | eui[4] = dev->dev_id & 0xFF; |
1515 | } else { | 1515 | } else { |
1516 | eui[3] = 0xFF; | 1516 | eui[3] = 0xFF; |
1517 | eui[4] = 0xFE; | 1517 | eui[4] = 0xFE; |
1518 | eui[0] ^= 2; | 1518 | eui[0] ^= 2; |
1519 | } | 1519 | } |
1520 | return 0; | 1520 | return 0; |
1521 | } | 1521 | } |
1522 | 1522 | ||
1523 | static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) | 1523 | static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) |
1524 | { | 1524 | { |
1525 | /* XXX: inherit EUI-64 from other interface -- yoshfuji */ | 1525 | /* XXX: inherit EUI-64 from other interface -- yoshfuji */ |
1526 | if (dev->addr_len != ARCNET_ALEN) | 1526 | if (dev->addr_len != ARCNET_ALEN) |
1527 | return -1; | 1527 | return -1; |
1528 | memset(eui, 0, 7); | 1528 | memset(eui, 0, 7); |
1529 | eui[7] = *(u8*)dev->dev_addr; | 1529 | eui[7] = *(u8*)dev->dev_addr; |
1530 | return 0; | 1530 | return 0; |
1531 | } | 1531 | } |
1532 | 1532 | ||
1533 | static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) | 1533 | static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) |
1534 | { | 1534 | { |
1535 | if (dev->addr_len != INFINIBAND_ALEN) | 1535 | if (dev->addr_len != INFINIBAND_ALEN) |
1536 | return -1; | 1536 | return -1; |
1537 | memcpy(eui, dev->dev_addr + 12, 8); | 1537 | memcpy(eui, dev->dev_addr + 12, 8); |
1538 | eui[0] |= 2; | 1538 | eui[0] |= 2; |
1539 | return 0; | 1539 | return 0; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | static int __ipv6_isatap_ifid(u8 *eui, __be32 addr) | 1542 | static int __ipv6_isatap_ifid(u8 *eui, __be32 addr) |
1543 | { | 1543 | { |
1544 | if (addr == 0) | 1544 | if (addr == 0) |
1545 | return -1; | 1545 | return -1; |
1546 | eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) || | 1546 | eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) || |
1547 | ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) || | 1547 | ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) || |
1548 | ipv4_is_private_172(addr) || ipv4_is_test_192(addr) || | 1548 | ipv4_is_private_172(addr) || ipv4_is_test_192(addr) || |
1549 | ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) || | 1549 | ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) || |
1550 | ipv4_is_test_198(addr) || ipv4_is_multicast(addr) || | 1550 | ipv4_is_test_198(addr) || ipv4_is_multicast(addr) || |
1551 | ipv4_is_lbcast(addr)) ? 0x00 : 0x02; | 1551 | ipv4_is_lbcast(addr)) ? 0x00 : 0x02; |
1552 | eui[1] = 0; | 1552 | eui[1] = 0; |
1553 | eui[2] = 0x5E; | 1553 | eui[2] = 0x5E; |
1554 | eui[3] = 0xFE; | 1554 | eui[3] = 0xFE; |
1555 | memcpy(eui + 4, &addr, 4); | 1555 | memcpy(eui + 4, &addr, 4); |
1556 | return 0; | 1556 | return 0; |
1557 | } | 1557 | } |
1558 | 1558 | ||
1559 | static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) | 1559 | static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) |
1560 | { | 1560 | { |
1561 | if (dev->priv_flags & IFF_ISATAP) | 1561 | if (dev->priv_flags & IFF_ISATAP) |
1562 | return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); | 1562 | return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); |
1563 | return -1; | 1563 | return -1; |
1564 | } | 1564 | } |
1565 | 1565 | ||
1566 | static int addrconf_ifid_gre(u8 *eui, struct net_device *dev) | 1566 | static int addrconf_ifid_gre(u8 *eui, struct net_device *dev) |
1567 | { | 1567 | { |
1568 | return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); | 1568 | return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) | 1571 | static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) |
1572 | { | 1572 | { |
1573 | switch (dev->type) { | 1573 | switch (dev->type) { |
1574 | case ARPHRD_ETHER: | 1574 | case ARPHRD_ETHER: |
1575 | case ARPHRD_FDDI: | 1575 | case ARPHRD_FDDI: |
1576 | case ARPHRD_IEEE802_TR: | 1576 | case ARPHRD_IEEE802_TR: |
1577 | return addrconf_ifid_eui48(eui, dev); | 1577 | return addrconf_ifid_eui48(eui, dev); |
1578 | case ARPHRD_ARCNET: | 1578 | case ARPHRD_ARCNET: |
1579 | return addrconf_ifid_arcnet(eui, dev); | 1579 | return addrconf_ifid_arcnet(eui, dev); |
1580 | case ARPHRD_INFINIBAND: | 1580 | case ARPHRD_INFINIBAND: |
1581 | return addrconf_ifid_infiniband(eui, dev); | 1581 | return addrconf_ifid_infiniband(eui, dev); |
1582 | case ARPHRD_SIT: | 1582 | case ARPHRD_SIT: |
1583 | return addrconf_ifid_sit(eui, dev); | 1583 | return addrconf_ifid_sit(eui, dev); |
1584 | case ARPHRD_IPGRE: | 1584 | case ARPHRD_IPGRE: |
1585 | return addrconf_ifid_gre(eui, dev); | 1585 | return addrconf_ifid_gre(eui, dev); |
1586 | } | 1586 | } |
1587 | return -1; | 1587 | return -1; |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) | 1590 | static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) |
1591 | { | 1591 | { |
1592 | int err = -1; | 1592 | int err = -1; |
1593 | struct inet6_ifaddr *ifp; | 1593 | struct inet6_ifaddr *ifp; |
1594 | 1594 | ||
1595 | read_lock_bh(&idev->lock); | 1595 | read_lock_bh(&idev->lock); |
1596 | list_for_each_entry(ifp, &idev->addr_list, if_list) { | 1596 | list_for_each_entry(ifp, &idev->addr_list, if_list) { |
1597 | if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { | 1597 | if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { |
1598 | memcpy(eui, ifp->addr.s6_addr+8, 8); | 1598 | memcpy(eui, ifp->addr.s6_addr+8, 8); |
1599 | err = 0; | 1599 | err = 0; |
1600 | break; | 1600 | break; |
1601 | } | 1601 | } |
1602 | } | 1602 | } |
1603 | read_unlock_bh(&idev->lock); | 1603 | read_unlock_bh(&idev->lock); |
1604 | return err; | 1604 | return err; |
1605 | } | 1605 | } |
1606 | 1606 | ||
1607 | #ifdef CONFIG_IPV6_PRIVACY | 1607 | #ifdef CONFIG_IPV6_PRIVACY |
1608 | /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ | 1608 | /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ |
1609 | static int __ipv6_regen_rndid(struct inet6_dev *idev) | 1609 | static int __ipv6_regen_rndid(struct inet6_dev *idev) |
1610 | { | 1610 | { |
1611 | regen: | 1611 | regen: |
1612 | get_random_bytes(idev->rndid, sizeof(idev->rndid)); | 1612 | get_random_bytes(idev->rndid, sizeof(idev->rndid)); |
1613 | idev->rndid[0] &= ~0x02; | 1613 | idev->rndid[0] &= ~0x02; |
1614 | 1614 | ||
1615 | /* | 1615 | /* |
1616 | * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>: | 1616 | * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>: |
1617 | * check if generated address is not inappropriate | 1617 | * check if generated address is not inappropriate |
1618 | * | 1618 | * |
1619 | * - Reserved subnet anycast (RFC 2526) | 1619 | * - Reserved subnet anycast (RFC 2526) |
1620 | * 11111101 11....11 1xxxxxxx | 1620 | * 11111101 11....11 1xxxxxxx |
1621 | * - ISATAP (RFC4214) 6.1 | 1621 | * - ISATAP (RFC4214) 6.1 |
1622 | * 00-00-5E-FE-xx-xx-xx-xx | 1622 | * 00-00-5E-FE-xx-xx-xx-xx |
1623 | * - value 0 | 1623 | * - value 0 |
1624 | * - XXX: already assigned to an address on the device | 1624 | * - XXX: already assigned to an address on the device |
1625 | */ | 1625 | */ |
1626 | if (idev->rndid[0] == 0xfd && | 1626 | if (idev->rndid[0] == 0xfd && |
1627 | (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && | 1627 | (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && |
1628 | (idev->rndid[7]&0x80)) | 1628 | (idev->rndid[7]&0x80)) |
1629 | goto regen; | 1629 | goto regen; |
1630 | if ((idev->rndid[0]|idev->rndid[1]) == 0) { | 1630 | if ((idev->rndid[0]|idev->rndid[1]) == 0) { |
1631 | if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) | 1631 | if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) |
1632 | goto regen; | 1632 | goto regen; |
1633 | if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) | 1633 | if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) |
1634 | goto regen; | 1634 | goto regen; |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | return 0; | 1637 | return 0; |
1638 | } | 1638 | } |
1639 | 1639 | ||
1640 | static void ipv6_regen_rndid(unsigned long data) | 1640 | static void ipv6_regen_rndid(unsigned long data) |
1641 | { | 1641 | { |
1642 | struct inet6_dev *idev = (struct inet6_dev *) data; | 1642 | struct inet6_dev *idev = (struct inet6_dev *) data; |
1643 | unsigned long expires; | 1643 | unsigned long expires; |
1644 | 1644 | ||
1645 | rcu_read_lock_bh(); | 1645 | rcu_read_lock_bh(); |
1646 | write_lock_bh(&idev->lock); | 1646 | write_lock_bh(&idev->lock); |
1647 | 1647 | ||
1648 | if (idev->dead) | 1648 | if (idev->dead) |
1649 | goto out; | 1649 | goto out; |
1650 | 1650 | ||
1651 | if (__ipv6_regen_rndid(idev) < 0) | 1651 | if (__ipv6_regen_rndid(idev) < 0) |
1652 | goto out; | 1652 | goto out; |
1653 | 1653 | ||
1654 | expires = jiffies + | 1654 | expires = jiffies + |
1655 | idev->cnf.temp_prefered_lft * HZ - | 1655 | idev->cnf.temp_prefered_lft * HZ - |
1656 | idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - | 1656 | idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - |
1657 | idev->cnf.max_desync_factor * HZ; | 1657 | idev->cnf.max_desync_factor * HZ; |
1658 | if (time_before(expires, jiffies)) { | 1658 | if (time_before(expires, jiffies)) { |
1659 | printk(KERN_WARNING | 1659 | printk(KERN_WARNING |
1660 | "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", | 1660 | "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", |
1661 | idev->dev->name); | 1661 | idev->dev->name); |
1662 | goto out; | 1662 | goto out; |
1663 | } | 1663 | } |
1664 | 1664 | ||
1665 | if (!mod_timer(&idev->regen_timer, expires)) | 1665 | if (!mod_timer(&idev->regen_timer, expires)) |
1666 | in6_dev_hold(idev); | 1666 | in6_dev_hold(idev); |
1667 | 1667 | ||
1668 | out: | 1668 | out: |
1669 | write_unlock_bh(&idev->lock); | 1669 | write_unlock_bh(&idev->lock); |
1670 | rcu_read_unlock_bh(); | 1670 | rcu_read_unlock_bh(); |
1671 | in6_dev_put(idev); | 1671 | in6_dev_put(idev); |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { | 1674 | static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { |
1675 | int ret = 0; | 1675 | int ret = 0; |
1676 | 1676 | ||
1677 | if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) | 1677 | if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) |
1678 | ret = __ipv6_regen_rndid(idev); | 1678 | ret = __ipv6_regen_rndid(idev); |
1679 | return ret; | 1679 | return ret; |
1680 | } | 1680 | } |
1681 | #endif | 1681 | #endif |
1682 | 1682 | ||
1683 | /* | 1683 | /* |
1684 | * Add prefix route. | 1684 | * Add prefix route. |
1685 | */ | 1685 | */ |
1686 | 1686 | ||
1687 | static void | 1687 | static void |
1688 | addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, | 1688 | addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, |
1689 | unsigned long expires, u32 flags) | 1689 | unsigned long expires, u32 flags) |
1690 | { | 1690 | { |
1691 | struct fib6_config cfg = { | 1691 | struct fib6_config cfg = { |
1692 | .fc_table = RT6_TABLE_PREFIX, | 1692 | .fc_table = RT6_TABLE_PREFIX, |
1693 | .fc_metric = IP6_RT_PRIO_ADDRCONF, | 1693 | .fc_metric = IP6_RT_PRIO_ADDRCONF, |
1694 | .fc_ifindex = dev->ifindex, | 1694 | .fc_ifindex = dev->ifindex, |
1695 | .fc_expires = expires, | 1695 | .fc_expires = expires, |
1696 | .fc_dst_len = plen, | 1696 | .fc_dst_len = plen, |
1697 | .fc_flags = RTF_UP | flags, | 1697 | .fc_flags = RTF_UP | flags, |
1698 | .fc_nlinfo.nl_net = dev_net(dev), | 1698 | .fc_nlinfo.nl_net = dev_net(dev), |
1699 | .fc_protocol = RTPROT_KERNEL, | 1699 | .fc_protocol = RTPROT_KERNEL, |
1700 | }; | 1700 | }; |
1701 | 1701 | ||
1702 | ipv6_addr_copy(&cfg.fc_dst, pfx); | 1702 | ipv6_addr_copy(&cfg.fc_dst, pfx); |
1703 | 1703 | ||
1704 | /* Prevent useless cloning on PtP SIT. | 1704 | /* Prevent useless cloning on PtP SIT. |
1705 | This thing is done here expecting that the whole | 1705 | This thing is done here expecting that the whole |
1706 | class of non-broadcast devices need not cloning. | 1706 | class of non-broadcast devices need not cloning. |
1707 | */ | 1707 | */ |
1708 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 1708 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
1709 | if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) | 1709 | if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) |
1710 | cfg.fc_flags |= RTF_NONEXTHOP; | 1710 | cfg.fc_flags |= RTF_NONEXTHOP; |
1711 | #endif | 1711 | #endif |
1712 | 1712 | ||
1713 | ip6_route_add(&cfg); | 1713 | ip6_route_add(&cfg); |
1714 | } | 1714 | } |
1715 | 1715 | ||
1716 | /* Create "default" multicast route to the interface */ | 1716 | /* Create "default" multicast route to the interface */ |
1717 | 1717 | ||
1718 | static void addrconf_add_mroute(struct net_device *dev) | 1718 | static void addrconf_add_mroute(struct net_device *dev) |
1719 | { | 1719 | { |
1720 | struct fib6_config cfg = { | 1720 | struct fib6_config cfg = { |
1721 | .fc_table = RT6_TABLE_LOCAL, | 1721 | .fc_table = RT6_TABLE_LOCAL, |
1722 | .fc_metric = IP6_RT_PRIO_ADDRCONF, | 1722 | .fc_metric = IP6_RT_PRIO_ADDRCONF, |
1723 | .fc_ifindex = dev->ifindex, | 1723 | .fc_ifindex = dev->ifindex, |
1724 | .fc_dst_len = 8, | 1724 | .fc_dst_len = 8, |
1725 | .fc_flags = RTF_UP, | 1725 | .fc_flags = RTF_UP, |
1726 | .fc_nlinfo.nl_net = dev_net(dev), | 1726 | .fc_nlinfo.nl_net = dev_net(dev), |
1727 | }; | 1727 | }; |
1728 | 1728 | ||
1729 | ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); | 1729 | ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); |
1730 | 1730 | ||
1731 | ip6_route_add(&cfg); | 1731 | ip6_route_add(&cfg); |
1732 | } | 1732 | } |
1733 | 1733 | ||
1734 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 1734 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
1735 | static void sit_route_add(struct net_device *dev) | 1735 | static void sit_route_add(struct net_device *dev) |
1736 | { | 1736 | { |
1737 | struct fib6_config cfg = { | 1737 | struct fib6_config cfg = { |
1738 | .fc_table = RT6_TABLE_MAIN, | 1738 | .fc_table = RT6_TABLE_MAIN, |
1739 | .fc_metric = IP6_RT_PRIO_ADDRCONF, | 1739 | .fc_metric = IP6_RT_PRIO_ADDRCONF, |
1740 | .fc_ifindex = dev->ifindex, | 1740 | .fc_ifindex = dev->ifindex, |
1741 | .fc_dst_len = 96, | 1741 | .fc_dst_len = 96, |
1742 | .fc_flags = RTF_UP | RTF_NONEXTHOP, | 1742 | .fc_flags = RTF_UP | RTF_NONEXTHOP, |
1743 | .fc_nlinfo.nl_net = dev_net(dev), | 1743 | .fc_nlinfo.nl_net = dev_net(dev), |
1744 | }; | 1744 | }; |
1745 | 1745 | ||
1746 | /* prefix length - 96 bits "::d.d.d.d" */ | 1746 | /* prefix length - 96 bits "::d.d.d.d" */ |
1747 | ip6_route_add(&cfg); | 1747 | ip6_route_add(&cfg); |
1748 | } | 1748 | } |
1749 | #endif | 1749 | #endif |
1750 | 1750 | ||
1751 | static void addrconf_add_lroute(struct net_device *dev) | 1751 | static void addrconf_add_lroute(struct net_device *dev) |
1752 | { | 1752 | { |
1753 | struct in6_addr addr; | 1753 | struct in6_addr addr; |
1754 | 1754 | ||
1755 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); | 1755 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); |
1756 | addrconf_prefix_route(&addr, 64, dev, 0, 0); | 1756 | addrconf_prefix_route(&addr, 64, dev, 0, 0); |
1757 | } | 1757 | } |
1758 | 1758 | ||
1759 | static struct inet6_dev *addrconf_add_dev(struct net_device *dev) | 1759 | static struct inet6_dev *addrconf_add_dev(struct net_device *dev) |
1760 | { | 1760 | { |
1761 | struct inet6_dev *idev; | 1761 | struct inet6_dev *idev; |
1762 | 1762 | ||
1763 | ASSERT_RTNL(); | 1763 | ASSERT_RTNL(); |
1764 | 1764 | ||
1765 | idev = ipv6_find_idev(dev); | 1765 | idev = ipv6_find_idev(dev); |
1766 | if (!idev) | 1766 | if (!idev) |
1767 | return ERR_PTR(-ENOBUFS); | 1767 | return ERR_PTR(-ENOBUFS); |
1768 | 1768 | ||
1769 | if (idev->cnf.disable_ipv6) | 1769 | if (idev->cnf.disable_ipv6) |
1770 | return ERR_PTR(-EACCES); | 1770 | return ERR_PTR(-EACCES); |
1771 | 1771 | ||
1772 | /* Add default multicast route */ | 1772 | /* Add default multicast route */ |
1773 | addrconf_add_mroute(dev); | 1773 | addrconf_add_mroute(dev); |
1774 | 1774 | ||
1775 | /* Add link local route */ | 1775 | /* Add link local route */ |
1776 | addrconf_add_lroute(dev); | 1776 | addrconf_add_lroute(dev); |
1777 | return idev; | 1777 | return idev; |
1778 | } | 1778 | } |
1779 | 1779 | ||
1780 | void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) | 1780 | void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) |
1781 | { | 1781 | { |
1782 | struct prefix_info *pinfo; | 1782 | struct prefix_info *pinfo; |
1783 | __u32 valid_lft; | 1783 | __u32 valid_lft; |
1784 | __u32 prefered_lft; | 1784 | __u32 prefered_lft; |
1785 | int addr_type; | 1785 | int addr_type; |
1786 | struct inet6_dev *in6_dev; | 1786 | struct inet6_dev *in6_dev; |
1787 | struct net *net = dev_net(dev); | 1787 | struct net *net = dev_net(dev); |
1788 | 1788 | ||
1789 | pinfo = (struct prefix_info *) opt; | 1789 | pinfo = (struct prefix_info *) opt; |
1790 | 1790 | ||
1791 | if (len < sizeof(struct prefix_info)) { | 1791 | if (len < sizeof(struct prefix_info)) { |
1792 | ADBG(("addrconf: prefix option too short\n")); | 1792 | ADBG(("addrconf: prefix option too short\n")); |
1793 | return; | 1793 | return; |
1794 | } | 1794 | } |
1795 | 1795 | ||
1796 | /* | 1796 | /* |
1797 | * Validation checks ([ADDRCONF], page 19) | 1797 | * Validation checks ([ADDRCONF], page 19) |
1798 | */ | 1798 | */ |
1799 | 1799 | ||
1800 | addr_type = ipv6_addr_type(&pinfo->prefix); | 1800 | addr_type = ipv6_addr_type(&pinfo->prefix); |
1801 | 1801 | ||
1802 | if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) | 1802 | if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) |
1803 | return; | 1803 | return; |
1804 | 1804 | ||
1805 | valid_lft = ntohl(pinfo->valid); | 1805 | valid_lft = ntohl(pinfo->valid); |
1806 | prefered_lft = ntohl(pinfo->prefered); | 1806 | prefered_lft = ntohl(pinfo->prefered); |
1807 | 1807 | ||
1808 | if (prefered_lft > valid_lft) { | 1808 | if (prefered_lft > valid_lft) { |
1809 | if (net_ratelimit()) | 1809 | if (net_ratelimit()) |
1810 | printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); | 1810 | printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); |
1811 | return; | 1811 | return; |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | in6_dev = in6_dev_get(dev); | 1814 | in6_dev = in6_dev_get(dev); |
1815 | 1815 | ||
1816 | if (in6_dev == NULL) { | 1816 | if (in6_dev == NULL) { |
1817 | if (net_ratelimit()) | 1817 | if (net_ratelimit()) |
1818 | printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); | 1818 | printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); |
1819 | return; | 1819 | return; |
1820 | } | 1820 | } |
1821 | 1821 | ||
1822 | /* | 1822 | /* |
1823 | * Two things going on here: | 1823 | * Two things going on here: |
1824 | * 1) Add routes for on-link prefixes | 1824 | * 1) Add routes for on-link prefixes |
1825 | * 2) Configure prefixes with the auto flag set | 1825 | * 2) Configure prefixes with the auto flag set |
1826 | */ | 1826 | */ |
1827 | 1827 | ||
1828 | if (pinfo->onlink) { | 1828 | if (pinfo->onlink) { |
1829 | struct rt6_info *rt; | 1829 | struct rt6_info *rt; |
1830 | unsigned long rt_expires; | 1830 | unsigned long rt_expires; |
1831 | 1831 | ||
1832 | /* Avoid arithmetic overflow. Really, we could | 1832 | /* Avoid arithmetic overflow. Really, we could |
1833 | * save rt_expires in seconds, likely valid_lft, | 1833 | * save rt_expires in seconds, likely valid_lft, |
1834 | * but it would require division in fib gc, that it | 1834 | * but it would require division in fib gc, that it |
1835 | * not good. | 1835 | * not good. |
1836 | */ | 1836 | */ |
1837 | if (HZ > USER_HZ) | 1837 | if (HZ > USER_HZ) |
1838 | rt_expires = addrconf_timeout_fixup(valid_lft, HZ); | 1838 | rt_expires = addrconf_timeout_fixup(valid_lft, HZ); |
1839 | else | 1839 | else |
1840 | rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); | 1840 | rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); |
1841 | 1841 | ||
1842 | if (addrconf_finite_timeout(rt_expires)) | 1842 | if (addrconf_finite_timeout(rt_expires)) |
1843 | rt_expires *= HZ; | 1843 | rt_expires *= HZ; |
1844 | 1844 | ||
1845 | rt = rt6_lookup(net, &pinfo->prefix, NULL, | 1845 | rt = rt6_lookup(net, &pinfo->prefix, NULL, |
1846 | dev->ifindex, 1); | 1846 | dev->ifindex, 1); |
1847 | 1847 | ||
1848 | if (rt && addrconf_is_prefix_route(rt)) { | 1848 | if (rt && addrconf_is_prefix_route(rt)) { |
1849 | /* Autoconf prefix route */ | 1849 | /* Autoconf prefix route */ |
1850 | if (valid_lft == 0) { | 1850 | if (valid_lft == 0) { |
1851 | ip6_del_rt(rt); | 1851 | ip6_del_rt(rt); |
1852 | rt = NULL; | 1852 | rt = NULL; |
1853 | } else if (addrconf_finite_timeout(rt_expires)) { | 1853 | } else if (addrconf_finite_timeout(rt_expires)) { |
1854 | /* not infinity */ | 1854 | /* not infinity */ |
1855 | rt->rt6i_expires = jiffies + rt_expires; | 1855 | rt->rt6i_expires = jiffies + rt_expires; |
1856 | rt->rt6i_flags |= RTF_EXPIRES; | 1856 | rt->rt6i_flags |= RTF_EXPIRES; |
1857 | } else { | 1857 | } else { |
1858 | rt->rt6i_flags &= ~RTF_EXPIRES; | 1858 | rt->rt6i_flags &= ~RTF_EXPIRES; |
1859 | rt->rt6i_expires = 0; | 1859 | rt->rt6i_expires = 0; |
1860 | } | 1860 | } |
1861 | } else if (valid_lft) { | 1861 | } else if (valid_lft) { |
1862 | clock_t expires = 0; | 1862 | clock_t expires = 0; |
1863 | int flags = RTF_ADDRCONF | RTF_PREFIX_RT; | 1863 | int flags = RTF_ADDRCONF | RTF_PREFIX_RT; |
1864 | if (addrconf_finite_timeout(rt_expires)) { | 1864 | if (addrconf_finite_timeout(rt_expires)) { |
1865 | /* not infinity */ | 1865 | /* not infinity */ |
1866 | flags |= RTF_EXPIRES; | 1866 | flags |= RTF_EXPIRES; |
1867 | expires = jiffies_to_clock_t(rt_expires); | 1867 | expires = jiffies_to_clock_t(rt_expires); |
1868 | } | 1868 | } |
1869 | addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, | 1869 | addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, |
1870 | dev, expires, flags); | 1870 | dev, expires, flags); |
1871 | } | 1871 | } |
1872 | if (rt) | 1872 | if (rt) |
1873 | dst_release(&rt->dst); | 1873 | dst_release(&rt->dst); |
1874 | } | 1874 | } |
1875 | 1875 | ||
1876 | /* Try to figure out our local address for this prefix */ | 1876 | /* Try to figure out our local address for this prefix */ |
1877 | 1877 | ||
1878 | if (pinfo->autoconf && in6_dev->cnf.autoconf) { | 1878 | if (pinfo->autoconf && in6_dev->cnf.autoconf) { |
1879 | struct inet6_ifaddr * ifp; | 1879 | struct inet6_ifaddr * ifp; |
1880 | struct in6_addr addr; | 1880 | struct in6_addr addr; |
1881 | int create = 0, update_lft = 0; | 1881 | int create = 0, update_lft = 0; |
1882 | 1882 | ||
1883 | if (pinfo->prefix_len == 64) { | 1883 | if (pinfo->prefix_len == 64) { |
1884 | memcpy(&addr, &pinfo->prefix, 8); | 1884 | memcpy(&addr, &pinfo->prefix, 8); |
1885 | if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && | 1885 | if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && |
1886 | ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { | 1886 | ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { |
1887 | in6_dev_put(in6_dev); | 1887 | in6_dev_put(in6_dev); |
1888 | return; | 1888 | return; |
1889 | } | 1889 | } |
1890 | goto ok; | 1890 | goto ok; |
1891 | } | 1891 | } |
1892 | if (net_ratelimit()) | 1892 | if (net_ratelimit()) |
1893 | printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", | 1893 | printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", |
1894 | pinfo->prefix_len); | 1894 | pinfo->prefix_len); |
1895 | in6_dev_put(in6_dev); | 1895 | in6_dev_put(in6_dev); |
1896 | return; | 1896 | return; |
1897 | 1897 | ||
1898 | ok: | 1898 | ok: |
1899 | 1899 | ||
1900 | ifp = ipv6_get_ifaddr(net, &addr, dev, 1); | 1900 | ifp = ipv6_get_ifaddr(net, &addr, dev, 1); |
1901 | 1901 | ||
1902 | if (ifp == NULL && valid_lft) { | 1902 | if (ifp == NULL && valid_lft) { |
1903 | int max_addresses = in6_dev->cnf.max_addresses; | 1903 | int max_addresses = in6_dev->cnf.max_addresses; |
1904 | u32 addr_flags = 0; | 1904 | u32 addr_flags = 0; |
1905 | 1905 | ||
1906 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 1906 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
1907 | if (in6_dev->cnf.optimistic_dad && | 1907 | if (in6_dev->cnf.optimistic_dad && |
1908 | !net->ipv6.devconf_all->forwarding) | 1908 | !net->ipv6.devconf_all->forwarding) |
1909 | addr_flags = IFA_F_OPTIMISTIC; | 1909 | addr_flags = IFA_F_OPTIMISTIC; |
1910 | #endif | 1910 | #endif |
1911 | 1911 | ||
1912 | /* Do not allow to create too much of autoconfigured | 1912 | /* Do not allow to create too much of autoconfigured |
1913 | * addresses; this would be too easy way to crash kernel. | 1913 | * addresses; this would be too easy way to crash kernel. |
1914 | */ | 1914 | */ |
1915 | if (!max_addresses || | 1915 | if (!max_addresses || |
1916 | ipv6_count_addresses(in6_dev) < max_addresses) | 1916 | ipv6_count_addresses(in6_dev) < max_addresses) |
1917 | ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, | 1917 | ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, |
1918 | addr_type&IPV6_ADDR_SCOPE_MASK, | 1918 | addr_type&IPV6_ADDR_SCOPE_MASK, |
1919 | addr_flags); | 1919 | addr_flags); |
1920 | 1920 | ||
1921 | if (!ifp || IS_ERR(ifp)) { | 1921 | if (!ifp || IS_ERR(ifp)) { |
1922 | in6_dev_put(in6_dev); | 1922 | in6_dev_put(in6_dev); |
1923 | return; | 1923 | return; |
1924 | } | 1924 | } |
1925 | 1925 | ||
1926 | update_lft = create = 1; | 1926 | update_lft = create = 1; |
1927 | ifp->cstamp = jiffies; | 1927 | ifp->cstamp = jiffies; |
1928 | addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); | 1928 | addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); |
1929 | } | 1929 | } |
1930 | 1930 | ||
1931 | if (ifp) { | 1931 | if (ifp) { |
1932 | int flags; | 1932 | int flags; |
1933 | unsigned long now; | 1933 | unsigned long now; |
1934 | #ifdef CONFIG_IPV6_PRIVACY | 1934 | #ifdef CONFIG_IPV6_PRIVACY |
1935 | struct inet6_ifaddr *ift; | 1935 | struct inet6_ifaddr *ift; |
1936 | #endif | 1936 | #endif |
1937 | u32 stored_lft; | 1937 | u32 stored_lft; |
1938 | 1938 | ||
1939 | /* update lifetime (RFC2462 5.5.3 e) */ | 1939 | /* update lifetime (RFC2462 5.5.3 e) */ |
1940 | spin_lock(&ifp->lock); | 1940 | spin_lock(&ifp->lock); |
1941 | now = jiffies; | 1941 | now = jiffies; |
1942 | if (ifp->valid_lft > (now - ifp->tstamp) / HZ) | 1942 | if (ifp->valid_lft > (now - ifp->tstamp) / HZ) |
1943 | stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; | 1943 | stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; |
1944 | else | 1944 | else |
1945 | stored_lft = 0; | 1945 | stored_lft = 0; |
1946 | if (!update_lft && stored_lft) { | 1946 | if (!update_lft && stored_lft) { |
1947 | if (valid_lft > MIN_VALID_LIFETIME || | 1947 | if (valid_lft > MIN_VALID_LIFETIME || |
1948 | valid_lft > stored_lft) | 1948 | valid_lft > stored_lft) |
1949 | update_lft = 1; | 1949 | update_lft = 1; |
1950 | else if (stored_lft <= MIN_VALID_LIFETIME) { | 1950 | else if (stored_lft <= MIN_VALID_LIFETIME) { |
1951 | /* valid_lft <= stored_lft is always true */ | 1951 | /* valid_lft <= stored_lft is always true */ |
1952 | /* | 1952 | /* |
1953 | * RFC 4862 Section 5.5.3e: | 1953 | * RFC 4862 Section 5.5.3e: |
1954 | * "Note that the preferred lifetime of | 1954 | * "Note that the preferred lifetime of |
1955 | * the corresponding address is always | 1955 | * the corresponding address is always |
1956 | * reset to the Preferred Lifetime in | 1956 | * reset to the Preferred Lifetime in |
1957 | * the received Prefix Information | 1957 | * the received Prefix Information |
1958 | * option, regardless of whether the | 1958 | * option, regardless of whether the |
1959 | * valid lifetime is also reset or | 1959 | * valid lifetime is also reset or |
1960 | * ignored." | 1960 | * ignored." |
1961 | * | 1961 | * |
1962 | * So if the preferred lifetime in | 1962 | * So if the preferred lifetime in |
1963 | * this advertisement is different | 1963 | * this advertisement is different |
1964 | * than what we have stored, but the | 1964 | * than what we have stored, but the |
1965 | * valid lifetime is invalid, just | 1965 | * valid lifetime is invalid, just |
1966 | * reset prefered_lft. | 1966 | * reset prefered_lft. |
1967 | * | 1967 | * |
1968 | * We must set the valid lifetime | 1968 | * We must set the valid lifetime |
1969 | * to the stored lifetime since we'll | 1969 | * to the stored lifetime since we'll |
1970 | * be updating the timestamp below, | 1970 | * be updating the timestamp below, |
1971 | * else we'll set it back to the | 1971 | * else we'll set it back to the |
1972 | * minimum. | 1972 | * minimum. |
1973 | */ | 1973 | */ |
1974 | if (prefered_lft != ifp->prefered_lft) { | 1974 | if (prefered_lft != ifp->prefered_lft) { |
1975 | valid_lft = stored_lft; | 1975 | valid_lft = stored_lft; |
1976 | update_lft = 1; | 1976 | update_lft = 1; |
1977 | } | 1977 | } |
1978 | } else { | 1978 | } else { |
1979 | valid_lft = MIN_VALID_LIFETIME; | 1979 | valid_lft = MIN_VALID_LIFETIME; |
1980 | if (valid_lft < prefered_lft) | 1980 | if (valid_lft < prefered_lft) |
1981 | prefered_lft = valid_lft; | 1981 | prefered_lft = valid_lft; |
1982 | update_lft = 1; | 1982 | update_lft = 1; |
1983 | } | 1983 | } |
1984 | } | 1984 | } |
1985 | 1985 | ||
1986 | if (update_lft) { | 1986 | if (update_lft) { |
1987 | ifp->valid_lft = valid_lft; | 1987 | ifp->valid_lft = valid_lft; |
1988 | ifp->prefered_lft = prefered_lft; | 1988 | ifp->prefered_lft = prefered_lft; |
1989 | ifp->tstamp = now; | 1989 | ifp->tstamp = now; |
1990 | flags = ifp->flags; | 1990 | flags = ifp->flags; |
1991 | ifp->flags &= ~IFA_F_DEPRECATED; | 1991 | ifp->flags &= ~IFA_F_DEPRECATED; |
1992 | spin_unlock(&ifp->lock); | 1992 | spin_unlock(&ifp->lock); |
1993 | 1993 | ||
1994 | if (!(flags&IFA_F_TENTATIVE)) | 1994 | if (!(flags&IFA_F_TENTATIVE)) |
1995 | ipv6_ifa_notify(0, ifp); | 1995 | ipv6_ifa_notify(0, ifp); |
1996 | } else | 1996 | } else |
1997 | spin_unlock(&ifp->lock); | 1997 | spin_unlock(&ifp->lock); |
1998 | 1998 | ||
1999 | #ifdef CONFIG_IPV6_PRIVACY | 1999 | #ifdef CONFIG_IPV6_PRIVACY |
2000 | read_lock_bh(&in6_dev->lock); | 2000 | read_lock_bh(&in6_dev->lock); |
2001 | /* update all temporary addresses in the list */ | 2001 | /* update all temporary addresses in the list */ |
2002 | list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { | 2002 | list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { |
2003 | /* | 2003 | /* |
2004 | * When adjusting the lifetimes of an existing | 2004 | * When adjusting the lifetimes of an existing |
2005 | * temporary address, only lower the lifetimes. | 2005 | * temporary address, only lower the lifetimes. |
2006 | * Implementations must not increase the | 2006 | * Implementations must not increase the |
2007 | * lifetimes of an existing temporary address | 2007 | * lifetimes of an existing temporary address |
2008 | * when processing a Prefix Information Option. | 2008 | * when processing a Prefix Information Option. |
2009 | */ | 2009 | */ |
2010 | if (ifp != ift->ifpub) | 2010 | if (ifp != ift->ifpub) |
2011 | continue; | 2011 | continue; |
2012 | 2012 | ||
2013 | spin_lock(&ift->lock); | 2013 | spin_lock(&ift->lock); |
2014 | flags = ift->flags; | 2014 | flags = ift->flags; |
2015 | if (ift->valid_lft > valid_lft && | 2015 | if (ift->valid_lft > valid_lft && |
2016 | ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) | 2016 | ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) |
2017 | ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; | 2017 | ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; |
2018 | if (ift->prefered_lft > prefered_lft && | 2018 | if (ift->prefered_lft > prefered_lft && |
2019 | ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) | 2019 | ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) |
2020 | ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; | 2020 | ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; |
2021 | spin_unlock(&ift->lock); | 2021 | spin_unlock(&ift->lock); |
2022 | if (!(flags&IFA_F_TENTATIVE)) | 2022 | if (!(flags&IFA_F_TENTATIVE)) |
2023 | ipv6_ifa_notify(0, ift); | 2023 | ipv6_ifa_notify(0, ift); |
2024 | } | 2024 | } |
2025 | 2025 | ||
2026 | if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { | 2026 | if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { |
2027 | /* | 2027 | /* |
2028 | * When a new public address is created as described in [ADDRCONF], | 2028 | * When a new public address is created as described in [ADDRCONF], |
2029 | * also create a new temporary address. Also create a temporary | 2029 | * also create a new temporary address. Also create a temporary |
2030 | * address if it's enabled but no temporary address currently exists. | 2030 | * address if it's enabled but no temporary address currently exists. |
2031 | */ | 2031 | */ |
2032 | read_unlock_bh(&in6_dev->lock); | 2032 | read_unlock_bh(&in6_dev->lock); |
2033 | ipv6_create_tempaddr(ifp, NULL); | 2033 | ipv6_create_tempaddr(ifp, NULL); |
2034 | } else { | 2034 | } else { |
2035 | read_unlock_bh(&in6_dev->lock); | 2035 | read_unlock_bh(&in6_dev->lock); |
2036 | } | 2036 | } |
2037 | #endif | 2037 | #endif |
2038 | in6_ifa_put(ifp); | 2038 | in6_ifa_put(ifp); |
2039 | addrconf_verify(0); | 2039 | addrconf_verify(0); |
2040 | } | 2040 | } |
2041 | } | 2041 | } |
2042 | inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); | 2042 | inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); |
2043 | in6_dev_put(in6_dev); | 2043 | in6_dev_put(in6_dev); |
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | /* | 2046 | /* |
2047 | * Set destination address. | 2047 | * Set destination address. |
2048 | * Special case for SIT interfaces where we create a new "virtual" | 2048 | * Special case for SIT interfaces where we create a new "virtual" |
2049 | * device. | 2049 | * device. |
2050 | */ | 2050 | */ |
2051 | int addrconf_set_dstaddr(struct net *net, void __user *arg) | 2051 | int addrconf_set_dstaddr(struct net *net, void __user *arg) |
2052 | { | 2052 | { |
2053 | struct in6_ifreq ireq; | 2053 | struct in6_ifreq ireq; |
2054 | struct net_device *dev; | 2054 | struct net_device *dev; |
2055 | int err = -EINVAL; | 2055 | int err = -EINVAL; |
2056 | 2056 | ||
2057 | rtnl_lock(); | 2057 | rtnl_lock(); |
2058 | 2058 | ||
2059 | err = -EFAULT; | 2059 | err = -EFAULT; |
2060 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) | 2060 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) |
2061 | goto err_exit; | 2061 | goto err_exit; |
2062 | 2062 | ||
2063 | dev = __dev_get_by_index(net, ireq.ifr6_ifindex); | 2063 | dev = __dev_get_by_index(net, ireq.ifr6_ifindex); |
2064 | 2064 | ||
2065 | err = -ENODEV; | 2065 | err = -ENODEV; |
2066 | if (dev == NULL) | 2066 | if (dev == NULL) |
2067 | goto err_exit; | 2067 | goto err_exit; |
2068 | 2068 | ||
2069 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 2069 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
2070 | if (dev->type == ARPHRD_SIT) { | 2070 | if (dev->type == ARPHRD_SIT) { |
2071 | const struct net_device_ops *ops = dev->netdev_ops; | 2071 | const struct net_device_ops *ops = dev->netdev_ops; |
2072 | struct ifreq ifr; | 2072 | struct ifreq ifr; |
2073 | struct ip_tunnel_parm p; | 2073 | struct ip_tunnel_parm p; |
2074 | 2074 | ||
2075 | err = -EADDRNOTAVAIL; | 2075 | err = -EADDRNOTAVAIL; |
2076 | if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) | 2076 | if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) |
2077 | goto err_exit; | 2077 | goto err_exit; |
2078 | 2078 | ||
2079 | memset(&p, 0, sizeof(p)); | 2079 | memset(&p, 0, sizeof(p)); |
2080 | p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; | 2080 | p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; |
2081 | p.iph.saddr = 0; | 2081 | p.iph.saddr = 0; |
2082 | p.iph.version = 4; | 2082 | p.iph.version = 4; |
2083 | p.iph.ihl = 5; | 2083 | p.iph.ihl = 5; |
2084 | p.iph.protocol = IPPROTO_IPV6; | 2084 | p.iph.protocol = IPPROTO_IPV6; |
2085 | p.iph.ttl = 64; | 2085 | p.iph.ttl = 64; |
2086 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; | 2086 | ifr.ifr_ifru.ifru_data = (__force void __user *)&p; |
2087 | 2087 | ||
2088 | if (ops->ndo_do_ioctl) { | 2088 | if (ops->ndo_do_ioctl) { |
2089 | mm_segment_t oldfs = get_fs(); | 2089 | mm_segment_t oldfs = get_fs(); |
2090 | 2090 | ||
2091 | set_fs(KERNEL_DS); | 2091 | set_fs(KERNEL_DS); |
2092 | err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); | 2092 | err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); |
2093 | set_fs(oldfs); | 2093 | set_fs(oldfs); |
2094 | } else | 2094 | } else |
2095 | err = -EOPNOTSUPP; | 2095 | err = -EOPNOTSUPP; |
2096 | 2096 | ||
2097 | if (err == 0) { | 2097 | if (err == 0) { |
2098 | err = -ENOBUFS; | 2098 | err = -ENOBUFS; |
2099 | dev = __dev_get_by_name(net, p.name); | 2099 | dev = __dev_get_by_name(net, p.name); |
2100 | if (!dev) | 2100 | if (!dev) |
2101 | goto err_exit; | 2101 | goto err_exit; |
2102 | err = dev_open(dev); | 2102 | err = dev_open(dev); |
2103 | } | 2103 | } |
2104 | } | 2104 | } |
2105 | #endif | 2105 | #endif |
2106 | 2106 | ||
2107 | err_exit: | 2107 | err_exit: |
2108 | rtnl_unlock(); | 2108 | rtnl_unlock(); |
2109 | return err; | 2109 | return err; |
2110 | } | 2110 | } |
2111 | 2111 | ||
2112 | /* | 2112 | /* |
2113 | * Manual configuration of address on an interface | 2113 | * Manual configuration of address on an interface |
2114 | */ | 2114 | */ |
2115 | static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, | 2115 | static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, |
2116 | unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, | 2116 | unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, |
2117 | __u32 valid_lft) | 2117 | __u32 valid_lft) |
2118 | { | 2118 | { |
2119 | struct inet6_ifaddr *ifp; | 2119 | struct inet6_ifaddr *ifp; |
2120 | struct inet6_dev *idev; | 2120 | struct inet6_dev *idev; |
2121 | struct net_device *dev; | 2121 | struct net_device *dev; |
2122 | int scope; | 2122 | int scope; |
2123 | u32 flags; | 2123 | u32 flags; |
2124 | clock_t expires; | 2124 | clock_t expires; |
2125 | unsigned long timeout; | 2125 | unsigned long timeout; |
2126 | 2126 | ||
2127 | ASSERT_RTNL(); | 2127 | ASSERT_RTNL(); |
2128 | 2128 | ||
2129 | if (plen > 128) | 2129 | if (plen > 128) |
2130 | return -EINVAL; | 2130 | return -EINVAL; |
2131 | 2131 | ||
2132 | /* check the lifetime */ | 2132 | /* check the lifetime */ |
2133 | if (!valid_lft || prefered_lft > valid_lft) | 2133 | if (!valid_lft || prefered_lft > valid_lft) |
2134 | return -EINVAL; | 2134 | return -EINVAL; |
2135 | 2135 | ||
2136 | dev = __dev_get_by_index(net, ifindex); | 2136 | dev = __dev_get_by_index(net, ifindex); |
2137 | if (!dev) | 2137 | if (!dev) |
2138 | return -ENODEV; | 2138 | return -ENODEV; |
2139 | 2139 | ||
2140 | idev = addrconf_add_dev(dev); | 2140 | idev = addrconf_add_dev(dev); |
2141 | if (IS_ERR(idev)) | 2141 | if (IS_ERR(idev)) |
2142 | return PTR_ERR(idev); | 2142 | return PTR_ERR(idev); |
2143 | 2143 | ||
2144 | scope = ipv6_addr_scope(pfx); | 2144 | scope = ipv6_addr_scope(pfx); |
2145 | 2145 | ||
2146 | timeout = addrconf_timeout_fixup(valid_lft, HZ); | 2146 | timeout = addrconf_timeout_fixup(valid_lft, HZ); |
2147 | if (addrconf_finite_timeout(timeout)) { | 2147 | if (addrconf_finite_timeout(timeout)) { |
2148 | expires = jiffies_to_clock_t(timeout * HZ); | 2148 | expires = jiffies_to_clock_t(timeout * HZ); |
2149 | valid_lft = timeout; | 2149 | valid_lft = timeout; |
2150 | flags = RTF_EXPIRES; | 2150 | flags = RTF_EXPIRES; |
2151 | } else { | 2151 | } else { |
2152 | expires = 0; | 2152 | expires = 0; |
2153 | flags = 0; | 2153 | flags = 0; |
2154 | ifa_flags |= IFA_F_PERMANENT; | 2154 | ifa_flags |= IFA_F_PERMANENT; |
2155 | } | 2155 | } |
2156 | 2156 | ||
2157 | timeout = addrconf_timeout_fixup(prefered_lft, HZ); | 2157 | timeout = addrconf_timeout_fixup(prefered_lft, HZ); |
2158 | if (addrconf_finite_timeout(timeout)) { | 2158 | if (addrconf_finite_timeout(timeout)) { |
2159 | if (timeout == 0) | 2159 | if (timeout == 0) |
2160 | ifa_flags |= IFA_F_DEPRECATED; | 2160 | ifa_flags |= IFA_F_DEPRECATED; |
2161 | prefered_lft = timeout; | 2161 | prefered_lft = timeout; |
2162 | } | 2162 | } |
2163 | 2163 | ||
2164 | ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); | 2164 | ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); |
2165 | 2165 | ||
2166 | if (!IS_ERR(ifp)) { | 2166 | if (!IS_ERR(ifp)) { |
2167 | spin_lock_bh(&ifp->lock); | 2167 | spin_lock_bh(&ifp->lock); |
2168 | ifp->valid_lft = valid_lft; | 2168 | ifp->valid_lft = valid_lft; |
2169 | ifp->prefered_lft = prefered_lft; | 2169 | ifp->prefered_lft = prefered_lft; |
2170 | ifp->tstamp = jiffies; | 2170 | ifp->tstamp = jiffies; |
2171 | spin_unlock_bh(&ifp->lock); | 2171 | spin_unlock_bh(&ifp->lock); |
2172 | 2172 | ||
2173 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, | 2173 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, |
2174 | expires, flags); | 2174 | expires, flags); |
2175 | /* | 2175 | /* |
2176 | * Note that section 3.1 of RFC 4429 indicates | 2176 | * Note that section 3.1 of RFC 4429 indicates |
2177 | * that the Optimistic flag should not be set for | 2177 | * that the Optimistic flag should not be set for |
2178 | * manually configured addresses | 2178 | * manually configured addresses |
2179 | */ | 2179 | */ |
2180 | addrconf_dad_start(ifp, 0); | 2180 | addrconf_dad_start(ifp, 0); |
2181 | in6_ifa_put(ifp); | 2181 | in6_ifa_put(ifp); |
2182 | addrconf_verify(0); | 2182 | addrconf_verify(0); |
2183 | return 0; | 2183 | return 0; |
2184 | } | 2184 | } |
2185 | 2185 | ||
2186 | return PTR_ERR(ifp); | 2186 | return PTR_ERR(ifp); |
2187 | } | 2187 | } |
2188 | 2188 | ||
2189 | static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, | 2189 | static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, |
2190 | unsigned int plen) | 2190 | unsigned int plen) |
2191 | { | 2191 | { |
2192 | struct inet6_ifaddr *ifp; | 2192 | struct inet6_ifaddr *ifp; |
2193 | struct inet6_dev *idev; | 2193 | struct inet6_dev *idev; |
2194 | struct net_device *dev; | 2194 | struct net_device *dev; |
2195 | 2195 | ||
2196 | if (plen > 128) | 2196 | if (plen > 128) |
2197 | return -EINVAL; | 2197 | return -EINVAL; |
2198 | 2198 | ||
2199 | dev = __dev_get_by_index(net, ifindex); | 2199 | dev = __dev_get_by_index(net, ifindex); |
2200 | if (!dev) | 2200 | if (!dev) |
2201 | return -ENODEV; | 2201 | return -ENODEV; |
2202 | 2202 | ||
2203 | if ((idev = __in6_dev_get(dev)) == NULL) | 2203 | if ((idev = __in6_dev_get(dev)) == NULL) |
2204 | return -ENXIO; | 2204 | return -ENXIO; |
2205 | 2205 | ||
2206 | read_lock_bh(&idev->lock); | 2206 | read_lock_bh(&idev->lock); |
2207 | list_for_each_entry(ifp, &idev->addr_list, if_list) { | 2207 | list_for_each_entry(ifp, &idev->addr_list, if_list) { |
2208 | if (ifp->prefix_len == plen && | 2208 | if (ifp->prefix_len == plen && |
2209 | ipv6_addr_equal(pfx, &ifp->addr)) { | 2209 | ipv6_addr_equal(pfx, &ifp->addr)) { |
2210 | in6_ifa_hold(ifp); | 2210 | in6_ifa_hold(ifp); |
2211 | read_unlock_bh(&idev->lock); | 2211 | read_unlock_bh(&idev->lock); |
2212 | 2212 | ||
2213 | ipv6_del_addr(ifp); | 2213 | ipv6_del_addr(ifp); |
2214 | 2214 | ||
2215 | /* If the last address is deleted administratively, | 2215 | /* If the last address is deleted administratively, |
2216 | disable IPv6 on this interface. | 2216 | disable IPv6 on this interface. |
2217 | */ | 2217 | */ |
2218 | if (list_empty(&idev->addr_list)) | 2218 | if (list_empty(&idev->addr_list)) |
2219 | addrconf_ifdown(idev->dev, 1); | 2219 | addrconf_ifdown(idev->dev, 1); |
2220 | return 0; | 2220 | return 0; |
2221 | } | 2221 | } |
2222 | } | 2222 | } |
2223 | read_unlock_bh(&idev->lock); | 2223 | read_unlock_bh(&idev->lock); |
2224 | return -EADDRNOTAVAIL; | 2224 | return -EADDRNOTAVAIL; |
2225 | } | 2225 | } |
2226 | 2226 | ||
2227 | 2227 | ||
2228 | int addrconf_add_ifaddr(struct net *net, void __user *arg) | 2228 | int addrconf_add_ifaddr(struct net *net, void __user *arg) |
2229 | { | 2229 | { |
2230 | struct in6_ifreq ireq; | 2230 | struct in6_ifreq ireq; |
2231 | int err; | 2231 | int err; |
2232 | 2232 | ||
2233 | if (!capable(CAP_NET_ADMIN)) | 2233 | if (!capable(CAP_NET_ADMIN)) |
2234 | return -EPERM; | 2234 | return -EPERM; |
2235 | 2235 | ||
2236 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) | 2236 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) |
2237 | return -EFAULT; | 2237 | return -EFAULT; |
2238 | 2238 | ||
2239 | rtnl_lock(); | 2239 | rtnl_lock(); |
2240 | err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, | 2240 | err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, |
2241 | ireq.ifr6_prefixlen, IFA_F_PERMANENT, | 2241 | ireq.ifr6_prefixlen, IFA_F_PERMANENT, |
2242 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); | 2242 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); |
2243 | rtnl_unlock(); | 2243 | rtnl_unlock(); |
2244 | return err; | 2244 | return err; |
2245 | } | 2245 | } |
2246 | 2246 | ||
2247 | int addrconf_del_ifaddr(struct net *net, void __user *arg) | 2247 | int addrconf_del_ifaddr(struct net *net, void __user *arg) |
2248 | { | 2248 | { |
2249 | struct in6_ifreq ireq; | 2249 | struct in6_ifreq ireq; |
2250 | int err; | 2250 | int err; |
2251 | 2251 | ||
2252 | if (!capable(CAP_NET_ADMIN)) | 2252 | if (!capable(CAP_NET_ADMIN)) |
2253 | return -EPERM; | 2253 | return -EPERM; |
2254 | 2254 | ||
2255 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) | 2255 | if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) |
2256 | return -EFAULT; | 2256 | return -EFAULT; |
2257 | 2257 | ||
2258 | rtnl_lock(); | 2258 | rtnl_lock(); |
2259 | err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, | 2259 | err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, |
2260 | ireq.ifr6_prefixlen); | 2260 | ireq.ifr6_prefixlen); |
2261 | rtnl_unlock(); | 2261 | rtnl_unlock(); |
2262 | return err; | 2262 | return err; |
2263 | } | 2263 | } |
2264 | 2264 | ||
2265 | static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, | 2265 | static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, |
2266 | int plen, int scope) | 2266 | int plen, int scope) |
2267 | { | 2267 | { |
2268 | struct inet6_ifaddr *ifp; | 2268 | struct inet6_ifaddr *ifp; |
2269 | 2269 | ||
2270 | ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); | 2270 | ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); |
2271 | if (!IS_ERR(ifp)) { | 2271 | if (!IS_ERR(ifp)) { |
2272 | spin_lock_bh(&ifp->lock); | 2272 | spin_lock_bh(&ifp->lock); |
2273 | ifp->flags &= ~IFA_F_TENTATIVE; | 2273 | ifp->flags &= ~IFA_F_TENTATIVE; |
2274 | spin_unlock_bh(&ifp->lock); | 2274 | spin_unlock_bh(&ifp->lock); |
2275 | ipv6_ifa_notify(RTM_NEWADDR, ifp); | 2275 | ipv6_ifa_notify(RTM_NEWADDR, ifp); |
2276 | in6_ifa_put(ifp); | 2276 | in6_ifa_put(ifp); |
2277 | } | 2277 | } |
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 2280 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
2281 | static void sit_add_v4_addrs(struct inet6_dev *idev) | 2281 | static void sit_add_v4_addrs(struct inet6_dev *idev) |
2282 | { | 2282 | { |
2283 | struct in6_addr addr; | 2283 | struct in6_addr addr; |
2284 | struct net_device *dev; | 2284 | struct net_device *dev; |
2285 | struct net *net = dev_net(idev->dev); | 2285 | struct net *net = dev_net(idev->dev); |
2286 | int scope; | 2286 | int scope; |
2287 | 2287 | ||
2288 | ASSERT_RTNL(); | 2288 | ASSERT_RTNL(); |
2289 | 2289 | ||
2290 | memset(&addr, 0, sizeof(struct in6_addr)); | 2290 | memset(&addr, 0, sizeof(struct in6_addr)); |
2291 | memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); | 2291 | memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); |
2292 | 2292 | ||
2293 | if (idev->dev->flags&IFF_POINTOPOINT) { | 2293 | if (idev->dev->flags&IFF_POINTOPOINT) { |
2294 | addr.s6_addr32[0] = htonl(0xfe800000); | 2294 | addr.s6_addr32[0] = htonl(0xfe800000); |
2295 | scope = IFA_LINK; | 2295 | scope = IFA_LINK; |
2296 | } else { | 2296 | } else { |
2297 | scope = IPV6_ADDR_COMPATv4; | 2297 | scope = IPV6_ADDR_COMPATv4; |
2298 | } | 2298 | } |
2299 | 2299 | ||
2300 | if (addr.s6_addr32[3]) { | 2300 | if (addr.s6_addr32[3]) { |
2301 | add_addr(idev, &addr, 128, scope); | 2301 | add_addr(idev, &addr, 128, scope); |
2302 | return; | 2302 | return; |
2303 | } | 2303 | } |
2304 | 2304 | ||
2305 | for_each_netdev(net, dev) { | 2305 | for_each_netdev(net, dev) { |
2306 | struct in_device * in_dev = __in_dev_get_rtnl(dev); | 2306 | struct in_device * in_dev = __in_dev_get_rtnl(dev); |
2307 | if (in_dev && (dev->flags & IFF_UP)) { | 2307 | if (in_dev && (dev->flags & IFF_UP)) { |
2308 | struct in_ifaddr * ifa; | 2308 | struct in_ifaddr * ifa; |
2309 | 2309 | ||
2310 | int flag = scope; | 2310 | int flag = scope; |
2311 | 2311 | ||
2312 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | 2312 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { |
2313 | int plen; | 2313 | int plen; |
2314 | 2314 | ||
2315 | addr.s6_addr32[3] = ifa->ifa_local; | 2315 | addr.s6_addr32[3] = ifa->ifa_local; |
2316 | 2316 | ||
2317 | if (ifa->ifa_scope == RT_SCOPE_LINK) | 2317 | if (ifa->ifa_scope == RT_SCOPE_LINK) |
2318 | continue; | 2318 | continue; |
2319 | if (ifa->ifa_scope >= RT_SCOPE_HOST) { | 2319 | if (ifa->ifa_scope >= RT_SCOPE_HOST) { |
2320 | if (idev->dev->flags&IFF_POINTOPOINT) | 2320 | if (idev->dev->flags&IFF_POINTOPOINT) |
2321 | continue; | 2321 | continue; |
2322 | flag |= IFA_HOST; | 2322 | flag |= IFA_HOST; |
2323 | } | 2323 | } |
2324 | if (idev->dev->flags&IFF_POINTOPOINT) | 2324 | if (idev->dev->flags&IFF_POINTOPOINT) |
2325 | plen = 64; | 2325 | plen = 64; |
2326 | else | 2326 | else |
2327 | plen = 96; | 2327 | plen = 96; |
2328 | 2328 | ||
2329 | add_addr(idev, &addr, plen, flag); | 2329 | add_addr(idev, &addr, plen, flag); |
2330 | } | 2330 | } |
2331 | } | 2331 | } |
2332 | } | 2332 | } |
2333 | } | 2333 | } |
2334 | #endif | 2334 | #endif |
2335 | 2335 | ||
2336 | static void init_loopback(struct net_device *dev) | 2336 | static void init_loopback(struct net_device *dev) |
2337 | { | 2337 | { |
2338 | struct inet6_dev *idev; | 2338 | struct inet6_dev *idev; |
2339 | 2339 | ||
2340 | /* ::1 */ | 2340 | /* ::1 */ |
2341 | 2341 | ||
2342 | ASSERT_RTNL(); | 2342 | ASSERT_RTNL(); |
2343 | 2343 | ||
2344 | if ((idev = ipv6_find_idev(dev)) == NULL) { | 2344 | if ((idev = ipv6_find_idev(dev)) == NULL) { |
2345 | printk(KERN_DEBUG "init loopback: add_dev failed\n"); | 2345 | printk(KERN_DEBUG "init loopback: add_dev failed\n"); |
2346 | return; | 2346 | return; |
2347 | } | 2347 | } |
2348 | 2348 | ||
2349 | add_addr(idev, &in6addr_loopback, 128, IFA_HOST); | 2349 | add_addr(idev, &in6addr_loopback, 128, IFA_HOST); |
2350 | } | 2350 | } |
2351 | 2351 | ||
2352 | static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) | 2352 | static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) |
2353 | { | 2353 | { |
2354 | struct inet6_ifaddr * ifp; | 2354 | struct inet6_ifaddr * ifp; |
2355 | u32 addr_flags = IFA_F_PERMANENT; | 2355 | u32 addr_flags = IFA_F_PERMANENT; |
2356 | 2356 | ||
2357 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 2357 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
2358 | if (idev->cnf.optimistic_dad && | 2358 | if (idev->cnf.optimistic_dad && |
2359 | !dev_net(idev->dev)->ipv6.devconf_all->forwarding) | 2359 | !dev_net(idev->dev)->ipv6.devconf_all->forwarding) |
2360 | addr_flags |= IFA_F_OPTIMISTIC; | 2360 | addr_flags |= IFA_F_OPTIMISTIC; |
2361 | #endif | 2361 | #endif |
2362 | 2362 | ||
2363 | 2363 | ||
2364 | ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); | 2364 | ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); |
2365 | if (!IS_ERR(ifp)) { | 2365 | if (!IS_ERR(ifp)) { |
2366 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); | 2366 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); |
2367 | addrconf_dad_start(ifp, 0); | 2367 | addrconf_dad_start(ifp, 0); |
2368 | in6_ifa_put(ifp); | 2368 | in6_ifa_put(ifp); |
2369 | } | 2369 | } |
2370 | } | 2370 | } |
2371 | 2371 | ||
2372 | static void addrconf_dev_config(struct net_device *dev) | 2372 | static void addrconf_dev_config(struct net_device *dev) |
2373 | { | 2373 | { |
2374 | struct in6_addr addr; | 2374 | struct in6_addr addr; |
2375 | struct inet6_dev * idev; | 2375 | struct inet6_dev * idev; |
2376 | 2376 | ||
2377 | ASSERT_RTNL(); | 2377 | ASSERT_RTNL(); |
2378 | 2378 | ||
2379 | if ((dev->type != ARPHRD_ETHER) && | 2379 | if ((dev->type != ARPHRD_ETHER) && |
2380 | (dev->type != ARPHRD_FDDI) && | 2380 | (dev->type != ARPHRD_FDDI) && |
2381 | (dev->type != ARPHRD_IEEE802_TR) && | 2381 | (dev->type != ARPHRD_IEEE802_TR) && |
2382 | (dev->type != ARPHRD_ARCNET) && | 2382 | (dev->type != ARPHRD_ARCNET) && |
2383 | (dev->type != ARPHRD_INFINIBAND)) { | 2383 | (dev->type != ARPHRD_INFINIBAND)) { |
2384 | /* Alas, we support only Ethernet autoconfiguration. */ | 2384 | /* Alas, we support only Ethernet autoconfiguration. */ |
2385 | return; | 2385 | return; |
2386 | } | 2386 | } |
2387 | 2387 | ||
2388 | idev = addrconf_add_dev(dev); | 2388 | idev = addrconf_add_dev(dev); |
2389 | if (IS_ERR(idev)) | 2389 | if (IS_ERR(idev)) |
2390 | return; | 2390 | return; |
2391 | 2391 | ||
2392 | memset(&addr, 0, sizeof(struct in6_addr)); | 2392 | memset(&addr, 0, sizeof(struct in6_addr)); |
2393 | addr.s6_addr32[0] = htonl(0xFE800000); | 2393 | addr.s6_addr32[0] = htonl(0xFE800000); |
2394 | 2394 | ||
2395 | if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) | 2395 | if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) |
2396 | addrconf_add_linklocal(idev, &addr); | 2396 | addrconf_add_linklocal(idev, &addr); |
2397 | } | 2397 | } |
2398 | 2398 | ||
2399 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 2399 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
2400 | static void addrconf_sit_config(struct net_device *dev) | 2400 | static void addrconf_sit_config(struct net_device *dev) |
2401 | { | 2401 | { |
2402 | struct inet6_dev *idev; | 2402 | struct inet6_dev *idev; |
2403 | 2403 | ||
2404 | ASSERT_RTNL(); | 2404 | ASSERT_RTNL(); |
2405 | 2405 | ||
2406 | /* | 2406 | /* |
2407 | * Configure the tunnel with one of our IPv4 | 2407 | * Configure the tunnel with one of our IPv4 |
2408 | * addresses... we should configure all of | 2408 | * addresses... we should configure all of |
2409 | * our v4 addrs in the tunnel | 2409 | * our v4 addrs in the tunnel |
2410 | */ | 2410 | */ |
2411 | 2411 | ||
2412 | if ((idev = ipv6_find_idev(dev)) == NULL) { | 2412 | if ((idev = ipv6_find_idev(dev)) == NULL) { |
2413 | printk(KERN_DEBUG "init sit: add_dev failed\n"); | 2413 | printk(KERN_DEBUG "init sit: add_dev failed\n"); |
2414 | return; | 2414 | return; |
2415 | } | 2415 | } |
2416 | 2416 | ||
2417 | if (dev->priv_flags & IFF_ISATAP) { | 2417 | if (dev->priv_flags & IFF_ISATAP) { |
2418 | struct in6_addr addr; | 2418 | struct in6_addr addr; |
2419 | 2419 | ||
2420 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); | 2420 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); |
2421 | addrconf_prefix_route(&addr, 64, dev, 0, 0); | 2421 | addrconf_prefix_route(&addr, 64, dev, 0, 0); |
2422 | if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) | 2422 | if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) |
2423 | addrconf_add_linklocal(idev, &addr); | 2423 | addrconf_add_linklocal(idev, &addr); |
2424 | return; | 2424 | return; |
2425 | } | 2425 | } |
2426 | 2426 | ||
2427 | sit_add_v4_addrs(idev); | 2427 | sit_add_v4_addrs(idev); |
2428 | 2428 | ||
2429 | if (dev->flags&IFF_POINTOPOINT) { | 2429 | if (dev->flags&IFF_POINTOPOINT) { |
2430 | addrconf_add_mroute(dev); | 2430 | addrconf_add_mroute(dev); |
2431 | addrconf_add_lroute(dev); | 2431 | addrconf_add_lroute(dev); |
2432 | } else | 2432 | } else |
2433 | sit_route_add(dev); | 2433 | sit_route_add(dev); |
2434 | } | 2434 | } |
2435 | #endif | 2435 | #endif |
2436 | 2436 | ||
2437 | #if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) | 2437 | #if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) |
2438 | static void addrconf_gre_config(struct net_device *dev) | 2438 | static void addrconf_gre_config(struct net_device *dev) |
2439 | { | 2439 | { |
2440 | struct inet6_dev *idev; | 2440 | struct inet6_dev *idev; |
2441 | struct in6_addr addr; | 2441 | struct in6_addr addr; |
2442 | 2442 | ||
2443 | pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name); | 2443 | pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name); |
2444 | 2444 | ||
2445 | ASSERT_RTNL(); | 2445 | ASSERT_RTNL(); |
2446 | 2446 | ||
2447 | if ((idev = ipv6_find_idev(dev)) == NULL) { | 2447 | if ((idev = ipv6_find_idev(dev)) == NULL) { |
2448 | printk(KERN_DEBUG "init gre: add_dev failed\n"); | 2448 | printk(KERN_DEBUG "init gre: add_dev failed\n"); |
2449 | return; | 2449 | return; |
2450 | } | 2450 | } |
2451 | 2451 | ||
2452 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); | 2452 | ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); |
2453 | addrconf_prefix_route(&addr, 64, dev, 0, 0); | 2453 | addrconf_prefix_route(&addr, 64, dev, 0, 0); |
2454 | 2454 | ||
2455 | if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) | 2455 | if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) |
2456 | addrconf_add_linklocal(idev, &addr); | 2456 | addrconf_add_linklocal(idev, &addr); |
2457 | } | 2457 | } |
2458 | #endif | 2458 | #endif |
2459 | 2459 | ||
2460 | static inline int | 2460 | static inline int |
2461 | ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) | 2461 | ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) |
2462 | { | 2462 | { |
2463 | struct in6_addr lladdr; | 2463 | struct in6_addr lladdr; |
2464 | 2464 | ||
2465 | if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { | 2465 | if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { |
2466 | addrconf_add_linklocal(idev, &lladdr); | 2466 | addrconf_add_linklocal(idev, &lladdr); |
2467 | return 0; | 2467 | return 0; |
2468 | } | 2468 | } |
2469 | return -1; | 2469 | return -1; |
2470 | } | 2470 | } |
2471 | 2471 | ||
2472 | static void ip6_tnl_add_linklocal(struct inet6_dev *idev) | 2472 | static void ip6_tnl_add_linklocal(struct inet6_dev *idev) |
2473 | { | 2473 | { |
2474 | struct net_device *link_dev; | 2474 | struct net_device *link_dev; |
2475 | struct net *net = dev_net(idev->dev); | 2475 | struct net *net = dev_net(idev->dev); |
2476 | 2476 | ||
2477 | /* first try to inherit the link-local address from the link device */ | 2477 | /* first try to inherit the link-local address from the link device */ |
2478 | if (idev->dev->iflink && | 2478 | if (idev->dev->iflink && |
2479 | (link_dev = __dev_get_by_index(net, idev->dev->iflink))) { | 2479 | (link_dev = __dev_get_by_index(net, idev->dev->iflink))) { |
2480 | if (!ipv6_inherit_linklocal(idev, link_dev)) | 2480 | if (!ipv6_inherit_linklocal(idev, link_dev)) |
2481 | return; | 2481 | return; |
2482 | } | 2482 | } |
2483 | /* then try to inherit it from any device */ | 2483 | /* then try to inherit it from any device */ |
2484 | for_each_netdev(net, link_dev) { | 2484 | for_each_netdev(net, link_dev) { |
2485 | if (!ipv6_inherit_linklocal(idev, link_dev)) | 2485 | if (!ipv6_inherit_linklocal(idev, link_dev)) |
2486 | return; | 2486 | return; |
2487 | } | 2487 | } |
2488 | printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); | 2488 | printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); |
2489 | } | 2489 | } |
2490 | 2490 | ||
2491 | /* | 2491 | /* |
2492 | * Autoconfigure tunnel with a link-local address so routing protocols, | 2492 | * Autoconfigure tunnel with a link-local address so routing protocols, |
2493 | * DHCPv6, MLD etc. can be run over the virtual link | 2493 | * DHCPv6, MLD etc. can be run over the virtual link |
2494 | */ | 2494 | */ |
2495 | 2495 | ||
2496 | static void addrconf_ip6_tnl_config(struct net_device *dev) | 2496 | static void addrconf_ip6_tnl_config(struct net_device *dev) |
2497 | { | 2497 | { |
2498 | struct inet6_dev *idev; | 2498 | struct inet6_dev *idev; |
2499 | 2499 | ||
2500 | ASSERT_RTNL(); | 2500 | ASSERT_RTNL(); |
2501 | 2501 | ||
2502 | idev = addrconf_add_dev(dev); | 2502 | idev = addrconf_add_dev(dev); |
2503 | if (IS_ERR(idev)) { | 2503 | if (IS_ERR(idev)) { |
2504 | printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); | 2504 | printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); |
2505 | return; | 2505 | return; |
2506 | } | 2506 | } |
2507 | ip6_tnl_add_linklocal(idev); | 2507 | ip6_tnl_add_linklocal(idev); |
2508 | } | 2508 | } |
2509 | 2509 | ||
2510 | static int addrconf_notify(struct notifier_block *this, unsigned long event, | 2510 | static int addrconf_notify(struct notifier_block *this, unsigned long event, |
2511 | void * data) | 2511 | void * data) |
2512 | { | 2512 | { |
2513 | struct net_device *dev = (struct net_device *) data; | 2513 | struct net_device *dev = (struct net_device *) data; |
2514 | struct inet6_dev *idev = __in6_dev_get(dev); | 2514 | struct inet6_dev *idev = __in6_dev_get(dev); |
2515 | int run_pending = 0; | 2515 | int run_pending = 0; |
2516 | int err; | 2516 | int err; |
2517 | 2517 | ||
2518 | switch (event) { | 2518 | switch (event) { |
2519 | case NETDEV_REGISTER: | 2519 | case NETDEV_REGISTER: |
2520 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { | 2520 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { |
2521 | idev = ipv6_add_dev(dev); | 2521 | idev = ipv6_add_dev(dev); |
2522 | if (!idev) | 2522 | if (!idev) |
2523 | return notifier_from_errno(-ENOMEM); | 2523 | return notifier_from_errno(-ENOMEM); |
2524 | } | 2524 | } |
2525 | break; | 2525 | break; |
2526 | 2526 | ||
2527 | case NETDEV_UP: | 2527 | case NETDEV_UP: |
2528 | case NETDEV_CHANGE: | 2528 | case NETDEV_CHANGE: |
2529 | if (dev->flags & IFF_SLAVE) | 2529 | if (dev->flags & IFF_SLAVE) |
2530 | break; | 2530 | break; |
2531 | 2531 | ||
2532 | if (event == NETDEV_UP) { | 2532 | if (event == NETDEV_UP) { |
2533 | if (!addrconf_qdisc_ok(dev)) { | 2533 | if (!addrconf_qdisc_ok(dev)) { |
2534 | /* device is not ready yet. */ | 2534 | /* device is not ready yet. */ |
2535 | printk(KERN_INFO | 2535 | printk(KERN_INFO |
2536 | "ADDRCONF(NETDEV_UP): %s: " | 2536 | "ADDRCONF(NETDEV_UP): %s: " |
2537 | "link is not ready\n", | 2537 | "link is not ready\n", |
2538 | dev->name); | 2538 | dev->name); |
2539 | break; | 2539 | break; |
2540 | } | 2540 | } |
2541 | 2541 | ||
2542 | if (!idev && dev->mtu >= IPV6_MIN_MTU) | 2542 | if (!idev && dev->mtu >= IPV6_MIN_MTU) |
2543 | idev = ipv6_add_dev(dev); | 2543 | idev = ipv6_add_dev(dev); |
2544 | 2544 | ||
2545 | if (idev) { | 2545 | if (idev) { |
2546 | idev->if_flags |= IF_READY; | 2546 | idev->if_flags |= IF_READY; |
2547 | run_pending = 1; | 2547 | run_pending = 1; |
2548 | } | 2548 | } |
2549 | } else { | 2549 | } else { |
2550 | if (!addrconf_qdisc_ok(dev)) { | 2550 | if (!addrconf_qdisc_ok(dev)) { |
2551 | /* device is still not ready. */ | 2551 | /* device is still not ready. */ |
2552 | break; | 2552 | break; |
2553 | } | 2553 | } |
2554 | 2554 | ||
2555 | if (idev) { | 2555 | if (idev) { |
2556 | if (idev->if_flags & IF_READY) | 2556 | if (idev->if_flags & IF_READY) |
2557 | /* device is already configured. */ | 2557 | /* device is already configured. */ |
2558 | break; | 2558 | break; |
2559 | idev->if_flags |= IF_READY; | 2559 | idev->if_flags |= IF_READY; |
2560 | } | 2560 | } |
2561 | 2561 | ||
2562 | printk(KERN_INFO | 2562 | printk(KERN_INFO |
2563 | "ADDRCONF(NETDEV_CHANGE): %s: " | 2563 | "ADDRCONF(NETDEV_CHANGE): %s: " |
2564 | "link becomes ready\n", | 2564 | "link becomes ready\n", |
2565 | dev->name); | 2565 | dev->name); |
2566 | 2566 | ||
2567 | run_pending = 1; | 2567 | run_pending = 1; |
2568 | } | 2568 | } |
2569 | 2569 | ||
2570 | switch (dev->type) { | 2570 | switch (dev->type) { |
2571 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) | 2571 | #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) |
2572 | case ARPHRD_SIT: | 2572 | case ARPHRD_SIT: |
2573 | addrconf_sit_config(dev); | 2573 | addrconf_sit_config(dev); |
2574 | break; | 2574 | break; |
2575 | #endif | 2575 | #endif |
2576 | #if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) | 2576 | #if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) |
2577 | case ARPHRD_IPGRE: | 2577 | case ARPHRD_IPGRE: |
2578 | addrconf_gre_config(dev); | 2578 | addrconf_gre_config(dev); |
2579 | break; | 2579 | break; |
2580 | #endif | 2580 | #endif |
2581 | case ARPHRD_TUNNEL6: | 2581 | case ARPHRD_TUNNEL6: |
2582 | addrconf_ip6_tnl_config(dev); | 2582 | addrconf_ip6_tnl_config(dev); |
2583 | break; | 2583 | break; |
2584 | case ARPHRD_LOOPBACK: | 2584 | case ARPHRD_LOOPBACK: |
2585 | init_loopback(dev); | 2585 | init_loopback(dev); |
2586 | break; | 2586 | break; |
2587 | 2587 | ||
2588 | default: | 2588 | default: |
2589 | addrconf_dev_config(dev); | 2589 | addrconf_dev_config(dev); |
2590 | break; | 2590 | break; |
2591 | } | 2591 | } |
2592 | 2592 | ||
2593 | if (idev) { | 2593 | if (idev) { |
2594 | if (run_pending) | 2594 | if (run_pending) |
2595 | addrconf_dad_run(idev); | 2595 | addrconf_dad_run(idev); |
2596 | 2596 | ||
2597 | /* | 2597 | /* |
2598 | * If the MTU changed during the interface down, | 2598 | * If the MTU changed during the interface down, |
2599 | * when the interface up, the changed MTU must be | 2599 | * when the interface up, the changed MTU must be |
2600 | * reflected in the idev as well as routers. | 2600 | * reflected in the idev as well as routers. |
2601 | */ | 2601 | */ |
2602 | if (idev->cnf.mtu6 != dev->mtu && | 2602 | if (idev->cnf.mtu6 != dev->mtu && |
2603 | dev->mtu >= IPV6_MIN_MTU) { | 2603 | dev->mtu >= IPV6_MIN_MTU) { |
2604 | rt6_mtu_change(dev, dev->mtu); | 2604 | rt6_mtu_change(dev, dev->mtu); |
2605 | idev->cnf.mtu6 = dev->mtu; | 2605 | idev->cnf.mtu6 = dev->mtu; |
2606 | } | 2606 | } |
2607 | idev->tstamp = jiffies; | 2607 | idev->tstamp = jiffies; |
2608 | inet6_ifinfo_notify(RTM_NEWLINK, idev); | 2608 | inet6_ifinfo_notify(RTM_NEWLINK, idev); |
2609 | 2609 | ||
2610 | /* | 2610 | /* |
2611 | * If the changed mtu during down is lower than | 2611 | * If the changed mtu during down is lower than |
2612 | * IPV6_MIN_MTU stop IPv6 on this interface. | 2612 | * IPV6_MIN_MTU stop IPv6 on this interface. |
2613 | */ | 2613 | */ |
2614 | if (dev->mtu < IPV6_MIN_MTU) | 2614 | if (dev->mtu < IPV6_MIN_MTU) |
2615 | addrconf_ifdown(dev, 1); | 2615 | addrconf_ifdown(dev, 1); |
2616 | } | 2616 | } |
2617 | break; | 2617 | break; |
2618 | 2618 | ||
2619 | case NETDEV_CHANGEMTU: | 2619 | case NETDEV_CHANGEMTU: |
2620 | if (idev && dev->mtu >= IPV6_MIN_MTU) { | 2620 | if (idev && dev->mtu >= IPV6_MIN_MTU) { |
2621 | rt6_mtu_change(dev, dev->mtu); | 2621 | rt6_mtu_change(dev, dev->mtu); |
2622 | idev->cnf.mtu6 = dev->mtu; | 2622 | idev->cnf.mtu6 = dev->mtu; |
2623 | break; | 2623 | break; |
2624 | } | 2624 | } |
2625 | 2625 | ||
2626 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { | 2626 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { |
2627 | idev = ipv6_add_dev(dev); | 2627 | idev = ipv6_add_dev(dev); |
2628 | if (idev) | 2628 | if (idev) |
2629 | break; | 2629 | break; |
2630 | } | 2630 | } |
2631 | 2631 | ||
2632 | /* | 2632 | /* |
2633 | * MTU falled under IPV6_MIN_MTU. | 2633 | * MTU falled under IPV6_MIN_MTU. |
2634 | * Stop IPv6 on this interface. | 2634 | * Stop IPv6 on this interface. |
2635 | */ | 2635 | */ |
2636 | 2636 | ||
2637 | case NETDEV_DOWN: | 2637 | case NETDEV_DOWN: |
2638 | case NETDEV_UNREGISTER: | 2638 | case NETDEV_UNREGISTER: |
2639 | /* | 2639 | /* |
2640 | * Remove all addresses from this interface. | 2640 | * Remove all addresses from this interface. |
2641 | */ | 2641 | */ |
2642 | addrconf_ifdown(dev, event != NETDEV_DOWN); | 2642 | addrconf_ifdown(dev, event != NETDEV_DOWN); |
2643 | break; | 2643 | break; |
2644 | 2644 | ||
2645 | case NETDEV_CHANGENAME: | 2645 | case NETDEV_CHANGENAME: |
2646 | if (idev) { | 2646 | if (idev) { |
2647 | snmp6_unregister_dev(idev); | 2647 | snmp6_unregister_dev(idev); |
2648 | addrconf_sysctl_unregister(idev); | 2648 | addrconf_sysctl_unregister(idev); |
2649 | addrconf_sysctl_register(idev); | 2649 | addrconf_sysctl_register(idev); |
2650 | err = snmp6_register_dev(idev); | 2650 | err = snmp6_register_dev(idev); |
2651 | if (err) | 2651 | if (err) |
2652 | return notifier_from_errno(err); | 2652 | return notifier_from_errno(err); |
2653 | } | 2653 | } |
2654 | break; | 2654 | break; |
2655 | 2655 | ||
2656 | case NETDEV_PRE_TYPE_CHANGE: | 2656 | case NETDEV_PRE_TYPE_CHANGE: |
2657 | case NETDEV_POST_TYPE_CHANGE: | 2657 | case NETDEV_POST_TYPE_CHANGE: |
2658 | addrconf_type_change(dev, event); | 2658 | addrconf_type_change(dev, event); |
2659 | break; | 2659 | break; |
2660 | } | 2660 | } |
2661 | 2661 | ||
2662 | return NOTIFY_OK; | 2662 | return NOTIFY_OK; |
2663 | } | 2663 | } |
2664 | 2664 | ||
2665 | /* | 2665 | /* |
2666 | * addrconf module should be notified of a device going up | 2666 | * addrconf module should be notified of a device going up |
2667 | */ | 2667 | */ |
2668 | static struct notifier_block ipv6_dev_notf = { | 2668 | static struct notifier_block ipv6_dev_notf = { |
2669 | .notifier_call = addrconf_notify, | 2669 | .notifier_call = addrconf_notify, |
2670 | }; | 2670 | }; |
2671 | 2671 | ||
2672 | static void addrconf_type_change(struct net_device *dev, unsigned long event) | 2672 | static void addrconf_type_change(struct net_device *dev, unsigned long event) |
2673 | { | 2673 | { |
2674 | struct inet6_dev *idev; | 2674 | struct inet6_dev *idev; |
2675 | ASSERT_RTNL(); | 2675 | ASSERT_RTNL(); |
2676 | 2676 | ||
2677 | idev = __in6_dev_get(dev); | 2677 | idev = __in6_dev_get(dev); |
2678 | 2678 | ||
2679 | if (event == NETDEV_POST_TYPE_CHANGE) | 2679 | if (event == NETDEV_POST_TYPE_CHANGE) |
2680 | ipv6_mc_remap(idev); | 2680 | ipv6_mc_remap(idev); |
2681 | else if (event == NETDEV_PRE_TYPE_CHANGE) | 2681 | else if (event == NETDEV_PRE_TYPE_CHANGE) |
2682 | ipv6_mc_unmap(idev); | 2682 | ipv6_mc_unmap(idev); |
2683 | } | 2683 | } |
2684 | 2684 | ||
2685 | static int addrconf_ifdown(struct net_device *dev, int how) | 2685 | static int addrconf_ifdown(struct net_device *dev, int how) |
2686 | { | 2686 | { |
2687 | struct net *net = dev_net(dev); | 2687 | struct net *net = dev_net(dev); |
2688 | struct inet6_dev *idev; | 2688 | struct inet6_dev *idev; |
2689 | struct inet6_ifaddr *ifa; | 2689 | struct inet6_ifaddr *ifa; |
2690 | int state, i; | 2690 | int state, i; |
2691 | 2691 | ||
2692 | ASSERT_RTNL(); | 2692 | ASSERT_RTNL(); |
2693 | 2693 | ||
2694 | rt6_ifdown(net, dev); | 2694 | rt6_ifdown(net, dev); |
2695 | neigh_ifdown(&nd_tbl, dev); | 2695 | neigh_ifdown(&nd_tbl, dev); |
2696 | 2696 | ||
2697 | idev = __in6_dev_get(dev); | 2697 | idev = __in6_dev_get(dev); |
2698 | if (idev == NULL) | 2698 | if (idev == NULL) |
2699 | return -ENODEV; | 2699 | return -ENODEV; |
2700 | 2700 | ||
2701 | /* | 2701 | /* |
2702 | * Step 1: remove reference to ipv6 device from parent device. | 2702 | * Step 1: remove reference to ipv6 device from parent device. |
2703 | * Do not dev_put! | 2703 | * Do not dev_put! |
2704 | */ | 2704 | */ |
2705 | if (how) { | 2705 | if (how) { |
2706 | idev->dead = 1; | 2706 | idev->dead = 1; |
2707 | 2707 | ||
2708 | /* protected by rtnl_lock */ | 2708 | /* protected by rtnl_lock */ |
2709 | rcu_assign_pointer(dev->ip6_ptr, NULL); | 2709 | rcu_assign_pointer(dev->ip6_ptr, NULL); |
2710 | 2710 | ||
2711 | /* Step 1.5: remove snmp6 entry */ | 2711 | /* Step 1.5: remove snmp6 entry */ |
2712 | snmp6_unregister_dev(idev); | 2712 | snmp6_unregister_dev(idev); |
2713 | 2713 | ||
2714 | } | 2714 | } |
2715 | 2715 | ||
2716 | /* Step 2: clear hash table */ | 2716 | /* Step 2: clear hash table */ |
2717 | for (i = 0; i < IN6_ADDR_HSIZE; i++) { | 2717 | for (i = 0; i < IN6_ADDR_HSIZE; i++) { |
2718 | struct hlist_head *h = &inet6_addr_lst[i]; | 2718 | struct hlist_head *h = &inet6_addr_lst[i]; |
2719 | struct hlist_node *n; | 2719 | struct hlist_node *n; |
2720 | 2720 | ||
2721 | spin_lock_bh(&addrconf_hash_lock); | 2721 | spin_lock_bh(&addrconf_hash_lock); |
2722 | restart: | 2722 | restart: |
2723 | hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { | 2723 | hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { |
2724 | if (ifa->idev == idev) { | 2724 | if (ifa->idev == idev) { |
2725 | hlist_del_init_rcu(&ifa->addr_lst); | 2725 | hlist_del_init_rcu(&ifa->addr_lst); |
2726 | addrconf_del_timer(ifa); | 2726 | addrconf_del_timer(ifa); |
2727 | goto restart; | 2727 | goto restart; |
2728 | } | 2728 | } |
2729 | } | 2729 | } |
2730 | spin_unlock_bh(&addrconf_hash_lock); | 2730 | spin_unlock_bh(&addrconf_hash_lock); |
2731 | } | 2731 | } |
2732 | 2732 | ||
2733 | write_lock_bh(&idev->lock); | 2733 | write_lock_bh(&idev->lock); |
2734 | 2734 | ||
2735 | /* Step 2: clear flags for stateless addrconf */ | 2735 | /* Step 2: clear flags for stateless addrconf */ |
2736 | if (!how) | 2736 | if (!how) |
2737 | idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); | 2737 | idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); |
2738 | 2738 | ||
2739 | #ifdef CONFIG_IPV6_PRIVACY | 2739 | #ifdef CONFIG_IPV6_PRIVACY |
2740 | if (how && del_timer(&idev->regen_timer)) | 2740 | if (how && del_timer(&idev->regen_timer)) |
2741 | in6_dev_put(idev); | 2741 | in6_dev_put(idev); |
2742 | 2742 | ||
2743 | /* Step 3: clear tempaddr list */ | 2743 | /* Step 3: clear tempaddr list */ |
2744 | while (!list_empty(&idev->tempaddr_list)) { | 2744 | while (!list_empty(&idev->tempaddr_list)) { |
2745 | ifa = list_first_entry(&idev->tempaddr_list, | 2745 | ifa = list_first_entry(&idev->tempaddr_list, |
2746 | struct inet6_ifaddr, tmp_list); | 2746 | struct inet6_ifaddr, tmp_list); |
2747 | list_del(&ifa->tmp_list); | 2747 | list_del(&ifa->tmp_list); |
2748 | write_unlock_bh(&idev->lock); | 2748 | write_unlock_bh(&idev->lock); |
2749 | spin_lock_bh(&ifa->lock); | 2749 | spin_lock_bh(&ifa->lock); |
2750 | 2750 | ||
2751 | if (ifa->ifpub) { | 2751 | if (ifa->ifpub) { |
2752 | in6_ifa_put(ifa->ifpub); | 2752 | in6_ifa_put(ifa->ifpub); |
2753 | ifa->ifpub = NULL; | 2753 | ifa->ifpub = NULL; |
2754 | } | 2754 | } |
2755 | spin_unlock_bh(&ifa->lock); | 2755 | spin_unlock_bh(&ifa->lock); |
2756 | in6_ifa_put(ifa); | 2756 | in6_ifa_put(ifa); |
2757 | write_lock_bh(&idev->lock); | 2757 | write_lock_bh(&idev->lock); |
2758 | } | 2758 | } |
2759 | #endif | 2759 | #endif |
2760 | 2760 | ||
2761 | while (!list_empty(&idev->addr_list)) { | 2761 | while (!list_empty(&idev->addr_list)) { |
2762 | ifa = list_first_entry(&idev->addr_list, | 2762 | ifa = list_first_entry(&idev->addr_list, |
2763 | struct inet6_ifaddr, if_list); | 2763 | struct inet6_ifaddr, if_list); |
2764 | addrconf_del_timer(ifa); | 2764 | addrconf_del_timer(ifa); |
2765 | 2765 | ||
2766 | list_del(&ifa->if_list); | 2766 | list_del(&ifa->if_list); |
2767 | 2767 | ||
2768 | write_unlock_bh(&idev->lock); | 2768 | write_unlock_bh(&idev->lock); |
2769 | 2769 | ||
2770 | spin_lock_bh(&ifa->state_lock); | 2770 | spin_lock_bh(&ifa->state_lock); |
2771 | state = ifa->state; | 2771 | state = ifa->state; |
2772 | ifa->state = INET6_IFADDR_STATE_DEAD; | 2772 | ifa->state = INET6_IFADDR_STATE_DEAD; |
2773 | spin_unlock_bh(&ifa->state_lock); | 2773 | spin_unlock_bh(&ifa->state_lock); |
2774 | 2774 | ||
2775 | if (state != INET6_IFADDR_STATE_DEAD) { | 2775 | if (state != INET6_IFADDR_STATE_DEAD) { |
2776 | __ipv6_ifa_notify(RTM_DELADDR, ifa); | 2776 | __ipv6_ifa_notify(RTM_DELADDR, ifa); |
2777 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); | 2777 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); |
2778 | } | 2778 | } |
2779 | in6_ifa_put(ifa); | 2779 | in6_ifa_put(ifa); |
2780 | 2780 | ||
2781 | write_lock_bh(&idev->lock); | 2781 | write_lock_bh(&idev->lock); |
2782 | } | 2782 | } |
2783 | 2783 | ||
2784 | write_unlock_bh(&idev->lock); | 2784 | write_unlock_bh(&idev->lock); |
2785 | 2785 | ||
2786 | /* Step 5: Discard multicast list */ | 2786 | /* Step 5: Discard multicast list */ |
2787 | if (how) | 2787 | if (how) |
2788 | ipv6_mc_destroy_dev(idev); | 2788 | ipv6_mc_destroy_dev(idev); |
2789 | else | 2789 | else |
2790 | ipv6_mc_down(idev); | 2790 | ipv6_mc_down(idev); |
2791 | 2791 | ||
2792 | idev->tstamp = jiffies; | 2792 | idev->tstamp = jiffies; |
2793 | 2793 | ||
2794 | /* Last: Shot the device (if unregistered) */ | 2794 | /* Last: Shot the device (if unregistered) */ |
2795 | if (how) { | 2795 | if (how) { |
2796 | addrconf_sysctl_unregister(idev); | 2796 | addrconf_sysctl_unregister(idev); |
2797 | neigh_parms_release(&nd_tbl, idev->nd_parms); | 2797 | neigh_parms_release(&nd_tbl, idev->nd_parms); |
2798 | neigh_ifdown(&nd_tbl, dev); | 2798 | neigh_ifdown(&nd_tbl, dev); |
2799 | in6_dev_put(idev); | 2799 | in6_dev_put(idev); |
2800 | } | 2800 | } |
2801 | return 0; | 2801 | return 0; |
2802 | } | 2802 | } |
2803 | 2803 | ||
2804 | static void addrconf_rs_timer(unsigned long data) | 2804 | static void addrconf_rs_timer(unsigned long data) |
2805 | { | 2805 | { |
2806 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; | 2806 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; |
2807 | struct inet6_dev *idev = ifp->idev; | 2807 | struct inet6_dev *idev = ifp->idev; |
2808 | 2808 | ||
2809 | read_lock(&idev->lock); | 2809 | read_lock(&idev->lock); |
2810 | if (idev->dead || !(idev->if_flags & IF_READY)) | 2810 | if (idev->dead || !(idev->if_flags & IF_READY)) |
2811 | goto out; | 2811 | goto out; |
2812 | 2812 | ||
2813 | if (idev->cnf.forwarding) | 2813 | if (idev->cnf.forwarding) |
2814 | goto out; | 2814 | goto out; |
2815 | 2815 | ||
2816 | /* Announcement received after solicitation was sent */ | 2816 | /* Announcement received after solicitation was sent */ |
2817 | if (idev->if_flags & IF_RA_RCVD) | 2817 | if (idev->if_flags & IF_RA_RCVD) |
2818 | goto out; | 2818 | goto out; |
2819 | 2819 | ||
2820 | spin_lock(&ifp->lock); | 2820 | spin_lock(&ifp->lock); |
2821 | if (ifp->probes++ < idev->cnf.rtr_solicits) { | 2821 | if (ifp->probes++ < idev->cnf.rtr_solicits) { |
2822 | /* The wait after the last probe can be shorter */ | 2822 | /* The wait after the last probe can be shorter */ |
2823 | addrconf_mod_timer(ifp, AC_RS, | 2823 | addrconf_mod_timer(ifp, AC_RS, |
2824 | (ifp->probes == idev->cnf.rtr_solicits) ? | 2824 | (ifp->probes == idev->cnf.rtr_solicits) ? |
2825 | idev->cnf.rtr_solicit_delay : | 2825 | idev->cnf.rtr_solicit_delay : |
2826 | idev->cnf.rtr_solicit_interval); | 2826 | idev->cnf.rtr_solicit_interval); |
2827 | spin_unlock(&ifp->lock); | 2827 | spin_unlock(&ifp->lock); |
2828 | 2828 | ||
2829 | ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); | 2829 | ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); |
2830 | } else { | 2830 | } else { |
2831 | spin_unlock(&ifp->lock); | 2831 | spin_unlock(&ifp->lock); |
2832 | /* | 2832 | /* |
2833 | * Note: we do not support deprecated "all on-link" | 2833 | * Note: we do not support deprecated "all on-link" |
2834 | * assumption any longer. | 2834 | * assumption any longer. |
2835 | */ | 2835 | */ |
2836 | printk(KERN_DEBUG "%s: no IPv6 routers present\n", | 2836 | printk(KERN_DEBUG "%s: no IPv6 routers present\n", |
2837 | idev->dev->name); | 2837 | idev->dev->name); |
2838 | } | 2838 | } |
2839 | 2839 | ||
2840 | out: | 2840 | out: |
2841 | read_unlock(&idev->lock); | 2841 | read_unlock(&idev->lock); |
2842 | in6_ifa_put(ifp); | 2842 | in6_ifa_put(ifp); |
2843 | } | 2843 | } |
2844 | 2844 | ||
2845 | /* | 2845 | /* |
2846 | * Duplicate Address Detection | 2846 | * Duplicate Address Detection |
2847 | */ | 2847 | */ |
2848 | static void addrconf_dad_kick(struct inet6_ifaddr *ifp) | 2848 | static void addrconf_dad_kick(struct inet6_ifaddr *ifp) |
2849 | { | 2849 | { |
2850 | unsigned long rand_num; | 2850 | unsigned long rand_num; |
2851 | struct inet6_dev *idev = ifp->idev; | 2851 | struct inet6_dev *idev = ifp->idev; |
2852 | 2852 | ||
2853 | if (ifp->flags & IFA_F_OPTIMISTIC) | 2853 | if (ifp->flags & IFA_F_OPTIMISTIC) |
2854 | rand_num = 0; | 2854 | rand_num = 0; |
2855 | else | 2855 | else |
2856 | rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); | 2856 | rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); |
2857 | 2857 | ||
2858 | ifp->probes = idev->cnf.dad_transmits; | 2858 | ifp->probes = idev->cnf.dad_transmits; |
2859 | addrconf_mod_timer(ifp, AC_DAD, rand_num); | 2859 | addrconf_mod_timer(ifp, AC_DAD, rand_num); |
2860 | } | 2860 | } |
2861 | 2861 | ||
2862 | static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) | 2862 | static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) |
2863 | { | 2863 | { |
2864 | struct inet6_dev *idev = ifp->idev; | 2864 | struct inet6_dev *idev = ifp->idev; |
2865 | struct net_device *dev = idev->dev; | 2865 | struct net_device *dev = idev->dev; |
2866 | 2866 | ||
2867 | addrconf_join_solict(dev, &ifp->addr); | 2867 | addrconf_join_solict(dev, &ifp->addr); |
2868 | 2868 | ||
2869 | net_srandom(ifp->addr.s6_addr32[3]); | 2869 | net_srandom(ifp->addr.s6_addr32[3]); |
2870 | 2870 | ||
2871 | read_lock_bh(&idev->lock); | 2871 | read_lock_bh(&idev->lock); |
2872 | spin_lock(&ifp->lock); | 2872 | spin_lock(&ifp->lock); |
2873 | if (ifp->state == INET6_IFADDR_STATE_DEAD) | 2873 | if (ifp->state == INET6_IFADDR_STATE_DEAD) |
2874 | goto out; | 2874 | goto out; |
2875 | 2875 | ||
2876 | if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || | 2876 | if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || |
2877 | idev->cnf.accept_dad < 1 || | 2877 | idev->cnf.accept_dad < 1 || |
2878 | !(ifp->flags&IFA_F_TENTATIVE) || | 2878 | !(ifp->flags&IFA_F_TENTATIVE) || |
2879 | ifp->flags & IFA_F_NODAD) { | 2879 | ifp->flags & IFA_F_NODAD) { |
2880 | ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); | 2880 | ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); |
2881 | spin_unlock(&ifp->lock); | 2881 | spin_unlock(&ifp->lock); |
2882 | read_unlock_bh(&idev->lock); | 2882 | read_unlock_bh(&idev->lock); |
2883 | 2883 | ||
2884 | addrconf_dad_completed(ifp); | 2884 | addrconf_dad_completed(ifp); |
2885 | return; | 2885 | return; |
2886 | } | 2886 | } |
2887 | 2887 | ||
2888 | if (!(idev->if_flags & IF_READY)) { | 2888 | if (!(idev->if_flags & IF_READY)) { |
2889 | spin_unlock(&ifp->lock); | 2889 | spin_unlock(&ifp->lock); |
2890 | read_unlock_bh(&idev->lock); | 2890 | read_unlock_bh(&idev->lock); |
2891 | /* | 2891 | /* |
2892 | * If the device is not ready: | 2892 | * If the device is not ready: |
2893 | * - keep it tentative if it is a permanent address. | 2893 | * - keep it tentative if it is a permanent address. |
2894 | * - otherwise, kill it. | 2894 | * - otherwise, kill it. |
2895 | */ | 2895 | */ |
2896 | in6_ifa_hold(ifp); | 2896 | in6_ifa_hold(ifp); |
2897 | addrconf_dad_stop(ifp, 0); | 2897 | addrconf_dad_stop(ifp, 0); |
2898 | return; | 2898 | return; |
2899 | } | 2899 | } |
2900 | 2900 | ||
2901 | /* | 2901 | /* |
2902 | * Optimistic nodes can start receiving | 2902 | * Optimistic nodes can start receiving |
2903 | * Frames right away | 2903 | * Frames right away |
2904 | */ | 2904 | */ |
2905 | if (ifp->flags & IFA_F_OPTIMISTIC) | 2905 | if (ifp->flags & IFA_F_OPTIMISTIC) |
2906 | ip6_ins_rt(ifp->rt); | 2906 | ip6_ins_rt(ifp->rt); |
2907 | 2907 | ||
2908 | addrconf_dad_kick(ifp); | 2908 | addrconf_dad_kick(ifp); |
2909 | out: | 2909 | out: |
2910 | spin_unlock(&ifp->lock); | 2910 | spin_unlock(&ifp->lock); |
2911 | read_unlock_bh(&idev->lock); | 2911 | read_unlock_bh(&idev->lock); |
2912 | } | 2912 | } |
2913 | 2913 | ||
2914 | static void addrconf_dad_timer(unsigned long data) | 2914 | static void addrconf_dad_timer(unsigned long data) |
2915 | { | 2915 | { |
2916 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; | 2916 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; |
2917 | struct inet6_dev *idev = ifp->idev; | 2917 | struct inet6_dev *idev = ifp->idev; |
2918 | struct in6_addr mcaddr; | 2918 | struct in6_addr mcaddr; |
2919 | 2919 | ||
2920 | if (!ifp->probes && addrconf_dad_end(ifp)) | 2920 | if (!ifp->probes && addrconf_dad_end(ifp)) |
2921 | goto out; | 2921 | goto out; |
2922 | 2922 | ||
2923 | read_lock(&idev->lock); | 2923 | read_lock(&idev->lock); |
2924 | if (idev->dead || !(idev->if_flags & IF_READY)) { | 2924 | if (idev->dead || !(idev->if_flags & IF_READY)) { |
2925 | read_unlock(&idev->lock); | 2925 | read_unlock(&idev->lock); |
2926 | goto out; | 2926 | goto out; |
2927 | } | 2927 | } |
2928 | 2928 | ||
2929 | spin_lock(&ifp->lock); | 2929 | spin_lock(&ifp->lock); |
2930 | if (ifp->state == INET6_IFADDR_STATE_DEAD) { | 2930 | if (ifp->state == INET6_IFADDR_STATE_DEAD) { |
2931 | spin_unlock(&ifp->lock); | 2931 | spin_unlock(&ifp->lock); |
2932 | read_unlock(&idev->lock); | 2932 | read_unlock(&idev->lock); |
2933 | goto out; | 2933 | goto out; |
2934 | } | 2934 | } |
2935 | 2935 | ||
2936 | if (ifp->probes == 0) { | 2936 | if (ifp->probes == 0) { |
2937 | /* | 2937 | /* |
2938 | * DAD was successful | 2938 | * DAD was successful |
2939 | */ | 2939 | */ |
2940 | 2940 | ||
2941 | ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); | 2941 | ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); |
2942 | spin_unlock(&ifp->lock); | 2942 | spin_unlock(&ifp->lock); |
2943 | read_unlock(&idev->lock); | 2943 | read_unlock(&idev->lock); |
2944 | 2944 | ||
2945 | addrconf_dad_completed(ifp); | 2945 | addrconf_dad_completed(ifp); |
2946 | 2946 | ||
2947 | goto out; | 2947 | goto out; |
2948 | } | 2948 | } |
2949 | 2949 | ||
2950 | ifp->probes--; | 2950 | ifp->probes--; |
2951 | addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); | 2951 | addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); |
2952 | spin_unlock(&ifp->lock); | 2952 | spin_unlock(&ifp->lock); |
2953 | read_unlock(&idev->lock); | 2953 | read_unlock(&idev->lock); |
2954 | 2954 | ||
2955 | /* send a neighbour solicitation for our addr */ | 2955 | /* send a neighbour solicitation for our addr */ |
2956 | addrconf_addr_solict_mult(&ifp->addr, &mcaddr); | 2956 | addrconf_addr_solict_mult(&ifp->addr, &mcaddr); |
2957 | ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); | 2957 | ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); |
2958 | out: | 2958 | out: |
2959 | in6_ifa_put(ifp); | 2959 | in6_ifa_put(ifp); |
2960 | } | 2960 | } |
2961 | 2961 | ||
2962 | static void addrconf_dad_completed(struct inet6_ifaddr *ifp) | 2962 | static void addrconf_dad_completed(struct inet6_ifaddr *ifp) |
2963 | { | 2963 | { |
2964 | struct net_device *dev = ifp->idev->dev; | 2964 | struct net_device *dev = ifp->idev->dev; |
2965 | 2965 | ||
2966 | /* | 2966 | /* |
2967 | * Configure the address for reception. Now it is valid. | 2967 | * Configure the address for reception. Now it is valid. |
2968 | */ | 2968 | */ |
2969 | 2969 | ||
2970 | ipv6_ifa_notify(RTM_NEWADDR, ifp); | 2970 | ipv6_ifa_notify(RTM_NEWADDR, ifp); |
2971 | 2971 | ||
2972 | /* If added prefix is link local and forwarding is off, | 2972 | /* If added prefix is link local and forwarding is off, |
2973 | start sending router solicitations. | 2973 | start sending router solicitations. |
2974 | */ | 2974 | */ |
2975 | 2975 | ||
2976 | if ((ifp->idev->cnf.forwarding == 0 || | 2976 | if ((ifp->idev->cnf.forwarding == 0 || |
2977 | ifp->idev->cnf.forwarding == 2) && | 2977 | ifp->idev->cnf.forwarding == 2) && |
2978 | ifp->idev->cnf.rtr_solicits > 0 && | 2978 | ifp->idev->cnf.rtr_solicits > 0 && |
2979 | (dev->flags&IFF_LOOPBACK) == 0 && | 2979 | (dev->flags&IFF_LOOPBACK) == 0 && |
2980 | (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { | 2980 | (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { |
2981 | /* | 2981 | /* |
2982 | * If a host as already performed a random delay | 2982 | * If a host as already performed a random delay |
2983 | * [...] as part of DAD [...] there is no need | 2983 | * [...] as part of DAD [...] there is no need |
2984 | * to delay again before sending the first RS | 2984 | * to delay again before sending the first RS |
2985 | */ | 2985 | */ |
2986 | ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); | 2986 | ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); |
2987 | 2987 | ||
2988 | spin_lock_bh(&ifp->lock); | 2988 | spin_lock_bh(&ifp->lock); |
2989 | ifp->probes = 1; | 2989 | ifp->probes = 1; |
2990 | ifp->idev->if_flags |= IF_RS_SENT; | 2990 | ifp->idev->if_flags |= IF_RS_SENT; |
2991 | addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); | 2991 | addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); |
2992 | spin_unlock_bh(&ifp->lock); | 2992 | spin_unlock_bh(&ifp->lock); |
2993 | } | 2993 | } |
2994 | } | 2994 | } |
2995 | 2995 | ||
2996 | static void addrconf_dad_run(struct inet6_dev *idev) | 2996 | static void addrconf_dad_run(struct inet6_dev *idev) |
2997 | { | 2997 | { |
2998 | struct inet6_ifaddr *ifp; | 2998 | struct inet6_ifaddr *ifp; |
2999 | 2999 | ||
3000 | read_lock_bh(&idev->lock); | 3000 | read_lock_bh(&idev->lock); |
3001 | list_for_each_entry(ifp, &idev->addr_list, if_list) { | 3001 | list_for_each_entry(ifp, &idev->addr_list, if_list) { |
3002 | spin_lock(&ifp->lock); | 3002 | spin_lock(&ifp->lock); |
3003 | if (ifp->flags & IFA_F_TENTATIVE && | 3003 | if (ifp->flags & IFA_F_TENTATIVE && |
3004 | ifp->state == INET6_IFADDR_STATE_DAD) | 3004 | ifp->state == INET6_IFADDR_STATE_DAD) |
3005 | addrconf_dad_kick(ifp); | 3005 | addrconf_dad_kick(ifp); |
3006 | spin_unlock(&ifp->lock); | 3006 | spin_unlock(&ifp->lock); |
3007 | } | 3007 | } |
3008 | read_unlock_bh(&idev->lock); | 3008 | read_unlock_bh(&idev->lock); |
3009 | } | 3009 | } |
3010 | 3010 | ||
3011 | #ifdef CONFIG_PROC_FS | 3011 | #ifdef CONFIG_PROC_FS |
3012 | struct if6_iter_state { | 3012 | struct if6_iter_state { |
3013 | struct seq_net_private p; | 3013 | struct seq_net_private p; |
3014 | int bucket; | 3014 | int bucket; |
3015 | }; | 3015 | }; |
3016 | 3016 | ||
3017 | static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) | 3017 | static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) |
3018 | { | 3018 | { |
3019 | struct inet6_ifaddr *ifa = NULL; | 3019 | struct inet6_ifaddr *ifa = NULL; |
3020 | struct if6_iter_state *state = seq->private; | 3020 | struct if6_iter_state *state = seq->private; |
3021 | struct net *net = seq_file_net(seq); | 3021 | struct net *net = seq_file_net(seq); |
3022 | 3022 | ||
3023 | for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { | 3023 | for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { |
3024 | struct hlist_node *n; | 3024 | struct hlist_node *n; |
3025 | hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], | 3025 | hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], |
3026 | addr_lst) | 3026 | addr_lst) |
3027 | if (net_eq(dev_net(ifa->idev->dev), net)) | 3027 | if (net_eq(dev_net(ifa->idev->dev), net)) |
3028 | return ifa; | 3028 | return ifa; |
3029 | } | 3029 | } |
3030 | return NULL; | 3030 | return NULL; |
3031 | } | 3031 | } |
3032 | 3032 | ||
3033 | static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, | 3033 | static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, |
3034 | struct inet6_ifaddr *ifa) | 3034 | struct inet6_ifaddr *ifa) |
3035 | { | 3035 | { |
3036 | struct if6_iter_state *state = seq->private; | 3036 | struct if6_iter_state *state = seq->private; |
3037 | struct net *net = seq_file_net(seq); | 3037 | struct net *net = seq_file_net(seq); |
3038 | struct hlist_node *n = &ifa->addr_lst; | 3038 | struct hlist_node *n = &ifa->addr_lst; |
3039 | 3039 | ||
3040 | hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) | 3040 | hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) |
3041 | if (net_eq(dev_net(ifa->idev->dev), net)) | 3041 | if (net_eq(dev_net(ifa->idev->dev), net)) |
3042 | return ifa; | 3042 | return ifa; |
3043 | 3043 | ||
3044 | while (++state->bucket < IN6_ADDR_HSIZE) { | 3044 | while (++state->bucket < IN6_ADDR_HSIZE) { |
3045 | hlist_for_each_entry_rcu_bh(ifa, n, | 3045 | hlist_for_each_entry_rcu_bh(ifa, n, |
3046 | &inet6_addr_lst[state->bucket], addr_lst) { | 3046 | &inet6_addr_lst[state->bucket], addr_lst) { |
3047 | if (net_eq(dev_net(ifa->idev->dev), net)) | 3047 | if (net_eq(dev_net(ifa->idev->dev), net)) |
3048 | return ifa; | 3048 | return ifa; |
3049 | } | 3049 | } |
3050 | } | 3050 | } |
3051 | 3051 | ||
3052 | return NULL; | 3052 | return NULL; |
3053 | } | 3053 | } |
3054 | 3054 | ||
3055 | static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) | 3055 | static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) |
3056 | { | 3056 | { |
3057 | struct inet6_ifaddr *ifa = if6_get_first(seq); | 3057 | struct inet6_ifaddr *ifa = if6_get_first(seq); |
3058 | 3058 | ||
3059 | if (ifa) | 3059 | if (ifa) |
3060 | while (pos && (ifa = if6_get_next(seq, ifa)) != NULL) | 3060 | while (pos && (ifa = if6_get_next(seq, ifa)) != NULL) |
3061 | --pos; | 3061 | --pos; |
3062 | return pos ? NULL : ifa; | 3062 | return pos ? NULL : ifa; |
3063 | } | 3063 | } |
3064 | 3064 | ||
3065 | static void *if6_seq_start(struct seq_file *seq, loff_t *pos) | 3065 | static void *if6_seq_start(struct seq_file *seq, loff_t *pos) |
3066 | __acquires(rcu_bh) | 3066 | __acquires(rcu_bh) |
3067 | { | 3067 | { |
3068 | rcu_read_lock_bh(); | 3068 | rcu_read_lock_bh(); |
3069 | return if6_get_idx(seq, *pos); | 3069 | return if6_get_idx(seq, *pos); |
3070 | } | 3070 | } |
3071 | 3071 | ||
3072 | static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3072 | static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3073 | { | 3073 | { |
3074 | struct inet6_ifaddr *ifa; | 3074 | struct inet6_ifaddr *ifa; |
3075 | 3075 | ||
3076 | ifa = if6_get_next(seq, v); | 3076 | ifa = if6_get_next(seq, v); |
3077 | ++*pos; | 3077 | ++*pos; |
3078 | return ifa; | 3078 | return ifa; |
3079 | } | 3079 | } |
3080 | 3080 | ||
3081 | static void if6_seq_stop(struct seq_file *seq, void *v) | 3081 | static void if6_seq_stop(struct seq_file *seq, void *v) |
3082 | __releases(rcu_bh) | 3082 | __releases(rcu_bh) |
3083 | { | 3083 | { |
3084 | rcu_read_unlock_bh(); | 3084 | rcu_read_unlock_bh(); |
3085 | } | 3085 | } |
3086 | 3086 | ||
3087 | static int if6_seq_show(struct seq_file *seq, void *v) | 3087 | static int if6_seq_show(struct seq_file *seq, void *v) |
3088 | { | 3088 | { |
3089 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; | 3089 | struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; |
3090 | seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n", | 3090 | seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n", |
3091 | &ifp->addr, | 3091 | &ifp->addr, |
3092 | ifp->idev->dev->ifindex, | 3092 | ifp->idev->dev->ifindex, |
3093 | ifp->prefix_len, | 3093 | ifp->prefix_len, |
3094 | ifp->scope, | 3094 | ifp->scope, |
3095 | ifp->flags, | 3095 | ifp->flags, |
3096 | ifp->idev->dev->name); | 3096 | ifp->idev->dev->name); |
3097 | return 0; | 3097 | return 0; |
3098 | } | 3098 | } |
3099 | 3099 | ||
3100 | static const struct seq_operations if6_seq_ops = { | 3100 | static const struct seq_operations if6_seq_ops = { |
3101 | .start = if6_seq_start, | 3101 | .start = if6_seq_start, |
3102 | .next = if6_seq_next, | 3102 | .next = if6_seq_next, |
3103 | .show = if6_seq_show, | 3103 | .show = if6_seq_show, |
3104 | .stop = if6_seq_stop, | 3104 | .stop = if6_seq_stop, |
3105 | }; | 3105 | }; |
3106 | 3106 | ||
3107 | static int if6_seq_open(struct inode *inode, struct file *file) | 3107 | static int if6_seq_open(struct inode *inode, struct file *file) |
3108 | { | 3108 | { |
3109 | return seq_open_net(inode, file, &if6_seq_ops, | 3109 | return seq_open_net(inode, file, &if6_seq_ops, |
3110 | sizeof(struct if6_iter_state)); | 3110 | sizeof(struct if6_iter_state)); |
3111 | } | 3111 | } |
3112 | 3112 | ||
3113 | static const struct file_operations if6_fops = { | 3113 | static const struct file_operations if6_fops = { |
3114 | .owner = THIS_MODULE, | 3114 | .owner = THIS_MODULE, |
3115 | .open = if6_seq_open, | 3115 | .open = if6_seq_open, |
3116 | .read = seq_read, | 3116 | .read = seq_read, |
3117 | .llseek = seq_lseek, | 3117 | .llseek = seq_lseek, |
3118 | .release = seq_release_net, | 3118 | .release = seq_release_net, |
3119 | }; | 3119 | }; |
3120 | 3120 | ||
3121 | static int __net_init if6_proc_net_init(struct net *net) | 3121 | static int __net_init if6_proc_net_init(struct net *net) |
3122 | { | 3122 | { |
3123 | if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) | 3123 | if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) |
3124 | return -ENOMEM; | 3124 | return -ENOMEM; |
3125 | return 0; | 3125 | return 0; |
3126 | } | 3126 | } |
3127 | 3127 | ||
3128 | static void __net_exit if6_proc_net_exit(struct net *net) | 3128 | static void __net_exit if6_proc_net_exit(struct net *net) |
3129 | { | 3129 | { |
3130 | proc_net_remove(net, "if_inet6"); | 3130 | proc_net_remove(net, "if_inet6"); |
3131 | } | 3131 | } |
3132 | 3132 | ||
3133 | static struct pernet_operations if6_proc_net_ops = { | 3133 | static struct pernet_operations if6_proc_net_ops = { |
3134 | .init = if6_proc_net_init, | 3134 | .init = if6_proc_net_init, |
3135 | .exit = if6_proc_net_exit, | 3135 | .exit = if6_proc_net_exit, |
3136 | }; | 3136 | }; |
3137 | 3137 | ||
3138 | int __init if6_proc_init(void) | 3138 | int __init if6_proc_init(void) |
3139 | { | 3139 | { |
3140 | return register_pernet_subsys(&if6_proc_net_ops); | 3140 | return register_pernet_subsys(&if6_proc_net_ops); |
3141 | } | 3141 | } |
3142 | 3142 | ||
3143 | void if6_proc_exit(void) | 3143 | void if6_proc_exit(void) |
3144 | { | 3144 | { |
3145 | unregister_pernet_subsys(&if6_proc_net_ops); | 3145 | unregister_pernet_subsys(&if6_proc_net_ops); |
3146 | } | 3146 | } |
3147 | #endif /* CONFIG_PROC_FS */ | 3147 | #endif /* CONFIG_PROC_FS */ |
3148 | 3148 | ||
3149 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 3149 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
3150 | /* Check if address is a home address configured on any interface. */ | 3150 | /* Check if address is a home address configured on any interface. */ |
3151 | int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) | 3151 | int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) |
3152 | { | 3152 | { |
3153 | int ret = 0; | 3153 | int ret = 0; |
3154 | struct inet6_ifaddr *ifp = NULL; | 3154 | struct inet6_ifaddr *ifp = NULL; |
3155 | struct hlist_node *n; | 3155 | struct hlist_node *n; |
3156 | unsigned int hash = ipv6_addr_hash(addr); | 3156 | unsigned int hash = ipv6_addr_hash(addr); |
3157 | 3157 | ||
3158 | rcu_read_lock_bh(); | 3158 | rcu_read_lock_bh(); |
3159 | hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { | 3159 | hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { |
3160 | if (!net_eq(dev_net(ifp->idev->dev), net)) | 3160 | if (!net_eq(dev_net(ifp->idev->dev), net)) |
3161 | continue; | 3161 | continue; |
3162 | if (ipv6_addr_equal(&ifp->addr, addr) && | 3162 | if (ipv6_addr_equal(&ifp->addr, addr) && |
3163 | (ifp->flags & IFA_F_HOMEADDRESS)) { | 3163 | (ifp->flags & IFA_F_HOMEADDRESS)) { |
3164 | ret = 1; | 3164 | ret = 1; |
3165 | break; | 3165 | break; |
3166 | } | 3166 | } |
3167 | } | 3167 | } |
3168 | rcu_read_unlock_bh(); | 3168 | rcu_read_unlock_bh(); |
3169 | return ret; | 3169 | return ret; |
3170 | } | 3170 | } |
3171 | #endif | 3171 | #endif |
3172 | 3172 | ||
3173 | /* | 3173 | /* |
3174 | * Periodic address status verification | 3174 | * Periodic address status verification |
3175 | */ | 3175 | */ |
3176 | 3176 | ||
3177 | static void addrconf_verify(unsigned long foo) | 3177 | static void addrconf_verify(unsigned long foo) |
3178 | { | 3178 | { |
3179 | unsigned long now, next, next_sec, next_sched; | 3179 | unsigned long now, next, next_sec, next_sched; |
3180 | struct inet6_ifaddr *ifp; | 3180 | struct inet6_ifaddr *ifp; |
3181 | struct hlist_node *node; | 3181 | struct hlist_node *node; |
3182 | int i; | 3182 | int i; |
3183 | 3183 | ||
3184 | rcu_read_lock_bh(); | 3184 | rcu_read_lock_bh(); |
3185 | spin_lock(&addrconf_verify_lock); | 3185 | spin_lock(&addrconf_verify_lock); |
3186 | now = jiffies; | 3186 | now = jiffies; |
3187 | next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); | 3187 | next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); |
3188 | 3188 | ||
3189 | del_timer(&addr_chk_timer); | 3189 | del_timer(&addr_chk_timer); |
3190 | 3190 | ||
3191 | for (i = 0; i < IN6_ADDR_HSIZE; i++) { | 3191 | for (i = 0; i < IN6_ADDR_HSIZE; i++) { |
3192 | restart: | 3192 | restart: |
3193 | hlist_for_each_entry_rcu_bh(ifp, node, | 3193 | hlist_for_each_entry_rcu_bh(ifp, node, |
3194 | &inet6_addr_lst[i], addr_lst) { | 3194 | &inet6_addr_lst[i], addr_lst) { |
3195 | unsigned long age; | 3195 | unsigned long age; |
3196 | 3196 | ||
3197 | if (ifp->flags & IFA_F_PERMANENT) | 3197 | if (ifp->flags & IFA_F_PERMANENT) |
3198 | continue; | 3198 | continue; |
3199 | 3199 | ||
3200 | spin_lock(&ifp->lock); | 3200 | spin_lock(&ifp->lock); |
3201 | /* We try to batch several events at once. */ | 3201 | /* We try to batch several events at once. */ |
3202 | age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; | 3202 | age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; |
3203 | 3203 | ||
3204 | if (ifp->valid_lft != INFINITY_LIFE_TIME && | 3204 | if (ifp->valid_lft != INFINITY_LIFE_TIME && |
3205 | age >= ifp->valid_lft) { | 3205 | age >= ifp->valid_lft) { |
3206 | spin_unlock(&ifp->lock); | 3206 | spin_unlock(&ifp->lock); |
3207 | in6_ifa_hold(ifp); | 3207 | in6_ifa_hold(ifp); |
3208 | ipv6_del_addr(ifp); | 3208 | ipv6_del_addr(ifp); |
3209 | goto restart; | 3209 | goto restart; |
3210 | } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { | 3210 | } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { |
3211 | spin_unlock(&ifp->lock); | 3211 | spin_unlock(&ifp->lock); |
3212 | continue; | 3212 | continue; |
3213 | } else if (age >= ifp->prefered_lft) { | 3213 | } else if (age >= ifp->prefered_lft) { |
3214 | /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */ | 3214 | /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */ |
3215 | int deprecate = 0; | 3215 | int deprecate = 0; |
3216 | 3216 | ||
3217 | if (!(ifp->flags&IFA_F_DEPRECATED)) { | 3217 | if (!(ifp->flags&IFA_F_DEPRECATED)) { |
3218 | deprecate = 1; | 3218 | deprecate = 1; |
3219 | ifp->flags |= IFA_F_DEPRECATED; | 3219 | ifp->flags |= IFA_F_DEPRECATED; |
3220 | } | 3220 | } |
3221 | 3221 | ||
3222 | if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) | 3222 | if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) |
3223 | next = ifp->tstamp + ifp->valid_lft * HZ; | 3223 | next = ifp->tstamp + ifp->valid_lft * HZ; |
3224 | 3224 | ||
3225 | spin_unlock(&ifp->lock); | 3225 | spin_unlock(&ifp->lock); |
3226 | 3226 | ||
3227 | if (deprecate) { | 3227 | if (deprecate) { |
3228 | in6_ifa_hold(ifp); | 3228 | in6_ifa_hold(ifp); |
3229 | 3229 | ||
3230 | ipv6_ifa_notify(0, ifp); | 3230 | ipv6_ifa_notify(0, ifp); |
3231 | in6_ifa_put(ifp); | 3231 | in6_ifa_put(ifp); |
3232 | goto restart; | 3232 | goto restart; |
3233 | } | 3233 | } |
3234 | #ifdef CONFIG_IPV6_PRIVACY | 3234 | #ifdef CONFIG_IPV6_PRIVACY |
3235 | } else if ((ifp->flags&IFA_F_TEMPORARY) && | 3235 | } else if ((ifp->flags&IFA_F_TEMPORARY) && |
3236 | !(ifp->flags&IFA_F_TENTATIVE)) { | 3236 | !(ifp->flags&IFA_F_TENTATIVE)) { |
3237 | unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * | 3237 | unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * |
3238 | ifp->idev->cnf.dad_transmits * | 3238 | ifp->idev->cnf.dad_transmits * |
3239 | ifp->idev->nd_parms->retrans_time / HZ; | 3239 | ifp->idev->nd_parms->retrans_time / HZ; |
3240 | 3240 | ||
3241 | if (age >= ifp->prefered_lft - regen_advance) { | 3241 | if (age >= ifp->prefered_lft - regen_advance) { |
3242 | struct inet6_ifaddr *ifpub = ifp->ifpub; | 3242 | struct inet6_ifaddr *ifpub = ifp->ifpub; |
3243 | if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) | 3243 | if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) |
3244 | next = ifp->tstamp + ifp->prefered_lft * HZ; | 3244 | next = ifp->tstamp + ifp->prefered_lft * HZ; |
3245 | if (!ifp->regen_count && ifpub) { | 3245 | if (!ifp->regen_count && ifpub) { |
3246 | ifp->regen_count++; | 3246 | ifp->regen_count++; |
3247 | in6_ifa_hold(ifp); | 3247 | in6_ifa_hold(ifp); |
3248 | in6_ifa_hold(ifpub); | 3248 | in6_ifa_hold(ifpub); |
3249 | spin_unlock(&ifp->lock); | 3249 | spin_unlock(&ifp->lock); |
3250 | 3250 | ||
3251 | spin_lock(&ifpub->lock); | 3251 | spin_lock(&ifpub->lock); |
3252 | ifpub->regen_count = 0; | 3252 | ifpub->regen_count = 0; |
3253 | spin_unlock(&ifpub->lock); | 3253 | spin_unlock(&ifpub->lock); |
3254 | ipv6_create_tempaddr(ifpub, ifp); | 3254 | ipv6_create_tempaddr(ifpub, ifp); |
3255 | in6_ifa_put(ifpub); | 3255 | in6_ifa_put(ifpub); |
3256 | in6_ifa_put(ifp); | 3256 | in6_ifa_put(ifp); |
3257 | goto restart; | 3257 | goto restart; |
3258 | } | 3258 | } |
3259 | } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) | 3259 | } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) |
3260 | next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; | 3260 | next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; |
3261 | spin_unlock(&ifp->lock); | 3261 | spin_unlock(&ifp->lock); |
3262 | #endif | 3262 | #endif |
3263 | } else { | 3263 | } else { |
3264 | /* ifp->prefered_lft <= ifp->valid_lft */ | 3264 | /* ifp->prefered_lft <= ifp->valid_lft */ |
3265 | if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) | 3265 | if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) |
3266 | next = ifp->tstamp + ifp->prefered_lft * HZ; | 3266 | next = ifp->tstamp + ifp->prefered_lft * HZ; |
3267 | spin_unlock(&ifp->lock); | 3267 | spin_unlock(&ifp->lock); |
3268 | } | 3268 | } |
3269 | } | 3269 | } |
3270 | } | 3270 | } |
3271 | 3271 | ||
3272 | next_sec = round_jiffies_up(next); | 3272 | next_sec = round_jiffies_up(next); |
3273 | next_sched = next; | 3273 | next_sched = next; |
3274 | 3274 | ||
3275 | /* If rounded timeout is accurate enough, accept it. */ | 3275 | /* If rounded timeout is accurate enough, accept it. */ |
3276 | if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) | 3276 | if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) |
3277 | next_sched = next_sec; | 3277 | next_sched = next_sec; |
3278 | 3278 | ||
3279 | /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ | 3279 | /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ |
3280 | if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) | 3280 | if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) |
3281 | next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; | 3281 | next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; |
3282 | 3282 | ||
3283 | ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", | 3283 | ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", |
3284 | now, next, next_sec, next_sched)); | 3284 | now, next, next_sec, next_sched)); |
3285 | 3285 | ||
3286 | addr_chk_timer.expires = next_sched; | 3286 | addr_chk_timer.expires = next_sched; |
3287 | add_timer(&addr_chk_timer); | 3287 | add_timer(&addr_chk_timer); |
3288 | spin_unlock(&addrconf_verify_lock); | 3288 | spin_unlock(&addrconf_verify_lock); |
3289 | rcu_read_unlock_bh(); | 3289 | rcu_read_unlock_bh(); |
3290 | } | 3290 | } |
3291 | 3291 | ||
3292 | static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) | 3292 | static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) |
3293 | { | 3293 | { |
3294 | struct in6_addr *pfx = NULL; | 3294 | struct in6_addr *pfx = NULL; |
3295 | 3295 | ||
3296 | if (addr) | 3296 | if (addr) |
3297 | pfx = nla_data(addr); | 3297 | pfx = nla_data(addr); |
3298 | 3298 | ||
3299 | if (local) { | 3299 | if (local) { |
3300 | if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) | 3300 | if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) |
3301 | pfx = NULL; | 3301 | pfx = NULL; |
3302 | else | 3302 | else |
3303 | pfx = nla_data(local); | 3303 | pfx = nla_data(local); |
3304 | } | 3304 | } |
3305 | 3305 | ||
3306 | return pfx; | 3306 | return pfx; |
3307 | } | 3307 | } |
3308 | 3308 | ||
3309 | static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { | 3309 | static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { |
3310 | [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, | 3310 | [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, |
3311 | [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, | 3311 | [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, |
3312 | [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, | 3312 | [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, |
3313 | }; | 3313 | }; |
3314 | 3314 | ||
3315 | static int | 3315 | static int |
3316 | inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 3316 | inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
3317 | { | 3317 | { |
3318 | struct net *net = sock_net(skb->sk); | 3318 | struct net *net = sock_net(skb->sk); |
3319 | struct ifaddrmsg *ifm; | 3319 | struct ifaddrmsg *ifm; |
3320 | struct nlattr *tb[IFA_MAX+1]; | 3320 | struct nlattr *tb[IFA_MAX+1]; |
3321 | struct in6_addr *pfx; | 3321 | struct in6_addr *pfx; |
3322 | int err; | 3322 | int err; |
3323 | 3323 | ||
3324 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); | 3324 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
3325 | if (err < 0) | 3325 | if (err < 0) |
3326 | return err; | 3326 | return err; |
3327 | 3327 | ||
3328 | ifm = nlmsg_data(nlh); | 3328 | ifm = nlmsg_data(nlh); |
3329 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); | 3329 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
3330 | if (pfx == NULL) | 3330 | if (pfx == NULL) |
3331 | return -EINVAL; | 3331 | return -EINVAL; |
3332 | 3332 | ||
3333 | return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); | 3333 | return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); |
3334 | } | 3334 | } |
3335 | 3335 | ||
3336 | static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, | 3336 | static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, |
3337 | u32 prefered_lft, u32 valid_lft) | 3337 | u32 prefered_lft, u32 valid_lft) |
3338 | { | 3338 | { |
3339 | u32 flags; | 3339 | u32 flags; |
3340 | clock_t expires; | 3340 | clock_t expires; |
3341 | unsigned long timeout; | 3341 | unsigned long timeout; |
3342 | 3342 | ||
3343 | if (!valid_lft || (prefered_lft > valid_lft)) | 3343 | if (!valid_lft || (prefered_lft > valid_lft)) |
3344 | return -EINVAL; | 3344 | return -EINVAL; |
3345 | 3345 | ||
3346 | timeout = addrconf_timeout_fixup(valid_lft, HZ); | 3346 | timeout = addrconf_timeout_fixup(valid_lft, HZ); |
3347 | if (addrconf_finite_timeout(timeout)) { | 3347 | if (addrconf_finite_timeout(timeout)) { |
3348 | expires = jiffies_to_clock_t(timeout * HZ); | 3348 | expires = jiffies_to_clock_t(timeout * HZ); |
3349 | valid_lft = timeout; | 3349 | valid_lft = timeout; |
3350 | flags = RTF_EXPIRES; | 3350 | flags = RTF_EXPIRES; |
3351 | } else { | 3351 | } else { |
3352 | expires = 0; | 3352 | expires = 0; |
3353 | flags = 0; | 3353 | flags = 0; |
3354 | ifa_flags |= IFA_F_PERMANENT; | 3354 | ifa_flags |= IFA_F_PERMANENT; |
3355 | } | 3355 | } |
3356 | 3356 | ||
3357 | timeout = addrconf_timeout_fixup(prefered_lft, HZ); | 3357 | timeout = addrconf_timeout_fixup(prefered_lft, HZ); |
3358 | if (addrconf_finite_timeout(timeout)) { | 3358 | if (addrconf_finite_timeout(timeout)) { |
3359 | if (timeout == 0) | 3359 | if (timeout == 0) |
3360 | ifa_flags |= IFA_F_DEPRECATED; | 3360 | ifa_flags |= IFA_F_DEPRECATED; |
3361 | prefered_lft = timeout; | 3361 | prefered_lft = timeout; |
3362 | } | 3362 | } |
3363 | 3363 | ||
3364 | spin_lock_bh(&ifp->lock); | 3364 | spin_lock_bh(&ifp->lock); |
3365 | ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; | 3365 | ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; |
3366 | ifp->tstamp = jiffies; | 3366 | ifp->tstamp = jiffies; |
3367 | ifp->valid_lft = valid_lft; | 3367 | ifp->valid_lft = valid_lft; |
3368 | ifp->prefered_lft = prefered_lft; | 3368 | ifp->prefered_lft = prefered_lft; |
3369 | 3369 | ||
3370 | spin_unlock_bh(&ifp->lock); | 3370 | spin_unlock_bh(&ifp->lock); |
3371 | if (!(ifp->flags&IFA_F_TENTATIVE)) | 3371 | if (!(ifp->flags&IFA_F_TENTATIVE)) |
3372 | ipv6_ifa_notify(0, ifp); | 3372 | ipv6_ifa_notify(0, ifp); |
3373 | 3373 | ||
3374 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, | 3374 | addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, |
3375 | expires, flags); | 3375 | expires, flags); |
3376 | addrconf_verify(0); | 3376 | addrconf_verify(0); |
3377 | 3377 | ||
3378 | return 0; | 3378 | return 0; |
3379 | } | 3379 | } |
3380 | 3380 | ||
3381 | static int | 3381 | static int |
3382 | inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 3382 | inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
3383 | { | 3383 | { |
3384 | struct net *net = sock_net(skb->sk); | 3384 | struct net *net = sock_net(skb->sk); |
3385 | struct ifaddrmsg *ifm; | 3385 | struct ifaddrmsg *ifm; |
3386 | struct nlattr *tb[IFA_MAX+1]; | 3386 | struct nlattr *tb[IFA_MAX+1]; |
3387 | struct in6_addr *pfx; | 3387 | struct in6_addr *pfx; |
3388 | struct inet6_ifaddr *ifa; | 3388 | struct inet6_ifaddr *ifa; |
3389 | struct net_device *dev; | 3389 | struct net_device *dev; |
3390 | u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; | 3390 | u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; |
3391 | u8 ifa_flags; | 3391 | u8 ifa_flags; |
3392 | int err; | 3392 | int err; |
3393 | 3393 | ||
3394 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); | 3394 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
3395 | if (err < 0) | 3395 | if (err < 0) |
3396 | return err; | 3396 | return err; |
3397 | 3397 | ||
3398 | ifm = nlmsg_data(nlh); | 3398 | ifm = nlmsg_data(nlh); |
3399 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); | 3399 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
3400 | if (pfx == NULL) | 3400 | if (pfx == NULL) |
3401 | return -EINVAL; | 3401 | return -EINVAL; |
3402 | 3402 | ||
3403 | if (tb[IFA_CACHEINFO]) { | 3403 | if (tb[IFA_CACHEINFO]) { |
3404 | struct ifa_cacheinfo *ci; | 3404 | struct ifa_cacheinfo *ci; |
3405 | 3405 | ||
3406 | ci = nla_data(tb[IFA_CACHEINFO]); | 3406 | ci = nla_data(tb[IFA_CACHEINFO]); |
3407 | valid_lft = ci->ifa_valid; | 3407 | valid_lft = ci->ifa_valid; |
3408 | preferred_lft = ci->ifa_prefered; | 3408 | preferred_lft = ci->ifa_prefered; |
3409 | } else { | 3409 | } else { |
3410 | preferred_lft = INFINITY_LIFE_TIME; | 3410 | preferred_lft = INFINITY_LIFE_TIME; |
3411 | valid_lft = INFINITY_LIFE_TIME; | 3411 | valid_lft = INFINITY_LIFE_TIME; |
3412 | } | 3412 | } |
3413 | 3413 | ||
3414 | dev = __dev_get_by_index(net, ifm->ifa_index); | 3414 | dev = __dev_get_by_index(net, ifm->ifa_index); |
3415 | if (dev == NULL) | 3415 | if (dev == NULL) |
3416 | return -ENODEV; | 3416 | return -ENODEV; |
3417 | 3417 | ||
3418 | /* We ignore other flags so far. */ | 3418 | /* We ignore other flags so far. */ |
3419 | ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); | 3419 | ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); |
3420 | 3420 | ||
3421 | ifa = ipv6_get_ifaddr(net, pfx, dev, 1); | 3421 | ifa = ipv6_get_ifaddr(net, pfx, dev, 1); |
3422 | if (ifa == NULL) { | 3422 | if (ifa == NULL) { |
3423 | /* | 3423 | /* |
3424 | * It would be best to check for !NLM_F_CREATE here but | 3424 | * It would be best to check for !NLM_F_CREATE here but |
3425 | * userspace alreay relies on not having to provide this. | 3425 | * userspace alreay relies on not having to provide this. |
3426 | */ | 3426 | */ |
3427 | return inet6_addr_add(net, ifm->ifa_index, pfx, | 3427 | return inet6_addr_add(net, ifm->ifa_index, pfx, |
3428 | ifm->ifa_prefixlen, ifa_flags, | 3428 | ifm->ifa_prefixlen, ifa_flags, |
3429 | preferred_lft, valid_lft); | 3429 | preferred_lft, valid_lft); |
3430 | } | 3430 | } |
3431 | 3431 | ||
3432 | if (nlh->nlmsg_flags & NLM_F_EXCL || | 3432 | if (nlh->nlmsg_flags & NLM_F_EXCL || |
3433 | !(nlh->nlmsg_flags & NLM_F_REPLACE)) | 3433 | !(nlh->nlmsg_flags & NLM_F_REPLACE)) |
3434 | err = -EEXIST; | 3434 | err = -EEXIST; |
3435 | else | 3435 | else |
3436 | err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); | 3436 | err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); |
3437 | 3437 | ||
3438 | in6_ifa_put(ifa); | 3438 | in6_ifa_put(ifa); |
3439 | 3439 | ||
3440 | return err; | 3440 | return err; |
3441 | } | 3441 | } |
3442 | 3442 | ||
3443 | static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, | 3443 | static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, |
3444 | u8 scope, int ifindex) | 3444 | u8 scope, int ifindex) |
3445 | { | 3445 | { |
3446 | struct ifaddrmsg *ifm; | 3446 | struct ifaddrmsg *ifm; |
3447 | 3447 | ||
3448 | ifm = nlmsg_data(nlh); | 3448 | ifm = nlmsg_data(nlh); |
3449 | ifm->ifa_family = AF_INET6; | 3449 | ifm->ifa_family = AF_INET6; |
3450 | ifm->ifa_prefixlen = prefixlen; | 3450 | ifm->ifa_prefixlen = prefixlen; |
3451 | ifm->ifa_flags = flags; | 3451 | ifm->ifa_flags = flags; |
3452 | ifm->ifa_scope = scope; | 3452 | ifm->ifa_scope = scope; |
3453 | ifm->ifa_index = ifindex; | 3453 | ifm->ifa_index = ifindex; |
3454 | } | 3454 | } |
3455 | 3455 | ||
3456 | static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, | 3456 | static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, |
3457 | unsigned long tstamp, u32 preferred, u32 valid) | 3457 | unsigned long tstamp, u32 preferred, u32 valid) |
3458 | { | 3458 | { |
3459 | struct ifa_cacheinfo ci; | 3459 | struct ifa_cacheinfo ci; |
3460 | 3460 | ||
3461 | ci.cstamp = cstamp_delta(cstamp); | 3461 | ci.cstamp = cstamp_delta(cstamp); |
3462 | ci.tstamp = cstamp_delta(tstamp); | 3462 | ci.tstamp = cstamp_delta(tstamp); |
3463 | ci.ifa_prefered = preferred; | 3463 | ci.ifa_prefered = preferred; |
3464 | ci.ifa_valid = valid; | 3464 | ci.ifa_valid = valid; |
3465 | 3465 | ||
3466 | return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); | 3466 | return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); |
3467 | } | 3467 | } |
3468 | 3468 | ||
3469 | static inline int rt_scope(int ifa_scope) | 3469 | static inline int rt_scope(int ifa_scope) |
3470 | { | 3470 | { |
3471 | if (ifa_scope & IFA_HOST) | 3471 | if (ifa_scope & IFA_HOST) |
3472 | return RT_SCOPE_HOST; | 3472 | return RT_SCOPE_HOST; |
3473 | else if (ifa_scope & IFA_LINK) | 3473 | else if (ifa_scope & IFA_LINK) |
3474 | return RT_SCOPE_LINK; | 3474 | return RT_SCOPE_LINK; |
3475 | else if (ifa_scope & IFA_SITE) | 3475 | else if (ifa_scope & IFA_SITE) |
3476 | return RT_SCOPE_SITE; | 3476 | return RT_SCOPE_SITE; |
3477 | else | 3477 | else |
3478 | return RT_SCOPE_UNIVERSE; | 3478 | return RT_SCOPE_UNIVERSE; |
3479 | } | 3479 | } |
3480 | 3480 | ||
3481 | static inline int inet6_ifaddr_msgsize(void) | 3481 | static inline int inet6_ifaddr_msgsize(void) |
3482 | { | 3482 | { |
3483 | return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) | 3483 | return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) |
3484 | + nla_total_size(16) /* IFA_ADDRESS */ | 3484 | + nla_total_size(16) /* IFA_ADDRESS */ |
3485 | + nla_total_size(sizeof(struct ifa_cacheinfo)); | 3485 | + nla_total_size(sizeof(struct ifa_cacheinfo)); |
3486 | } | 3486 | } |
3487 | 3487 | ||
3488 | static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, | 3488 | static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, |
3489 | u32 pid, u32 seq, int event, unsigned int flags) | 3489 | u32 pid, u32 seq, int event, unsigned int flags) |
3490 | { | 3490 | { |
3491 | struct nlmsghdr *nlh; | 3491 | struct nlmsghdr *nlh; |
3492 | u32 preferred, valid; | 3492 | u32 preferred, valid; |
3493 | 3493 | ||
3494 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); | 3494 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); |
3495 | if (nlh == NULL) | 3495 | if (nlh == NULL) |
3496 | return -EMSGSIZE; | 3496 | return -EMSGSIZE; |
3497 | 3497 | ||
3498 | put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), | 3498 | put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), |
3499 | ifa->idev->dev->ifindex); | 3499 | ifa->idev->dev->ifindex); |
3500 | 3500 | ||
3501 | if (!(ifa->flags&IFA_F_PERMANENT)) { | 3501 | if (!(ifa->flags&IFA_F_PERMANENT)) { |
3502 | preferred = ifa->prefered_lft; | 3502 | preferred = ifa->prefered_lft; |
3503 | valid = ifa->valid_lft; | 3503 | valid = ifa->valid_lft; |
3504 | if (preferred != INFINITY_LIFE_TIME) { | 3504 | if (preferred != INFINITY_LIFE_TIME) { |
3505 | long tval = (jiffies - ifa->tstamp)/HZ; | 3505 | long tval = (jiffies - ifa->tstamp)/HZ; |
3506 | if (preferred > tval) | 3506 | if (preferred > tval) |
3507 | preferred -= tval; | 3507 | preferred -= tval; |
3508 | else | 3508 | else |
3509 | preferred = 0; | 3509 | preferred = 0; |
3510 | if (valid != INFINITY_LIFE_TIME) { | 3510 | if (valid != INFINITY_LIFE_TIME) { |
3511 | if (valid > tval) | 3511 | if (valid > tval) |
3512 | valid -= tval; | 3512 | valid -= tval; |
3513 | else | 3513 | else |
3514 | valid = 0; | 3514 | valid = 0; |
3515 | } | 3515 | } |
3516 | } | 3516 | } |
3517 | } else { | 3517 | } else { |
3518 | preferred = INFINITY_LIFE_TIME; | 3518 | preferred = INFINITY_LIFE_TIME; |
3519 | valid = INFINITY_LIFE_TIME; | 3519 | valid = INFINITY_LIFE_TIME; |
3520 | } | 3520 | } |
3521 | 3521 | ||
3522 | if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || | 3522 | if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || |
3523 | put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { | 3523 | put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { |
3524 | nlmsg_cancel(skb, nlh); | 3524 | nlmsg_cancel(skb, nlh); |
3525 | return -EMSGSIZE; | 3525 | return -EMSGSIZE; |
3526 | } | 3526 | } |
3527 | 3527 | ||
3528 | return nlmsg_end(skb, nlh); | 3528 | return nlmsg_end(skb, nlh); |
3529 | } | 3529 | } |
3530 | 3530 | ||
3531 | static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, | 3531 | static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, |
3532 | u32 pid, u32 seq, int event, u16 flags) | 3532 | u32 pid, u32 seq, int event, u16 flags) |
3533 | { | 3533 | { |
3534 | struct nlmsghdr *nlh; | 3534 | struct nlmsghdr *nlh; |
3535 | u8 scope = RT_SCOPE_UNIVERSE; | 3535 | u8 scope = RT_SCOPE_UNIVERSE; |
3536 | int ifindex = ifmca->idev->dev->ifindex; | 3536 | int ifindex = ifmca->idev->dev->ifindex; |
3537 | 3537 | ||
3538 | if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) | 3538 | if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) |
3539 | scope = RT_SCOPE_SITE; | 3539 | scope = RT_SCOPE_SITE; |
3540 | 3540 | ||
3541 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); | 3541 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); |
3542 | if (nlh == NULL) | 3542 | if (nlh == NULL) |
3543 | return -EMSGSIZE; | 3543 | return -EMSGSIZE; |
3544 | 3544 | ||
3545 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); | 3545 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); |
3546 | if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || | 3546 | if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || |
3547 | put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, | 3547 | put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, |
3548 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { | 3548 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { |
3549 | nlmsg_cancel(skb, nlh); | 3549 | nlmsg_cancel(skb, nlh); |
3550 | return -EMSGSIZE; | 3550 | return -EMSGSIZE; |
3551 | } | 3551 | } |
3552 | 3552 | ||
3553 | return nlmsg_end(skb, nlh); | 3553 | return nlmsg_end(skb, nlh); |
3554 | } | 3554 | } |
3555 | 3555 | ||
3556 | static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, | 3556 | static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, |
3557 | u32 pid, u32 seq, int event, unsigned int flags) | 3557 | u32 pid, u32 seq, int event, unsigned int flags) |
3558 | { | 3558 | { |
3559 | struct nlmsghdr *nlh; | 3559 | struct nlmsghdr *nlh; |
3560 | u8 scope = RT_SCOPE_UNIVERSE; | 3560 | u8 scope = RT_SCOPE_UNIVERSE; |
3561 | int ifindex = ifaca->aca_idev->dev->ifindex; | 3561 | int ifindex = ifaca->aca_idev->dev->ifindex; |
3562 | 3562 | ||
3563 | if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) | 3563 | if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) |
3564 | scope = RT_SCOPE_SITE; | 3564 | scope = RT_SCOPE_SITE; |
3565 | 3565 | ||
3566 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); | 3566 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); |
3567 | if (nlh == NULL) | 3567 | if (nlh == NULL) |
3568 | return -EMSGSIZE; | 3568 | return -EMSGSIZE; |
3569 | 3569 | ||
3570 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); | 3570 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); |
3571 | if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || | 3571 | if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || |
3572 | put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, | 3572 | put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, |
3573 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { | 3573 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { |
3574 | nlmsg_cancel(skb, nlh); | 3574 | nlmsg_cancel(skb, nlh); |
3575 | return -EMSGSIZE; | 3575 | return -EMSGSIZE; |
3576 | } | 3576 | } |
3577 | 3577 | ||
3578 | return nlmsg_end(skb, nlh); | 3578 | return nlmsg_end(skb, nlh); |
3579 | } | 3579 | } |
3580 | 3580 | ||
3581 | enum addr_type_t { | 3581 | enum addr_type_t { |
3582 | UNICAST_ADDR, | 3582 | UNICAST_ADDR, |
3583 | MULTICAST_ADDR, | 3583 | MULTICAST_ADDR, |
3584 | ANYCAST_ADDR, | 3584 | ANYCAST_ADDR, |
3585 | }; | 3585 | }; |
3586 | 3586 | ||
3587 | /* called with rcu_read_lock() */ | 3587 | /* called with rcu_read_lock() */ |
3588 | static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, | 3588 | static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, |
3589 | struct netlink_callback *cb, enum addr_type_t type, | 3589 | struct netlink_callback *cb, enum addr_type_t type, |
3590 | int s_ip_idx, int *p_ip_idx) | 3590 | int s_ip_idx, int *p_ip_idx) |
3591 | { | 3591 | { |
3592 | struct ifmcaddr6 *ifmca; | 3592 | struct ifmcaddr6 *ifmca; |
3593 | struct ifacaddr6 *ifaca; | 3593 | struct ifacaddr6 *ifaca; |
3594 | int err = 1; | 3594 | int err = 1; |
3595 | int ip_idx = *p_ip_idx; | 3595 | int ip_idx = *p_ip_idx; |
3596 | 3596 | ||
3597 | read_lock_bh(&idev->lock); | 3597 | read_lock_bh(&idev->lock); |
3598 | switch (type) { | 3598 | switch (type) { |
3599 | case UNICAST_ADDR: { | 3599 | case UNICAST_ADDR: { |
3600 | struct inet6_ifaddr *ifa; | 3600 | struct inet6_ifaddr *ifa; |
3601 | 3601 | ||
3602 | /* unicast address incl. temp addr */ | 3602 | /* unicast address incl. temp addr */ |
3603 | list_for_each_entry(ifa, &idev->addr_list, if_list) { | 3603 | list_for_each_entry(ifa, &idev->addr_list, if_list) { |
3604 | if (++ip_idx < s_ip_idx) | 3604 | if (++ip_idx < s_ip_idx) |
3605 | continue; | 3605 | continue; |
3606 | err = inet6_fill_ifaddr(skb, ifa, | 3606 | err = inet6_fill_ifaddr(skb, ifa, |
3607 | NETLINK_CB(cb->skb).pid, | 3607 | NETLINK_CB(cb->skb).pid, |
3608 | cb->nlh->nlmsg_seq, | 3608 | cb->nlh->nlmsg_seq, |
3609 | RTM_NEWADDR, | 3609 | RTM_NEWADDR, |
3610 | NLM_F_MULTI); | 3610 | NLM_F_MULTI); |
3611 | if (err <= 0) | 3611 | if (err <= 0) |
3612 | break; | 3612 | break; |
3613 | } | 3613 | } |
3614 | break; | 3614 | break; |
3615 | } | 3615 | } |
3616 | case MULTICAST_ADDR: | 3616 | case MULTICAST_ADDR: |
3617 | /* multicast address */ | 3617 | /* multicast address */ |
3618 | for (ifmca = idev->mc_list; ifmca; | 3618 | for (ifmca = idev->mc_list; ifmca; |
3619 | ifmca = ifmca->next, ip_idx++) { | 3619 | ifmca = ifmca->next, ip_idx++) { |
3620 | if (ip_idx < s_ip_idx) | 3620 | if (ip_idx < s_ip_idx) |
3621 | continue; | 3621 | continue; |
3622 | err = inet6_fill_ifmcaddr(skb, ifmca, | 3622 | err = inet6_fill_ifmcaddr(skb, ifmca, |
3623 | NETLINK_CB(cb->skb).pid, | 3623 | NETLINK_CB(cb->skb).pid, |
3624 | cb->nlh->nlmsg_seq, | 3624 | cb->nlh->nlmsg_seq, |
3625 | RTM_GETMULTICAST, | 3625 | RTM_GETMULTICAST, |
3626 | NLM_F_MULTI); | 3626 | NLM_F_MULTI); |
3627 | if (err <= 0) | 3627 | if (err <= 0) |
3628 | break; | 3628 | break; |
3629 | } | 3629 | } |
3630 | break; | 3630 | break; |
3631 | case ANYCAST_ADDR: | 3631 | case ANYCAST_ADDR: |
3632 | /* anycast address */ | 3632 | /* anycast address */ |
3633 | for (ifaca = idev->ac_list; ifaca; | 3633 | for (ifaca = idev->ac_list; ifaca; |
3634 | ifaca = ifaca->aca_next, ip_idx++) { | 3634 | ifaca = ifaca->aca_next, ip_idx++) { |
3635 | if (ip_idx < s_ip_idx) | 3635 | if (ip_idx < s_ip_idx) |
3636 | continue; | 3636 | continue; |
3637 | err = inet6_fill_ifacaddr(skb, ifaca, | 3637 | err = inet6_fill_ifacaddr(skb, ifaca, |
3638 | NETLINK_CB(cb->skb).pid, | 3638 | NETLINK_CB(cb->skb).pid, |
3639 | cb->nlh->nlmsg_seq, | 3639 | cb->nlh->nlmsg_seq, |
3640 | RTM_GETANYCAST, | 3640 | RTM_GETANYCAST, |
3641 | NLM_F_MULTI); | 3641 | NLM_F_MULTI); |
3642 | if (err <= 0) | 3642 | if (err <= 0) |
3643 | break; | 3643 | break; |
3644 | } | 3644 | } |
3645 | break; | 3645 | break; |
3646 | default: | 3646 | default: |
3647 | break; | 3647 | break; |
3648 | } | 3648 | } |
3649 | read_unlock_bh(&idev->lock); | 3649 | read_unlock_bh(&idev->lock); |
3650 | *p_ip_idx = ip_idx; | 3650 | *p_ip_idx = ip_idx; |
3651 | return err; | 3651 | return err; |
3652 | } | 3652 | } |
3653 | 3653 | ||
3654 | static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, | 3654 | static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, |
3655 | enum addr_type_t type) | 3655 | enum addr_type_t type) |
3656 | { | 3656 | { |
3657 | struct net *net = sock_net(skb->sk); | 3657 | struct net *net = sock_net(skb->sk); |
3658 | int h, s_h; | 3658 | int h, s_h; |
3659 | int idx, ip_idx; | 3659 | int idx, ip_idx; |
3660 | int s_idx, s_ip_idx; | 3660 | int s_idx, s_ip_idx; |
3661 | struct net_device *dev; | 3661 | struct net_device *dev; |
3662 | struct inet6_dev *idev; | 3662 | struct inet6_dev *idev; |
3663 | struct hlist_head *head; | 3663 | struct hlist_head *head; |
3664 | struct hlist_node *node; | 3664 | struct hlist_node *node; |
3665 | 3665 | ||
3666 | s_h = cb->args[0]; | 3666 | s_h = cb->args[0]; |
3667 | s_idx = idx = cb->args[1]; | 3667 | s_idx = idx = cb->args[1]; |
3668 | s_ip_idx = ip_idx = cb->args[2]; | 3668 | s_ip_idx = ip_idx = cb->args[2]; |
3669 | 3669 | ||
3670 | rcu_read_lock(); | 3670 | rcu_read_lock(); |
3671 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 3671 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
3672 | idx = 0; | 3672 | idx = 0; |
3673 | head = &net->dev_index_head[h]; | 3673 | head = &net->dev_index_head[h]; |
3674 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { | 3674 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
3675 | if (idx < s_idx) | 3675 | if (idx < s_idx) |
3676 | goto cont; | 3676 | goto cont; |
3677 | if (h > s_h || idx > s_idx) | 3677 | if (h > s_h || idx > s_idx) |
3678 | s_ip_idx = 0; | 3678 | s_ip_idx = 0; |
3679 | ip_idx = 0; | 3679 | ip_idx = 0; |
3680 | idev = __in6_dev_get(dev); | 3680 | idev = __in6_dev_get(dev); |
3681 | if (!idev) | 3681 | if (!idev) |
3682 | goto cont; | 3682 | goto cont; |
3683 | 3683 | ||
3684 | if (in6_dump_addrs(idev, skb, cb, type, | 3684 | if (in6_dump_addrs(idev, skb, cb, type, |
3685 | s_ip_idx, &ip_idx) <= 0) | 3685 | s_ip_idx, &ip_idx) <= 0) |
3686 | goto done; | 3686 | goto done; |
3687 | cont: | 3687 | cont: |
3688 | idx++; | 3688 | idx++; |
3689 | } | 3689 | } |
3690 | } | 3690 | } |
3691 | done: | 3691 | done: |
3692 | rcu_read_unlock(); | 3692 | rcu_read_unlock(); |
3693 | cb->args[0] = h; | 3693 | cb->args[0] = h; |
3694 | cb->args[1] = idx; | 3694 | cb->args[1] = idx; |
3695 | cb->args[2] = ip_idx; | 3695 | cb->args[2] = ip_idx; |
3696 | 3696 | ||
3697 | return skb->len; | 3697 | return skb->len; |
3698 | } | 3698 | } |
3699 | 3699 | ||
3700 | static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 3700 | static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
3701 | { | 3701 | { |
3702 | enum addr_type_t type = UNICAST_ADDR; | 3702 | enum addr_type_t type = UNICAST_ADDR; |
3703 | 3703 | ||
3704 | return inet6_dump_addr(skb, cb, type); | 3704 | return inet6_dump_addr(skb, cb, type); |
3705 | } | 3705 | } |
3706 | 3706 | ||
3707 | static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) | 3707 | static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) |
3708 | { | 3708 | { |
3709 | enum addr_type_t type = MULTICAST_ADDR; | 3709 | enum addr_type_t type = MULTICAST_ADDR; |
3710 | 3710 | ||
3711 | return inet6_dump_addr(skb, cb, type); | 3711 | return inet6_dump_addr(skb, cb, type); |
3712 | } | 3712 | } |
3713 | 3713 | ||
3714 | 3714 | ||
3715 | static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) | 3715 | static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) |
3716 | { | 3716 | { |
3717 | enum addr_type_t type = ANYCAST_ADDR; | 3717 | enum addr_type_t type = ANYCAST_ADDR; |
3718 | 3718 | ||
3719 | return inet6_dump_addr(skb, cb, type); | 3719 | return inet6_dump_addr(skb, cb, type); |
3720 | } | 3720 | } |
3721 | 3721 | ||
3722 | static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, | 3722 | static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, |
3723 | void *arg) | 3723 | void *arg) |
3724 | { | 3724 | { |
3725 | struct net *net = sock_net(in_skb->sk); | 3725 | struct net *net = sock_net(in_skb->sk); |
3726 | struct ifaddrmsg *ifm; | 3726 | struct ifaddrmsg *ifm; |
3727 | struct nlattr *tb[IFA_MAX+1]; | 3727 | struct nlattr *tb[IFA_MAX+1]; |
3728 | struct in6_addr *addr = NULL; | 3728 | struct in6_addr *addr = NULL; |
3729 | struct net_device *dev = NULL; | 3729 | struct net_device *dev = NULL; |
3730 | struct inet6_ifaddr *ifa; | 3730 | struct inet6_ifaddr *ifa; |
3731 | struct sk_buff *skb; | 3731 | struct sk_buff *skb; |
3732 | int err; | 3732 | int err; |
3733 | 3733 | ||
3734 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); | 3734 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
3735 | if (err < 0) | 3735 | if (err < 0) |
3736 | goto errout; | 3736 | goto errout; |
3737 | 3737 | ||
3738 | addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); | 3738 | addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
3739 | if (addr == NULL) { | 3739 | if (addr == NULL) { |
3740 | err = -EINVAL; | 3740 | err = -EINVAL; |
3741 | goto errout; | 3741 | goto errout; |
3742 | } | 3742 | } |
3743 | 3743 | ||
3744 | ifm = nlmsg_data(nlh); | 3744 | ifm = nlmsg_data(nlh); |
3745 | if (ifm->ifa_index) | 3745 | if (ifm->ifa_index) |
3746 | dev = __dev_get_by_index(net, ifm->ifa_index); | 3746 | dev = __dev_get_by_index(net, ifm->ifa_index); |
3747 | 3747 | ||
3748 | ifa = ipv6_get_ifaddr(net, addr, dev, 1); | 3748 | ifa = ipv6_get_ifaddr(net, addr, dev, 1); |
3749 | if (!ifa) { | 3749 | if (!ifa) { |
3750 | err = -EADDRNOTAVAIL; | 3750 | err = -EADDRNOTAVAIL; |
3751 | goto errout; | 3751 | goto errout; |
3752 | } | 3752 | } |
3753 | 3753 | ||
3754 | skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL); | 3754 | skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL); |
3755 | if (!skb) { | 3755 | if (!skb) { |
3756 | err = -ENOBUFS; | 3756 | err = -ENOBUFS; |
3757 | goto errout_ifa; | 3757 | goto errout_ifa; |
3758 | } | 3758 | } |
3759 | 3759 | ||
3760 | err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, | 3760 | err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, |
3761 | nlh->nlmsg_seq, RTM_NEWADDR, 0); | 3761 | nlh->nlmsg_seq, RTM_NEWADDR, 0); |
3762 | if (err < 0) { | 3762 | if (err < 0) { |
3763 | /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ | 3763 | /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ |
3764 | WARN_ON(err == -EMSGSIZE); | 3764 | WARN_ON(err == -EMSGSIZE); |
3765 | kfree_skb(skb); | 3765 | kfree_skb(skb); |
3766 | goto errout_ifa; | 3766 | goto errout_ifa; |
3767 | } | 3767 | } |
3768 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); | 3768 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); |
3769 | errout_ifa: | 3769 | errout_ifa: |
3770 | in6_ifa_put(ifa); | 3770 | in6_ifa_put(ifa); |
3771 | errout: | 3771 | errout: |
3772 | return err; | 3772 | return err; |
3773 | } | 3773 | } |
3774 | 3774 | ||
3775 | static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) | 3775 | static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) |
3776 | { | 3776 | { |
3777 | struct sk_buff *skb; | 3777 | struct sk_buff *skb; |
3778 | struct net *net = dev_net(ifa->idev->dev); | 3778 | struct net *net = dev_net(ifa->idev->dev); |
3779 | int err = -ENOBUFS; | 3779 | int err = -ENOBUFS; |
3780 | 3780 | ||
3781 | skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); | 3781 | skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); |
3782 | if (skb == NULL) | 3782 | if (skb == NULL) |
3783 | goto errout; | 3783 | goto errout; |
3784 | 3784 | ||
3785 | err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); | 3785 | err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); |
3786 | if (err < 0) { | 3786 | if (err < 0) { |
3787 | /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ | 3787 | /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ |
3788 | WARN_ON(err == -EMSGSIZE); | 3788 | WARN_ON(err == -EMSGSIZE); |
3789 | kfree_skb(skb); | 3789 | kfree_skb(skb); |
3790 | goto errout; | 3790 | goto errout; |
3791 | } | 3791 | } |
3792 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); | 3792 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); |
3793 | return; | 3793 | return; |
3794 | errout: | 3794 | errout: |
3795 | if (err < 0) | 3795 | if (err < 0) |
3796 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); | 3796 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); |
3797 | } | 3797 | } |
3798 | 3798 | ||
3799 | static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, | 3799 | static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, |
3800 | __s32 *array, int bytes) | 3800 | __s32 *array, int bytes) |
3801 | { | 3801 | { |
3802 | BUG_ON(bytes < (DEVCONF_MAX * 4)); | 3802 | BUG_ON(bytes < (DEVCONF_MAX * 4)); |
3803 | 3803 | ||
3804 | memset(array, 0, bytes); | 3804 | memset(array, 0, bytes); |
3805 | array[DEVCONF_FORWARDING] = cnf->forwarding; | 3805 | array[DEVCONF_FORWARDING] = cnf->forwarding; |
3806 | array[DEVCONF_HOPLIMIT] = cnf->hop_limit; | 3806 | array[DEVCONF_HOPLIMIT] = cnf->hop_limit; |
3807 | array[DEVCONF_MTU6] = cnf->mtu6; | 3807 | array[DEVCONF_MTU6] = cnf->mtu6; |
3808 | array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; | 3808 | array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; |
3809 | array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; | 3809 | array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; |
3810 | array[DEVCONF_AUTOCONF] = cnf->autoconf; | 3810 | array[DEVCONF_AUTOCONF] = cnf->autoconf; |
3811 | array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; | 3811 | array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; |
3812 | array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; | 3812 | array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; |
3813 | array[DEVCONF_RTR_SOLICIT_INTERVAL] = | 3813 | array[DEVCONF_RTR_SOLICIT_INTERVAL] = |
3814 | jiffies_to_msecs(cnf->rtr_solicit_interval); | 3814 | jiffies_to_msecs(cnf->rtr_solicit_interval); |
3815 | array[DEVCONF_RTR_SOLICIT_DELAY] = | 3815 | array[DEVCONF_RTR_SOLICIT_DELAY] = |
3816 | jiffies_to_msecs(cnf->rtr_solicit_delay); | 3816 | jiffies_to_msecs(cnf->rtr_solicit_delay); |
3817 | array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; | 3817 | array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; |
3818 | #ifdef CONFIG_IPV6_PRIVACY | 3818 | #ifdef CONFIG_IPV6_PRIVACY |
3819 | array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; | 3819 | array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; |
3820 | array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; | 3820 | array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; |
3821 | array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; | 3821 | array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; |
3822 | array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; | 3822 | array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; |
3823 | array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; | 3823 | array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; |
3824 | #endif | 3824 | #endif |
3825 | array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; | 3825 | array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; |
3826 | array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; | 3826 | array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; |
3827 | array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; | 3827 | array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; |
3828 | #ifdef CONFIG_IPV6_ROUTER_PREF | 3828 | #ifdef CONFIG_IPV6_ROUTER_PREF |
3829 | array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; | 3829 | array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; |
3830 | array[DEVCONF_RTR_PROBE_INTERVAL] = | 3830 | array[DEVCONF_RTR_PROBE_INTERVAL] = |
3831 | jiffies_to_msecs(cnf->rtr_probe_interval); | 3831 | jiffies_to_msecs(cnf->rtr_probe_interval); |
3832 | #ifdef CONFIG_IPV6_ROUTE_INFO | 3832 | #ifdef CONFIG_IPV6_ROUTE_INFO |
3833 | array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; | 3833 | array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; |
3834 | #endif | 3834 | #endif |
3835 | #endif | 3835 | #endif |
3836 | array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; | 3836 | array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; |
3837 | array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; | 3837 | array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; |
3838 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 3838 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
3839 | array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; | 3839 | array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; |
3840 | #endif | 3840 | #endif |
3841 | #ifdef CONFIG_IPV6_MROUTE | 3841 | #ifdef CONFIG_IPV6_MROUTE |
3842 | array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; | 3842 | array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; |
3843 | #endif | 3843 | #endif |
3844 | array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; | 3844 | array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; |
3845 | array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; | 3845 | array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; |
3846 | array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; | 3846 | array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; |
3847 | } | 3847 | } |
3848 | 3848 | ||
3849 | static inline size_t inet6_ifla6_size(void) | 3849 | static inline size_t inet6_ifla6_size(void) |
3850 | { | 3850 | { |
3851 | return nla_total_size(4) /* IFLA_INET6_FLAGS */ | 3851 | return nla_total_size(4) /* IFLA_INET6_FLAGS */ |
3852 | + nla_total_size(sizeof(struct ifla_cacheinfo)) | 3852 | + nla_total_size(sizeof(struct ifla_cacheinfo)) |
3853 | + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ | 3853 | + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ |
3854 | + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ | 3854 | + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ |
3855 | + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ | 3855 | + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ |
3856 | } | 3856 | } |
3857 | 3857 | ||
3858 | static inline size_t inet6_if_nlmsg_size(void) | 3858 | static inline size_t inet6_if_nlmsg_size(void) |
3859 | { | 3859 | { |
3860 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | 3860 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) |
3861 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ | 3861 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ |
3862 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ | 3862 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ |
3863 | + nla_total_size(4) /* IFLA_MTU */ | 3863 | + nla_total_size(4) /* IFLA_MTU */ |
3864 | + nla_total_size(4) /* IFLA_LINK */ | 3864 | + nla_total_size(4) /* IFLA_LINK */ |
3865 | + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ | 3865 | + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ |
3866 | } | 3866 | } |
3867 | 3867 | ||
3868 | static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, | 3868 | static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, |
3869 | int items, int bytes) | 3869 | int items, int bytes) |
3870 | { | 3870 | { |
3871 | int i; | 3871 | int i; |
3872 | int pad = bytes - sizeof(u64) * items; | 3872 | int pad = bytes - sizeof(u64) * items; |
3873 | BUG_ON(pad < 0); | 3873 | BUG_ON(pad < 0); |
3874 | 3874 | ||
3875 | /* Use put_unaligned() because stats may not be aligned for u64. */ | 3875 | /* Use put_unaligned() because stats may not be aligned for u64. */ |
3876 | put_unaligned(items, &stats[0]); | 3876 | put_unaligned(items, &stats[0]); |
3877 | for (i = 1; i < items; i++) | 3877 | for (i = 1; i < items; i++) |
3878 | put_unaligned(atomic_long_read(&mib[i]), &stats[i]); | 3878 | put_unaligned(atomic_long_read(&mib[i]), &stats[i]); |
3879 | 3879 | ||
3880 | memset(&stats[items], 0, pad); | 3880 | memset(&stats[items], 0, pad); |
3881 | } | 3881 | } |
3882 | 3882 | ||
3883 | static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, | 3883 | static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, |
3884 | int items, int bytes, size_t syncpoff) | 3884 | int items, int bytes, size_t syncpoff) |
3885 | { | 3885 | { |
3886 | int i; | 3886 | int i; |
3887 | int pad = bytes - sizeof(u64) * items; | 3887 | int pad = bytes - sizeof(u64) * items; |
3888 | BUG_ON(pad < 0); | 3888 | BUG_ON(pad < 0); |
3889 | 3889 | ||
3890 | /* Use put_unaligned() because stats may not be aligned for u64. */ | 3890 | /* Use put_unaligned() because stats may not be aligned for u64. */ |
3891 | put_unaligned(items, &stats[0]); | 3891 | put_unaligned(items, &stats[0]); |
3892 | for (i = 1; i < items; i++) | 3892 | for (i = 1; i < items; i++) |
3893 | put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]); | 3893 | put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]); |
3894 | 3894 | ||
3895 | memset(&stats[items], 0, pad); | 3895 | memset(&stats[items], 0, pad); |
3896 | } | 3896 | } |
3897 | 3897 | ||
3898 | static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, | 3898 | static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, |
3899 | int bytes) | 3899 | int bytes) |
3900 | { | 3900 | { |
3901 | switch (attrtype) { | 3901 | switch (attrtype) { |
3902 | case IFLA_INET6_STATS: | 3902 | case IFLA_INET6_STATS: |
3903 | __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, | 3903 | __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, |
3904 | IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); | 3904 | IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); |
3905 | break; | 3905 | break; |
3906 | case IFLA_INET6_ICMP6STATS: | 3906 | case IFLA_INET6_ICMP6STATS: |
3907 | __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); | 3907 | __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); |
3908 | break; | 3908 | break; |
3909 | } | 3909 | } |
3910 | } | 3910 | } |
3911 | 3911 | ||
3912 | static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) | 3912 | static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) |
3913 | { | 3913 | { |
3914 | struct nlattr *nla; | 3914 | struct nlattr *nla; |
3915 | struct ifla_cacheinfo ci; | 3915 | struct ifla_cacheinfo ci; |
3916 | 3916 | ||
3917 | NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); | 3917 | NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); |
3918 | 3918 | ||
3919 | ci.max_reasm_len = IPV6_MAXPLEN; | 3919 | ci.max_reasm_len = IPV6_MAXPLEN; |
3920 | ci.tstamp = cstamp_delta(idev->tstamp); | 3920 | ci.tstamp = cstamp_delta(idev->tstamp); |
3921 | ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); | 3921 | ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); |
3922 | ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); | 3922 | ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); |
3923 | NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); | 3923 | NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); |
3924 | 3924 | ||
3925 | nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); | 3925 | nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); |
3926 | if (nla == NULL) | 3926 | if (nla == NULL) |
3927 | goto nla_put_failure; | 3927 | goto nla_put_failure; |
3928 | ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); | 3928 | ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); |
3929 | 3929 | ||
3930 | /* XXX - MC not implemented */ | 3930 | /* XXX - MC not implemented */ |
3931 | 3931 | ||
3932 | nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); | 3932 | nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); |
3933 | if (nla == NULL) | 3933 | if (nla == NULL) |
3934 | goto nla_put_failure; | 3934 | goto nla_put_failure; |
3935 | snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); | 3935 | snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); |
3936 | 3936 | ||
3937 | nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); | 3937 | nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); |
3938 | if (nla == NULL) | 3938 | if (nla == NULL) |
3939 | goto nla_put_failure; | 3939 | goto nla_put_failure; |
3940 | snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); | 3940 | snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); |
3941 | 3941 | ||
3942 | return 0; | 3942 | return 0; |
3943 | 3943 | ||
3944 | nla_put_failure: | 3944 | nla_put_failure: |
3945 | return -EMSGSIZE; | 3945 | return -EMSGSIZE; |
3946 | } | 3946 | } |
3947 | 3947 | ||
3948 | static size_t inet6_get_link_af_size(const struct net_device *dev) | 3948 | static size_t inet6_get_link_af_size(const struct net_device *dev) |
3949 | { | 3949 | { |
3950 | if (!__in6_dev_get(dev)) | 3950 | if (!__in6_dev_get(dev)) |
3951 | return 0; | 3951 | return 0; |
3952 | 3952 | ||
3953 | return inet6_ifla6_size(); | 3953 | return inet6_ifla6_size(); |
3954 | } | 3954 | } |
3955 | 3955 | ||
3956 | static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) | 3956 | static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) |
3957 | { | 3957 | { |
3958 | struct inet6_dev *idev = __in6_dev_get(dev); | 3958 | struct inet6_dev *idev = __in6_dev_get(dev); |
3959 | 3959 | ||
3960 | if (!idev) | 3960 | if (!idev) |
3961 | return -ENODATA; | 3961 | return -ENODATA; |
3962 | 3962 | ||
3963 | if (inet6_fill_ifla6_attrs(skb, idev) < 0) | 3963 | if (inet6_fill_ifla6_attrs(skb, idev) < 0) |
3964 | return -EMSGSIZE; | 3964 | return -EMSGSIZE; |
3965 | 3965 | ||
3966 | return 0; | 3966 | return 0; |
3967 | } | 3967 | } |
3968 | 3968 | ||
3969 | static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, | 3969 | static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, |
3970 | u32 pid, u32 seq, int event, unsigned int flags) | 3970 | u32 pid, u32 seq, int event, unsigned int flags) |
3971 | { | 3971 | { |
3972 | struct net_device *dev = idev->dev; | 3972 | struct net_device *dev = idev->dev; |
3973 | struct ifinfomsg *hdr; | 3973 | struct ifinfomsg *hdr; |
3974 | struct nlmsghdr *nlh; | 3974 | struct nlmsghdr *nlh; |
3975 | void *protoinfo; | 3975 | void *protoinfo; |
3976 | 3976 | ||
3977 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); | 3977 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); |
3978 | if (nlh == NULL) | 3978 | if (nlh == NULL) |
3979 | return -EMSGSIZE; | 3979 | return -EMSGSIZE; |
3980 | 3980 | ||
3981 | hdr = nlmsg_data(nlh); | 3981 | hdr = nlmsg_data(nlh); |
3982 | hdr->ifi_family = AF_INET6; | 3982 | hdr->ifi_family = AF_INET6; |
3983 | hdr->__ifi_pad = 0; | 3983 | hdr->__ifi_pad = 0; |
3984 | hdr->ifi_type = dev->type; | 3984 | hdr->ifi_type = dev->type; |
3985 | hdr->ifi_index = dev->ifindex; | 3985 | hdr->ifi_index = dev->ifindex; |
3986 | hdr->ifi_flags = dev_get_flags(dev); | 3986 | hdr->ifi_flags = dev_get_flags(dev); |
3987 | hdr->ifi_change = 0; | 3987 | hdr->ifi_change = 0; |
3988 | 3988 | ||
3989 | NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); | 3989 | NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); |
3990 | 3990 | ||
3991 | if (dev->addr_len) | 3991 | if (dev->addr_len) |
3992 | NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); | 3992 | NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); |
3993 | 3993 | ||
3994 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); | 3994 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); |
3995 | if (dev->ifindex != dev->iflink) | 3995 | if (dev->ifindex != dev->iflink) |
3996 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); | 3996 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); |
3997 | 3997 | ||
3998 | protoinfo = nla_nest_start(skb, IFLA_PROTINFO); | 3998 | protoinfo = nla_nest_start(skb, IFLA_PROTINFO); |
3999 | if (protoinfo == NULL) | 3999 | if (protoinfo == NULL) |
4000 | goto nla_put_failure; | 4000 | goto nla_put_failure; |
4001 | 4001 | ||
4002 | if (inet6_fill_ifla6_attrs(skb, idev) < 0) | 4002 | if (inet6_fill_ifla6_attrs(skb, idev) < 0) |
4003 | goto nla_put_failure; | 4003 | goto nla_put_failure; |
4004 | 4004 | ||
4005 | nla_nest_end(skb, protoinfo); | 4005 | nla_nest_end(skb, protoinfo); |
4006 | return nlmsg_end(skb, nlh); | 4006 | return nlmsg_end(skb, nlh); |
4007 | 4007 | ||
4008 | nla_put_failure: | 4008 | nla_put_failure: |
4009 | nlmsg_cancel(skb, nlh); | 4009 | nlmsg_cancel(skb, nlh); |
4010 | return -EMSGSIZE; | 4010 | return -EMSGSIZE; |
4011 | } | 4011 | } |
4012 | 4012 | ||
4013 | static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | 4013 | static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) |
4014 | { | 4014 | { |
4015 | struct net *net = sock_net(skb->sk); | 4015 | struct net *net = sock_net(skb->sk); |
4016 | int h, s_h; | 4016 | int h, s_h; |
4017 | int idx = 0, s_idx; | 4017 | int idx = 0, s_idx; |
4018 | struct net_device *dev; | 4018 | struct net_device *dev; |
4019 | struct inet6_dev *idev; | 4019 | struct inet6_dev *idev; |
4020 | struct hlist_head *head; | 4020 | struct hlist_head *head; |
4021 | struct hlist_node *node; | 4021 | struct hlist_node *node; |
4022 | 4022 | ||
4023 | s_h = cb->args[0]; | 4023 | s_h = cb->args[0]; |
4024 | s_idx = cb->args[1]; | 4024 | s_idx = cb->args[1]; |
4025 | 4025 | ||
4026 | rcu_read_lock(); | 4026 | rcu_read_lock(); |
4027 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 4027 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
4028 | idx = 0; | 4028 | idx = 0; |
4029 | head = &net->dev_index_head[h]; | 4029 | head = &net->dev_index_head[h]; |
4030 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { | 4030 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
4031 | if (idx < s_idx) | 4031 | if (idx < s_idx) |
4032 | goto cont; | 4032 | goto cont; |
4033 | idev = __in6_dev_get(dev); | 4033 | idev = __in6_dev_get(dev); |
4034 | if (!idev) | 4034 | if (!idev) |
4035 | goto cont; | 4035 | goto cont; |
4036 | if (inet6_fill_ifinfo(skb, idev, | 4036 | if (inet6_fill_ifinfo(skb, idev, |
4037 | NETLINK_CB(cb->skb).pid, | 4037 | NETLINK_CB(cb->skb).pid, |
4038 | cb->nlh->nlmsg_seq, | 4038 | cb->nlh->nlmsg_seq, |
4039 | RTM_NEWLINK, NLM_F_MULTI) <= 0) | 4039 | RTM_NEWLINK, NLM_F_MULTI) <= 0) |
4040 | goto out; | 4040 | goto out; |
4041 | cont: | 4041 | cont: |
4042 | idx++; | 4042 | idx++; |
4043 | } | 4043 | } |
4044 | } | 4044 | } |
4045 | out: | 4045 | out: |
4046 | rcu_read_unlock(); | 4046 | rcu_read_unlock(); |
4047 | cb->args[1] = idx; | 4047 | cb->args[1] = idx; |
4048 | cb->args[0] = h; | 4048 | cb->args[0] = h; |
4049 | 4049 | ||
4050 | return skb->len; | 4050 | return skb->len; |
4051 | } | 4051 | } |
4052 | 4052 | ||
4053 | void inet6_ifinfo_notify(int event, struct inet6_dev *idev) | 4053 | void inet6_ifinfo_notify(int event, struct inet6_dev *idev) |
4054 | { | 4054 | { |
4055 | struct sk_buff *skb; | 4055 | struct sk_buff *skb; |
4056 | struct net *net = dev_net(idev->dev); | 4056 | struct net *net = dev_net(idev->dev); |
4057 | int err = -ENOBUFS; | 4057 | int err = -ENOBUFS; |
4058 | 4058 | ||
4059 | skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); | 4059 | skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); |
4060 | if (skb == NULL) | 4060 | if (skb == NULL) |
4061 | goto errout; | 4061 | goto errout; |
4062 | 4062 | ||
4063 | err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); | 4063 | err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); |
4064 | if (err < 0) { | 4064 | if (err < 0) { |
4065 | /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ | 4065 | /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ |
4066 | WARN_ON(err == -EMSGSIZE); | 4066 | WARN_ON(err == -EMSGSIZE); |
4067 | kfree_skb(skb); | 4067 | kfree_skb(skb); |
4068 | goto errout; | 4068 | goto errout; |
4069 | } | 4069 | } |
4070 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC); | 4070 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC); |
4071 | return; | 4071 | return; |
4072 | errout: | 4072 | errout: |
4073 | if (err < 0) | 4073 | if (err < 0) |
4074 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err); | 4074 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err); |
4075 | } | 4075 | } |
4076 | 4076 | ||
4077 | static inline size_t inet6_prefix_nlmsg_size(void) | 4077 | static inline size_t inet6_prefix_nlmsg_size(void) |
4078 | { | 4078 | { |
4079 | return NLMSG_ALIGN(sizeof(struct prefixmsg)) | 4079 | return NLMSG_ALIGN(sizeof(struct prefixmsg)) |
4080 | + nla_total_size(sizeof(struct in6_addr)) | 4080 | + nla_total_size(sizeof(struct in6_addr)) |
4081 | + nla_total_size(sizeof(struct prefix_cacheinfo)); | 4081 | + nla_total_size(sizeof(struct prefix_cacheinfo)); |
4082 | } | 4082 | } |
4083 | 4083 | ||
4084 | static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, | 4084 | static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, |
4085 | struct prefix_info *pinfo, u32 pid, u32 seq, | 4085 | struct prefix_info *pinfo, u32 pid, u32 seq, |
4086 | int event, unsigned int flags) | 4086 | int event, unsigned int flags) |
4087 | { | 4087 | { |
4088 | struct prefixmsg *pmsg; | 4088 | struct prefixmsg *pmsg; |
4089 | struct nlmsghdr *nlh; | 4089 | struct nlmsghdr *nlh; |
4090 | struct prefix_cacheinfo ci; | 4090 | struct prefix_cacheinfo ci; |
4091 | 4091 | ||
4092 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); | 4092 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); |
4093 | if (nlh == NULL) | 4093 | if (nlh == NULL) |
4094 | return -EMSGSIZE; | 4094 | return -EMSGSIZE; |
4095 | 4095 | ||
4096 | pmsg = nlmsg_data(nlh); | 4096 | pmsg = nlmsg_data(nlh); |
4097 | pmsg->prefix_family = AF_INET6; | 4097 | pmsg->prefix_family = AF_INET6; |
4098 | pmsg->prefix_pad1 = 0; | 4098 | pmsg->prefix_pad1 = 0; |
4099 | pmsg->prefix_pad2 = 0; | 4099 | pmsg->prefix_pad2 = 0; |
4100 | pmsg->prefix_ifindex = idev->dev->ifindex; | 4100 | pmsg->prefix_ifindex = idev->dev->ifindex; |
4101 | pmsg->prefix_len = pinfo->prefix_len; | 4101 | pmsg->prefix_len = pinfo->prefix_len; |
4102 | pmsg->prefix_type = pinfo->type; | 4102 | pmsg->prefix_type = pinfo->type; |
4103 | pmsg->prefix_pad3 = 0; | 4103 | pmsg->prefix_pad3 = 0; |
4104 | pmsg->prefix_flags = 0; | 4104 | pmsg->prefix_flags = 0; |
4105 | if (pinfo->onlink) | 4105 | if (pinfo->onlink) |
4106 | pmsg->prefix_flags |= IF_PREFIX_ONLINK; | 4106 | pmsg->prefix_flags |= IF_PREFIX_ONLINK; |
4107 | if (pinfo->autoconf) | 4107 | if (pinfo->autoconf) |
4108 | pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; | 4108 | pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; |
4109 | 4109 | ||
4110 | NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); | 4110 | NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); |
4111 | 4111 | ||
4112 | ci.preferred_time = ntohl(pinfo->prefered); | 4112 | ci.preferred_time = ntohl(pinfo->prefered); |
4113 | ci.valid_time = ntohl(pinfo->valid); | 4113 | ci.valid_time = ntohl(pinfo->valid); |
4114 | NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); | 4114 | NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); |
4115 | 4115 | ||
4116 | return nlmsg_end(skb, nlh); | 4116 | return nlmsg_end(skb, nlh); |
4117 | 4117 | ||
4118 | nla_put_failure: | 4118 | nla_put_failure: |
4119 | nlmsg_cancel(skb, nlh); | 4119 | nlmsg_cancel(skb, nlh); |
4120 | return -EMSGSIZE; | 4120 | return -EMSGSIZE; |
4121 | } | 4121 | } |
4122 | 4122 | ||
4123 | static void inet6_prefix_notify(int event, struct inet6_dev *idev, | 4123 | static void inet6_prefix_notify(int event, struct inet6_dev *idev, |
4124 | struct prefix_info *pinfo) | 4124 | struct prefix_info *pinfo) |
4125 | { | 4125 | { |
4126 | struct sk_buff *skb; | 4126 | struct sk_buff *skb; |
4127 | struct net *net = dev_net(idev->dev); | 4127 | struct net *net = dev_net(idev->dev); |
4128 | int err = -ENOBUFS; | 4128 | int err = -ENOBUFS; |
4129 | 4129 | ||
4130 | skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); | 4130 | skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); |
4131 | if (skb == NULL) | 4131 | if (skb == NULL) |
4132 | goto errout; | 4132 | goto errout; |
4133 | 4133 | ||
4134 | err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); | 4134 | err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); |
4135 | if (err < 0) { | 4135 | if (err < 0) { |
4136 | /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ | 4136 | /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ |
4137 | WARN_ON(err == -EMSGSIZE); | 4137 | WARN_ON(err == -EMSGSIZE); |
4138 | kfree_skb(skb); | 4138 | kfree_skb(skb); |
4139 | goto errout; | 4139 | goto errout; |
4140 | } | 4140 | } |
4141 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); | 4141 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); |
4142 | return; | 4142 | return; |
4143 | errout: | 4143 | errout: |
4144 | if (err < 0) | 4144 | if (err < 0) |
4145 | rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); | 4145 | rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); |
4146 | } | 4146 | } |
4147 | 4147 | ||
4148 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | 4148 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) |
4149 | { | 4149 | { |
4150 | inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); | 4150 | inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); |
4151 | 4151 | ||
4152 | switch (event) { | 4152 | switch (event) { |
4153 | case RTM_NEWADDR: | 4153 | case RTM_NEWADDR: |
4154 | /* | 4154 | /* |
4155 | * If the address was optimistic | 4155 | * If the address was optimistic |
4156 | * we inserted the route at the start of | 4156 | * we inserted the route at the start of |
4157 | * our DAD process, so we don't need | 4157 | * our DAD process, so we don't need |
4158 | * to do it again | 4158 | * to do it again |
4159 | */ | 4159 | */ |
4160 | if (!(ifp->rt->rt6i_node)) | 4160 | if (!(ifp->rt->rt6i_node)) |
4161 | ip6_ins_rt(ifp->rt); | 4161 | ip6_ins_rt(ifp->rt); |
4162 | if (ifp->idev->cnf.forwarding) | 4162 | if (ifp->idev->cnf.forwarding) |
4163 | addrconf_join_anycast(ifp); | 4163 | addrconf_join_anycast(ifp); |
4164 | break; | 4164 | break; |
4165 | case RTM_DELADDR: | 4165 | case RTM_DELADDR: |
4166 | if (ifp->idev->cnf.forwarding) | 4166 | if (ifp->idev->cnf.forwarding) |
4167 | addrconf_leave_anycast(ifp); | 4167 | addrconf_leave_anycast(ifp); |
4168 | addrconf_leave_solict(ifp->idev, &ifp->addr); | 4168 | addrconf_leave_solict(ifp->idev, &ifp->addr); |
4169 | dst_hold(&ifp->rt->dst); | 4169 | dst_hold(&ifp->rt->dst); |
4170 | 4170 | ||
4171 | if (ip6_del_rt(ifp->rt)) | 4171 | if (ip6_del_rt(ifp->rt)) |
4172 | dst_free(&ifp->rt->dst); | 4172 | dst_free(&ifp->rt->dst); |
4173 | break; | 4173 | break; |
4174 | } | 4174 | } |
4175 | } | 4175 | } |
4176 | 4176 | ||
4177 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | 4177 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) |
4178 | { | 4178 | { |
4179 | rcu_read_lock_bh(); | 4179 | rcu_read_lock_bh(); |
4180 | if (likely(ifp->idev->dead == 0)) | 4180 | if (likely(ifp->idev->dead == 0)) |
4181 | __ipv6_ifa_notify(event, ifp); | 4181 | __ipv6_ifa_notify(event, ifp); |
4182 | rcu_read_unlock_bh(); | 4182 | rcu_read_unlock_bh(); |
4183 | } | 4183 | } |
4184 | 4184 | ||
4185 | #ifdef CONFIG_SYSCTL | 4185 | #ifdef CONFIG_SYSCTL |
4186 | 4186 | ||
4187 | static | 4187 | static |
4188 | int addrconf_sysctl_forward(ctl_table *ctl, int write, | 4188 | int addrconf_sysctl_forward(ctl_table *ctl, int write, |
4189 | void __user *buffer, size_t *lenp, loff_t *ppos) | 4189 | void __user *buffer, size_t *lenp, loff_t *ppos) |
4190 | { | 4190 | { |
4191 | int *valp = ctl->data; | 4191 | int *valp = ctl->data; |
4192 | int val = *valp; | 4192 | int val = *valp; |
4193 | loff_t pos = *ppos; | 4193 | loff_t pos = *ppos; |
4194 | int ret; | 4194 | int ret; |
4195 | 4195 | ||
4196 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 4196 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
4197 | 4197 | ||
4198 | if (write) | 4198 | if (write) |
4199 | ret = addrconf_fixup_forwarding(ctl, valp, val); | 4199 | ret = addrconf_fixup_forwarding(ctl, valp, val); |
4200 | if (ret) | 4200 | if (ret) |
4201 | *ppos = pos; | 4201 | *ppos = pos; |
4202 | return ret; | 4202 | return ret; |
4203 | } | 4203 | } |
4204 | 4204 | ||
4205 | static void dev_disable_change(struct inet6_dev *idev) | 4205 | static void dev_disable_change(struct inet6_dev *idev) |
4206 | { | 4206 | { |
4207 | if (!idev || !idev->dev) | 4207 | if (!idev || !idev->dev) |
4208 | return; | 4208 | return; |
4209 | 4209 | ||
4210 | if (idev->cnf.disable_ipv6) | 4210 | if (idev->cnf.disable_ipv6) |
4211 | addrconf_notify(NULL, NETDEV_DOWN, idev->dev); | 4211 | addrconf_notify(NULL, NETDEV_DOWN, idev->dev); |
4212 | else | 4212 | else |
4213 | addrconf_notify(NULL, NETDEV_UP, idev->dev); | 4213 | addrconf_notify(NULL, NETDEV_UP, idev->dev); |
4214 | } | 4214 | } |
4215 | 4215 | ||
4216 | static void addrconf_disable_change(struct net *net, __s32 newf) | 4216 | static void addrconf_disable_change(struct net *net, __s32 newf) |
4217 | { | 4217 | { |
4218 | struct net_device *dev; | 4218 | struct net_device *dev; |
4219 | struct inet6_dev *idev; | 4219 | struct inet6_dev *idev; |
4220 | 4220 | ||
4221 | rcu_read_lock(); | 4221 | rcu_read_lock(); |
4222 | for_each_netdev_rcu(net, dev) { | 4222 | for_each_netdev_rcu(net, dev) { |
4223 | idev = __in6_dev_get(dev); | 4223 | idev = __in6_dev_get(dev); |
4224 | if (idev) { | 4224 | if (idev) { |
4225 | int changed = (!idev->cnf.disable_ipv6) ^ (!newf); | 4225 | int changed = (!idev->cnf.disable_ipv6) ^ (!newf); |
4226 | idev->cnf.disable_ipv6 = newf; | 4226 | idev->cnf.disable_ipv6 = newf; |
4227 | if (changed) | 4227 | if (changed) |
4228 | dev_disable_change(idev); | 4228 | dev_disable_change(idev); |
4229 | } | 4229 | } |
4230 | } | 4230 | } |
4231 | rcu_read_unlock(); | 4231 | rcu_read_unlock(); |
4232 | } | 4232 | } |
4233 | 4233 | ||
4234 | static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) | 4234 | static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) |
4235 | { | 4235 | { |
4236 | struct net *net; | 4236 | struct net *net; |
4237 | 4237 | ||
4238 | net = (struct net *)table->extra2; | 4238 | net = (struct net *)table->extra2; |
4239 | 4239 | ||
4240 | if (p == &net->ipv6.devconf_dflt->disable_ipv6) | 4240 | if (p == &net->ipv6.devconf_dflt->disable_ipv6) |
4241 | return 0; | 4241 | return 0; |
4242 | 4242 | ||
4243 | if (!rtnl_trylock()) { | 4243 | if (!rtnl_trylock()) { |
4244 | /* Restore the original values before restarting */ | 4244 | /* Restore the original values before restarting */ |
4245 | *p = old; | 4245 | *p = old; |
4246 | return restart_syscall(); | 4246 | return restart_syscall(); |
4247 | } | 4247 | } |
4248 | 4248 | ||
4249 | if (p == &net->ipv6.devconf_all->disable_ipv6) { | 4249 | if (p == &net->ipv6.devconf_all->disable_ipv6) { |
4250 | __s32 newf = net->ipv6.devconf_all->disable_ipv6; | 4250 | __s32 newf = net->ipv6.devconf_all->disable_ipv6; |
4251 | net->ipv6.devconf_dflt->disable_ipv6 = newf; | 4251 | net->ipv6.devconf_dflt->disable_ipv6 = newf; |
4252 | addrconf_disable_change(net, newf); | 4252 | addrconf_disable_change(net, newf); |
4253 | } else if ((!*p) ^ (!old)) | 4253 | } else if ((!*p) ^ (!old)) |
4254 | dev_disable_change((struct inet6_dev *)table->extra1); | 4254 | dev_disable_change((struct inet6_dev *)table->extra1); |
4255 | 4255 | ||
4256 | rtnl_unlock(); | 4256 | rtnl_unlock(); |
4257 | return 0; | 4257 | return 0; |
4258 | } | 4258 | } |
4259 | 4259 | ||
4260 | static | 4260 | static |
4261 | int addrconf_sysctl_disable(ctl_table *ctl, int write, | 4261 | int addrconf_sysctl_disable(ctl_table *ctl, int write, |
4262 | void __user *buffer, size_t *lenp, loff_t *ppos) | 4262 | void __user *buffer, size_t *lenp, loff_t *ppos) |
4263 | { | 4263 | { |
4264 | int *valp = ctl->data; | 4264 | int *valp = ctl->data; |
4265 | int val = *valp; | 4265 | int val = *valp; |
4266 | loff_t pos = *ppos; | 4266 | loff_t pos = *ppos; |
4267 | int ret; | 4267 | int ret; |
4268 | 4268 | ||
4269 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 4269 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
4270 | 4270 | ||
4271 | if (write) | 4271 | if (write) |
4272 | ret = addrconf_disable_ipv6(ctl, valp, val); | 4272 | ret = addrconf_disable_ipv6(ctl, valp, val); |
4273 | if (ret) | 4273 | if (ret) |
4274 | *ppos = pos; | 4274 | *ppos = pos; |
4275 | return ret; | 4275 | return ret; |
4276 | } | 4276 | } |
4277 | 4277 | ||
4278 | static struct addrconf_sysctl_table | 4278 | static struct addrconf_sysctl_table |
4279 | { | 4279 | { |
4280 | struct ctl_table_header *sysctl_header; | 4280 | struct ctl_table_header *sysctl_header; |
4281 | ctl_table addrconf_vars[DEVCONF_MAX+1]; | 4281 | ctl_table addrconf_vars[DEVCONF_MAX+1]; |
4282 | char *dev_name; | 4282 | char *dev_name; |
4283 | } addrconf_sysctl __read_mostly = { | 4283 | } addrconf_sysctl __read_mostly = { |
4284 | .sysctl_header = NULL, | 4284 | .sysctl_header = NULL, |
4285 | .addrconf_vars = { | 4285 | .addrconf_vars = { |
4286 | { | 4286 | { |
4287 | .procname = "forwarding", | 4287 | .procname = "forwarding", |
4288 | .data = &ipv6_devconf.forwarding, | 4288 | .data = &ipv6_devconf.forwarding, |
4289 | .maxlen = sizeof(int), | 4289 | .maxlen = sizeof(int), |
4290 | .mode = 0644, | 4290 | .mode = 0644, |
4291 | .proc_handler = addrconf_sysctl_forward, | 4291 | .proc_handler = addrconf_sysctl_forward, |
4292 | }, | 4292 | }, |
4293 | { | 4293 | { |
4294 | .procname = "hop_limit", | 4294 | .procname = "hop_limit", |
4295 | .data = &ipv6_devconf.hop_limit, | 4295 | .data = &ipv6_devconf.hop_limit, |
4296 | .maxlen = sizeof(int), | 4296 | .maxlen = sizeof(int), |
4297 | .mode = 0644, | 4297 | .mode = 0644, |
4298 | .proc_handler = proc_dointvec, | 4298 | .proc_handler = proc_dointvec, |
4299 | }, | 4299 | }, |
4300 | { | 4300 | { |
4301 | .procname = "mtu", | 4301 | .procname = "mtu", |
4302 | .data = &ipv6_devconf.mtu6, | 4302 | .data = &ipv6_devconf.mtu6, |
4303 | .maxlen = sizeof(int), | 4303 | .maxlen = sizeof(int), |
4304 | .mode = 0644, | 4304 | .mode = 0644, |
4305 | .proc_handler = proc_dointvec, | 4305 | .proc_handler = proc_dointvec, |
4306 | }, | 4306 | }, |
4307 | { | 4307 | { |
4308 | .procname = "accept_ra", | 4308 | .procname = "accept_ra", |
4309 | .data = &ipv6_devconf.accept_ra, | 4309 | .data = &ipv6_devconf.accept_ra, |
4310 | .maxlen = sizeof(int), | 4310 | .maxlen = sizeof(int), |
4311 | .mode = 0644, | 4311 | .mode = 0644, |
4312 | .proc_handler = proc_dointvec, | 4312 | .proc_handler = proc_dointvec, |
4313 | }, | 4313 | }, |
4314 | { | 4314 | { |
4315 | .procname = "accept_redirects", | 4315 | .procname = "accept_redirects", |
4316 | .data = &ipv6_devconf.accept_redirects, | 4316 | .data = &ipv6_devconf.accept_redirects, |
4317 | .maxlen = sizeof(int), | 4317 | .maxlen = sizeof(int), |
4318 | .mode = 0644, | 4318 | .mode = 0644, |
4319 | .proc_handler = proc_dointvec, | 4319 | .proc_handler = proc_dointvec, |
4320 | }, | 4320 | }, |
4321 | { | 4321 | { |
4322 | .procname = "autoconf", | 4322 | .procname = "autoconf", |
4323 | .data = &ipv6_devconf.autoconf, | 4323 | .data = &ipv6_devconf.autoconf, |
4324 | .maxlen = sizeof(int), | 4324 | .maxlen = sizeof(int), |
4325 | .mode = 0644, | 4325 | .mode = 0644, |
4326 | .proc_handler = proc_dointvec, | 4326 | .proc_handler = proc_dointvec, |
4327 | }, | 4327 | }, |
4328 | { | 4328 | { |
4329 | .procname = "dad_transmits", | 4329 | .procname = "dad_transmits", |
4330 | .data = &ipv6_devconf.dad_transmits, | 4330 | .data = &ipv6_devconf.dad_transmits, |
4331 | .maxlen = sizeof(int), | 4331 | .maxlen = sizeof(int), |
4332 | .mode = 0644, | 4332 | .mode = 0644, |
4333 | .proc_handler = proc_dointvec, | 4333 | .proc_handler = proc_dointvec, |
4334 | }, | 4334 | }, |
4335 | { | 4335 | { |
4336 | .procname = "router_solicitations", | 4336 | .procname = "router_solicitations", |
4337 | .data = &ipv6_devconf.rtr_solicits, | 4337 | .data = &ipv6_devconf.rtr_solicits, |
4338 | .maxlen = sizeof(int), | 4338 | .maxlen = sizeof(int), |
4339 | .mode = 0644, | 4339 | .mode = 0644, |
4340 | .proc_handler = proc_dointvec, | 4340 | .proc_handler = proc_dointvec, |
4341 | }, | 4341 | }, |
4342 | { | 4342 | { |
4343 | .procname = "router_solicitation_interval", | 4343 | .procname = "router_solicitation_interval", |
4344 | .data = &ipv6_devconf.rtr_solicit_interval, | 4344 | .data = &ipv6_devconf.rtr_solicit_interval, |
4345 | .maxlen = sizeof(int), | 4345 | .maxlen = sizeof(int), |
4346 | .mode = 0644, | 4346 | .mode = 0644, |
4347 | .proc_handler = proc_dointvec_jiffies, | 4347 | .proc_handler = proc_dointvec_jiffies, |
4348 | }, | 4348 | }, |
4349 | { | 4349 | { |
4350 | .procname = "router_solicitation_delay", | 4350 | .procname = "router_solicitation_delay", |
4351 | .data = &ipv6_devconf.rtr_solicit_delay, | 4351 | .data = &ipv6_devconf.rtr_solicit_delay, |
4352 | .maxlen = sizeof(int), | 4352 | .maxlen = sizeof(int), |
4353 | .mode = 0644, | 4353 | .mode = 0644, |
4354 | .proc_handler = proc_dointvec_jiffies, | 4354 | .proc_handler = proc_dointvec_jiffies, |
4355 | }, | 4355 | }, |
4356 | { | 4356 | { |
4357 | .procname = "force_mld_version", | 4357 | .procname = "force_mld_version", |
4358 | .data = &ipv6_devconf.force_mld_version, | 4358 | .data = &ipv6_devconf.force_mld_version, |
4359 | .maxlen = sizeof(int), | 4359 | .maxlen = sizeof(int), |
4360 | .mode = 0644, | 4360 | .mode = 0644, |
4361 | .proc_handler = proc_dointvec, | 4361 | .proc_handler = proc_dointvec, |
4362 | }, | 4362 | }, |
4363 | #ifdef CONFIG_IPV6_PRIVACY | 4363 | #ifdef CONFIG_IPV6_PRIVACY |
4364 | { | 4364 | { |
4365 | .procname = "use_tempaddr", | 4365 | .procname = "use_tempaddr", |
4366 | .data = &ipv6_devconf.use_tempaddr, | 4366 | .data = &ipv6_devconf.use_tempaddr, |
4367 | .maxlen = sizeof(int), | 4367 | .maxlen = sizeof(int), |
4368 | .mode = 0644, | 4368 | .mode = 0644, |
4369 | .proc_handler = proc_dointvec, | 4369 | .proc_handler = proc_dointvec, |
4370 | }, | 4370 | }, |
4371 | { | 4371 | { |
4372 | .procname = "temp_valid_lft", | 4372 | .procname = "temp_valid_lft", |
4373 | .data = &ipv6_devconf.temp_valid_lft, | 4373 | .data = &ipv6_devconf.temp_valid_lft, |
4374 | .maxlen = sizeof(int), | 4374 | .maxlen = sizeof(int), |
4375 | .mode = 0644, | 4375 | .mode = 0644, |
4376 | .proc_handler = proc_dointvec, | 4376 | .proc_handler = proc_dointvec, |
4377 | }, | 4377 | }, |
4378 | { | 4378 | { |
4379 | .procname = "temp_prefered_lft", | 4379 | .procname = "temp_prefered_lft", |
4380 | .data = &ipv6_devconf.temp_prefered_lft, | 4380 | .data = &ipv6_devconf.temp_prefered_lft, |
4381 | .maxlen = sizeof(int), | 4381 | .maxlen = sizeof(int), |
4382 | .mode = 0644, | 4382 | .mode = 0644, |
4383 | .proc_handler = proc_dointvec, | 4383 | .proc_handler = proc_dointvec, |
4384 | }, | 4384 | }, |
4385 | { | 4385 | { |
4386 | .procname = "regen_max_retry", | 4386 | .procname = "regen_max_retry", |
4387 | .data = &ipv6_devconf.regen_max_retry, | 4387 | .data = &ipv6_devconf.regen_max_retry, |
4388 | .maxlen = sizeof(int), | 4388 | .maxlen = sizeof(int), |
4389 | .mode = 0644, | 4389 | .mode = 0644, |
4390 | .proc_handler = proc_dointvec, | 4390 | .proc_handler = proc_dointvec, |
4391 | }, | 4391 | }, |
4392 | { | 4392 | { |
4393 | .procname = "max_desync_factor", | 4393 | .procname = "max_desync_factor", |
4394 | .data = &ipv6_devconf.max_desync_factor, | 4394 | .data = &ipv6_devconf.max_desync_factor, |
4395 | .maxlen = sizeof(int), | 4395 | .maxlen = sizeof(int), |
4396 | .mode = 0644, | 4396 | .mode = 0644, |
4397 | .proc_handler = proc_dointvec, | 4397 | .proc_handler = proc_dointvec, |
4398 | }, | 4398 | }, |
4399 | #endif | 4399 | #endif |
4400 | { | 4400 | { |
4401 | .procname = "max_addresses", | 4401 | .procname = "max_addresses", |
4402 | .data = &ipv6_devconf.max_addresses, | 4402 | .data = &ipv6_devconf.max_addresses, |
4403 | .maxlen = sizeof(int), | 4403 | .maxlen = sizeof(int), |
4404 | .mode = 0644, | 4404 | .mode = 0644, |
4405 | .proc_handler = proc_dointvec, | 4405 | .proc_handler = proc_dointvec, |
4406 | }, | 4406 | }, |
4407 | { | 4407 | { |
4408 | .procname = "accept_ra_defrtr", | 4408 | .procname = "accept_ra_defrtr", |
4409 | .data = &ipv6_devconf.accept_ra_defrtr, | 4409 | .data = &ipv6_devconf.accept_ra_defrtr, |
4410 | .maxlen = sizeof(int), | 4410 | .maxlen = sizeof(int), |
4411 | .mode = 0644, | 4411 | .mode = 0644, |
4412 | .proc_handler = proc_dointvec, | 4412 | .proc_handler = proc_dointvec, |
4413 | }, | 4413 | }, |
4414 | { | 4414 | { |
4415 | .procname = "accept_ra_pinfo", | 4415 | .procname = "accept_ra_pinfo", |
4416 | .data = &ipv6_devconf.accept_ra_pinfo, | 4416 | .data = &ipv6_devconf.accept_ra_pinfo, |
4417 | .maxlen = sizeof(int), | 4417 | .maxlen = sizeof(int), |
4418 | .mode = 0644, | 4418 | .mode = 0644, |
4419 | .proc_handler = proc_dointvec, | 4419 | .proc_handler = proc_dointvec, |
4420 | }, | 4420 | }, |
4421 | #ifdef CONFIG_IPV6_ROUTER_PREF | 4421 | #ifdef CONFIG_IPV6_ROUTER_PREF |
4422 | { | 4422 | { |
4423 | .procname = "accept_ra_rtr_pref", | 4423 | .procname = "accept_ra_rtr_pref", |
4424 | .data = &ipv6_devconf.accept_ra_rtr_pref, | 4424 | .data = &ipv6_devconf.accept_ra_rtr_pref, |
4425 | .maxlen = sizeof(int), | 4425 | .maxlen = sizeof(int), |
4426 | .mode = 0644, | 4426 | .mode = 0644, |
4427 | .proc_handler = proc_dointvec, | 4427 | .proc_handler = proc_dointvec, |
4428 | }, | 4428 | }, |
4429 | { | 4429 | { |
4430 | .procname = "router_probe_interval", | 4430 | .procname = "router_probe_interval", |
4431 | .data = &ipv6_devconf.rtr_probe_interval, | 4431 | .data = &ipv6_devconf.rtr_probe_interval, |
4432 | .maxlen = sizeof(int), | 4432 | .maxlen = sizeof(int), |
4433 | .mode = 0644, | 4433 | .mode = 0644, |
4434 | .proc_handler = proc_dointvec_jiffies, | 4434 | .proc_handler = proc_dointvec_jiffies, |
4435 | }, | 4435 | }, |
4436 | #ifdef CONFIG_IPV6_ROUTE_INFO | 4436 | #ifdef CONFIG_IPV6_ROUTE_INFO |
4437 | { | 4437 | { |
4438 | .procname = "accept_ra_rt_info_max_plen", | 4438 | .procname = "accept_ra_rt_info_max_plen", |
4439 | .data = &ipv6_devconf.accept_ra_rt_info_max_plen, | 4439 | .data = &ipv6_devconf.accept_ra_rt_info_max_plen, |
4440 | .maxlen = sizeof(int), | 4440 | .maxlen = sizeof(int), |
4441 | .mode = 0644, | 4441 | .mode = 0644, |
4442 | .proc_handler = proc_dointvec, | 4442 | .proc_handler = proc_dointvec, |
4443 | }, | 4443 | }, |
4444 | #endif | 4444 | #endif |
4445 | #endif | 4445 | #endif |
4446 | { | 4446 | { |
4447 | .procname = "proxy_ndp", | 4447 | .procname = "proxy_ndp", |
4448 | .data = &ipv6_devconf.proxy_ndp, | 4448 | .data = &ipv6_devconf.proxy_ndp, |
4449 | .maxlen = sizeof(int), | 4449 | .maxlen = sizeof(int), |
4450 | .mode = 0644, | 4450 | .mode = 0644, |
4451 | .proc_handler = proc_dointvec, | 4451 | .proc_handler = proc_dointvec, |
4452 | }, | 4452 | }, |
4453 | { | 4453 | { |
4454 | .procname = "accept_source_route", | 4454 | .procname = "accept_source_route", |
4455 | .data = &ipv6_devconf.accept_source_route, | 4455 | .data = &ipv6_devconf.accept_source_route, |
4456 | .maxlen = sizeof(int), | 4456 | .maxlen = sizeof(int), |
4457 | .mode = 0644, | 4457 | .mode = 0644, |
4458 | .proc_handler = proc_dointvec, | 4458 | .proc_handler = proc_dointvec, |
4459 | }, | 4459 | }, |
4460 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 4460 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
4461 | { | 4461 | { |
4462 | .procname = "optimistic_dad", | 4462 | .procname = "optimistic_dad", |
4463 | .data = &ipv6_devconf.optimistic_dad, | 4463 | .data = &ipv6_devconf.optimistic_dad, |
4464 | .maxlen = sizeof(int), | 4464 | .maxlen = sizeof(int), |
4465 | .mode = 0644, | 4465 | .mode = 0644, |
4466 | .proc_handler = proc_dointvec, | 4466 | .proc_handler = proc_dointvec, |
4467 | 4467 | ||
4468 | }, | 4468 | }, |
4469 | #endif | 4469 | #endif |
4470 | #ifdef CONFIG_IPV6_MROUTE | 4470 | #ifdef CONFIG_IPV6_MROUTE |
4471 | { | 4471 | { |
4472 | .procname = "mc_forwarding", | 4472 | .procname = "mc_forwarding", |
4473 | .data = &ipv6_devconf.mc_forwarding, | 4473 | .data = &ipv6_devconf.mc_forwarding, |
4474 | .maxlen = sizeof(int), | 4474 | .maxlen = sizeof(int), |
4475 | .mode = 0444, | 4475 | .mode = 0444, |
4476 | .proc_handler = proc_dointvec, | 4476 | .proc_handler = proc_dointvec, |
4477 | }, | 4477 | }, |
4478 | #endif | 4478 | #endif |
4479 | { | 4479 | { |
4480 | .procname = "disable_ipv6", | 4480 | .procname = "disable_ipv6", |
4481 | .data = &ipv6_devconf.disable_ipv6, | 4481 | .data = &ipv6_devconf.disable_ipv6, |
4482 | .maxlen = sizeof(int), | 4482 | .maxlen = sizeof(int), |
4483 | .mode = 0644, | 4483 | .mode = 0644, |
4484 | .proc_handler = addrconf_sysctl_disable, | 4484 | .proc_handler = addrconf_sysctl_disable, |
4485 | }, | 4485 | }, |
4486 | { | 4486 | { |
4487 | .procname = "accept_dad", | 4487 | .procname = "accept_dad", |
4488 | .data = &ipv6_devconf.accept_dad, | 4488 | .data = &ipv6_devconf.accept_dad, |
4489 | .maxlen = sizeof(int), | 4489 | .maxlen = sizeof(int), |
4490 | .mode = 0644, | 4490 | .mode = 0644, |
4491 | .proc_handler = proc_dointvec, | 4491 | .proc_handler = proc_dointvec, |
4492 | }, | 4492 | }, |
4493 | { | 4493 | { |
4494 | .procname = "force_tllao", | 4494 | .procname = "force_tllao", |
4495 | .data = &ipv6_devconf.force_tllao, | 4495 | .data = &ipv6_devconf.force_tllao, |
4496 | .maxlen = sizeof(int), | 4496 | .maxlen = sizeof(int), |
4497 | .mode = 0644, | 4497 | .mode = 0644, |
4498 | .proc_handler = proc_dointvec | 4498 | .proc_handler = proc_dointvec |
4499 | }, | 4499 | }, |
4500 | { | 4500 | { |
4501 | /* sentinel */ | 4501 | /* sentinel */ |
4502 | } | 4502 | } |
4503 | }, | 4503 | }, |
4504 | }; | 4504 | }; |
4505 | 4505 | ||
4506 | static int __addrconf_sysctl_register(struct net *net, char *dev_name, | 4506 | static int __addrconf_sysctl_register(struct net *net, char *dev_name, |
4507 | struct inet6_dev *idev, struct ipv6_devconf *p) | 4507 | struct inet6_dev *idev, struct ipv6_devconf *p) |
4508 | { | 4508 | { |
4509 | int i; | 4509 | int i; |
4510 | struct addrconf_sysctl_table *t; | 4510 | struct addrconf_sysctl_table *t; |
4511 | 4511 | ||
4512 | #define ADDRCONF_CTL_PATH_DEV 3 | 4512 | #define ADDRCONF_CTL_PATH_DEV 3 |
4513 | 4513 | ||
4514 | struct ctl_path addrconf_ctl_path[] = { | 4514 | struct ctl_path addrconf_ctl_path[] = { |
4515 | { .procname = "net", }, | 4515 | { .procname = "net", }, |
4516 | { .procname = "ipv6", }, | 4516 | { .procname = "ipv6", }, |
4517 | { .procname = "conf", }, | 4517 | { .procname = "conf", }, |
4518 | { /* to be set */ }, | 4518 | { /* to be set */ }, |
4519 | { }, | 4519 | { }, |
4520 | }; | 4520 | }; |
4521 | 4521 | ||
4522 | 4522 | ||
4523 | t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); | 4523 | t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); |
4524 | if (t == NULL) | 4524 | if (t == NULL) |
4525 | goto out; | 4525 | goto out; |
4526 | 4526 | ||
4527 | for (i = 0; t->addrconf_vars[i].data; i++) { | 4527 | for (i = 0; t->addrconf_vars[i].data; i++) { |
4528 | t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; | 4528 | t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; |
4529 | t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ | 4529 | t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ |
4530 | t->addrconf_vars[i].extra2 = net; | 4530 | t->addrconf_vars[i].extra2 = net; |
4531 | } | 4531 | } |
4532 | 4532 | ||
4533 | /* | 4533 | /* |
4534 | * Make a copy of dev_name, because '.procname' is regarded as const | 4534 | * Make a copy of dev_name, because '.procname' is regarded as const |
4535 | * by sysctl and we wouldn't want anyone to change it under our feet | 4535 | * by sysctl and we wouldn't want anyone to change it under our feet |
4536 | * (see SIOCSIFNAME). | 4536 | * (see SIOCSIFNAME). |
4537 | */ | 4537 | */ |
4538 | t->dev_name = kstrdup(dev_name, GFP_KERNEL); | 4538 | t->dev_name = kstrdup(dev_name, GFP_KERNEL); |
4539 | if (!t->dev_name) | 4539 | if (!t->dev_name) |
4540 | goto free; | 4540 | goto free; |
4541 | 4541 | ||
4542 | addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; | 4542 | addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; |
4543 | 4543 | ||
4544 | t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, | 4544 | t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, |
4545 | t->addrconf_vars); | 4545 | t->addrconf_vars); |
4546 | if (t->sysctl_header == NULL) | 4546 | if (t->sysctl_header == NULL) |
4547 | goto free_procname; | 4547 | goto free_procname; |
4548 | 4548 | ||
4549 | p->sysctl = t; | 4549 | p->sysctl = t; |
4550 | return 0; | 4550 | return 0; |
4551 | 4551 | ||
4552 | free_procname: | 4552 | free_procname: |
4553 | kfree(t->dev_name); | 4553 | kfree(t->dev_name); |
4554 | free: | 4554 | free: |
4555 | kfree(t); | 4555 | kfree(t); |
4556 | out: | 4556 | out: |
4557 | return -ENOBUFS; | 4557 | return -ENOBUFS; |
4558 | } | 4558 | } |
4559 | 4559 | ||
4560 | static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) | 4560 | static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) |
4561 | { | 4561 | { |
4562 | struct addrconf_sysctl_table *t; | 4562 | struct addrconf_sysctl_table *t; |
4563 | 4563 | ||
4564 | if (p->sysctl == NULL) | 4564 | if (p->sysctl == NULL) |
4565 | return; | 4565 | return; |
4566 | 4566 | ||
4567 | t = p->sysctl; | 4567 | t = p->sysctl; |
4568 | p->sysctl = NULL; | 4568 | p->sysctl = NULL; |
4569 | unregister_net_sysctl_table(t->sysctl_header); | 4569 | unregister_net_sysctl_table(t->sysctl_header); |
4570 | kfree(t->dev_name); | 4570 | kfree(t->dev_name); |
4571 | kfree(t); | 4571 | kfree(t); |
4572 | } | 4572 | } |
4573 | 4573 | ||
4574 | static void addrconf_sysctl_register(struct inet6_dev *idev) | 4574 | static void addrconf_sysctl_register(struct inet6_dev *idev) |
4575 | { | 4575 | { |
4576 | neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", | 4576 | neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", |
4577 | &ndisc_ifinfo_sysctl_change); | 4577 | &ndisc_ifinfo_sysctl_change); |
4578 | __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, | 4578 | __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, |
4579 | idev, &idev->cnf); | 4579 | idev, &idev->cnf); |
4580 | } | 4580 | } |
4581 | 4581 | ||
4582 | static void addrconf_sysctl_unregister(struct inet6_dev *idev) | 4582 | static void addrconf_sysctl_unregister(struct inet6_dev *idev) |
4583 | { | 4583 | { |
4584 | __addrconf_sysctl_unregister(&idev->cnf); | 4584 | __addrconf_sysctl_unregister(&idev->cnf); |
4585 | neigh_sysctl_unregister(idev->nd_parms); | 4585 | neigh_sysctl_unregister(idev->nd_parms); |
4586 | } | 4586 | } |
4587 | 4587 | ||
4588 | 4588 | ||
4589 | #endif | 4589 | #endif |
4590 | 4590 | ||
4591 | static int __net_init addrconf_init_net(struct net *net) | 4591 | static int __net_init addrconf_init_net(struct net *net) |
4592 | { | 4592 | { |
4593 | int err; | 4593 | int err; |
4594 | struct ipv6_devconf *all, *dflt; | 4594 | struct ipv6_devconf *all, *dflt; |
4595 | 4595 | ||
4596 | err = -ENOMEM; | 4596 | err = -ENOMEM; |
4597 | all = &ipv6_devconf; | 4597 | all = &ipv6_devconf; |
4598 | dflt = &ipv6_devconf_dflt; | 4598 | dflt = &ipv6_devconf_dflt; |
4599 | 4599 | ||
4600 | if (!net_eq(net, &init_net)) { | 4600 | if (!net_eq(net, &init_net)) { |
4601 | all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); | 4601 | all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); |
4602 | if (all == NULL) | 4602 | if (all == NULL) |
4603 | goto err_alloc_all; | 4603 | goto err_alloc_all; |
4604 | 4604 | ||
4605 | dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); | 4605 | dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); |
4606 | if (dflt == NULL) | 4606 | if (dflt == NULL) |
4607 | goto err_alloc_dflt; | 4607 | goto err_alloc_dflt; |
4608 | } else { | 4608 | } else { |
4609 | /* these will be inherited by all namespaces */ | 4609 | /* these will be inherited by all namespaces */ |
4610 | dflt->autoconf = ipv6_defaults.autoconf; | 4610 | dflt->autoconf = ipv6_defaults.autoconf; |
4611 | dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; | 4611 | dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; |
4612 | } | 4612 | } |
4613 | 4613 | ||
4614 | net->ipv6.devconf_all = all; | 4614 | net->ipv6.devconf_all = all; |
4615 | net->ipv6.devconf_dflt = dflt; | 4615 | net->ipv6.devconf_dflt = dflt; |
4616 | 4616 | ||
4617 | #ifdef CONFIG_SYSCTL | 4617 | #ifdef CONFIG_SYSCTL |
4618 | err = __addrconf_sysctl_register(net, "all", NULL, all); | 4618 | err = __addrconf_sysctl_register(net, "all", NULL, all); |
4619 | if (err < 0) | 4619 | if (err < 0) |
4620 | goto err_reg_all; | 4620 | goto err_reg_all; |
4621 | 4621 | ||
4622 | err = __addrconf_sysctl_register(net, "default", NULL, dflt); | 4622 | err = __addrconf_sysctl_register(net, "default", NULL, dflt); |
4623 | if (err < 0) | 4623 | if (err < 0) |
4624 | goto err_reg_dflt; | 4624 | goto err_reg_dflt; |
4625 | #endif | 4625 | #endif |
4626 | return 0; | 4626 | return 0; |
4627 | 4627 | ||
4628 | #ifdef CONFIG_SYSCTL | 4628 | #ifdef CONFIG_SYSCTL |
4629 | err_reg_dflt: | 4629 | err_reg_dflt: |
4630 | __addrconf_sysctl_unregister(all); | 4630 | __addrconf_sysctl_unregister(all); |
4631 | err_reg_all: | 4631 | err_reg_all: |
4632 | kfree(dflt); | 4632 | kfree(dflt); |
4633 | #endif | 4633 | #endif |
4634 | err_alloc_dflt: | 4634 | err_alloc_dflt: |
4635 | kfree(all); | 4635 | kfree(all); |
4636 | err_alloc_all: | 4636 | err_alloc_all: |
4637 | return err; | 4637 | return err; |
4638 | } | 4638 | } |
4639 | 4639 | ||
4640 | static void __net_exit addrconf_exit_net(struct net *net) | 4640 | static void __net_exit addrconf_exit_net(struct net *net) |
4641 | { | 4641 | { |
4642 | #ifdef CONFIG_SYSCTL | 4642 | #ifdef CONFIG_SYSCTL |
4643 | __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); | 4643 | __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); |
4644 | __addrconf_sysctl_unregister(net->ipv6.devconf_all); | 4644 | __addrconf_sysctl_unregister(net->ipv6.devconf_all); |
4645 | #endif | 4645 | #endif |
4646 | if (!net_eq(net, &init_net)) { | 4646 | if (!net_eq(net, &init_net)) { |
4647 | kfree(net->ipv6.devconf_dflt); | 4647 | kfree(net->ipv6.devconf_dflt); |
4648 | kfree(net->ipv6.devconf_all); | 4648 | kfree(net->ipv6.devconf_all); |
4649 | } | 4649 | } |
4650 | } | 4650 | } |
4651 | 4651 | ||
4652 | static struct pernet_operations addrconf_ops = { | 4652 | static struct pernet_operations addrconf_ops = { |
4653 | .init = addrconf_init_net, | 4653 | .init = addrconf_init_net, |
4654 | .exit = addrconf_exit_net, | 4654 | .exit = addrconf_exit_net, |
4655 | }; | 4655 | }; |
4656 | 4656 | ||
4657 | /* | 4657 | /* |
4658 | * Device notifier | 4658 | * Device notifier |
4659 | */ | 4659 | */ |
4660 | 4660 | ||
4661 | int register_inet6addr_notifier(struct notifier_block *nb) | 4661 | int register_inet6addr_notifier(struct notifier_block *nb) |
4662 | { | 4662 | { |
4663 | return atomic_notifier_chain_register(&inet6addr_chain, nb); | 4663 | return atomic_notifier_chain_register(&inet6addr_chain, nb); |
4664 | } | 4664 | } |
4665 | EXPORT_SYMBOL(register_inet6addr_notifier); | 4665 | EXPORT_SYMBOL(register_inet6addr_notifier); |
4666 | 4666 | ||
4667 | int unregister_inet6addr_notifier(struct notifier_block *nb) | 4667 | int unregister_inet6addr_notifier(struct notifier_block *nb) |
4668 | { | 4668 | { |
4669 | return atomic_notifier_chain_unregister(&inet6addr_chain, nb); | 4669 | return atomic_notifier_chain_unregister(&inet6addr_chain, nb); |
4670 | } | 4670 | } |
4671 | EXPORT_SYMBOL(unregister_inet6addr_notifier); | 4671 | EXPORT_SYMBOL(unregister_inet6addr_notifier); |
4672 | 4672 | ||
4673 | static struct rtnl_af_ops inet6_ops = { | 4673 | static struct rtnl_af_ops inet6_ops = { |
4674 | .family = AF_INET6, | 4674 | .family = AF_INET6, |
4675 | .fill_link_af = inet6_fill_link_af, | 4675 | .fill_link_af = inet6_fill_link_af, |
4676 | .get_link_af_size = inet6_get_link_af_size, | 4676 | .get_link_af_size = inet6_get_link_af_size, |
4677 | }; | 4677 | }; |
4678 | 4678 | ||
4679 | /* | 4679 | /* |
4680 | * Init / cleanup code | 4680 | * Init / cleanup code |
4681 | */ | 4681 | */ |
4682 | 4682 | ||
4683 | int __init addrconf_init(void) | 4683 | int __init addrconf_init(void) |
4684 | { | 4684 | { |
4685 | int i, err; | 4685 | int i, err; |
4686 | 4686 | ||
4687 | err = ipv6_addr_label_init(); | 4687 | err = ipv6_addr_label_init(); |
4688 | if (err < 0) { | 4688 | if (err < 0) { |
4689 | printk(KERN_CRIT "IPv6 Addrconf:" | 4689 | printk(KERN_CRIT "IPv6 Addrconf:" |
4690 | " cannot initialize default policy table: %d.\n", err); | 4690 | " cannot initialize default policy table: %d.\n", err); |
4691 | goto out; | 4691 | goto out; |
4692 | } | 4692 | } |
4693 | 4693 | ||
4694 | err = register_pernet_subsys(&addrconf_ops); | 4694 | err = register_pernet_subsys(&addrconf_ops); |
4695 | if (err < 0) | 4695 | if (err < 0) |
4696 | goto out_addrlabel; | 4696 | goto out_addrlabel; |
4697 | 4697 | ||
4698 | /* The addrconf netdev notifier requires that loopback_dev | 4698 | /* The addrconf netdev notifier requires that loopback_dev |
4699 | * has it's ipv6 private information allocated and setup | 4699 | * has it's ipv6 private information allocated and setup |
4700 | * before it can bring up and give link-local addresses | 4700 | * before it can bring up and give link-local addresses |
4701 | * to other devices which are up. | 4701 | * to other devices which are up. |
4702 | * | 4702 | * |
4703 | * Unfortunately, loopback_dev is not necessarily the first | 4703 | * Unfortunately, loopback_dev is not necessarily the first |
4704 | * entry in the global dev_base list of net devices. In fact, | 4704 | * entry in the global dev_base list of net devices. In fact, |
4705 | * it is likely to be the very last entry on that list. | 4705 | * it is likely to be the very last entry on that list. |
4706 | * So this causes the notifier registry below to try and | 4706 | * So this causes the notifier registry below to try and |
4707 | * give link-local addresses to all devices besides loopback_dev | 4707 | * give link-local addresses to all devices besides loopback_dev |
4708 | * first, then loopback_dev, which cases all the non-loopback_dev | 4708 | * first, then loopback_dev, which cases all the non-loopback_dev |
4709 | * devices to fail to get a link-local address. | 4709 | * devices to fail to get a link-local address. |
4710 | * | 4710 | * |
4711 | * So, as a temporary fix, allocate the ipv6 structure for | 4711 | * So, as a temporary fix, allocate the ipv6 structure for |
4712 | * loopback_dev first by hand. | 4712 | * loopback_dev first by hand. |
4713 | * Longer term, all of the dependencies ipv6 has upon the loopback | 4713 | * Longer term, all of the dependencies ipv6 has upon the loopback |
4714 | * device and it being up should be removed. | 4714 | * device and it being up should be removed. |
4715 | */ | 4715 | */ |
4716 | rtnl_lock(); | 4716 | rtnl_lock(); |
4717 | if (!ipv6_add_dev(init_net.loopback_dev)) | 4717 | if (!ipv6_add_dev(init_net.loopback_dev)) |
4718 | err = -ENOMEM; | 4718 | err = -ENOMEM; |
4719 | rtnl_unlock(); | 4719 | rtnl_unlock(); |
4720 | if (err) | 4720 | if (err) |
4721 | goto errlo; | 4721 | goto errlo; |
4722 | 4722 | ||
4723 | for (i = 0; i < IN6_ADDR_HSIZE; i++) | 4723 | for (i = 0; i < IN6_ADDR_HSIZE; i++) |
4724 | INIT_HLIST_HEAD(&inet6_addr_lst[i]); | 4724 | INIT_HLIST_HEAD(&inet6_addr_lst[i]); |
4725 | 4725 | ||
4726 | register_netdevice_notifier(&ipv6_dev_notf); | 4726 | register_netdevice_notifier(&ipv6_dev_notf); |
4727 | 4727 | ||
4728 | addrconf_verify(0); | 4728 | addrconf_verify(0); |
4729 | 4729 | ||
4730 | err = rtnl_af_register(&inet6_ops); | 4730 | err = rtnl_af_register(&inet6_ops); |
4731 | if (err < 0) | 4731 | if (err < 0) |
4732 | goto errout_af; | 4732 | goto errout_af; |
4733 | 4733 | ||
4734 | err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, | 4734 | err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, |
4735 | NULL); | 4735 | NULL); |
4736 | if (err < 0) | 4736 | if (err < 0) |
4737 | goto errout; | 4737 | goto errout; |
4738 | 4738 | ||
4739 | /* Only the first call to __rtnl_register can fail */ | 4739 | /* Only the first call to __rtnl_register can fail */ |
4740 | __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); | 4740 | __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); |
4741 | __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); | 4741 | __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); |
4742 | __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, | 4742 | __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, |
4743 | inet6_dump_ifaddr, NULL); | 4743 | inet6_dump_ifaddr, NULL); |
4744 | __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, | 4744 | __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, |
4745 | inet6_dump_ifmcaddr, NULL); | 4745 | inet6_dump_ifmcaddr, NULL); |
4746 | __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, | 4746 | __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, |
4747 | inet6_dump_ifacaddr, NULL); | 4747 | inet6_dump_ifacaddr, NULL); |
4748 | 4748 | ||
4749 | ipv6_addr_label_rtnl_register(); | 4749 | ipv6_addr_label_rtnl_register(); |
4750 | 4750 | ||
4751 | return 0; | 4751 | return 0; |
4752 | errout: | 4752 | errout: |
4753 | rtnl_af_unregister(&inet6_ops); | 4753 | rtnl_af_unregister(&inet6_ops); |
4754 | errout_af: | 4754 | errout_af: |
4755 | unregister_netdevice_notifier(&ipv6_dev_notf); | 4755 | unregister_netdevice_notifier(&ipv6_dev_notf); |
4756 | errlo: | 4756 | errlo: |
4757 | unregister_pernet_subsys(&addrconf_ops); | 4757 | unregister_pernet_subsys(&addrconf_ops); |
4758 | out_addrlabel: | 4758 | out_addrlabel: |
4759 | ipv6_addr_label_cleanup(); | 4759 | ipv6_addr_label_cleanup(); |
4760 | out: | 4760 | out: |
4761 | return err; | 4761 | return err; |
4762 | } | 4762 | } |
4763 | 4763 | ||
4764 | void addrconf_cleanup(void) | 4764 | void addrconf_cleanup(void) |
4765 | { | 4765 | { |
4766 | struct net_device *dev; | 4766 | struct net_device *dev; |
4767 | int i; | 4767 | int i; |
4768 | 4768 | ||
4769 | unregister_netdevice_notifier(&ipv6_dev_notf); | 4769 | unregister_netdevice_notifier(&ipv6_dev_notf); |
4770 | unregister_pernet_subsys(&addrconf_ops); | 4770 | unregister_pernet_subsys(&addrconf_ops); |
4771 | ipv6_addr_label_cleanup(); | 4771 | ipv6_addr_label_cleanup(); |
4772 | 4772 | ||
4773 | rtnl_lock(); | 4773 | rtnl_lock(); |
4774 | 4774 | ||
4775 | __rtnl_af_unregister(&inet6_ops); | 4775 | __rtnl_af_unregister(&inet6_ops); |
4776 | 4776 | ||
4777 | /* clean dev list */ | 4777 | /* clean dev list */ |
4778 | for_each_netdev(&init_net, dev) { | 4778 | for_each_netdev(&init_net, dev) { |
4779 | if (__in6_dev_get(dev) == NULL) | 4779 | if (__in6_dev_get(dev) == NULL) |
4780 | continue; | 4780 | continue; |
4781 | addrconf_ifdown(dev, 1); | 4781 | addrconf_ifdown(dev, 1); |
4782 | } | 4782 | } |
4783 | addrconf_ifdown(init_net.loopback_dev, 2); | 4783 | addrconf_ifdown(init_net.loopback_dev, 2); |
4784 | 4784 | ||
4785 | /* | 4785 | /* |
4786 | * Check hash table. | 4786 | * Check hash table. |
4787 | */ | 4787 | */ |
4788 | spin_lock_bh(&addrconf_hash_lock); | 4788 | spin_lock_bh(&addrconf_hash_lock); |
4789 | for (i = 0; i < IN6_ADDR_HSIZE; i++) | 4789 | for (i = 0; i < IN6_ADDR_HSIZE; i++) |
4790 | WARN_ON(!hlist_empty(&inet6_addr_lst[i])); | 4790 | WARN_ON(!hlist_empty(&inet6_addr_lst[i])); |
4791 | spin_unlock_bh(&addrconf_hash_lock); | 4791 | spin_unlock_bh(&addrconf_hash_lock); |
4792 | 4792 | ||
4793 | del_timer(&addr_chk_timer); | 4793 | del_timer(&addr_chk_timer); |
4794 | rtnl_unlock(); | 4794 | rtnl_unlock(); |
4795 | } | 4795 | } |
4796 | 4796 |
1 | /* | 1 | /* |
2 | * Linux INET6 implementation | 2 | * Linux INET6 implementation |
3 | * Forwarding Information Database | 3 | * Forwarding Information Database |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version | 10 | * as published by the Free Software Foundation; either version |
11 | * 2 of the License, or (at your option) any later version. | 11 | * 2 of the License, or (at your option) any later version. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * Changes: | 15 | * Changes: |
16 | * Yuji SEKIYA @USAGI: Support default route on router node; | 16 | * Yuji SEKIYA @USAGI: Support default route on router node; |
17 | * remove ip6_null_entry from the top of | 17 | * remove ip6_null_entry from the top of |
18 | * routing table. | 18 | * routing table. |
19 | * Ville Nuorvala: Fixed routing subtrees. | 19 | * Ville Nuorvala: Fixed routing subtrees. |
20 | */ | 20 | */ |
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/types.h> | 22 | #include <linux/types.h> |
23 | #include <linux/net.h> | 23 | #include <linux/net.h> |
24 | #include <linux/route.h> | 24 | #include <linux/route.h> |
25 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> |
26 | #include <linux/in6.h> | 26 | #include <linux/in6.h> |
27 | #include <linux/init.h> | 27 | #include <linux/init.h> |
28 | #include <linux/list.h> | 28 | #include <linux/list.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | 30 | ||
31 | #ifdef CONFIG_PROC_FS | 31 | #ifdef CONFIG_PROC_FS |
32 | #include <linux/proc_fs.h> | 32 | #include <linux/proc_fs.h> |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #include <net/ipv6.h> | 35 | #include <net/ipv6.h> |
36 | #include <net/ndisc.h> | 36 | #include <net/ndisc.h> |
37 | #include <net/addrconf.h> | 37 | #include <net/addrconf.h> |
38 | 38 | ||
39 | #include <net/ip6_fib.h> | 39 | #include <net/ip6_fib.h> |
40 | #include <net/ip6_route.h> | 40 | #include <net/ip6_route.h> |
41 | 41 | ||
42 | #define RT6_DEBUG 2 | 42 | #define RT6_DEBUG 2 |
43 | 43 | ||
44 | #if RT6_DEBUG >= 3 | 44 | #if RT6_DEBUG >= 3 |
45 | #define RT6_TRACE(x...) printk(KERN_DEBUG x) | 45 | #define RT6_TRACE(x...) printk(KERN_DEBUG x) |
46 | #else | 46 | #else |
47 | #define RT6_TRACE(x...) do { ; } while (0) | 47 | #define RT6_TRACE(x...) do { ; } while (0) |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | static struct kmem_cache * fib6_node_kmem __read_mostly; | 50 | static struct kmem_cache * fib6_node_kmem __read_mostly; |
51 | 51 | ||
52 | enum fib_walk_state_t | 52 | enum fib_walk_state_t |
53 | { | 53 | { |
54 | #ifdef CONFIG_IPV6_SUBTREES | 54 | #ifdef CONFIG_IPV6_SUBTREES |
55 | FWS_S, | 55 | FWS_S, |
56 | #endif | 56 | #endif |
57 | FWS_L, | 57 | FWS_L, |
58 | FWS_R, | 58 | FWS_R, |
59 | FWS_C, | 59 | FWS_C, |
60 | FWS_U | 60 | FWS_U |
61 | }; | 61 | }; |
62 | 62 | ||
63 | struct fib6_cleaner_t | 63 | struct fib6_cleaner_t |
64 | { | 64 | { |
65 | struct fib6_walker_t w; | 65 | struct fib6_walker_t w; |
66 | struct net *net; | 66 | struct net *net; |
67 | int (*func)(struct rt6_info *, void *arg); | 67 | int (*func)(struct rt6_info *, void *arg); |
68 | void *arg; | 68 | void *arg; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | static DEFINE_RWLOCK(fib6_walker_lock); | 71 | static DEFINE_RWLOCK(fib6_walker_lock); |
72 | 72 | ||
73 | #ifdef CONFIG_IPV6_SUBTREES | 73 | #ifdef CONFIG_IPV6_SUBTREES |
74 | #define FWS_INIT FWS_S | 74 | #define FWS_INIT FWS_S |
75 | #else | 75 | #else |
76 | #define FWS_INIT FWS_L | 76 | #define FWS_INIT FWS_L |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, | 79 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, |
80 | struct rt6_info *rt); | 80 | struct rt6_info *rt); |
81 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); | 81 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); |
82 | static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); | 82 | static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); |
83 | static int fib6_walk(struct fib6_walker_t *w); | 83 | static int fib6_walk(struct fib6_walker_t *w); |
84 | static int fib6_walk_continue(struct fib6_walker_t *w); | 84 | static int fib6_walk_continue(struct fib6_walker_t *w); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * A routing update causes an increase of the serial number on the | 87 | * A routing update causes an increase of the serial number on the |
88 | * affected subtree. This allows for cached routes to be asynchronously | 88 | * affected subtree. This allows for cached routes to be asynchronously |
89 | * tested when modifications are made to the destination cache as a | 89 | * tested when modifications are made to the destination cache as a |
90 | * result of redirects, path MTU changes, etc. | 90 | * result of redirects, path MTU changes, etc. |
91 | */ | 91 | */ |
92 | 92 | ||
93 | static __u32 rt_sernum; | 93 | static __u32 rt_sernum; |
94 | 94 | ||
95 | static void fib6_gc_timer_cb(unsigned long arg); | 95 | static void fib6_gc_timer_cb(unsigned long arg); |
96 | 96 | ||
97 | static LIST_HEAD(fib6_walkers); | 97 | static LIST_HEAD(fib6_walkers); |
98 | #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) | 98 | #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) |
99 | 99 | ||
100 | static inline void fib6_walker_link(struct fib6_walker_t *w) | 100 | static inline void fib6_walker_link(struct fib6_walker_t *w) |
101 | { | 101 | { |
102 | write_lock_bh(&fib6_walker_lock); | 102 | write_lock_bh(&fib6_walker_lock); |
103 | list_add(&w->lh, &fib6_walkers); | 103 | list_add(&w->lh, &fib6_walkers); |
104 | write_unlock_bh(&fib6_walker_lock); | 104 | write_unlock_bh(&fib6_walker_lock); |
105 | } | 105 | } |
106 | 106 | ||
107 | static inline void fib6_walker_unlink(struct fib6_walker_t *w) | 107 | static inline void fib6_walker_unlink(struct fib6_walker_t *w) |
108 | { | 108 | { |
109 | write_lock_bh(&fib6_walker_lock); | 109 | write_lock_bh(&fib6_walker_lock); |
110 | list_del(&w->lh); | 110 | list_del(&w->lh); |
111 | write_unlock_bh(&fib6_walker_lock); | 111 | write_unlock_bh(&fib6_walker_lock); |
112 | } | 112 | } |
113 | static __inline__ u32 fib6_new_sernum(void) | 113 | static __inline__ u32 fib6_new_sernum(void) |
114 | { | 114 | { |
115 | u32 n = ++rt_sernum; | 115 | u32 n = ++rt_sernum; |
116 | if ((__s32)n <= 0) | 116 | if ((__s32)n <= 0) |
117 | rt_sernum = n = 1; | 117 | rt_sernum = n = 1; |
118 | return n; | 118 | return n; |
119 | } | 119 | } |
120 | 120 | ||
121 | /* | 121 | /* |
122 | * Auxiliary address test functions for the radix tree. | 122 | * Auxiliary address test functions for the radix tree. |
123 | * | 123 | * |
124 | * These assume a 32bit processor (although it will work on | 124 | * These assume a 32bit processor (although it will work on |
125 | * 64bit processors) | 125 | * 64bit processors) |
126 | */ | 126 | */ |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * test bit | 129 | * test bit |
130 | */ | 130 | */ |
131 | #if defined(__LITTLE_ENDIAN) | 131 | #if defined(__LITTLE_ENDIAN) |
132 | # define BITOP_BE32_SWIZZLE (0x1F & ~7) | 132 | # define BITOP_BE32_SWIZZLE (0x1F & ~7) |
133 | #else | 133 | #else |
134 | # define BITOP_BE32_SWIZZLE 0 | 134 | # define BITOP_BE32_SWIZZLE 0 |
135 | #endif | 135 | #endif |
136 | 136 | ||
137 | static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) | 137 | static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) |
138 | { | 138 | { |
139 | const __be32 *addr = token; | 139 | const __be32 *addr = token; |
140 | /* | 140 | /* |
141 | * Here, | 141 | * Here, |
142 | * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) | 142 | * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) |
143 | * is optimized version of | 143 | * is optimized version of |
144 | * htonl(1 << ((~fn_bit)&0x1F)) | 144 | * htonl(1 << ((~fn_bit)&0x1F)) |
145 | * See include/asm-generic/bitops/le.h. | 145 | * See include/asm-generic/bitops/le.h. |
146 | */ | 146 | */ |
147 | return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & | 147 | return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & |
148 | addr[fn_bit >> 5]; | 148 | addr[fn_bit >> 5]; |
149 | } | 149 | } |
150 | 150 | ||
151 | static __inline__ struct fib6_node * node_alloc(void) | 151 | static __inline__ struct fib6_node * node_alloc(void) |
152 | { | 152 | { |
153 | struct fib6_node *fn; | 153 | struct fib6_node *fn; |
154 | 154 | ||
155 | fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); | 155 | fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); |
156 | 156 | ||
157 | return fn; | 157 | return fn; |
158 | } | 158 | } |
159 | 159 | ||
160 | static __inline__ void node_free(struct fib6_node * fn) | 160 | static __inline__ void node_free(struct fib6_node * fn) |
161 | { | 161 | { |
162 | kmem_cache_free(fib6_node_kmem, fn); | 162 | kmem_cache_free(fib6_node_kmem, fn); |
163 | } | 163 | } |
164 | 164 | ||
165 | static __inline__ void rt6_release(struct rt6_info *rt) | 165 | static __inline__ void rt6_release(struct rt6_info *rt) |
166 | { | 166 | { |
167 | if (atomic_dec_and_test(&rt->rt6i_ref)) | 167 | if (atomic_dec_and_test(&rt->rt6i_ref)) |
168 | dst_free(&rt->dst); | 168 | dst_free(&rt->dst); |
169 | } | 169 | } |
170 | 170 | ||
171 | static void fib6_link_table(struct net *net, struct fib6_table *tb) | 171 | static void fib6_link_table(struct net *net, struct fib6_table *tb) |
172 | { | 172 | { |
173 | unsigned int h; | 173 | unsigned int h; |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * Initialize table lock at a single place to give lockdep a key, | 176 | * Initialize table lock at a single place to give lockdep a key, |
177 | * tables aren't visible prior to being linked to the list. | 177 | * tables aren't visible prior to being linked to the list. |
178 | */ | 178 | */ |
179 | rwlock_init(&tb->tb6_lock); | 179 | rwlock_init(&tb->tb6_lock); |
180 | 180 | ||
181 | h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); | 181 | h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * No protection necessary, this is the only list mutatation | 184 | * No protection necessary, this is the only list mutatation |
185 | * operation, tables never disappear once they exist. | 185 | * operation, tables never disappear once they exist. |
186 | */ | 186 | */ |
187 | hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); | 187 | hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); |
188 | } | 188 | } |
189 | 189 | ||
190 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 190 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
191 | 191 | ||
192 | static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) | 192 | static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) |
193 | { | 193 | { |
194 | struct fib6_table *table; | 194 | struct fib6_table *table; |
195 | 195 | ||
196 | table = kzalloc(sizeof(*table), GFP_ATOMIC); | 196 | table = kzalloc(sizeof(*table), GFP_ATOMIC); |
197 | if (table != NULL) { | 197 | if (table != NULL) { |
198 | table->tb6_id = id; | 198 | table->tb6_id = id; |
199 | table->tb6_root.leaf = net->ipv6.ip6_null_entry; | 199 | table->tb6_root.leaf = net->ipv6.ip6_null_entry; |
200 | table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 200 | table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
201 | } | 201 | } |
202 | 202 | ||
203 | return table; | 203 | return table; |
204 | } | 204 | } |
205 | 205 | ||
206 | struct fib6_table *fib6_new_table(struct net *net, u32 id) | 206 | struct fib6_table *fib6_new_table(struct net *net, u32 id) |
207 | { | 207 | { |
208 | struct fib6_table *tb; | 208 | struct fib6_table *tb; |
209 | 209 | ||
210 | if (id == 0) | 210 | if (id == 0) |
211 | id = RT6_TABLE_MAIN; | 211 | id = RT6_TABLE_MAIN; |
212 | tb = fib6_get_table(net, id); | 212 | tb = fib6_get_table(net, id); |
213 | if (tb) | 213 | if (tb) |
214 | return tb; | 214 | return tb; |
215 | 215 | ||
216 | tb = fib6_alloc_table(net, id); | 216 | tb = fib6_alloc_table(net, id); |
217 | if (tb != NULL) | 217 | if (tb != NULL) |
218 | fib6_link_table(net, tb); | 218 | fib6_link_table(net, tb); |
219 | 219 | ||
220 | return tb; | 220 | return tb; |
221 | } | 221 | } |
222 | 222 | ||
223 | struct fib6_table *fib6_get_table(struct net *net, u32 id) | 223 | struct fib6_table *fib6_get_table(struct net *net, u32 id) |
224 | { | 224 | { |
225 | struct fib6_table *tb; | 225 | struct fib6_table *tb; |
226 | struct hlist_head *head; | 226 | struct hlist_head *head; |
227 | struct hlist_node *node; | 227 | struct hlist_node *node; |
228 | unsigned int h; | 228 | unsigned int h; |
229 | 229 | ||
230 | if (id == 0) | 230 | if (id == 0) |
231 | id = RT6_TABLE_MAIN; | 231 | id = RT6_TABLE_MAIN; |
232 | h = id & (FIB6_TABLE_HASHSZ - 1); | 232 | h = id & (FIB6_TABLE_HASHSZ - 1); |
233 | rcu_read_lock(); | 233 | rcu_read_lock(); |
234 | head = &net->ipv6.fib_table_hash[h]; | 234 | head = &net->ipv6.fib_table_hash[h]; |
235 | hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { | 235 | hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { |
236 | if (tb->tb6_id == id) { | 236 | if (tb->tb6_id == id) { |
237 | rcu_read_unlock(); | 237 | rcu_read_unlock(); |
238 | return tb; | 238 | return tb; |
239 | } | 239 | } |
240 | } | 240 | } |
241 | rcu_read_unlock(); | 241 | rcu_read_unlock(); |
242 | 242 | ||
243 | return NULL; | 243 | return NULL; |
244 | } | 244 | } |
245 | 245 | ||
246 | static void __net_init fib6_tables_init(struct net *net) | 246 | static void __net_init fib6_tables_init(struct net *net) |
247 | { | 247 | { |
248 | fib6_link_table(net, net->ipv6.fib6_main_tbl); | 248 | fib6_link_table(net, net->ipv6.fib6_main_tbl); |
249 | fib6_link_table(net, net->ipv6.fib6_local_tbl); | 249 | fib6_link_table(net, net->ipv6.fib6_local_tbl); |
250 | } | 250 | } |
251 | #else | 251 | #else |
252 | 252 | ||
253 | struct fib6_table *fib6_new_table(struct net *net, u32 id) | 253 | struct fib6_table *fib6_new_table(struct net *net, u32 id) |
254 | { | 254 | { |
255 | return fib6_get_table(net, id); | 255 | return fib6_get_table(net, id); |
256 | } | 256 | } |
257 | 257 | ||
258 | struct fib6_table *fib6_get_table(struct net *net, u32 id) | 258 | struct fib6_table *fib6_get_table(struct net *net, u32 id) |
259 | { | 259 | { |
260 | return net->ipv6.fib6_main_tbl; | 260 | return net->ipv6.fib6_main_tbl; |
261 | } | 261 | } |
262 | 262 | ||
263 | struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, | 263 | struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, |
264 | int flags, pol_lookup_t lookup) | 264 | int flags, pol_lookup_t lookup) |
265 | { | 265 | { |
266 | return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); | 266 | return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); |
267 | } | 267 | } |
268 | 268 | ||
269 | static void __net_init fib6_tables_init(struct net *net) | 269 | static void __net_init fib6_tables_init(struct net *net) |
270 | { | 270 | { |
271 | fib6_link_table(net, net->ipv6.fib6_main_tbl); | 271 | fib6_link_table(net, net->ipv6.fib6_main_tbl); |
272 | } | 272 | } |
273 | 273 | ||
274 | #endif | 274 | #endif |
275 | 275 | ||
276 | static int fib6_dump_node(struct fib6_walker_t *w) | 276 | static int fib6_dump_node(struct fib6_walker_t *w) |
277 | { | 277 | { |
278 | int res; | 278 | int res; |
279 | struct rt6_info *rt; | 279 | struct rt6_info *rt; |
280 | 280 | ||
281 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { | 281 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { |
282 | res = rt6_dump_route(rt, w->args); | 282 | res = rt6_dump_route(rt, w->args); |
283 | if (res < 0) { | 283 | if (res < 0) { |
284 | /* Frame is full, suspend walking */ | 284 | /* Frame is full, suspend walking */ |
285 | w->leaf = rt; | 285 | w->leaf = rt; |
286 | return 1; | 286 | return 1; |
287 | } | 287 | } |
288 | WARN_ON(res == 0); | 288 | WARN_ON(res == 0); |
289 | } | 289 | } |
290 | w->leaf = NULL; | 290 | w->leaf = NULL; |
291 | return 0; | 291 | return 0; |
292 | } | 292 | } |
293 | 293 | ||
294 | static void fib6_dump_end(struct netlink_callback *cb) | 294 | static void fib6_dump_end(struct netlink_callback *cb) |
295 | { | 295 | { |
296 | struct fib6_walker_t *w = (void*)cb->args[2]; | 296 | struct fib6_walker_t *w = (void*)cb->args[2]; |
297 | 297 | ||
298 | if (w) { | 298 | if (w) { |
299 | if (cb->args[4]) { | 299 | if (cb->args[4]) { |
300 | cb->args[4] = 0; | 300 | cb->args[4] = 0; |
301 | fib6_walker_unlink(w); | 301 | fib6_walker_unlink(w); |
302 | } | 302 | } |
303 | cb->args[2] = 0; | 303 | cb->args[2] = 0; |
304 | kfree(w); | 304 | kfree(w); |
305 | } | 305 | } |
306 | cb->done = (void*)cb->args[3]; | 306 | cb->done = (void*)cb->args[3]; |
307 | cb->args[1] = 3; | 307 | cb->args[1] = 3; |
308 | } | 308 | } |
309 | 309 | ||
310 | static int fib6_dump_done(struct netlink_callback *cb) | 310 | static int fib6_dump_done(struct netlink_callback *cb) |
311 | { | 311 | { |
312 | fib6_dump_end(cb); | 312 | fib6_dump_end(cb); |
313 | return cb->done ? cb->done(cb) : 0; | 313 | return cb->done ? cb->done(cb) : 0; |
314 | } | 314 | } |
315 | 315 | ||
316 | static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, | 316 | static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, |
317 | struct netlink_callback *cb) | 317 | struct netlink_callback *cb) |
318 | { | 318 | { |
319 | struct fib6_walker_t *w; | 319 | struct fib6_walker_t *w; |
320 | int res; | 320 | int res; |
321 | 321 | ||
322 | w = (void *)cb->args[2]; | 322 | w = (void *)cb->args[2]; |
323 | w->root = &table->tb6_root; | 323 | w->root = &table->tb6_root; |
324 | 324 | ||
325 | if (cb->args[4] == 0) { | 325 | if (cb->args[4] == 0) { |
326 | w->count = 0; | 326 | w->count = 0; |
327 | w->skip = 0; | 327 | w->skip = 0; |
328 | 328 | ||
329 | read_lock_bh(&table->tb6_lock); | 329 | read_lock_bh(&table->tb6_lock); |
330 | res = fib6_walk(w); | 330 | res = fib6_walk(w); |
331 | read_unlock_bh(&table->tb6_lock); | 331 | read_unlock_bh(&table->tb6_lock); |
332 | if (res > 0) { | 332 | if (res > 0) { |
333 | cb->args[4] = 1; | 333 | cb->args[4] = 1; |
334 | cb->args[5] = w->root->fn_sernum; | 334 | cb->args[5] = w->root->fn_sernum; |
335 | } | 335 | } |
336 | } else { | 336 | } else { |
337 | if (cb->args[5] != w->root->fn_sernum) { | 337 | if (cb->args[5] != w->root->fn_sernum) { |
338 | /* Begin at the root if the tree changed */ | 338 | /* Begin at the root if the tree changed */ |
339 | cb->args[5] = w->root->fn_sernum; | 339 | cb->args[5] = w->root->fn_sernum; |
340 | w->state = FWS_INIT; | 340 | w->state = FWS_INIT; |
341 | w->node = w->root; | 341 | w->node = w->root; |
342 | w->skip = w->count; | 342 | w->skip = w->count; |
343 | } else | 343 | } else |
344 | w->skip = 0; | 344 | w->skip = 0; |
345 | 345 | ||
346 | read_lock_bh(&table->tb6_lock); | 346 | read_lock_bh(&table->tb6_lock); |
347 | res = fib6_walk_continue(w); | 347 | res = fib6_walk_continue(w); |
348 | read_unlock_bh(&table->tb6_lock); | 348 | read_unlock_bh(&table->tb6_lock); |
349 | if (res <= 0) { | 349 | if (res <= 0) { |
350 | fib6_walker_unlink(w); | 350 | fib6_walker_unlink(w); |
351 | cb->args[4] = 0; | 351 | cb->args[4] = 0; |
352 | } | 352 | } |
353 | } | 353 | } |
354 | 354 | ||
355 | return res; | 355 | return res; |
356 | } | 356 | } |
357 | 357 | ||
358 | static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 358 | static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
359 | { | 359 | { |
360 | struct net *net = sock_net(skb->sk); | 360 | struct net *net = sock_net(skb->sk); |
361 | unsigned int h, s_h; | 361 | unsigned int h, s_h; |
362 | unsigned int e = 0, s_e; | 362 | unsigned int e = 0, s_e; |
363 | struct rt6_rtnl_dump_arg arg; | 363 | struct rt6_rtnl_dump_arg arg; |
364 | struct fib6_walker_t *w; | 364 | struct fib6_walker_t *w; |
365 | struct fib6_table *tb; | 365 | struct fib6_table *tb; |
366 | struct hlist_node *node; | 366 | struct hlist_node *node; |
367 | struct hlist_head *head; | 367 | struct hlist_head *head; |
368 | int res = 0; | 368 | int res = 0; |
369 | 369 | ||
370 | s_h = cb->args[0]; | 370 | s_h = cb->args[0]; |
371 | s_e = cb->args[1]; | 371 | s_e = cb->args[1]; |
372 | 372 | ||
373 | w = (void *)cb->args[2]; | 373 | w = (void *)cb->args[2]; |
374 | if (w == NULL) { | 374 | if (w == NULL) { |
375 | /* New dump: | 375 | /* New dump: |
376 | * | 376 | * |
377 | * 1. hook callback destructor. | 377 | * 1. hook callback destructor. |
378 | */ | 378 | */ |
379 | cb->args[3] = (long)cb->done; | 379 | cb->args[3] = (long)cb->done; |
380 | cb->done = fib6_dump_done; | 380 | cb->done = fib6_dump_done; |
381 | 381 | ||
382 | /* | 382 | /* |
383 | * 2. allocate and initialize walker. | 383 | * 2. allocate and initialize walker. |
384 | */ | 384 | */ |
385 | w = kzalloc(sizeof(*w), GFP_ATOMIC); | 385 | w = kzalloc(sizeof(*w), GFP_ATOMIC); |
386 | if (w == NULL) | 386 | if (w == NULL) |
387 | return -ENOMEM; | 387 | return -ENOMEM; |
388 | w->func = fib6_dump_node; | 388 | w->func = fib6_dump_node; |
389 | cb->args[2] = (long)w; | 389 | cb->args[2] = (long)w; |
390 | } | 390 | } |
391 | 391 | ||
392 | arg.skb = skb; | 392 | arg.skb = skb; |
393 | arg.cb = cb; | 393 | arg.cb = cb; |
394 | arg.net = net; | 394 | arg.net = net; |
395 | w->args = &arg; | 395 | w->args = &arg; |
396 | 396 | ||
397 | rcu_read_lock(); | 397 | rcu_read_lock(); |
398 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { | 398 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { |
399 | e = 0; | 399 | e = 0; |
400 | head = &net->ipv6.fib_table_hash[h]; | 400 | head = &net->ipv6.fib_table_hash[h]; |
401 | hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { | 401 | hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { |
402 | if (e < s_e) | 402 | if (e < s_e) |
403 | goto next; | 403 | goto next; |
404 | res = fib6_dump_table(tb, skb, cb); | 404 | res = fib6_dump_table(tb, skb, cb); |
405 | if (res != 0) | 405 | if (res != 0) |
406 | goto out; | 406 | goto out; |
407 | next: | 407 | next: |
408 | e++; | 408 | e++; |
409 | } | 409 | } |
410 | } | 410 | } |
411 | out: | 411 | out: |
412 | rcu_read_unlock(); | 412 | rcu_read_unlock(); |
413 | cb->args[1] = e; | 413 | cb->args[1] = e; |
414 | cb->args[0] = h; | 414 | cb->args[0] = h; |
415 | 415 | ||
416 | res = res < 0 ? res : skb->len; | 416 | res = res < 0 ? res : skb->len; |
417 | if (res <= 0) | 417 | if (res <= 0) |
418 | fib6_dump_end(cb); | 418 | fib6_dump_end(cb); |
419 | return res; | 419 | return res; |
420 | } | 420 | } |
421 | 421 | ||
422 | /* | 422 | /* |
423 | * Routing Table | 423 | * Routing Table |
424 | * | 424 | * |
425 | * return the appropriate node for a routing tree "add" operation | 425 | * return the appropriate node for a routing tree "add" operation |
426 | * by either creating and inserting or by returning an existing | 426 | * by either creating and inserting or by returning an existing |
427 | * node. | 427 | * node. |
428 | */ | 428 | */ |
429 | 429 | ||
430 | static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, | 430 | static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, |
431 | int addrlen, int plen, | 431 | int addrlen, int plen, |
432 | int offset) | 432 | int offset) |
433 | { | 433 | { |
434 | struct fib6_node *fn, *in, *ln; | 434 | struct fib6_node *fn, *in, *ln; |
435 | struct fib6_node *pn = NULL; | 435 | struct fib6_node *pn = NULL; |
436 | struct rt6key *key; | 436 | struct rt6key *key; |
437 | int bit; | 437 | int bit; |
438 | __be32 dir = 0; | 438 | __be32 dir = 0; |
439 | __u32 sernum = fib6_new_sernum(); | 439 | __u32 sernum = fib6_new_sernum(); |
440 | 440 | ||
441 | RT6_TRACE("fib6_add_1\n"); | 441 | RT6_TRACE("fib6_add_1\n"); |
442 | 442 | ||
443 | /* insert node in tree */ | 443 | /* insert node in tree */ |
444 | 444 | ||
445 | fn = root; | 445 | fn = root; |
446 | 446 | ||
447 | do { | 447 | do { |
448 | key = (struct rt6key *)((u8 *)fn->leaf + offset); | 448 | key = (struct rt6key *)((u8 *)fn->leaf + offset); |
449 | 449 | ||
450 | /* | 450 | /* |
451 | * Prefix match | 451 | * Prefix match |
452 | */ | 452 | */ |
453 | if (plen < fn->fn_bit || | 453 | if (plen < fn->fn_bit || |
454 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) | 454 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) |
455 | goto insert_above; | 455 | goto insert_above; |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Exact match ? | 458 | * Exact match ? |
459 | */ | 459 | */ |
460 | 460 | ||
461 | if (plen == fn->fn_bit) { | 461 | if (plen == fn->fn_bit) { |
462 | /* clean up an intermediate node */ | 462 | /* clean up an intermediate node */ |
463 | if ((fn->fn_flags & RTN_RTINFO) == 0) { | 463 | if ((fn->fn_flags & RTN_RTINFO) == 0) { |
464 | rt6_release(fn->leaf); | 464 | rt6_release(fn->leaf); |
465 | fn->leaf = NULL; | 465 | fn->leaf = NULL; |
466 | } | 466 | } |
467 | 467 | ||
468 | fn->fn_sernum = sernum; | 468 | fn->fn_sernum = sernum; |
469 | 469 | ||
470 | return fn; | 470 | return fn; |
471 | } | 471 | } |
472 | 472 | ||
473 | /* | 473 | /* |
474 | * We have more bits to go | 474 | * We have more bits to go |
475 | */ | 475 | */ |
476 | 476 | ||
477 | /* Try to walk down on tree. */ | 477 | /* Try to walk down on tree. */ |
478 | fn->fn_sernum = sernum; | 478 | fn->fn_sernum = sernum; |
479 | dir = addr_bit_set(addr, fn->fn_bit); | 479 | dir = addr_bit_set(addr, fn->fn_bit); |
480 | pn = fn; | 480 | pn = fn; |
481 | fn = dir ? fn->right: fn->left; | 481 | fn = dir ? fn->right: fn->left; |
482 | } while (fn); | 482 | } while (fn); |
483 | 483 | ||
484 | /* | 484 | /* |
485 | * We walked to the bottom of tree. | 485 | * We walked to the bottom of tree. |
486 | * Create new leaf node without children. | 486 | * Create new leaf node without children. |
487 | */ | 487 | */ |
488 | 488 | ||
489 | ln = node_alloc(); | 489 | ln = node_alloc(); |
490 | 490 | ||
491 | if (ln == NULL) | 491 | if (ln == NULL) |
492 | return NULL; | 492 | return NULL; |
493 | ln->fn_bit = plen; | 493 | ln->fn_bit = plen; |
494 | 494 | ||
495 | ln->parent = pn; | 495 | ln->parent = pn; |
496 | ln->fn_sernum = sernum; | 496 | ln->fn_sernum = sernum; |
497 | 497 | ||
498 | if (dir) | 498 | if (dir) |
499 | pn->right = ln; | 499 | pn->right = ln; |
500 | else | 500 | else |
501 | pn->left = ln; | 501 | pn->left = ln; |
502 | 502 | ||
503 | return ln; | 503 | return ln; |
504 | 504 | ||
505 | 505 | ||
506 | insert_above: | 506 | insert_above: |
507 | /* | 507 | /* |
508 | * split since we don't have a common prefix anymore or | 508 | * split since we don't have a common prefix anymore or |
509 | * we have a less significant route. | 509 | * we have a less significant route. |
510 | * we've to insert an intermediate node on the list | 510 | * we've to insert an intermediate node on the list |
511 | * this new node will point to the one we need to create | 511 | * this new node will point to the one we need to create |
512 | * and the current | 512 | * and the current |
513 | */ | 513 | */ |
514 | 514 | ||
515 | pn = fn->parent; | 515 | pn = fn->parent; |
516 | 516 | ||
517 | /* find 1st bit in difference between the 2 addrs. | 517 | /* find 1st bit in difference between the 2 addrs. |
518 | 518 | ||
519 | See comment in __ipv6_addr_diff: bit may be an invalid value, | 519 | See comment in __ipv6_addr_diff: bit may be an invalid value, |
520 | but if it is >= plen, the value is ignored in any case. | 520 | but if it is >= plen, the value is ignored in any case. |
521 | */ | 521 | */ |
522 | 522 | ||
523 | bit = __ipv6_addr_diff(addr, &key->addr, addrlen); | 523 | bit = __ipv6_addr_diff(addr, &key->addr, addrlen); |
524 | 524 | ||
525 | /* | 525 | /* |
526 | * (intermediate)[in] | 526 | * (intermediate)[in] |
527 | * / \ | 527 | * / \ |
528 | * (new leaf node)[ln] (old node)[fn] | 528 | * (new leaf node)[ln] (old node)[fn] |
529 | */ | 529 | */ |
530 | if (plen > bit) { | 530 | if (plen > bit) { |
531 | in = node_alloc(); | 531 | in = node_alloc(); |
532 | ln = node_alloc(); | 532 | ln = node_alloc(); |
533 | 533 | ||
534 | if (in == NULL || ln == NULL) { | 534 | if (in == NULL || ln == NULL) { |
535 | if (in) | 535 | if (in) |
536 | node_free(in); | 536 | node_free(in); |
537 | if (ln) | 537 | if (ln) |
538 | node_free(ln); | 538 | node_free(ln); |
539 | return NULL; | 539 | return NULL; |
540 | } | 540 | } |
541 | 541 | ||
542 | /* | 542 | /* |
543 | * new intermediate node. | 543 | * new intermediate node. |
544 | * RTN_RTINFO will | 544 | * RTN_RTINFO will |
545 | * be off since that an address that chooses one of | 545 | * be off since that an address that chooses one of |
546 | * the branches would not match less specific routes | 546 | * the branches would not match less specific routes |
547 | * in the other branch | 547 | * in the other branch |
548 | */ | 548 | */ |
549 | 549 | ||
550 | in->fn_bit = bit; | 550 | in->fn_bit = bit; |
551 | 551 | ||
552 | in->parent = pn; | 552 | in->parent = pn; |
553 | in->leaf = fn->leaf; | 553 | in->leaf = fn->leaf; |
554 | atomic_inc(&in->leaf->rt6i_ref); | 554 | atomic_inc(&in->leaf->rt6i_ref); |
555 | 555 | ||
556 | in->fn_sernum = sernum; | 556 | in->fn_sernum = sernum; |
557 | 557 | ||
558 | /* update parent pointer */ | 558 | /* update parent pointer */ |
559 | if (dir) | 559 | if (dir) |
560 | pn->right = in; | 560 | pn->right = in; |
561 | else | 561 | else |
562 | pn->left = in; | 562 | pn->left = in; |
563 | 563 | ||
564 | ln->fn_bit = plen; | 564 | ln->fn_bit = plen; |
565 | 565 | ||
566 | ln->parent = in; | 566 | ln->parent = in; |
567 | fn->parent = in; | 567 | fn->parent = in; |
568 | 568 | ||
569 | ln->fn_sernum = sernum; | 569 | ln->fn_sernum = sernum; |
570 | 570 | ||
571 | if (addr_bit_set(addr, bit)) { | 571 | if (addr_bit_set(addr, bit)) { |
572 | in->right = ln; | 572 | in->right = ln; |
573 | in->left = fn; | 573 | in->left = fn; |
574 | } else { | 574 | } else { |
575 | in->left = ln; | 575 | in->left = ln; |
576 | in->right = fn; | 576 | in->right = fn; |
577 | } | 577 | } |
578 | } else { /* plen <= bit */ | 578 | } else { /* plen <= bit */ |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * (new leaf node)[ln] | 581 | * (new leaf node)[ln] |
582 | * / \ | 582 | * / \ |
583 | * (old node)[fn] NULL | 583 | * (old node)[fn] NULL |
584 | */ | 584 | */ |
585 | 585 | ||
586 | ln = node_alloc(); | 586 | ln = node_alloc(); |
587 | 587 | ||
588 | if (ln == NULL) | 588 | if (ln == NULL) |
589 | return NULL; | 589 | return NULL; |
590 | 590 | ||
591 | ln->fn_bit = plen; | 591 | ln->fn_bit = plen; |
592 | 592 | ||
593 | ln->parent = pn; | 593 | ln->parent = pn; |
594 | 594 | ||
595 | ln->fn_sernum = sernum; | 595 | ln->fn_sernum = sernum; |
596 | 596 | ||
597 | if (dir) | 597 | if (dir) |
598 | pn->right = ln; | 598 | pn->right = ln; |
599 | else | 599 | else |
600 | pn->left = ln; | 600 | pn->left = ln; |
601 | 601 | ||
602 | if (addr_bit_set(&key->addr, plen)) | 602 | if (addr_bit_set(&key->addr, plen)) |
603 | ln->right = fn; | 603 | ln->right = fn; |
604 | else | 604 | else |
605 | ln->left = fn; | 605 | ln->left = fn; |
606 | 606 | ||
607 | fn->parent = ln; | 607 | fn->parent = ln; |
608 | } | 608 | } |
609 | return ln; | 609 | return ln; |
610 | } | 610 | } |
611 | 611 | ||
612 | /* | 612 | /* |
613 | * Insert routing information in a node. | 613 | * Insert routing information in a node. |
614 | */ | 614 | */ |
615 | 615 | ||
616 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | 616 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, |
617 | struct nl_info *info) | 617 | struct nl_info *info) |
618 | { | 618 | { |
619 | struct rt6_info *iter = NULL; | 619 | struct rt6_info *iter = NULL; |
620 | struct rt6_info **ins; | 620 | struct rt6_info **ins; |
621 | 621 | ||
622 | ins = &fn->leaf; | 622 | ins = &fn->leaf; |
623 | 623 | ||
624 | for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { | 624 | for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { |
625 | /* | 625 | /* |
626 | * Search for duplicates | 626 | * Search for duplicates |
627 | */ | 627 | */ |
628 | 628 | ||
629 | if (iter->rt6i_metric == rt->rt6i_metric) { | 629 | if (iter->rt6i_metric == rt->rt6i_metric) { |
630 | /* | 630 | /* |
631 | * Same priority level | 631 | * Same priority level |
632 | */ | 632 | */ |
633 | 633 | ||
634 | if (iter->rt6i_dev == rt->rt6i_dev && | 634 | if (iter->rt6i_dev == rt->rt6i_dev && |
635 | iter->rt6i_idev == rt->rt6i_idev && | 635 | iter->rt6i_idev == rt->rt6i_idev && |
636 | ipv6_addr_equal(&iter->rt6i_gateway, | 636 | ipv6_addr_equal(&iter->rt6i_gateway, |
637 | &rt->rt6i_gateway)) { | 637 | &rt->rt6i_gateway)) { |
638 | if (!(iter->rt6i_flags&RTF_EXPIRES)) | 638 | if (!(iter->rt6i_flags&RTF_EXPIRES)) |
639 | return -EEXIST; | 639 | return -EEXIST; |
640 | iter->rt6i_expires = rt->rt6i_expires; | 640 | iter->rt6i_expires = rt->rt6i_expires; |
641 | if (!(rt->rt6i_flags&RTF_EXPIRES)) { | 641 | if (!(rt->rt6i_flags&RTF_EXPIRES)) { |
642 | iter->rt6i_flags &= ~RTF_EXPIRES; | 642 | iter->rt6i_flags &= ~RTF_EXPIRES; |
643 | iter->rt6i_expires = 0; | 643 | iter->rt6i_expires = 0; |
644 | } | 644 | } |
645 | return -EEXIST; | 645 | return -EEXIST; |
646 | } | 646 | } |
647 | } | 647 | } |
648 | 648 | ||
649 | if (iter->rt6i_metric > rt->rt6i_metric) | 649 | if (iter->rt6i_metric > rt->rt6i_metric) |
650 | break; | 650 | break; |
651 | 651 | ||
652 | ins = &iter->dst.rt6_next; | 652 | ins = &iter->dst.rt6_next; |
653 | } | 653 | } |
654 | 654 | ||
655 | /* Reset round-robin state, if necessary */ | 655 | /* Reset round-robin state, if necessary */ |
656 | if (ins == &fn->leaf) | 656 | if (ins == &fn->leaf) |
657 | fn->rr_ptr = NULL; | 657 | fn->rr_ptr = NULL; |
658 | 658 | ||
659 | /* | 659 | /* |
660 | * insert node | 660 | * insert node |
661 | */ | 661 | */ |
662 | 662 | ||
663 | rt->dst.rt6_next = iter; | 663 | rt->dst.rt6_next = iter; |
664 | *ins = rt; | 664 | *ins = rt; |
665 | rt->rt6i_node = fn; | 665 | rt->rt6i_node = fn; |
666 | atomic_inc(&rt->rt6i_ref); | 666 | atomic_inc(&rt->rt6i_ref); |
667 | inet6_rt_notify(RTM_NEWROUTE, rt, info); | 667 | inet6_rt_notify(RTM_NEWROUTE, rt, info); |
668 | info->nl_net->ipv6.rt6_stats->fib_rt_entries++; | 668 | info->nl_net->ipv6.rt6_stats->fib_rt_entries++; |
669 | 669 | ||
670 | if ((fn->fn_flags & RTN_RTINFO) == 0) { | 670 | if ((fn->fn_flags & RTN_RTINFO) == 0) { |
671 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; | 671 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; |
672 | fn->fn_flags |= RTN_RTINFO; | 672 | fn->fn_flags |= RTN_RTINFO; |
673 | } | 673 | } |
674 | 674 | ||
675 | return 0; | 675 | return 0; |
676 | } | 676 | } |
677 | 677 | ||
678 | static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) | 678 | static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) |
679 | { | 679 | { |
680 | if (!timer_pending(&net->ipv6.ip6_fib_timer) && | 680 | if (!timer_pending(&net->ipv6.ip6_fib_timer) && |
681 | (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) | 681 | (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) |
682 | mod_timer(&net->ipv6.ip6_fib_timer, | 682 | mod_timer(&net->ipv6.ip6_fib_timer, |
683 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); | 683 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); |
684 | } | 684 | } |
685 | 685 | ||
686 | void fib6_force_start_gc(struct net *net) | 686 | void fib6_force_start_gc(struct net *net) |
687 | { | 687 | { |
688 | if (!timer_pending(&net->ipv6.ip6_fib_timer)) | 688 | if (!timer_pending(&net->ipv6.ip6_fib_timer)) |
689 | mod_timer(&net->ipv6.ip6_fib_timer, | 689 | mod_timer(&net->ipv6.ip6_fib_timer, |
690 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); | 690 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); |
691 | } | 691 | } |
692 | 692 | ||
693 | /* | 693 | /* |
694 | * Add routing information to the routing tree. | 694 | * Add routing information to the routing tree. |
695 | * <destination addr>/<source addr> | 695 | * <destination addr>/<source addr> |
696 | * with source addr info in sub-trees | 696 | * with source addr info in sub-trees |
697 | */ | 697 | */ |
698 | 698 | ||
699 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) | 699 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) |
700 | { | 700 | { |
701 | struct fib6_node *fn, *pn = NULL; | 701 | struct fib6_node *fn, *pn = NULL; |
702 | int err = -ENOMEM; | 702 | int err = -ENOMEM; |
703 | 703 | ||
704 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), | 704 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), |
705 | rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); | 705 | rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); |
706 | 706 | ||
707 | if (fn == NULL) | 707 | if (fn == NULL) |
708 | goto out; | 708 | goto out; |
709 | 709 | ||
710 | pn = fn; | 710 | pn = fn; |
711 | 711 | ||
712 | #ifdef CONFIG_IPV6_SUBTREES | 712 | #ifdef CONFIG_IPV6_SUBTREES |
713 | if (rt->rt6i_src.plen) { | 713 | if (rt->rt6i_src.plen) { |
714 | struct fib6_node *sn; | 714 | struct fib6_node *sn; |
715 | 715 | ||
716 | if (fn->subtree == NULL) { | 716 | if (fn->subtree == NULL) { |
717 | struct fib6_node *sfn; | 717 | struct fib6_node *sfn; |
718 | 718 | ||
719 | /* | 719 | /* |
720 | * Create subtree. | 720 | * Create subtree. |
721 | * | 721 | * |
722 | * fn[main tree] | 722 | * fn[main tree] |
723 | * | | 723 | * | |
724 | * sfn[subtree root] | 724 | * sfn[subtree root] |
725 | * \ | 725 | * \ |
726 | * sn[new leaf node] | 726 | * sn[new leaf node] |
727 | */ | 727 | */ |
728 | 728 | ||
729 | /* Create subtree root node */ | 729 | /* Create subtree root node */ |
730 | sfn = node_alloc(); | 730 | sfn = node_alloc(); |
731 | if (sfn == NULL) | 731 | if (sfn == NULL) |
732 | goto st_failure; | 732 | goto st_failure; |
733 | 733 | ||
734 | sfn->leaf = info->nl_net->ipv6.ip6_null_entry; | 734 | sfn->leaf = info->nl_net->ipv6.ip6_null_entry; |
735 | atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); | 735 | atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); |
736 | sfn->fn_flags = RTN_ROOT; | 736 | sfn->fn_flags = RTN_ROOT; |
737 | sfn->fn_sernum = fib6_new_sernum(); | 737 | sfn->fn_sernum = fib6_new_sernum(); |
738 | 738 | ||
739 | /* Now add the first leaf node to new subtree */ | 739 | /* Now add the first leaf node to new subtree */ |
740 | 740 | ||
741 | sn = fib6_add_1(sfn, &rt->rt6i_src.addr, | 741 | sn = fib6_add_1(sfn, &rt->rt6i_src.addr, |
742 | sizeof(struct in6_addr), rt->rt6i_src.plen, | 742 | sizeof(struct in6_addr), rt->rt6i_src.plen, |
743 | offsetof(struct rt6_info, rt6i_src)); | 743 | offsetof(struct rt6_info, rt6i_src)); |
744 | 744 | ||
745 | if (sn == NULL) { | 745 | if (sn == NULL) { |
746 | /* If it is failed, discard just allocated | 746 | /* If it is failed, discard just allocated |
747 | root, and then (in st_failure) stale node | 747 | root, and then (in st_failure) stale node |
748 | in main tree. | 748 | in main tree. |
749 | */ | 749 | */ |
750 | node_free(sfn); | 750 | node_free(sfn); |
751 | goto st_failure; | 751 | goto st_failure; |
752 | } | 752 | } |
753 | 753 | ||
754 | /* Now link new subtree to main tree */ | 754 | /* Now link new subtree to main tree */ |
755 | sfn->parent = fn; | 755 | sfn->parent = fn; |
756 | fn->subtree = sfn; | 756 | fn->subtree = sfn; |
757 | } else { | 757 | } else { |
758 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, | 758 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, |
759 | sizeof(struct in6_addr), rt->rt6i_src.plen, | 759 | sizeof(struct in6_addr), rt->rt6i_src.plen, |
760 | offsetof(struct rt6_info, rt6i_src)); | 760 | offsetof(struct rt6_info, rt6i_src)); |
761 | 761 | ||
762 | if (sn == NULL) | 762 | if (sn == NULL) |
763 | goto st_failure; | 763 | goto st_failure; |
764 | } | 764 | } |
765 | 765 | ||
766 | if (fn->leaf == NULL) { | 766 | if (fn->leaf == NULL) { |
767 | fn->leaf = rt; | 767 | fn->leaf = rt; |
768 | atomic_inc(&rt->rt6i_ref); | 768 | atomic_inc(&rt->rt6i_ref); |
769 | } | 769 | } |
770 | fn = sn; | 770 | fn = sn; |
771 | } | 771 | } |
772 | #endif | 772 | #endif |
773 | 773 | ||
774 | err = fib6_add_rt2node(fn, rt, info); | 774 | err = fib6_add_rt2node(fn, rt, info); |
775 | 775 | ||
776 | if (err == 0) { | 776 | if (err == 0) { |
777 | fib6_start_gc(info->nl_net, rt); | 777 | fib6_start_gc(info->nl_net, rt); |
778 | if (!(rt->rt6i_flags&RTF_CACHE)) | 778 | if (!(rt->rt6i_flags&RTF_CACHE)) |
779 | fib6_prune_clones(info->nl_net, pn, rt); | 779 | fib6_prune_clones(info->nl_net, pn, rt); |
780 | } | 780 | } |
781 | 781 | ||
782 | out: | 782 | out: |
783 | if (err) { | 783 | if (err) { |
784 | #ifdef CONFIG_IPV6_SUBTREES | 784 | #ifdef CONFIG_IPV6_SUBTREES |
785 | /* | 785 | /* |
786 | * If fib6_add_1 has cleared the old leaf pointer in the | 786 | * If fib6_add_1 has cleared the old leaf pointer in the |
787 | * super-tree leaf node we have to find a new one for it. | 787 | * super-tree leaf node we have to find a new one for it. |
788 | */ | 788 | */ |
789 | if (pn != fn && pn->leaf == rt) { | 789 | if (pn != fn && pn->leaf == rt) { |
790 | pn->leaf = NULL; | 790 | pn->leaf = NULL; |
791 | atomic_dec(&rt->rt6i_ref); | 791 | atomic_dec(&rt->rt6i_ref); |
792 | } | 792 | } |
793 | if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { | 793 | if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { |
794 | pn->leaf = fib6_find_prefix(info->nl_net, pn); | 794 | pn->leaf = fib6_find_prefix(info->nl_net, pn); |
795 | #if RT6_DEBUG >= 2 | 795 | #if RT6_DEBUG >= 2 |
796 | if (!pn->leaf) { | 796 | if (!pn->leaf) { |
797 | WARN_ON(pn->leaf == NULL); | 797 | WARN_ON(pn->leaf == NULL); |
798 | pn->leaf = info->nl_net->ipv6.ip6_null_entry; | 798 | pn->leaf = info->nl_net->ipv6.ip6_null_entry; |
799 | } | 799 | } |
800 | #endif | 800 | #endif |
801 | atomic_inc(&pn->leaf->rt6i_ref); | 801 | atomic_inc(&pn->leaf->rt6i_ref); |
802 | } | 802 | } |
803 | #endif | 803 | #endif |
804 | dst_free(&rt->dst); | 804 | dst_free(&rt->dst); |
805 | } | 805 | } |
806 | return err; | 806 | return err; |
807 | 807 | ||
808 | #ifdef CONFIG_IPV6_SUBTREES | 808 | #ifdef CONFIG_IPV6_SUBTREES |
809 | /* Subtree creation failed, probably main tree node | 809 | /* Subtree creation failed, probably main tree node |
810 | is orphan. If it is, shoot it. | 810 | is orphan. If it is, shoot it. |
811 | */ | 811 | */ |
812 | st_failure: | 812 | st_failure: |
813 | if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) | 813 | if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) |
814 | fib6_repair_tree(info->nl_net, fn); | 814 | fib6_repair_tree(info->nl_net, fn); |
815 | dst_free(&rt->dst); | 815 | dst_free(&rt->dst); |
816 | return err; | 816 | return err; |
817 | #endif | 817 | #endif |
818 | } | 818 | } |
819 | 819 | ||
820 | /* | 820 | /* |
821 | * Routing tree lookup | 821 | * Routing tree lookup |
822 | * | 822 | * |
823 | */ | 823 | */ |
824 | 824 | ||
825 | struct lookup_args { | 825 | struct lookup_args { |
826 | int offset; /* key offset on rt6_info */ | 826 | int offset; /* key offset on rt6_info */ |
827 | const struct in6_addr *addr; /* search key */ | 827 | const struct in6_addr *addr; /* search key */ |
828 | }; | 828 | }; |
829 | 829 | ||
830 | static struct fib6_node * fib6_lookup_1(struct fib6_node *root, | 830 | static struct fib6_node * fib6_lookup_1(struct fib6_node *root, |
831 | struct lookup_args *args) | 831 | struct lookup_args *args) |
832 | { | 832 | { |
833 | struct fib6_node *fn; | 833 | struct fib6_node *fn; |
834 | __be32 dir; | 834 | __be32 dir; |
835 | 835 | ||
836 | if (unlikely(args->offset == 0)) | 836 | if (unlikely(args->offset == 0)) |
837 | return NULL; | 837 | return NULL; |
838 | 838 | ||
839 | /* | 839 | /* |
840 | * Descend on a tree | 840 | * Descend on a tree |
841 | */ | 841 | */ |
842 | 842 | ||
843 | fn = root; | 843 | fn = root; |
844 | 844 | ||
845 | for (;;) { | 845 | for (;;) { |
846 | struct fib6_node *next; | 846 | struct fib6_node *next; |
847 | 847 | ||
848 | dir = addr_bit_set(args->addr, fn->fn_bit); | 848 | dir = addr_bit_set(args->addr, fn->fn_bit); |
849 | 849 | ||
850 | next = dir ? fn->right : fn->left; | 850 | next = dir ? fn->right : fn->left; |
851 | 851 | ||
852 | if (next) { | 852 | if (next) { |
853 | fn = next; | 853 | fn = next; |
854 | continue; | 854 | continue; |
855 | } | 855 | } |
856 | 856 | ||
857 | break; | 857 | break; |
858 | } | 858 | } |
859 | 859 | ||
860 | while(fn) { | 860 | while(fn) { |
861 | if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { | 861 | if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { |
862 | struct rt6key *key; | 862 | struct rt6key *key; |
863 | 863 | ||
864 | key = (struct rt6key *) ((u8 *) fn->leaf + | 864 | key = (struct rt6key *) ((u8 *) fn->leaf + |
865 | args->offset); | 865 | args->offset); |
866 | 866 | ||
867 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { | 867 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { |
868 | #ifdef CONFIG_IPV6_SUBTREES | 868 | #ifdef CONFIG_IPV6_SUBTREES |
869 | if (fn->subtree) | 869 | if (fn->subtree) |
870 | fn = fib6_lookup_1(fn->subtree, args + 1); | 870 | fn = fib6_lookup_1(fn->subtree, args + 1); |
871 | #endif | 871 | #endif |
872 | if (!fn || fn->fn_flags & RTN_RTINFO) | 872 | if (!fn || fn->fn_flags & RTN_RTINFO) |
873 | return fn; | 873 | return fn; |
874 | } | 874 | } |
875 | } | 875 | } |
876 | 876 | ||
877 | if (fn->fn_flags & RTN_ROOT) | 877 | if (fn->fn_flags & RTN_ROOT) |
878 | break; | 878 | break; |
879 | 879 | ||
880 | fn = fn->parent; | 880 | fn = fn->parent; |
881 | } | 881 | } |
882 | 882 | ||
883 | return NULL; | 883 | return NULL; |
884 | } | 884 | } |
885 | 885 | ||
886 | struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, | 886 | struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, |
887 | const struct in6_addr *saddr) | 887 | const struct in6_addr *saddr) |
888 | { | 888 | { |
889 | struct fib6_node *fn; | 889 | struct fib6_node *fn; |
890 | struct lookup_args args[] = { | 890 | struct lookup_args args[] = { |
891 | { | 891 | { |
892 | .offset = offsetof(struct rt6_info, rt6i_dst), | 892 | .offset = offsetof(struct rt6_info, rt6i_dst), |
893 | .addr = daddr, | 893 | .addr = daddr, |
894 | }, | 894 | }, |
895 | #ifdef CONFIG_IPV6_SUBTREES | 895 | #ifdef CONFIG_IPV6_SUBTREES |
896 | { | 896 | { |
897 | .offset = offsetof(struct rt6_info, rt6i_src), | 897 | .offset = offsetof(struct rt6_info, rt6i_src), |
898 | .addr = saddr, | 898 | .addr = saddr, |
899 | }, | 899 | }, |
900 | #endif | 900 | #endif |
901 | { | 901 | { |
902 | .offset = 0, /* sentinel */ | 902 | .offset = 0, /* sentinel */ |
903 | } | 903 | } |
904 | }; | 904 | }; |
905 | 905 | ||
906 | fn = fib6_lookup_1(root, daddr ? args : args + 1); | 906 | fn = fib6_lookup_1(root, daddr ? args : args + 1); |
907 | 907 | ||
908 | if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) | 908 | if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) |
909 | fn = root; | 909 | fn = root; |
910 | 910 | ||
911 | return fn; | 911 | return fn; |
912 | } | 912 | } |
913 | 913 | ||
914 | /* | 914 | /* |
915 | * Get node with specified destination prefix (and source prefix, | 915 | * Get node with specified destination prefix (and source prefix, |
916 | * if subtrees are used) | 916 | * if subtrees are used) |
917 | */ | 917 | */ |
918 | 918 | ||
919 | 919 | ||
920 | static struct fib6_node * fib6_locate_1(struct fib6_node *root, | 920 | static struct fib6_node * fib6_locate_1(struct fib6_node *root, |
921 | const struct in6_addr *addr, | 921 | const struct in6_addr *addr, |
922 | int plen, int offset) | 922 | int plen, int offset) |
923 | { | 923 | { |
924 | struct fib6_node *fn; | 924 | struct fib6_node *fn; |
925 | 925 | ||
926 | for (fn = root; fn ; ) { | 926 | for (fn = root; fn ; ) { |
927 | struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); | 927 | struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); |
928 | 928 | ||
929 | /* | 929 | /* |
930 | * Prefix match | 930 | * Prefix match |
931 | */ | 931 | */ |
932 | if (plen < fn->fn_bit || | 932 | if (plen < fn->fn_bit || |
933 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) | 933 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) |
934 | return NULL; | 934 | return NULL; |
935 | 935 | ||
936 | if (plen == fn->fn_bit) | 936 | if (plen == fn->fn_bit) |
937 | return fn; | 937 | return fn; |
938 | 938 | ||
939 | /* | 939 | /* |
940 | * We have more bits to go | 940 | * We have more bits to go |
941 | */ | 941 | */ |
942 | if (addr_bit_set(addr, fn->fn_bit)) | 942 | if (addr_bit_set(addr, fn->fn_bit)) |
943 | fn = fn->right; | 943 | fn = fn->right; |
944 | else | 944 | else |
945 | fn = fn->left; | 945 | fn = fn->left; |
946 | } | 946 | } |
947 | return NULL; | 947 | return NULL; |
948 | } | 948 | } |
949 | 949 | ||
950 | struct fib6_node * fib6_locate(struct fib6_node *root, | 950 | struct fib6_node * fib6_locate(struct fib6_node *root, |
951 | const struct in6_addr *daddr, int dst_len, | 951 | const struct in6_addr *daddr, int dst_len, |
952 | const struct in6_addr *saddr, int src_len) | 952 | const struct in6_addr *saddr, int src_len) |
953 | { | 953 | { |
954 | struct fib6_node *fn; | 954 | struct fib6_node *fn; |
955 | 955 | ||
956 | fn = fib6_locate_1(root, daddr, dst_len, | 956 | fn = fib6_locate_1(root, daddr, dst_len, |
957 | offsetof(struct rt6_info, rt6i_dst)); | 957 | offsetof(struct rt6_info, rt6i_dst)); |
958 | 958 | ||
959 | #ifdef CONFIG_IPV6_SUBTREES | 959 | #ifdef CONFIG_IPV6_SUBTREES |
960 | if (src_len) { | 960 | if (src_len) { |
961 | WARN_ON(saddr == NULL); | 961 | WARN_ON(saddr == NULL); |
962 | if (fn && fn->subtree) | 962 | if (fn && fn->subtree) |
963 | fn = fib6_locate_1(fn->subtree, saddr, src_len, | 963 | fn = fib6_locate_1(fn->subtree, saddr, src_len, |
964 | offsetof(struct rt6_info, rt6i_src)); | 964 | offsetof(struct rt6_info, rt6i_src)); |
965 | } | 965 | } |
966 | #endif | 966 | #endif |
967 | 967 | ||
968 | if (fn && fn->fn_flags&RTN_RTINFO) | 968 | if (fn && fn->fn_flags&RTN_RTINFO) |
969 | return fn; | 969 | return fn; |
970 | 970 | ||
971 | return NULL; | 971 | return NULL; |
972 | } | 972 | } |
973 | 973 | ||
974 | 974 | ||
975 | /* | 975 | /* |
976 | * Deletion | 976 | * Deletion |
977 | * | 977 | * |
978 | */ | 978 | */ |
979 | 979 | ||
980 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) | 980 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) |
981 | { | 981 | { |
982 | if (fn->fn_flags&RTN_ROOT) | 982 | if (fn->fn_flags&RTN_ROOT) |
983 | return net->ipv6.ip6_null_entry; | 983 | return net->ipv6.ip6_null_entry; |
984 | 984 | ||
985 | while(fn) { | 985 | while(fn) { |
986 | if(fn->left) | 986 | if(fn->left) |
987 | return fn->left->leaf; | 987 | return fn->left->leaf; |
988 | 988 | ||
989 | if(fn->right) | 989 | if(fn->right) |
990 | return fn->right->leaf; | 990 | return fn->right->leaf; |
991 | 991 | ||
992 | fn = FIB6_SUBTREE(fn); | 992 | fn = FIB6_SUBTREE(fn); |
993 | } | 993 | } |
994 | return NULL; | 994 | return NULL; |
995 | } | 995 | } |
996 | 996 | ||
997 | /* | 997 | /* |
998 | * Called to trim the tree of intermediate nodes when possible. "fn" | 998 | * Called to trim the tree of intermediate nodes when possible. "fn" |
999 | * is the node we want to try and remove. | 999 | * is the node we want to try and remove. |
1000 | */ | 1000 | */ |
1001 | 1001 | ||
1002 | static struct fib6_node *fib6_repair_tree(struct net *net, | 1002 | static struct fib6_node *fib6_repair_tree(struct net *net, |
1003 | struct fib6_node *fn) | 1003 | struct fib6_node *fn) |
1004 | { | 1004 | { |
1005 | int children; | 1005 | int children; |
1006 | int nstate; | 1006 | int nstate; |
1007 | struct fib6_node *child, *pn; | 1007 | struct fib6_node *child, *pn; |
1008 | struct fib6_walker_t *w; | 1008 | struct fib6_walker_t *w; |
1009 | int iter = 0; | 1009 | int iter = 0; |
1010 | 1010 | ||
1011 | for (;;) { | 1011 | for (;;) { |
1012 | RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); | 1012 | RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); |
1013 | iter++; | 1013 | iter++; |
1014 | 1014 | ||
1015 | WARN_ON(fn->fn_flags & RTN_RTINFO); | 1015 | WARN_ON(fn->fn_flags & RTN_RTINFO); |
1016 | WARN_ON(fn->fn_flags & RTN_TL_ROOT); | 1016 | WARN_ON(fn->fn_flags & RTN_TL_ROOT); |
1017 | WARN_ON(fn->leaf != NULL); | 1017 | WARN_ON(fn->leaf != NULL); |
1018 | 1018 | ||
1019 | children = 0; | 1019 | children = 0; |
1020 | child = NULL; | 1020 | child = NULL; |
1021 | if (fn->right) child = fn->right, children |= 1; | 1021 | if (fn->right) child = fn->right, children |= 1; |
1022 | if (fn->left) child = fn->left, children |= 2; | 1022 | if (fn->left) child = fn->left, children |= 2; |
1023 | 1023 | ||
1024 | if (children == 3 || FIB6_SUBTREE(fn) | 1024 | if (children == 3 || FIB6_SUBTREE(fn) |
1025 | #ifdef CONFIG_IPV6_SUBTREES | 1025 | #ifdef CONFIG_IPV6_SUBTREES |
1026 | /* Subtree root (i.e. fn) may have one child */ | 1026 | /* Subtree root (i.e. fn) may have one child */ |
1027 | || (children && fn->fn_flags&RTN_ROOT) | 1027 | || (children && fn->fn_flags&RTN_ROOT) |
1028 | #endif | 1028 | #endif |
1029 | ) { | 1029 | ) { |
1030 | fn->leaf = fib6_find_prefix(net, fn); | 1030 | fn->leaf = fib6_find_prefix(net, fn); |
1031 | #if RT6_DEBUG >= 2 | 1031 | #if RT6_DEBUG >= 2 |
1032 | if (fn->leaf==NULL) { | 1032 | if (fn->leaf==NULL) { |
1033 | WARN_ON(!fn->leaf); | 1033 | WARN_ON(!fn->leaf); |
1034 | fn->leaf = net->ipv6.ip6_null_entry; | 1034 | fn->leaf = net->ipv6.ip6_null_entry; |
1035 | } | 1035 | } |
1036 | #endif | 1036 | #endif |
1037 | atomic_inc(&fn->leaf->rt6i_ref); | 1037 | atomic_inc(&fn->leaf->rt6i_ref); |
1038 | return fn->parent; | 1038 | return fn->parent; |
1039 | } | 1039 | } |
1040 | 1040 | ||
1041 | pn = fn->parent; | 1041 | pn = fn->parent; |
1042 | #ifdef CONFIG_IPV6_SUBTREES | 1042 | #ifdef CONFIG_IPV6_SUBTREES |
1043 | if (FIB6_SUBTREE(pn) == fn) { | 1043 | if (FIB6_SUBTREE(pn) == fn) { |
1044 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); | 1044 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); |
1045 | FIB6_SUBTREE(pn) = NULL; | 1045 | FIB6_SUBTREE(pn) = NULL; |
1046 | nstate = FWS_L; | 1046 | nstate = FWS_L; |
1047 | } else { | 1047 | } else { |
1048 | WARN_ON(fn->fn_flags & RTN_ROOT); | 1048 | WARN_ON(fn->fn_flags & RTN_ROOT); |
1049 | #endif | 1049 | #endif |
1050 | if (pn->right == fn) pn->right = child; | 1050 | if (pn->right == fn) pn->right = child; |
1051 | else if (pn->left == fn) pn->left = child; | 1051 | else if (pn->left == fn) pn->left = child; |
1052 | #if RT6_DEBUG >= 2 | 1052 | #if RT6_DEBUG >= 2 |
1053 | else | 1053 | else |
1054 | WARN_ON(1); | 1054 | WARN_ON(1); |
1055 | #endif | 1055 | #endif |
1056 | if (child) | 1056 | if (child) |
1057 | child->parent = pn; | 1057 | child->parent = pn; |
1058 | nstate = FWS_R; | 1058 | nstate = FWS_R; |
1059 | #ifdef CONFIG_IPV6_SUBTREES | 1059 | #ifdef CONFIG_IPV6_SUBTREES |
1060 | } | 1060 | } |
1061 | #endif | 1061 | #endif |
1062 | 1062 | ||
1063 | read_lock(&fib6_walker_lock); | 1063 | read_lock(&fib6_walker_lock); |
1064 | FOR_WALKERS(w) { | 1064 | FOR_WALKERS(w) { |
1065 | if (child == NULL) { | 1065 | if (child == NULL) { |
1066 | if (w->root == fn) { | 1066 | if (w->root == fn) { |
1067 | w->root = w->node = NULL; | 1067 | w->root = w->node = NULL; |
1068 | RT6_TRACE("W %p adjusted by delroot 1\n", w); | 1068 | RT6_TRACE("W %p adjusted by delroot 1\n", w); |
1069 | } else if (w->node == fn) { | 1069 | } else if (w->node == fn) { |
1070 | RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); | 1070 | RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); |
1071 | w->node = pn; | 1071 | w->node = pn; |
1072 | w->state = nstate; | 1072 | w->state = nstate; |
1073 | } | 1073 | } |
1074 | } else { | 1074 | } else { |
1075 | if (w->root == fn) { | 1075 | if (w->root == fn) { |
1076 | w->root = child; | 1076 | w->root = child; |
1077 | RT6_TRACE("W %p adjusted by delroot 2\n", w); | 1077 | RT6_TRACE("W %p adjusted by delroot 2\n", w); |
1078 | } | 1078 | } |
1079 | if (w->node == fn) { | 1079 | if (w->node == fn) { |
1080 | w->node = child; | 1080 | w->node = child; |
1081 | if (children&2) { | 1081 | if (children&2) { |
1082 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); | 1082 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); |
1083 | w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; | 1083 | w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; |
1084 | } else { | 1084 | } else { |
1085 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); | 1085 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); |
1086 | w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; | 1086 | w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; |
1087 | } | 1087 | } |
1088 | } | 1088 | } |
1089 | } | 1089 | } |
1090 | } | 1090 | } |
1091 | read_unlock(&fib6_walker_lock); | 1091 | read_unlock(&fib6_walker_lock); |
1092 | 1092 | ||
1093 | node_free(fn); | 1093 | node_free(fn); |
1094 | if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) | 1094 | if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) |
1095 | return pn; | 1095 | return pn; |
1096 | 1096 | ||
1097 | rt6_release(pn->leaf); | 1097 | rt6_release(pn->leaf); |
1098 | pn->leaf = NULL; | 1098 | pn->leaf = NULL; |
1099 | fn = pn; | 1099 | fn = pn; |
1100 | } | 1100 | } |
1101 | } | 1101 | } |
1102 | 1102 | ||
1103 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, | 1103 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, |
1104 | struct nl_info *info) | 1104 | struct nl_info *info) |
1105 | { | 1105 | { |
1106 | struct fib6_walker_t *w; | 1106 | struct fib6_walker_t *w; |
1107 | struct rt6_info *rt = *rtp; | 1107 | struct rt6_info *rt = *rtp; |
1108 | struct net *net = info->nl_net; | 1108 | struct net *net = info->nl_net; |
1109 | 1109 | ||
1110 | RT6_TRACE("fib6_del_route\n"); | 1110 | RT6_TRACE("fib6_del_route\n"); |
1111 | 1111 | ||
1112 | /* Unlink it */ | 1112 | /* Unlink it */ |
1113 | *rtp = rt->dst.rt6_next; | 1113 | *rtp = rt->dst.rt6_next; |
1114 | rt->rt6i_node = NULL; | 1114 | rt->rt6i_node = NULL; |
1115 | net->ipv6.rt6_stats->fib_rt_entries--; | 1115 | net->ipv6.rt6_stats->fib_rt_entries--; |
1116 | net->ipv6.rt6_stats->fib_discarded_routes++; | 1116 | net->ipv6.rt6_stats->fib_discarded_routes++; |
1117 | 1117 | ||
1118 | /* Reset round-robin state, if necessary */ | 1118 | /* Reset round-robin state, if necessary */ |
1119 | if (fn->rr_ptr == rt) | 1119 | if (fn->rr_ptr == rt) |
1120 | fn->rr_ptr = NULL; | 1120 | fn->rr_ptr = NULL; |
1121 | 1121 | ||
1122 | /* Adjust walkers */ | 1122 | /* Adjust walkers */ |
1123 | read_lock(&fib6_walker_lock); | 1123 | read_lock(&fib6_walker_lock); |
1124 | FOR_WALKERS(w) { | 1124 | FOR_WALKERS(w) { |
1125 | if (w->state == FWS_C && w->leaf == rt) { | 1125 | if (w->state == FWS_C && w->leaf == rt) { |
1126 | RT6_TRACE("walker %p adjusted by delroute\n", w); | 1126 | RT6_TRACE("walker %p adjusted by delroute\n", w); |
1127 | w->leaf = rt->dst.rt6_next; | 1127 | w->leaf = rt->dst.rt6_next; |
1128 | if (w->leaf == NULL) | 1128 | if (w->leaf == NULL) |
1129 | w->state = FWS_U; | 1129 | w->state = FWS_U; |
1130 | } | 1130 | } |
1131 | } | 1131 | } |
1132 | read_unlock(&fib6_walker_lock); | 1132 | read_unlock(&fib6_walker_lock); |
1133 | 1133 | ||
1134 | rt->dst.rt6_next = NULL; | 1134 | rt->dst.rt6_next = NULL; |
1135 | 1135 | ||
1136 | /* If it was last route, expunge its radix tree node */ | 1136 | /* If it was last route, expunge its radix tree node */ |
1137 | if (fn->leaf == NULL) { | 1137 | if (fn->leaf == NULL) { |
1138 | fn->fn_flags &= ~RTN_RTINFO; | 1138 | fn->fn_flags &= ~RTN_RTINFO; |
1139 | net->ipv6.rt6_stats->fib_route_nodes--; | 1139 | net->ipv6.rt6_stats->fib_route_nodes--; |
1140 | fn = fib6_repair_tree(net, fn); | 1140 | fn = fib6_repair_tree(net, fn); |
1141 | } | 1141 | } |
1142 | 1142 | ||
1143 | if (atomic_read(&rt->rt6i_ref) != 1) { | 1143 | if (atomic_read(&rt->rt6i_ref) != 1) { |
1144 | /* This route is used as dummy address holder in some split | 1144 | /* This route is used as dummy address holder in some split |
1145 | * nodes. It is not leaked, but it still holds other resources, | 1145 | * nodes. It is not leaked, but it still holds other resources, |
1146 | * which must be released in time. So, scan ascendant nodes | 1146 | * which must be released in time. So, scan ascendant nodes |
1147 | * and replace dummy references to this route with references | 1147 | * and replace dummy references to this route with references |
1148 | * to still alive ones. | 1148 | * to still alive ones. |
1149 | */ | 1149 | */ |
1150 | while (fn) { | 1150 | while (fn) { |
1151 | if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { | 1151 | if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { |
1152 | fn->leaf = fib6_find_prefix(net, fn); | 1152 | fn->leaf = fib6_find_prefix(net, fn); |
1153 | atomic_inc(&fn->leaf->rt6i_ref); | 1153 | atomic_inc(&fn->leaf->rt6i_ref); |
1154 | rt6_release(rt); | 1154 | rt6_release(rt); |
1155 | } | 1155 | } |
1156 | fn = fn->parent; | 1156 | fn = fn->parent; |
1157 | } | 1157 | } |
1158 | /* No more references are possible at this point. */ | 1158 | /* No more references are possible at this point. */ |
1159 | BUG_ON(atomic_read(&rt->rt6i_ref) != 1); | 1159 | BUG_ON(atomic_read(&rt->rt6i_ref) != 1); |
1160 | } | 1160 | } |
1161 | 1161 | ||
1162 | inet6_rt_notify(RTM_DELROUTE, rt, info); | 1162 | inet6_rt_notify(RTM_DELROUTE, rt, info); |
1163 | rt6_release(rt); | 1163 | rt6_release(rt); |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | int fib6_del(struct rt6_info *rt, struct nl_info *info) | 1166 | int fib6_del(struct rt6_info *rt, struct nl_info *info) |
1167 | { | 1167 | { |
1168 | struct net *net = info->nl_net; | 1168 | struct net *net = info->nl_net; |
1169 | struct fib6_node *fn = rt->rt6i_node; | 1169 | struct fib6_node *fn = rt->rt6i_node; |
1170 | struct rt6_info **rtp; | 1170 | struct rt6_info **rtp; |
1171 | 1171 | ||
1172 | #if RT6_DEBUG >= 2 | 1172 | #if RT6_DEBUG >= 2 |
1173 | if (rt->dst.obsolete>0) { | 1173 | if (rt->dst.obsolete>0) { |
1174 | WARN_ON(fn != NULL); | 1174 | WARN_ON(fn != NULL); |
1175 | return -ENOENT; | 1175 | return -ENOENT; |
1176 | } | 1176 | } |
1177 | #endif | 1177 | #endif |
1178 | if (fn == NULL || rt == net->ipv6.ip6_null_entry) | 1178 | if (fn == NULL || rt == net->ipv6.ip6_null_entry) |
1179 | return -ENOENT; | 1179 | return -ENOENT; |
1180 | 1180 | ||
1181 | WARN_ON(!(fn->fn_flags & RTN_RTINFO)); | 1181 | WARN_ON(!(fn->fn_flags & RTN_RTINFO)); |
1182 | 1182 | ||
1183 | if (!(rt->rt6i_flags&RTF_CACHE)) { | 1183 | if (!(rt->rt6i_flags&RTF_CACHE)) { |
1184 | struct fib6_node *pn = fn; | 1184 | struct fib6_node *pn = fn; |
1185 | #ifdef CONFIG_IPV6_SUBTREES | 1185 | #ifdef CONFIG_IPV6_SUBTREES |
1186 | /* clones of this route might be in another subtree */ | 1186 | /* clones of this route might be in another subtree */ |
1187 | if (rt->rt6i_src.plen) { | 1187 | if (rt->rt6i_src.plen) { |
1188 | while (!(pn->fn_flags&RTN_ROOT)) | 1188 | while (!(pn->fn_flags&RTN_ROOT)) |
1189 | pn = pn->parent; | 1189 | pn = pn->parent; |
1190 | pn = pn->parent; | 1190 | pn = pn->parent; |
1191 | } | 1191 | } |
1192 | #endif | 1192 | #endif |
1193 | fib6_prune_clones(info->nl_net, pn, rt); | 1193 | fib6_prune_clones(info->nl_net, pn, rt); |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | /* | 1196 | /* |
1197 | * Walk the leaf entries looking for ourself | 1197 | * Walk the leaf entries looking for ourself |
1198 | */ | 1198 | */ |
1199 | 1199 | ||
1200 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { | 1200 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { |
1201 | if (*rtp == rt) { | 1201 | if (*rtp == rt) { |
1202 | fib6_del_route(fn, rtp, info); | 1202 | fib6_del_route(fn, rtp, info); |
1203 | return 0; | 1203 | return 0; |
1204 | } | 1204 | } |
1205 | } | 1205 | } |
1206 | return -ENOENT; | 1206 | return -ENOENT; |
1207 | } | 1207 | } |
1208 | 1208 | ||
1209 | /* | 1209 | /* |
1210 | * Tree traversal function. | 1210 | * Tree traversal function. |
1211 | * | 1211 | * |
1212 | * Certainly, it is not interrupt safe. | 1212 | * Certainly, it is not interrupt safe. |
1213 | * However, it is internally reenterable wrt itself and fib6_add/fib6_del. | 1213 | * However, it is internally reenterable wrt itself and fib6_add/fib6_del. |
1214 | * It means, that we can modify tree during walking | 1214 | * It means, that we can modify tree during walking |
1215 | * and use this function for garbage collection, clone pruning, | 1215 | * and use this function for garbage collection, clone pruning, |
1216 | * cleaning tree when a device goes down etc. etc. | 1216 | * cleaning tree when a device goes down etc. etc. |
1217 | * | 1217 | * |
1218 | * It guarantees that every node will be traversed, | 1218 | * It guarantees that every node will be traversed, |
1219 | * and that it will be traversed only once. | 1219 | * and that it will be traversed only once. |
1220 | * | 1220 | * |
1221 | * Callback function w->func may return: | 1221 | * Callback function w->func may return: |
1222 | * 0 -> continue walking. | 1222 | * 0 -> continue walking. |
1223 | * positive value -> walking is suspended (used by tree dumps, | 1223 | * positive value -> walking is suspended (used by tree dumps, |
1224 | * and probably by gc, if it will be split to several slices) | 1224 | * and probably by gc, if it will be split to several slices) |
1225 | * negative value -> terminate walking. | 1225 | * negative value -> terminate walking. |
1226 | * | 1226 | * |
1227 | * The function itself returns: | 1227 | * The function itself returns: |
1228 | * 0 -> walk is complete. | 1228 | * 0 -> walk is complete. |
1229 | * >0 -> walk is incomplete (i.e. suspended) | 1229 | * >0 -> walk is incomplete (i.e. suspended) |
1230 | * <0 -> walk is terminated by an error. | 1230 | * <0 -> walk is terminated by an error. |
1231 | */ | 1231 | */ |
1232 | 1232 | ||
1233 | static int fib6_walk_continue(struct fib6_walker_t *w) | 1233 | static int fib6_walk_continue(struct fib6_walker_t *w) |
1234 | { | 1234 | { |
1235 | struct fib6_node *fn, *pn; | 1235 | struct fib6_node *fn, *pn; |
1236 | 1236 | ||
1237 | for (;;) { | 1237 | for (;;) { |
1238 | fn = w->node; | 1238 | fn = w->node; |
1239 | if (fn == NULL) | 1239 | if (fn == NULL) |
1240 | return 0; | 1240 | return 0; |
1241 | 1241 | ||
1242 | if (w->prune && fn != w->root && | 1242 | if (w->prune && fn != w->root && |
1243 | fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { | 1243 | fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { |
1244 | w->state = FWS_C; | 1244 | w->state = FWS_C; |
1245 | w->leaf = fn->leaf; | 1245 | w->leaf = fn->leaf; |
1246 | } | 1246 | } |
1247 | switch (w->state) { | 1247 | switch (w->state) { |
1248 | #ifdef CONFIG_IPV6_SUBTREES | 1248 | #ifdef CONFIG_IPV6_SUBTREES |
1249 | case FWS_S: | 1249 | case FWS_S: |
1250 | if (FIB6_SUBTREE(fn)) { | 1250 | if (FIB6_SUBTREE(fn)) { |
1251 | w->node = FIB6_SUBTREE(fn); | 1251 | w->node = FIB6_SUBTREE(fn); |
1252 | continue; | 1252 | continue; |
1253 | } | 1253 | } |
1254 | w->state = FWS_L; | 1254 | w->state = FWS_L; |
1255 | #endif | 1255 | #endif |
1256 | case FWS_L: | 1256 | case FWS_L: |
1257 | if (fn->left) { | 1257 | if (fn->left) { |
1258 | w->node = fn->left; | 1258 | w->node = fn->left; |
1259 | w->state = FWS_INIT; | 1259 | w->state = FWS_INIT; |
1260 | continue; | 1260 | continue; |
1261 | } | 1261 | } |
1262 | w->state = FWS_R; | 1262 | w->state = FWS_R; |
1263 | case FWS_R: | 1263 | case FWS_R: |
1264 | if (fn->right) { | 1264 | if (fn->right) { |
1265 | w->node = fn->right; | 1265 | w->node = fn->right; |
1266 | w->state = FWS_INIT; | 1266 | w->state = FWS_INIT; |
1267 | continue; | 1267 | continue; |
1268 | } | 1268 | } |
1269 | w->state = FWS_C; | 1269 | w->state = FWS_C; |
1270 | w->leaf = fn->leaf; | 1270 | w->leaf = fn->leaf; |
1271 | case FWS_C: | 1271 | case FWS_C: |
1272 | if (w->leaf && fn->fn_flags&RTN_RTINFO) { | 1272 | if (w->leaf && fn->fn_flags&RTN_RTINFO) { |
1273 | int err; | 1273 | int err; |
1274 | 1274 | ||
1275 | if (w->count < w->skip) { | 1275 | if (w->count < w->skip) { |
1276 | w->count++; | 1276 | w->count++; |
1277 | continue; | 1277 | continue; |
1278 | } | 1278 | } |
1279 | 1279 | ||
1280 | err = w->func(w); | 1280 | err = w->func(w); |
1281 | if (err) | 1281 | if (err) |
1282 | return err; | 1282 | return err; |
1283 | 1283 | ||
1284 | w->count++; | 1284 | w->count++; |
1285 | continue; | 1285 | continue; |
1286 | } | 1286 | } |
1287 | w->state = FWS_U; | 1287 | w->state = FWS_U; |
1288 | case FWS_U: | 1288 | case FWS_U: |
1289 | if (fn == w->root) | 1289 | if (fn == w->root) |
1290 | return 0; | 1290 | return 0; |
1291 | pn = fn->parent; | 1291 | pn = fn->parent; |
1292 | w->node = pn; | 1292 | w->node = pn; |
1293 | #ifdef CONFIG_IPV6_SUBTREES | 1293 | #ifdef CONFIG_IPV6_SUBTREES |
1294 | if (FIB6_SUBTREE(pn) == fn) { | 1294 | if (FIB6_SUBTREE(pn) == fn) { |
1295 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); | 1295 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); |
1296 | w->state = FWS_L; | 1296 | w->state = FWS_L; |
1297 | continue; | 1297 | continue; |
1298 | } | 1298 | } |
1299 | #endif | 1299 | #endif |
1300 | if (pn->left == fn) { | 1300 | if (pn->left == fn) { |
1301 | w->state = FWS_R; | 1301 | w->state = FWS_R; |
1302 | continue; | 1302 | continue; |
1303 | } | 1303 | } |
1304 | if (pn->right == fn) { | 1304 | if (pn->right == fn) { |
1305 | w->state = FWS_C; | 1305 | w->state = FWS_C; |
1306 | w->leaf = w->node->leaf; | 1306 | w->leaf = w->node->leaf; |
1307 | continue; | 1307 | continue; |
1308 | } | 1308 | } |
1309 | #if RT6_DEBUG >= 2 | 1309 | #if RT6_DEBUG >= 2 |
1310 | WARN_ON(1); | 1310 | WARN_ON(1); |
1311 | #endif | 1311 | #endif |
1312 | } | 1312 | } |
1313 | } | 1313 | } |
1314 | } | 1314 | } |
1315 | 1315 | ||
1316 | static int fib6_walk(struct fib6_walker_t *w) | 1316 | static int fib6_walk(struct fib6_walker_t *w) |
1317 | { | 1317 | { |
1318 | int res; | 1318 | int res; |
1319 | 1319 | ||
1320 | w->state = FWS_INIT; | 1320 | w->state = FWS_INIT; |
1321 | w->node = w->root; | 1321 | w->node = w->root; |
1322 | 1322 | ||
1323 | fib6_walker_link(w); | 1323 | fib6_walker_link(w); |
1324 | res = fib6_walk_continue(w); | 1324 | res = fib6_walk_continue(w); |
1325 | if (res <= 0) | 1325 | if (res <= 0) |
1326 | fib6_walker_unlink(w); | 1326 | fib6_walker_unlink(w); |
1327 | return res; | 1327 | return res; |
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | static int fib6_clean_node(struct fib6_walker_t *w) | 1330 | static int fib6_clean_node(struct fib6_walker_t *w) |
1331 | { | 1331 | { |
1332 | int res; | 1332 | int res; |
1333 | struct rt6_info *rt; | 1333 | struct rt6_info *rt; |
1334 | struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); | 1334 | struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); |
1335 | struct nl_info info = { | 1335 | struct nl_info info = { |
1336 | .nl_net = c->net, | 1336 | .nl_net = c->net, |
1337 | }; | 1337 | }; |
1338 | 1338 | ||
1339 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { | 1339 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { |
1340 | res = c->func(rt, c->arg); | 1340 | res = c->func(rt, c->arg); |
1341 | if (res < 0) { | 1341 | if (res < 0) { |
1342 | w->leaf = rt; | 1342 | w->leaf = rt; |
1343 | res = fib6_del(rt, &info); | 1343 | res = fib6_del(rt, &info); |
1344 | if (res) { | 1344 | if (res) { |
1345 | #if RT6_DEBUG >= 2 | 1345 | #if RT6_DEBUG >= 2 |
1346 | printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); | 1346 | printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); |
1347 | #endif | 1347 | #endif |
1348 | continue; | 1348 | continue; |
1349 | } | 1349 | } |
1350 | return 0; | 1350 | return 0; |
1351 | } | 1351 | } |
1352 | WARN_ON(res != 0); | 1352 | WARN_ON(res != 0); |
1353 | } | 1353 | } |
1354 | w->leaf = rt; | 1354 | w->leaf = rt; |
1355 | return 0; | 1355 | return 0; |
1356 | } | 1356 | } |
1357 | 1357 | ||
1358 | /* | 1358 | /* |
1359 | * Convenient frontend to tree walker. | 1359 | * Convenient frontend to tree walker. |
1360 | * | 1360 | * |
1361 | * func is called on each route. | 1361 | * func is called on each route. |
1362 | * It may return -1 -> delete this route. | 1362 | * It may return -1 -> delete this route. |
1363 | * 0 -> continue walking | 1363 | * 0 -> continue walking |
1364 | * | 1364 | * |
1365 | * prune==1 -> only immediate children of node (certainly, | 1365 | * prune==1 -> only immediate children of node (certainly, |
1366 | * ignoring pure split nodes) will be scanned. | 1366 | * ignoring pure split nodes) will be scanned. |
1367 | */ | 1367 | */ |
1368 | 1368 | ||
1369 | static void fib6_clean_tree(struct net *net, struct fib6_node *root, | 1369 | static void fib6_clean_tree(struct net *net, struct fib6_node *root, |
1370 | int (*func)(struct rt6_info *, void *arg), | 1370 | int (*func)(struct rt6_info *, void *arg), |
1371 | int prune, void *arg) | 1371 | int prune, void *arg) |
1372 | { | 1372 | { |
1373 | struct fib6_cleaner_t c; | 1373 | struct fib6_cleaner_t c; |
1374 | 1374 | ||
1375 | c.w.root = root; | 1375 | c.w.root = root; |
1376 | c.w.func = fib6_clean_node; | 1376 | c.w.func = fib6_clean_node; |
1377 | c.w.prune = prune; | 1377 | c.w.prune = prune; |
1378 | c.w.count = 0; | 1378 | c.w.count = 0; |
1379 | c.w.skip = 0; | 1379 | c.w.skip = 0; |
1380 | c.func = func; | 1380 | c.func = func; |
1381 | c.arg = arg; | 1381 | c.arg = arg; |
1382 | c.net = net; | 1382 | c.net = net; |
1383 | 1383 | ||
1384 | fib6_walk(&c.w); | 1384 | fib6_walk(&c.w); |
1385 | } | 1385 | } |
1386 | 1386 | ||
1387 | void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), | 1387 | void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), |
1388 | int prune, void *arg) | 1388 | int prune, void *arg) |
1389 | { | 1389 | { |
1390 | struct fib6_table *table; | 1390 | struct fib6_table *table; |
1391 | struct hlist_node *node; | 1391 | struct hlist_node *node; |
1392 | struct hlist_head *head; | 1392 | struct hlist_head *head; |
1393 | unsigned int h; | 1393 | unsigned int h; |
1394 | 1394 | ||
1395 | rcu_read_lock(); | 1395 | rcu_read_lock(); |
1396 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { | 1396 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { |
1397 | head = &net->ipv6.fib_table_hash[h]; | 1397 | head = &net->ipv6.fib_table_hash[h]; |
1398 | hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { | 1398 | hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { |
1399 | write_lock_bh(&table->tb6_lock); | 1399 | write_lock_bh(&table->tb6_lock); |
1400 | fib6_clean_tree(net, &table->tb6_root, | 1400 | fib6_clean_tree(net, &table->tb6_root, |
1401 | func, prune, arg); | 1401 | func, prune, arg); |
1402 | write_unlock_bh(&table->tb6_lock); | 1402 | write_unlock_bh(&table->tb6_lock); |
1403 | } | 1403 | } |
1404 | } | 1404 | } |
1405 | rcu_read_unlock(); | 1405 | rcu_read_unlock(); |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) | 1408 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) |
1409 | { | 1409 | { |
1410 | if (rt->rt6i_flags & RTF_CACHE) { | 1410 | if (rt->rt6i_flags & RTF_CACHE) { |
1411 | RT6_TRACE("pruning clone %p\n", rt); | 1411 | RT6_TRACE("pruning clone %p\n", rt); |
1412 | return -1; | 1412 | return -1; |
1413 | } | 1413 | } |
1414 | 1414 | ||
1415 | return 0; | 1415 | return 0; |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, | 1418 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, |
1419 | struct rt6_info *rt) | 1419 | struct rt6_info *rt) |
1420 | { | 1420 | { |
1421 | fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); | 1421 | fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); |
1422 | } | 1422 | } |
1423 | 1423 | ||
1424 | /* | 1424 | /* |
1425 | * Garbage collection | 1425 | * Garbage collection |
1426 | */ | 1426 | */ |
1427 | 1427 | ||
1428 | static struct fib6_gc_args | 1428 | static struct fib6_gc_args |
1429 | { | 1429 | { |
1430 | int timeout; | 1430 | int timeout; |
1431 | int more; | 1431 | int more; |
1432 | } gc_args; | 1432 | } gc_args; |
1433 | 1433 | ||
1434 | static int fib6_age(struct rt6_info *rt, void *arg) | 1434 | static int fib6_age(struct rt6_info *rt, void *arg) |
1435 | { | 1435 | { |
1436 | unsigned long now = jiffies; | 1436 | unsigned long now = jiffies; |
1437 | 1437 | ||
1438 | /* | 1438 | /* |
1439 | * check addrconf expiration here. | 1439 | * check addrconf expiration here. |
1440 | * Routes are expired even if they are in use. | 1440 | * Routes are expired even if they are in use. |
1441 | * | 1441 | * |
1442 | * Also age clones. Note, that clones are aged out | 1442 | * Also age clones. Note, that clones are aged out |
1443 | * only if they are not in use now. | 1443 | * only if they are not in use now. |
1444 | */ | 1444 | */ |
1445 | 1445 | ||
1446 | if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { | 1446 | if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { |
1447 | if (time_after(now, rt->rt6i_expires)) { | 1447 | if (time_after(now, rt->rt6i_expires)) { |
1448 | RT6_TRACE("expiring %p\n", rt); | 1448 | RT6_TRACE("expiring %p\n", rt); |
1449 | return -1; | 1449 | return -1; |
1450 | } | 1450 | } |
1451 | gc_args.more++; | 1451 | gc_args.more++; |
1452 | } else if (rt->rt6i_flags & RTF_CACHE) { | 1452 | } else if (rt->rt6i_flags & RTF_CACHE) { |
1453 | if (atomic_read(&rt->dst.__refcnt) == 0 && | 1453 | if (atomic_read(&rt->dst.__refcnt) == 0 && |
1454 | time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { | 1454 | time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { |
1455 | RT6_TRACE("aging clone %p\n", rt); | 1455 | RT6_TRACE("aging clone %p\n", rt); |
1456 | return -1; | 1456 | return -1; |
1457 | } else if ((rt->rt6i_flags & RTF_GATEWAY) && | 1457 | } else if ((rt->rt6i_flags & RTF_GATEWAY) && |
1458 | (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { | 1458 | (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { |
1459 | RT6_TRACE("purging route %p via non-router but gateway\n", | 1459 | RT6_TRACE("purging route %p via non-router but gateway\n", |
1460 | rt); | 1460 | rt); |
1461 | return -1; | 1461 | return -1; |
1462 | } | 1462 | } |
1463 | gc_args.more++; | 1463 | gc_args.more++; |
1464 | } | 1464 | } |
1465 | 1465 | ||
1466 | return 0; | 1466 | return 0; |
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | static DEFINE_SPINLOCK(fib6_gc_lock); | 1469 | static DEFINE_SPINLOCK(fib6_gc_lock); |
1470 | 1470 | ||
1471 | void fib6_run_gc(unsigned long expires, struct net *net) | 1471 | void fib6_run_gc(unsigned long expires, struct net *net) |
1472 | { | 1472 | { |
1473 | if (expires != ~0UL) { | 1473 | if (expires != ~0UL) { |
1474 | spin_lock_bh(&fib6_gc_lock); | 1474 | spin_lock_bh(&fib6_gc_lock); |
1475 | gc_args.timeout = expires ? (int)expires : | 1475 | gc_args.timeout = expires ? (int)expires : |
1476 | net->ipv6.sysctl.ip6_rt_gc_interval; | 1476 | net->ipv6.sysctl.ip6_rt_gc_interval; |
1477 | } else { | 1477 | } else { |
1478 | if (!spin_trylock_bh(&fib6_gc_lock)) { | 1478 | if (!spin_trylock_bh(&fib6_gc_lock)) { |
1479 | mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); | 1479 | mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); |
1480 | return; | 1480 | return; |
1481 | } | 1481 | } |
1482 | gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; | 1482 | gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; |
1483 | } | 1483 | } |
1484 | 1484 | ||
1485 | gc_args.more = icmp6_dst_gc(); | 1485 | gc_args.more = icmp6_dst_gc(); |
1486 | 1486 | ||
1487 | fib6_clean_all(net, fib6_age, 0, NULL); | 1487 | fib6_clean_all(net, fib6_age, 0, NULL); |
1488 | 1488 | ||
1489 | if (gc_args.more) | 1489 | if (gc_args.more) |
1490 | mod_timer(&net->ipv6.ip6_fib_timer, | 1490 | mod_timer(&net->ipv6.ip6_fib_timer, |
1491 | round_jiffies(jiffies | 1491 | round_jiffies(jiffies |
1492 | + net->ipv6.sysctl.ip6_rt_gc_interval)); | 1492 | + net->ipv6.sysctl.ip6_rt_gc_interval)); |
1493 | else | 1493 | else |
1494 | del_timer(&net->ipv6.ip6_fib_timer); | 1494 | del_timer(&net->ipv6.ip6_fib_timer); |
1495 | spin_unlock_bh(&fib6_gc_lock); | 1495 | spin_unlock_bh(&fib6_gc_lock); |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | static void fib6_gc_timer_cb(unsigned long arg) | 1498 | static void fib6_gc_timer_cb(unsigned long arg) |
1499 | { | 1499 | { |
1500 | fib6_run_gc(0, (struct net *)arg); | 1500 | fib6_run_gc(0, (struct net *)arg); |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | static int __net_init fib6_net_init(struct net *net) | 1503 | static int __net_init fib6_net_init(struct net *net) |
1504 | { | 1504 | { |
1505 | size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; | 1505 | size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; |
1506 | 1506 | ||
1507 | setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); | 1507 | setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); |
1508 | 1508 | ||
1509 | net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); | 1509 | net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); |
1510 | if (!net->ipv6.rt6_stats) | 1510 | if (!net->ipv6.rt6_stats) |
1511 | goto out_timer; | 1511 | goto out_timer; |
1512 | 1512 | ||
1513 | /* Avoid false sharing : Use at least a full cache line */ | 1513 | /* Avoid false sharing : Use at least a full cache line */ |
1514 | size = max_t(size_t, size, L1_CACHE_BYTES); | 1514 | size = max_t(size_t, size, L1_CACHE_BYTES); |
1515 | 1515 | ||
1516 | net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); | 1516 | net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); |
1517 | if (!net->ipv6.fib_table_hash) | 1517 | if (!net->ipv6.fib_table_hash) |
1518 | goto out_rt6_stats; | 1518 | goto out_rt6_stats; |
1519 | 1519 | ||
1520 | net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), | 1520 | net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), |
1521 | GFP_KERNEL); | 1521 | GFP_KERNEL); |
1522 | if (!net->ipv6.fib6_main_tbl) | 1522 | if (!net->ipv6.fib6_main_tbl) |
1523 | goto out_fib_table_hash; | 1523 | goto out_fib_table_hash; |
1524 | 1524 | ||
1525 | net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; | 1525 | net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; |
1526 | net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; | 1526 | net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; |
1527 | net->ipv6.fib6_main_tbl->tb6_root.fn_flags = | 1527 | net->ipv6.fib6_main_tbl->tb6_root.fn_flags = |
1528 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 1528 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
1529 | 1529 | ||
1530 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1530 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1531 | net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), | 1531 | net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), |
1532 | GFP_KERNEL); | 1532 | GFP_KERNEL); |
1533 | if (!net->ipv6.fib6_local_tbl) | 1533 | if (!net->ipv6.fib6_local_tbl) |
1534 | goto out_fib6_main_tbl; | 1534 | goto out_fib6_main_tbl; |
1535 | net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; | 1535 | net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; |
1536 | net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; | 1536 | net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; |
1537 | net->ipv6.fib6_local_tbl->tb6_root.fn_flags = | 1537 | net->ipv6.fib6_local_tbl->tb6_root.fn_flags = |
1538 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 1538 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
1539 | #endif | 1539 | #endif |
1540 | fib6_tables_init(net); | 1540 | fib6_tables_init(net); |
1541 | 1541 | ||
1542 | return 0; | 1542 | return 0; |
1543 | 1543 | ||
1544 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1544 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1545 | out_fib6_main_tbl: | 1545 | out_fib6_main_tbl: |
1546 | kfree(net->ipv6.fib6_main_tbl); | 1546 | kfree(net->ipv6.fib6_main_tbl); |
1547 | #endif | 1547 | #endif |
1548 | out_fib_table_hash: | 1548 | out_fib_table_hash: |
1549 | kfree(net->ipv6.fib_table_hash); | 1549 | kfree(net->ipv6.fib_table_hash); |
1550 | out_rt6_stats: | 1550 | out_rt6_stats: |
1551 | kfree(net->ipv6.rt6_stats); | 1551 | kfree(net->ipv6.rt6_stats); |
1552 | out_timer: | 1552 | out_timer: |
1553 | return -ENOMEM; | 1553 | return -ENOMEM; |
1554 | } | 1554 | } |
1555 | 1555 | ||
1556 | static void fib6_net_exit(struct net *net) | 1556 | static void fib6_net_exit(struct net *net) |
1557 | { | 1557 | { |
1558 | rt6_ifdown(net, NULL); | 1558 | rt6_ifdown(net, NULL); |
1559 | del_timer_sync(&net->ipv6.ip6_fib_timer); | 1559 | del_timer_sync(&net->ipv6.ip6_fib_timer); |
1560 | 1560 | ||
1561 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1561 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1562 | kfree(net->ipv6.fib6_local_tbl); | 1562 | kfree(net->ipv6.fib6_local_tbl); |
1563 | #endif | 1563 | #endif |
1564 | kfree(net->ipv6.fib6_main_tbl); | 1564 | kfree(net->ipv6.fib6_main_tbl); |
1565 | kfree(net->ipv6.fib_table_hash); | 1565 | kfree(net->ipv6.fib_table_hash); |
1566 | kfree(net->ipv6.rt6_stats); | 1566 | kfree(net->ipv6.rt6_stats); |
1567 | } | 1567 | } |
1568 | 1568 | ||
1569 | static struct pernet_operations fib6_net_ops = { | 1569 | static struct pernet_operations fib6_net_ops = { |
1570 | .init = fib6_net_init, | 1570 | .init = fib6_net_init, |
1571 | .exit = fib6_net_exit, | 1571 | .exit = fib6_net_exit, |
1572 | }; | 1572 | }; |
1573 | 1573 | ||
1574 | int __init fib6_init(void) | 1574 | int __init fib6_init(void) |
1575 | { | 1575 | { |
1576 | int ret = -ENOMEM; | 1576 | int ret = -ENOMEM; |
1577 | 1577 | ||
1578 | fib6_node_kmem = kmem_cache_create("fib6_nodes", | 1578 | fib6_node_kmem = kmem_cache_create("fib6_nodes", |
1579 | sizeof(struct fib6_node), | 1579 | sizeof(struct fib6_node), |
1580 | 0, SLAB_HWCACHE_ALIGN, | 1580 | 0, SLAB_HWCACHE_ALIGN, |
1581 | NULL); | 1581 | NULL); |
1582 | if (!fib6_node_kmem) | 1582 | if (!fib6_node_kmem) |
1583 | goto out; | 1583 | goto out; |
1584 | 1584 | ||
1585 | ret = register_pernet_subsys(&fib6_net_ops); | 1585 | ret = register_pernet_subsys(&fib6_net_ops); |
1586 | if (ret) | 1586 | if (ret) |
1587 | goto out_kmem_cache_create; | 1587 | goto out_kmem_cache_create; |
1588 | 1588 | ||
1589 | ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, | 1589 | ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, |
1590 | NULL); | 1590 | NULL); |
1591 | if (ret) | 1591 | if (ret) |
1592 | goto out_unregister_subsys; | 1592 | goto out_unregister_subsys; |
1593 | out: | 1593 | out: |
1594 | return ret; | 1594 | return ret; |
1595 | 1595 | ||
1596 | out_unregister_subsys: | 1596 | out_unregister_subsys: |
1597 | unregister_pernet_subsys(&fib6_net_ops); | 1597 | unregister_pernet_subsys(&fib6_net_ops); |
1598 | out_kmem_cache_create: | 1598 | out_kmem_cache_create: |
1599 | kmem_cache_destroy(fib6_node_kmem); | 1599 | kmem_cache_destroy(fib6_node_kmem); |
1600 | goto out; | 1600 | goto out; |
1601 | } | 1601 | } |
1602 | 1602 | ||
1603 | void fib6_gc_cleanup(void) | 1603 | void fib6_gc_cleanup(void) |
1604 | { | 1604 | { |
1605 | unregister_pernet_subsys(&fib6_net_ops); | 1605 | unregister_pernet_subsys(&fib6_net_ops); |
1606 | kmem_cache_destroy(fib6_node_kmem); | 1606 | kmem_cache_destroy(fib6_node_kmem); |
1607 | } | 1607 | } |
1608 | 1608 |
1 | /* | 1 | /* |
2 | * IPv6 output functions | 2 | * IPv6 output functions |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * Based on linux/net/ipv4/ip_output.c | 8 | * Based on linux/net/ipv4/ip_output.c |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License | 11 | * modify it under the terms of the GNU General Public License |
12 | * as published by the Free Software Foundation; either version | 12 | * as published by the Free Software Foundation; either version |
13 | * 2 of the License, or (at your option) any later version. | 13 | * 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * Changes: | 15 | * Changes: |
16 | * A.N.Kuznetsov : airthmetics in fragmentation. | 16 | * A.N.Kuznetsov : airthmetics in fragmentation. |
17 | * extension headers are implemented. | 17 | * extension headers are implemented. |
18 | * route changes now work. | 18 | * route changes now work. |
19 | * ip6_forward does not confuse sniffers. | 19 | * ip6_forward does not confuse sniffers. |
20 | * etc. | 20 | * etc. |
21 | * | 21 | * |
22 | * H. von Brand : Added missing #include <linux/string.h> | 22 | * H. von Brand : Added missing #include <linux/string.h> |
23 | * Imran Patel : frag id should be in NBO | 23 | * Imran Patel : frag id should be in NBO |
24 | * Kazunori MIYAZAWA @USAGI | 24 | * Kazunori MIYAZAWA @USAGI |
25 | * : add ip6_append_data and related functions | 25 | * : add ip6_append_data and related functions |
26 | * for datagram xmit | 26 | * for datagram xmit |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
30 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/socket.h> | 32 | #include <linux/socket.h> |
33 | #include <linux/net.h> | 33 | #include <linux/net.h> |
34 | #include <linux/netdevice.h> | 34 | #include <linux/netdevice.h> |
35 | #include <linux/if_arp.h> | 35 | #include <linux/if_arp.h> |
36 | #include <linux/in6.h> | 36 | #include <linux/in6.h> |
37 | #include <linux/tcp.h> | 37 | #include <linux/tcp.h> |
38 | #include <linux/route.h> | 38 | #include <linux/route.h> |
39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | 41 | ||
42 | #include <linux/netfilter.h> | 42 | #include <linux/netfilter.h> |
43 | #include <linux/netfilter_ipv6.h> | 43 | #include <linux/netfilter_ipv6.h> |
44 | 44 | ||
45 | #include <net/sock.h> | 45 | #include <net/sock.h> |
46 | #include <net/snmp.h> | 46 | #include <net/snmp.h> |
47 | 47 | ||
48 | #include <net/ipv6.h> | 48 | #include <net/ipv6.h> |
49 | #include <net/ndisc.h> | 49 | #include <net/ndisc.h> |
50 | #include <net/protocol.h> | 50 | #include <net/protocol.h> |
51 | #include <net/ip6_route.h> | 51 | #include <net/ip6_route.h> |
52 | #include <net/addrconf.h> | 52 | #include <net/addrconf.h> |
53 | #include <net/rawv6.h> | 53 | #include <net/rawv6.h> |
54 | #include <net/icmp.h> | 54 | #include <net/icmp.h> |
55 | #include <net/xfrm.h> | 55 | #include <net/xfrm.h> |
56 | #include <net/checksum.h> | 56 | #include <net/checksum.h> |
57 | #include <linux/mroute6.h> | 57 | #include <linux/mroute6.h> |
58 | 58 | ||
59 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); | 59 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); |
60 | 60 | ||
61 | int __ip6_local_out(struct sk_buff *skb) | 61 | int __ip6_local_out(struct sk_buff *skb) |
62 | { | 62 | { |
63 | int len; | 63 | int len; |
64 | 64 | ||
65 | len = skb->len - sizeof(struct ipv6hdr); | 65 | len = skb->len - sizeof(struct ipv6hdr); |
66 | if (len > IPV6_MAXPLEN) | 66 | if (len > IPV6_MAXPLEN) |
67 | len = 0; | 67 | len = 0; |
68 | ipv6_hdr(skb)->payload_len = htons(len); | 68 | ipv6_hdr(skb)->payload_len = htons(len); |
69 | 69 | ||
70 | return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, | 70 | return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, |
71 | skb_dst(skb)->dev, dst_output); | 71 | skb_dst(skb)->dev, dst_output); |
72 | } | 72 | } |
73 | 73 | ||
74 | int ip6_local_out(struct sk_buff *skb) | 74 | int ip6_local_out(struct sk_buff *skb) |
75 | { | 75 | { |
76 | int err; | 76 | int err; |
77 | 77 | ||
78 | err = __ip6_local_out(skb); | 78 | err = __ip6_local_out(skb); |
79 | if (likely(err == 1)) | 79 | if (likely(err == 1)) |
80 | err = dst_output(skb); | 80 | err = dst_output(skb); |
81 | 81 | ||
82 | return err; | 82 | return err; |
83 | } | 83 | } |
84 | EXPORT_SYMBOL_GPL(ip6_local_out); | 84 | EXPORT_SYMBOL_GPL(ip6_local_out); |
85 | 85 | ||
86 | /* dev_loopback_xmit for use with netfilter. */ | 86 | /* dev_loopback_xmit for use with netfilter. */ |
87 | static int ip6_dev_loopback_xmit(struct sk_buff *newskb) | 87 | static int ip6_dev_loopback_xmit(struct sk_buff *newskb) |
88 | { | 88 | { |
89 | skb_reset_mac_header(newskb); | 89 | skb_reset_mac_header(newskb); |
90 | __skb_pull(newskb, skb_network_offset(newskb)); | 90 | __skb_pull(newskb, skb_network_offset(newskb)); |
91 | newskb->pkt_type = PACKET_LOOPBACK; | 91 | newskb->pkt_type = PACKET_LOOPBACK; |
92 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 92 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
93 | WARN_ON(!skb_dst(newskb)); | 93 | WARN_ON(!skb_dst(newskb)); |
94 | 94 | ||
95 | netif_rx_ni(newskb); | 95 | netif_rx_ni(newskb); |
96 | return 0; | 96 | return 0; |
97 | } | 97 | } |
98 | 98 | ||
99 | static int ip6_finish_output2(struct sk_buff *skb) | 99 | static int ip6_finish_output2(struct sk_buff *skb) |
100 | { | 100 | { |
101 | struct dst_entry *dst = skb_dst(skb); | 101 | struct dst_entry *dst = skb_dst(skb); |
102 | struct net_device *dev = dst->dev; | 102 | struct net_device *dev = dst->dev; |
103 | struct neighbour *neigh; | 103 | struct neighbour *neigh; |
104 | 104 | ||
105 | skb->protocol = htons(ETH_P_IPV6); | 105 | skb->protocol = htons(ETH_P_IPV6); |
106 | skb->dev = dev; | 106 | skb->dev = dev; |
107 | 107 | ||
108 | if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { | 108 | if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { |
109 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); | 109 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); |
110 | 110 | ||
111 | if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && | 111 | if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && |
112 | ((mroute6_socket(dev_net(dev), skb) && | 112 | ((mroute6_socket(dev_net(dev), skb) && |
113 | !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || | 113 | !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || |
114 | ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, | 114 | ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, |
115 | &ipv6_hdr(skb)->saddr))) { | 115 | &ipv6_hdr(skb)->saddr))) { |
116 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 116 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
117 | 117 | ||
118 | /* Do not check for IFF_ALLMULTI; multicast routing | 118 | /* Do not check for IFF_ALLMULTI; multicast routing |
119 | is not supported in any case. | 119 | is not supported in any case. |
120 | */ | 120 | */ |
121 | if (newskb) | 121 | if (newskb) |
122 | NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, | 122 | NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, |
123 | newskb, NULL, newskb->dev, | 123 | newskb, NULL, newskb->dev, |
124 | ip6_dev_loopback_xmit); | 124 | ip6_dev_loopback_xmit); |
125 | 125 | ||
126 | if (ipv6_hdr(skb)->hop_limit == 0) { | 126 | if (ipv6_hdr(skb)->hop_limit == 0) { |
127 | IP6_INC_STATS(dev_net(dev), idev, | 127 | IP6_INC_STATS(dev_net(dev), idev, |
128 | IPSTATS_MIB_OUTDISCARDS); | 128 | IPSTATS_MIB_OUTDISCARDS); |
129 | kfree_skb(skb); | 129 | kfree_skb(skb); |
130 | return 0; | 130 | return 0; |
131 | } | 131 | } |
132 | } | 132 | } |
133 | 133 | ||
134 | IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, | 134 | IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, |
135 | skb->len); | 135 | skb->len); |
136 | } | 136 | } |
137 | 137 | ||
138 | rcu_read_lock(); | ||
138 | neigh = dst_get_neighbour(dst); | 139 | neigh = dst_get_neighbour(dst); |
139 | if (neigh) | 140 | if (neigh) { |
140 | return neigh_output(neigh, skb); | 141 | int res = neigh_output(neigh, skb); |
141 | 142 | ||
143 | rcu_read_unlock(); | ||
144 | return res; | ||
145 | } | ||
146 | rcu_read_unlock(); | ||
142 | IP6_INC_STATS_BH(dev_net(dst->dev), | 147 | IP6_INC_STATS_BH(dev_net(dst->dev), |
143 | ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); | 148 | ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); |
144 | kfree_skb(skb); | 149 | kfree_skb(skb); |
145 | return -EINVAL; | 150 | return -EINVAL; |
146 | } | 151 | } |
147 | 152 | ||
148 | static int ip6_finish_output(struct sk_buff *skb) | 153 | static int ip6_finish_output(struct sk_buff *skb) |
149 | { | 154 | { |
150 | if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || | 155 | if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || |
151 | dst_allfrag(skb_dst(skb))) | 156 | dst_allfrag(skb_dst(skb))) |
152 | return ip6_fragment(skb, ip6_finish_output2); | 157 | return ip6_fragment(skb, ip6_finish_output2); |
153 | else | 158 | else |
154 | return ip6_finish_output2(skb); | 159 | return ip6_finish_output2(skb); |
155 | } | 160 | } |
156 | 161 | ||
157 | int ip6_output(struct sk_buff *skb) | 162 | int ip6_output(struct sk_buff *skb) |
158 | { | 163 | { |
159 | struct net_device *dev = skb_dst(skb)->dev; | 164 | struct net_device *dev = skb_dst(skb)->dev; |
160 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); | 165 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); |
161 | if (unlikely(idev->cnf.disable_ipv6)) { | 166 | if (unlikely(idev->cnf.disable_ipv6)) { |
162 | IP6_INC_STATS(dev_net(dev), idev, | 167 | IP6_INC_STATS(dev_net(dev), idev, |
163 | IPSTATS_MIB_OUTDISCARDS); | 168 | IPSTATS_MIB_OUTDISCARDS); |
164 | kfree_skb(skb); | 169 | kfree_skb(skb); |
165 | return 0; | 170 | return 0; |
166 | } | 171 | } |
167 | 172 | ||
168 | return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, | 173 | return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, |
169 | ip6_finish_output, | 174 | ip6_finish_output, |
170 | !(IP6CB(skb)->flags & IP6SKB_REROUTED)); | 175 | !(IP6CB(skb)->flags & IP6SKB_REROUTED)); |
171 | } | 176 | } |
172 | 177 | ||
173 | /* | 178 | /* |
174 | * xmit an sk_buff (used by TCP, SCTP and DCCP) | 179 | * xmit an sk_buff (used by TCP, SCTP and DCCP) |
175 | */ | 180 | */ |
176 | 181 | ||
177 | int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, | 182 | int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, |
178 | struct ipv6_txoptions *opt) | 183 | struct ipv6_txoptions *opt) |
179 | { | 184 | { |
180 | struct net *net = sock_net(sk); | 185 | struct net *net = sock_net(sk); |
181 | struct ipv6_pinfo *np = inet6_sk(sk); | 186 | struct ipv6_pinfo *np = inet6_sk(sk); |
182 | struct in6_addr *first_hop = &fl6->daddr; | 187 | struct in6_addr *first_hop = &fl6->daddr; |
183 | struct dst_entry *dst = skb_dst(skb); | 188 | struct dst_entry *dst = skb_dst(skb); |
184 | struct ipv6hdr *hdr; | 189 | struct ipv6hdr *hdr; |
185 | u8 proto = fl6->flowi6_proto; | 190 | u8 proto = fl6->flowi6_proto; |
186 | int seg_len = skb->len; | 191 | int seg_len = skb->len; |
187 | int hlimit = -1; | 192 | int hlimit = -1; |
188 | int tclass = 0; | 193 | int tclass = 0; |
189 | u32 mtu; | 194 | u32 mtu; |
190 | 195 | ||
191 | if (opt) { | 196 | if (opt) { |
192 | unsigned int head_room; | 197 | unsigned int head_room; |
193 | 198 | ||
194 | /* First: exthdrs may take lots of space (~8K for now) | 199 | /* First: exthdrs may take lots of space (~8K for now) |
195 | MAX_HEADER is not enough. | 200 | MAX_HEADER is not enough. |
196 | */ | 201 | */ |
197 | head_room = opt->opt_nflen + opt->opt_flen; | 202 | head_room = opt->opt_nflen + opt->opt_flen; |
198 | seg_len += head_room; | 203 | seg_len += head_room; |
199 | head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); | 204 | head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); |
200 | 205 | ||
201 | if (skb_headroom(skb) < head_room) { | 206 | if (skb_headroom(skb) < head_room) { |
202 | struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); | 207 | struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); |
203 | if (skb2 == NULL) { | 208 | if (skb2 == NULL) { |
204 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 209 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
205 | IPSTATS_MIB_OUTDISCARDS); | 210 | IPSTATS_MIB_OUTDISCARDS); |
206 | kfree_skb(skb); | 211 | kfree_skb(skb); |
207 | return -ENOBUFS; | 212 | return -ENOBUFS; |
208 | } | 213 | } |
209 | kfree_skb(skb); | 214 | kfree_skb(skb); |
210 | skb = skb2; | 215 | skb = skb2; |
211 | skb_set_owner_w(skb, sk); | 216 | skb_set_owner_w(skb, sk); |
212 | } | 217 | } |
213 | if (opt->opt_flen) | 218 | if (opt->opt_flen) |
214 | ipv6_push_frag_opts(skb, opt, &proto); | 219 | ipv6_push_frag_opts(skb, opt, &proto); |
215 | if (opt->opt_nflen) | 220 | if (opt->opt_nflen) |
216 | ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); | 221 | ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); |
217 | } | 222 | } |
218 | 223 | ||
219 | skb_push(skb, sizeof(struct ipv6hdr)); | 224 | skb_push(skb, sizeof(struct ipv6hdr)); |
220 | skb_reset_network_header(skb); | 225 | skb_reset_network_header(skb); |
221 | hdr = ipv6_hdr(skb); | 226 | hdr = ipv6_hdr(skb); |
222 | 227 | ||
223 | /* | 228 | /* |
224 | * Fill in the IPv6 header | 229 | * Fill in the IPv6 header |
225 | */ | 230 | */ |
226 | if (np) { | 231 | if (np) { |
227 | tclass = np->tclass; | 232 | tclass = np->tclass; |
228 | hlimit = np->hop_limit; | 233 | hlimit = np->hop_limit; |
229 | } | 234 | } |
230 | if (hlimit < 0) | 235 | if (hlimit < 0) |
231 | hlimit = ip6_dst_hoplimit(dst); | 236 | hlimit = ip6_dst_hoplimit(dst); |
232 | 237 | ||
233 | *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; | 238 | *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; |
234 | 239 | ||
235 | hdr->payload_len = htons(seg_len); | 240 | hdr->payload_len = htons(seg_len); |
236 | hdr->nexthdr = proto; | 241 | hdr->nexthdr = proto; |
237 | hdr->hop_limit = hlimit; | 242 | hdr->hop_limit = hlimit; |
238 | 243 | ||
239 | ipv6_addr_copy(&hdr->saddr, &fl6->saddr); | 244 | ipv6_addr_copy(&hdr->saddr, &fl6->saddr); |
240 | ipv6_addr_copy(&hdr->daddr, first_hop); | 245 | ipv6_addr_copy(&hdr->daddr, first_hop); |
241 | 246 | ||
242 | skb->priority = sk->sk_priority; | 247 | skb->priority = sk->sk_priority; |
243 | skb->mark = sk->sk_mark; | 248 | skb->mark = sk->sk_mark; |
244 | 249 | ||
245 | mtu = dst_mtu(dst); | 250 | mtu = dst_mtu(dst); |
246 | if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { | 251 | if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { |
247 | IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), | 252 | IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), |
248 | IPSTATS_MIB_OUT, skb->len); | 253 | IPSTATS_MIB_OUT, skb->len); |
249 | return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, | 254 | return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, |
250 | dst->dev, dst_output); | 255 | dst->dev, dst_output); |
251 | } | 256 | } |
252 | 257 | ||
253 | if (net_ratelimit()) | 258 | if (net_ratelimit()) |
254 | printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); | 259 | printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); |
255 | skb->dev = dst->dev; | 260 | skb->dev = dst->dev; |
256 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 261 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
257 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); | 262 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); |
258 | kfree_skb(skb); | 263 | kfree_skb(skb); |
259 | return -EMSGSIZE; | 264 | return -EMSGSIZE; |
260 | } | 265 | } |
261 | 266 | ||
262 | EXPORT_SYMBOL(ip6_xmit); | 267 | EXPORT_SYMBOL(ip6_xmit); |
263 | 268 | ||
264 | /* | 269 | /* |
265 | * To avoid extra problems ND packets are send through this | 270 | * To avoid extra problems ND packets are send through this |
266 | * routine. It's code duplication but I really want to avoid | 271 | * routine. It's code duplication but I really want to avoid |
267 | * extra checks since ipv6_build_header is used by TCP (which | 272 | * extra checks since ipv6_build_header is used by TCP (which |
268 | * is for us performance critical) | 273 | * is for us performance critical) |
269 | */ | 274 | */ |
270 | 275 | ||
271 | int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, | 276 | int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, |
272 | const struct in6_addr *saddr, const struct in6_addr *daddr, | 277 | const struct in6_addr *saddr, const struct in6_addr *daddr, |
273 | int proto, int len) | 278 | int proto, int len) |
274 | { | 279 | { |
275 | struct ipv6_pinfo *np = inet6_sk(sk); | 280 | struct ipv6_pinfo *np = inet6_sk(sk); |
276 | struct ipv6hdr *hdr; | 281 | struct ipv6hdr *hdr; |
277 | 282 | ||
278 | skb->protocol = htons(ETH_P_IPV6); | 283 | skb->protocol = htons(ETH_P_IPV6); |
279 | skb->dev = dev; | 284 | skb->dev = dev; |
280 | 285 | ||
281 | skb_reset_network_header(skb); | 286 | skb_reset_network_header(skb); |
282 | skb_put(skb, sizeof(struct ipv6hdr)); | 287 | skb_put(skb, sizeof(struct ipv6hdr)); |
283 | hdr = ipv6_hdr(skb); | 288 | hdr = ipv6_hdr(skb); |
284 | 289 | ||
285 | *(__be32*)hdr = htonl(0x60000000); | 290 | *(__be32*)hdr = htonl(0x60000000); |
286 | 291 | ||
287 | hdr->payload_len = htons(len); | 292 | hdr->payload_len = htons(len); |
288 | hdr->nexthdr = proto; | 293 | hdr->nexthdr = proto; |
289 | hdr->hop_limit = np->hop_limit; | 294 | hdr->hop_limit = np->hop_limit; |
290 | 295 | ||
291 | ipv6_addr_copy(&hdr->saddr, saddr); | 296 | ipv6_addr_copy(&hdr->saddr, saddr); |
292 | ipv6_addr_copy(&hdr->daddr, daddr); | 297 | ipv6_addr_copy(&hdr->daddr, daddr); |
293 | 298 | ||
294 | return 0; | 299 | return 0; |
295 | } | 300 | } |
296 | 301 | ||
297 | static int ip6_call_ra_chain(struct sk_buff *skb, int sel) | 302 | static int ip6_call_ra_chain(struct sk_buff *skb, int sel) |
298 | { | 303 | { |
299 | struct ip6_ra_chain *ra; | 304 | struct ip6_ra_chain *ra; |
300 | struct sock *last = NULL; | 305 | struct sock *last = NULL; |
301 | 306 | ||
302 | read_lock(&ip6_ra_lock); | 307 | read_lock(&ip6_ra_lock); |
303 | for (ra = ip6_ra_chain; ra; ra = ra->next) { | 308 | for (ra = ip6_ra_chain; ra; ra = ra->next) { |
304 | struct sock *sk = ra->sk; | 309 | struct sock *sk = ra->sk; |
305 | if (sk && ra->sel == sel && | 310 | if (sk && ra->sel == sel && |
306 | (!sk->sk_bound_dev_if || | 311 | (!sk->sk_bound_dev_if || |
307 | sk->sk_bound_dev_if == skb->dev->ifindex)) { | 312 | sk->sk_bound_dev_if == skb->dev->ifindex)) { |
308 | if (last) { | 313 | if (last) { |
309 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 314 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
310 | if (skb2) | 315 | if (skb2) |
311 | rawv6_rcv(last, skb2); | 316 | rawv6_rcv(last, skb2); |
312 | } | 317 | } |
313 | last = sk; | 318 | last = sk; |
314 | } | 319 | } |
315 | } | 320 | } |
316 | 321 | ||
317 | if (last) { | 322 | if (last) { |
318 | rawv6_rcv(last, skb); | 323 | rawv6_rcv(last, skb); |
319 | read_unlock(&ip6_ra_lock); | 324 | read_unlock(&ip6_ra_lock); |
320 | return 1; | 325 | return 1; |
321 | } | 326 | } |
322 | read_unlock(&ip6_ra_lock); | 327 | read_unlock(&ip6_ra_lock); |
323 | return 0; | 328 | return 0; |
324 | } | 329 | } |
325 | 330 | ||
326 | static int ip6_forward_proxy_check(struct sk_buff *skb) | 331 | static int ip6_forward_proxy_check(struct sk_buff *skb) |
327 | { | 332 | { |
328 | struct ipv6hdr *hdr = ipv6_hdr(skb); | 333 | struct ipv6hdr *hdr = ipv6_hdr(skb); |
329 | u8 nexthdr = hdr->nexthdr; | 334 | u8 nexthdr = hdr->nexthdr; |
330 | int offset; | 335 | int offset; |
331 | 336 | ||
332 | if (ipv6_ext_hdr(nexthdr)) { | 337 | if (ipv6_ext_hdr(nexthdr)) { |
333 | offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); | 338 | offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); |
334 | if (offset < 0) | 339 | if (offset < 0) |
335 | return 0; | 340 | return 0; |
336 | } else | 341 | } else |
337 | offset = sizeof(struct ipv6hdr); | 342 | offset = sizeof(struct ipv6hdr); |
338 | 343 | ||
339 | if (nexthdr == IPPROTO_ICMPV6) { | 344 | if (nexthdr == IPPROTO_ICMPV6) { |
340 | struct icmp6hdr *icmp6; | 345 | struct icmp6hdr *icmp6; |
341 | 346 | ||
342 | if (!pskb_may_pull(skb, (skb_network_header(skb) + | 347 | if (!pskb_may_pull(skb, (skb_network_header(skb) + |
343 | offset + 1 - skb->data))) | 348 | offset + 1 - skb->data))) |
344 | return 0; | 349 | return 0; |
345 | 350 | ||
346 | icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); | 351 | icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); |
347 | 352 | ||
348 | switch (icmp6->icmp6_type) { | 353 | switch (icmp6->icmp6_type) { |
349 | case NDISC_ROUTER_SOLICITATION: | 354 | case NDISC_ROUTER_SOLICITATION: |
350 | case NDISC_ROUTER_ADVERTISEMENT: | 355 | case NDISC_ROUTER_ADVERTISEMENT: |
351 | case NDISC_NEIGHBOUR_SOLICITATION: | 356 | case NDISC_NEIGHBOUR_SOLICITATION: |
352 | case NDISC_NEIGHBOUR_ADVERTISEMENT: | 357 | case NDISC_NEIGHBOUR_ADVERTISEMENT: |
353 | case NDISC_REDIRECT: | 358 | case NDISC_REDIRECT: |
354 | /* For reaction involving unicast neighbor discovery | 359 | /* For reaction involving unicast neighbor discovery |
355 | * message destined to the proxied address, pass it to | 360 | * message destined to the proxied address, pass it to |
356 | * input function. | 361 | * input function. |
357 | */ | 362 | */ |
358 | return 1; | 363 | return 1; |
359 | default: | 364 | default: |
360 | break; | 365 | break; |
361 | } | 366 | } |
362 | } | 367 | } |
363 | 368 | ||
364 | /* | 369 | /* |
365 | * The proxying router can't forward traffic sent to a link-local | 370 | * The proxying router can't forward traffic sent to a link-local |
366 | * address, so signal the sender and discard the packet. This | 371 | * address, so signal the sender and discard the packet. This |
367 | * behavior is clarified by the MIPv6 specification. | 372 | * behavior is clarified by the MIPv6 specification. |
368 | */ | 373 | */ |
369 | if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { | 374 | if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { |
370 | dst_link_failure(skb); | 375 | dst_link_failure(skb); |
371 | return -1; | 376 | return -1; |
372 | } | 377 | } |
373 | 378 | ||
374 | return 0; | 379 | return 0; |
375 | } | 380 | } |
376 | 381 | ||
377 | static inline int ip6_forward_finish(struct sk_buff *skb) | 382 | static inline int ip6_forward_finish(struct sk_buff *skb) |
378 | { | 383 | { |
379 | return dst_output(skb); | 384 | return dst_output(skb); |
380 | } | 385 | } |
381 | 386 | ||
382 | int ip6_forward(struct sk_buff *skb) | 387 | int ip6_forward(struct sk_buff *skb) |
383 | { | 388 | { |
384 | struct dst_entry *dst = skb_dst(skb); | 389 | struct dst_entry *dst = skb_dst(skb); |
385 | struct ipv6hdr *hdr = ipv6_hdr(skb); | 390 | struct ipv6hdr *hdr = ipv6_hdr(skb); |
386 | struct inet6_skb_parm *opt = IP6CB(skb); | 391 | struct inet6_skb_parm *opt = IP6CB(skb); |
387 | struct net *net = dev_net(dst->dev); | 392 | struct net *net = dev_net(dst->dev); |
388 | struct neighbour *n; | 393 | struct neighbour *n; |
389 | u32 mtu; | 394 | u32 mtu; |
390 | 395 | ||
391 | if (net->ipv6.devconf_all->forwarding == 0) | 396 | if (net->ipv6.devconf_all->forwarding == 0) |
392 | goto error; | 397 | goto error; |
393 | 398 | ||
394 | if (skb_warn_if_lro(skb)) | 399 | if (skb_warn_if_lro(skb)) |
395 | goto drop; | 400 | goto drop; |
396 | 401 | ||
397 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { | 402 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { |
398 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); | 403 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); |
399 | goto drop; | 404 | goto drop; |
400 | } | 405 | } |
401 | 406 | ||
402 | if (skb->pkt_type != PACKET_HOST) | 407 | if (skb->pkt_type != PACKET_HOST) |
403 | goto drop; | 408 | goto drop; |
404 | 409 | ||
405 | skb_forward_csum(skb); | 410 | skb_forward_csum(skb); |
406 | 411 | ||
407 | /* | 412 | /* |
408 | * We DO NOT make any processing on | 413 | * We DO NOT make any processing on |
409 | * RA packets, pushing them to user level AS IS | 414 | * RA packets, pushing them to user level AS IS |
410 | * without ane WARRANTY that application will be able | 415 | * without ane WARRANTY that application will be able |
411 | * to interpret them. The reason is that we | 416 | * to interpret them. The reason is that we |
412 | * cannot make anything clever here. | 417 | * cannot make anything clever here. |
413 | * | 418 | * |
414 | * We are not end-node, so that if packet contains | 419 | * We are not end-node, so that if packet contains |
415 | * AH/ESP, we cannot make anything. | 420 | * AH/ESP, we cannot make anything. |
416 | * Defragmentation also would be mistake, RA packets | 421 | * Defragmentation also would be mistake, RA packets |
417 | * cannot be fragmented, because there is no warranty | 422 | * cannot be fragmented, because there is no warranty |
418 | * that different fragments will go along one path. --ANK | 423 | * that different fragments will go along one path. --ANK |
419 | */ | 424 | */ |
420 | if (opt->ra) { | 425 | if (opt->ra) { |
421 | u8 *ptr = skb_network_header(skb) + opt->ra; | 426 | u8 *ptr = skb_network_header(skb) + opt->ra; |
422 | if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) | 427 | if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) |
423 | return 0; | 428 | return 0; |
424 | } | 429 | } |
425 | 430 | ||
426 | /* | 431 | /* |
427 | * check and decrement ttl | 432 | * check and decrement ttl |
428 | */ | 433 | */ |
429 | if (hdr->hop_limit <= 1) { | 434 | if (hdr->hop_limit <= 1) { |
430 | /* Force OUTPUT device used as source address */ | 435 | /* Force OUTPUT device used as source address */ |
431 | skb->dev = dst->dev; | 436 | skb->dev = dst->dev; |
432 | icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); | 437 | icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); |
433 | IP6_INC_STATS_BH(net, | 438 | IP6_INC_STATS_BH(net, |
434 | ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); | 439 | ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); |
435 | 440 | ||
436 | kfree_skb(skb); | 441 | kfree_skb(skb); |
437 | return -ETIMEDOUT; | 442 | return -ETIMEDOUT; |
438 | } | 443 | } |
439 | 444 | ||
440 | /* XXX: idev->cnf.proxy_ndp? */ | 445 | /* XXX: idev->cnf.proxy_ndp? */ |
441 | if (net->ipv6.devconf_all->proxy_ndp && | 446 | if (net->ipv6.devconf_all->proxy_ndp && |
442 | pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { | 447 | pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { |
443 | int proxied = ip6_forward_proxy_check(skb); | 448 | int proxied = ip6_forward_proxy_check(skb); |
444 | if (proxied > 0) | 449 | if (proxied > 0) |
445 | return ip6_input(skb); | 450 | return ip6_input(skb); |
446 | else if (proxied < 0) { | 451 | else if (proxied < 0) { |
447 | IP6_INC_STATS(net, ip6_dst_idev(dst), | 452 | IP6_INC_STATS(net, ip6_dst_idev(dst), |
448 | IPSTATS_MIB_INDISCARDS); | 453 | IPSTATS_MIB_INDISCARDS); |
449 | goto drop; | 454 | goto drop; |
450 | } | 455 | } |
451 | } | 456 | } |
452 | 457 | ||
453 | if (!xfrm6_route_forward(skb)) { | 458 | if (!xfrm6_route_forward(skb)) { |
454 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); | 459 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); |
455 | goto drop; | 460 | goto drop; |
456 | } | 461 | } |
457 | dst = skb_dst(skb); | 462 | dst = skb_dst(skb); |
458 | 463 | ||
459 | /* IPv6 specs say nothing about it, but it is clear that we cannot | 464 | /* IPv6 specs say nothing about it, but it is clear that we cannot |
460 | send redirects to source routed frames. | 465 | send redirects to source routed frames. |
461 | We don't send redirects to frames decapsulated from IPsec. | 466 | We don't send redirects to frames decapsulated from IPsec. |
462 | */ | 467 | */ |
463 | n = dst_get_neighbour(dst); | 468 | n = dst_get_neighbour(dst); |
464 | if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { | 469 | if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { |
465 | struct in6_addr *target = NULL; | 470 | struct in6_addr *target = NULL; |
466 | struct rt6_info *rt; | 471 | struct rt6_info *rt; |
467 | 472 | ||
468 | /* | 473 | /* |
469 | * incoming and outgoing devices are the same | 474 | * incoming and outgoing devices are the same |
470 | * send a redirect. | 475 | * send a redirect. |
471 | */ | 476 | */ |
472 | 477 | ||
473 | rt = (struct rt6_info *) dst; | 478 | rt = (struct rt6_info *) dst; |
474 | if ((rt->rt6i_flags & RTF_GATEWAY)) | 479 | if ((rt->rt6i_flags & RTF_GATEWAY)) |
475 | target = (struct in6_addr*)&n->primary_key; | 480 | target = (struct in6_addr*)&n->primary_key; |
476 | else | 481 | else |
477 | target = &hdr->daddr; | 482 | target = &hdr->daddr; |
478 | 483 | ||
479 | if (!rt->rt6i_peer) | 484 | if (!rt->rt6i_peer) |
480 | rt6_bind_peer(rt, 1); | 485 | rt6_bind_peer(rt, 1); |
481 | 486 | ||
482 | /* Limit redirects both by destination (here) | 487 | /* Limit redirects both by destination (here) |
483 | and by source (inside ndisc_send_redirect) | 488 | and by source (inside ndisc_send_redirect) |
484 | */ | 489 | */ |
485 | if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) | 490 | if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) |
486 | ndisc_send_redirect(skb, n, target); | 491 | ndisc_send_redirect(skb, n, target); |
487 | } else { | 492 | } else { |
488 | int addrtype = ipv6_addr_type(&hdr->saddr); | 493 | int addrtype = ipv6_addr_type(&hdr->saddr); |
489 | 494 | ||
490 | /* This check is security critical. */ | 495 | /* This check is security critical. */ |
491 | if (addrtype == IPV6_ADDR_ANY || | 496 | if (addrtype == IPV6_ADDR_ANY || |
492 | addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) | 497 | addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) |
493 | goto error; | 498 | goto error; |
494 | if (addrtype & IPV6_ADDR_LINKLOCAL) { | 499 | if (addrtype & IPV6_ADDR_LINKLOCAL) { |
495 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, | 500 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, |
496 | ICMPV6_NOT_NEIGHBOUR, 0); | 501 | ICMPV6_NOT_NEIGHBOUR, 0); |
497 | goto error; | 502 | goto error; |
498 | } | 503 | } |
499 | } | 504 | } |
500 | 505 | ||
501 | mtu = dst_mtu(dst); | 506 | mtu = dst_mtu(dst); |
502 | if (mtu < IPV6_MIN_MTU) | 507 | if (mtu < IPV6_MIN_MTU) |
503 | mtu = IPV6_MIN_MTU; | 508 | mtu = IPV6_MIN_MTU; |
504 | 509 | ||
505 | if (skb->len > mtu && !skb_is_gso(skb)) { | 510 | if (skb->len > mtu && !skb_is_gso(skb)) { |
506 | /* Again, force OUTPUT device used as source address */ | 511 | /* Again, force OUTPUT device used as source address */ |
507 | skb->dev = dst->dev; | 512 | skb->dev = dst->dev; |
508 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 513 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
509 | IP6_INC_STATS_BH(net, | 514 | IP6_INC_STATS_BH(net, |
510 | ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); | 515 | ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); |
511 | IP6_INC_STATS_BH(net, | 516 | IP6_INC_STATS_BH(net, |
512 | ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); | 517 | ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); |
513 | kfree_skb(skb); | 518 | kfree_skb(skb); |
514 | return -EMSGSIZE; | 519 | return -EMSGSIZE; |
515 | } | 520 | } |
516 | 521 | ||
517 | if (skb_cow(skb, dst->dev->hard_header_len)) { | 522 | if (skb_cow(skb, dst->dev->hard_header_len)) { |
518 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); | 523 | IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); |
519 | goto drop; | 524 | goto drop; |
520 | } | 525 | } |
521 | 526 | ||
522 | hdr = ipv6_hdr(skb); | 527 | hdr = ipv6_hdr(skb); |
523 | 528 | ||
524 | /* Mangling hops number delayed to point after skb COW */ | 529 | /* Mangling hops number delayed to point after skb COW */ |
525 | 530 | ||
526 | hdr->hop_limit--; | 531 | hdr->hop_limit--; |
527 | 532 | ||
528 | IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); | 533 | IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); |
529 | return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, | 534 | return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, |
530 | ip6_forward_finish); | 535 | ip6_forward_finish); |
531 | 536 | ||
532 | error: | 537 | error: |
533 | IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); | 538 | IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); |
534 | drop: | 539 | drop: |
535 | kfree_skb(skb); | 540 | kfree_skb(skb); |
536 | return -EINVAL; | 541 | return -EINVAL; |
537 | } | 542 | } |
538 | 543 | ||
539 | static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) | 544 | static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
540 | { | 545 | { |
541 | to->pkt_type = from->pkt_type; | 546 | to->pkt_type = from->pkt_type; |
542 | to->priority = from->priority; | 547 | to->priority = from->priority; |
543 | to->protocol = from->protocol; | 548 | to->protocol = from->protocol; |
544 | skb_dst_drop(to); | 549 | skb_dst_drop(to); |
545 | skb_dst_set(to, dst_clone(skb_dst(from))); | 550 | skb_dst_set(to, dst_clone(skb_dst(from))); |
546 | to->dev = from->dev; | 551 | to->dev = from->dev; |
547 | to->mark = from->mark; | 552 | to->mark = from->mark; |
548 | 553 | ||
549 | #ifdef CONFIG_NET_SCHED | 554 | #ifdef CONFIG_NET_SCHED |
550 | to->tc_index = from->tc_index; | 555 | to->tc_index = from->tc_index; |
551 | #endif | 556 | #endif |
552 | nf_copy(to, from); | 557 | nf_copy(to, from); |
553 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 558 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ |
554 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | 559 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) |
555 | to->nf_trace = from->nf_trace; | 560 | to->nf_trace = from->nf_trace; |
556 | #endif | 561 | #endif |
557 | skb_copy_secmark(to, from); | 562 | skb_copy_secmark(to, from); |
558 | } | 563 | } |
559 | 564 | ||
560 | int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) | 565 | int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) |
561 | { | 566 | { |
562 | u16 offset = sizeof(struct ipv6hdr); | 567 | u16 offset = sizeof(struct ipv6hdr); |
563 | struct ipv6_opt_hdr *exthdr = | 568 | struct ipv6_opt_hdr *exthdr = |
564 | (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); | 569 | (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); |
565 | unsigned int packet_len = skb->tail - skb->network_header; | 570 | unsigned int packet_len = skb->tail - skb->network_header; |
566 | int found_rhdr = 0; | 571 | int found_rhdr = 0; |
567 | *nexthdr = &ipv6_hdr(skb)->nexthdr; | 572 | *nexthdr = &ipv6_hdr(skb)->nexthdr; |
568 | 573 | ||
569 | while (offset + 1 <= packet_len) { | 574 | while (offset + 1 <= packet_len) { |
570 | 575 | ||
571 | switch (**nexthdr) { | 576 | switch (**nexthdr) { |
572 | 577 | ||
573 | case NEXTHDR_HOP: | 578 | case NEXTHDR_HOP: |
574 | break; | 579 | break; |
575 | case NEXTHDR_ROUTING: | 580 | case NEXTHDR_ROUTING: |
576 | found_rhdr = 1; | 581 | found_rhdr = 1; |
577 | break; | 582 | break; |
578 | case NEXTHDR_DEST: | 583 | case NEXTHDR_DEST: |
579 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 584 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
580 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) | 585 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) |
581 | break; | 586 | break; |
582 | #endif | 587 | #endif |
583 | if (found_rhdr) | 588 | if (found_rhdr) |
584 | return offset; | 589 | return offset; |
585 | break; | 590 | break; |
586 | default : | 591 | default : |
587 | return offset; | 592 | return offset; |
588 | } | 593 | } |
589 | 594 | ||
590 | offset += ipv6_optlen(exthdr); | 595 | offset += ipv6_optlen(exthdr); |
591 | *nexthdr = &exthdr->nexthdr; | 596 | *nexthdr = &exthdr->nexthdr; |
592 | exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + | 597 | exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + |
593 | offset); | 598 | offset); |
594 | } | 599 | } |
595 | 600 | ||
596 | return offset; | 601 | return offset; |
597 | } | 602 | } |
598 | 603 | ||
599 | void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) | 604 | void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) |
600 | { | 605 | { |
601 | static atomic_t ipv6_fragmentation_id; | 606 | static atomic_t ipv6_fragmentation_id; |
602 | int old, new; | 607 | int old, new; |
603 | 608 | ||
604 | if (rt) { | 609 | if (rt) { |
605 | struct inet_peer *peer; | 610 | struct inet_peer *peer; |
606 | 611 | ||
607 | if (!rt->rt6i_peer) | 612 | if (!rt->rt6i_peer) |
608 | rt6_bind_peer(rt, 1); | 613 | rt6_bind_peer(rt, 1); |
609 | peer = rt->rt6i_peer; | 614 | peer = rt->rt6i_peer; |
610 | if (peer) { | 615 | if (peer) { |
611 | fhdr->identification = htonl(inet_getid(peer, 0)); | 616 | fhdr->identification = htonl(inet_getid(peer, 0)); |
612 | return; | 617 | return; |
613 | } | 618 | } |
614 | } | 619 | } |
615 | do { | 620 | do { |
616 | old = atomic_read(&ipv6_fragmentation_id); | 621 | old = atomic_read(&ipv6_fragmentation_id); |
617 | new = old + 1; | 622 | new = old + 1; |
618 | if (!new) | 623 | if (!new) |
619 | new = 1; | 624 | new = 1; |
620 | } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); | 625 | } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); |
621 | fhdr->identification = htonl(new); | 626 | fhdr->identification = htonl(new); |
622 | } | 627 | } |
623 | 628 | ||
624 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | 629 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
625 | { | 630 | { |
626 | struct sk_buff *frag; | 631 | struct sk_buff *frag; |
627 | struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); | 632 | struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); |
628 | struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; | 633 | struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; |
629 | struct ipv6hdr *tmp_hdr; | 634 | struct ipv6hdr *tmp_hdr; |
630 | struct frag_hdr *fh; | 635 | struct frag_hdr *fh; |
631 | unsigned int mtu, hlen, left, len; | 636 | unsigned int mtu, hlen, left, len; |
632 | __be32 frag_id = 0; | 637 | __be32 frag_id = 0; |
633 | int ptr, offset = 0, err=0; | 638 | int ptr, offset = 0, err=0; |
634 | u8 *prevhdr, nexthdr = 0; | 639 | u8 *prevhdr, nexthdr = 0; |
635 | struct net *net = dev_net(skb_dst(skb)->dev); | 640 | struct net *net = dev_net(skb_dst(skb)->dev); |
636 | 641 | ||
637 | hlen = ip6_find_1stfragopt(skb, &prevhdr); | 642 | hlen = ip6_find_1stfragopt(skb, &prevhdr); |
638 | nexthdr = *prevhdr; | 643 | nexthdr = *prevhdr; |
639 | 644 | ||
640 | mtu = ip6_skb_dst_mtu(skb); | 645 | mtu = ip6_skb_dst_mtu(skb); |
641 | 646 | ||
642 | /* We must not fragment if the socket is set to force MTU discovery | 647 | /* We must not fragment if the socket is set to force MTU discovery |
643 | * or if the skb it not generated by a local socket. | 648 | * or if the skb it not generated by a local socket. |
644 | */ | 649 | */ |
645 | if (!skb->local_df && skb->len > mtu) { | 650 | if (!skb->local_df && skb->len > mtu) { |
646 | skb->dev = skb_dst(skb)->dev; | 651 | skb->dev = skb_dst(skb)->dev; |
647 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 652 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
648 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 653 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
649 | IPSTATS_MIB_FRAGFAILS); | 654 | IPSTATS_MIB_FRAGFAILS); |
650 | kfree_skb(skb); | 655 | kfree_skb(skb); |
651 | return -EMSGSIZE; | 656 | return -EMSGSIZE; |
652 | } | 657 | } |
653 | 658 | ||
654 | if (np && np->frag_size < mtu) { | 659 | if (np && np->frag_size < mtu) { |
655 | if (np->frag_size) | 660 | if (np->frag_size) |
656 | mtu = np->frag_size; | 661 | mtu = np->frag_size; |
657 | } | 662 | } |
658 | mtu -= hlen + sizeof(struct frag_hdr); | 663 | mtu -= hlen + sizeof(struct frag_hdr); |
659 | 664 | ||
660 | if (skb_has_frag_list(skb)) { | 665 | if (skb_has_frag_list(skb)) { |
661 | int first_len = skb_pagelen(skb); | 666 | int first_len = skb_pagelen(skb); |
662 | struct sk_buff *frag2; | 667 | struct sk_buff *frag2; |
663 | 668 | ||
664 | if (first_len - hlen > mtu || | 669 | if (first_len - hlen > mtu || |
665 | ((first_len - hlen) & 7) || | 670 | ((first_len - hlen) & 7) || |
666 | skb_cloned(skb)) | 671 | skb_cloned(skb)) |
667 | goto slow_path; | 672 | goto slow_path; |
668 | 673 | ||
669 | skb_walk_frags(skb, frag) { | 674 | skb_walk_frags(skb, frag) { |
670 | /* Correct geometry. */ | 675 | /* Correct geometry. */ |
671 | if (frag->len > mtu || | 676 | if (frag->len > mtu || |
672 | ((frag->len & 7) && frag->next) || | 677 | ((frag->len & 7) && frag->next) || |
673 | skb_headroom(frag) < hlen) | 678 | skb_headroom(frag) < hlen) |
674 | goto slow_path_clean; | 679 | goto slow_path_clean; |
675 | 680 | ||
676 | /* Partially cloned skb? */ | 681 | /* Partially cloned skb? */ |
677 | if (skb_shared(frag)) | 682 | if (skb_shared(frag)) |
678 | goto slow_path_clean; | 683 | goto slow_path_clean; |
679 | 684 | ||
680 | BUG_ON(frag->sk); | 685 | BUG_ON(frag->sk); |
681 | if (skb->sk) { | 686 | if (skb->sk) { |
682 | frag->sk = skb->sk; | 687 | frag->sk = skb->sk; |
683 | frag->destructor = sock_wfree; | 688 | frag->destructor = sock_wfree; |
684 | } | 689 | } |
685 | skb->truesize -= frag->truesize; | 690 | skb->truesize -= frag->truesize; |
686 | } | 691 | } |
687 | 692 | ||
688 | err = 0; | 693 | err = 0; |
689 | offset = 0; | 694 | offset = 0; |
690 | frag = skb_shinfo(skb)->frag_list; | 695 | frag = skb_shinfo(skb)->frag_list; |
691 | skb_frag_list_init(skb); | 696 | skb_frag_list_init(skb); |
692 | /* BUILD HEADER */ | 697 | /* BUILD HEADER */ |
693 | 698 | ||
694 | *prevhdr = NEXTHDR_FRAGMENT; | 699 | *prevhdr = NEXTHDR_FRAGMENT; |
695 | tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); | 700 | tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); |
696 | if (!tmp_hdr) { | 701 | if (!tmp_hdr) { |
697 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 702 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
698 | IPSTATS_MIB_FRAGFAILS); | 703 | IPSTATS_MIB_FRAGFAILS); |
699 | return -ENOMEM; | 704 | return -ENOMEM; |
700 | } | 705 | } |
701 | 706 | ||
702 | __skb_pull(skb, hlen); | 707 | __skb_pull(skb, hlen); |
703 | fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); | 708 | fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); |
704 | __skb_push(skb, hlen); | 709 | __skb_push(skb, hlen); |
705 | skb_reset_network_header(skb); | 710 | skb_reset_network_header(skb); |
706 | memcpy(skb_network_header(skb), tmp_hdr, hlen); | 711 | memcpy(skb_network_header(skb), tmp_hdr, hlen); |
707 | 712 | ||
708 | ipv6_select_ident(fh, rt); | 713 | ipv6_select_ident(fh, rt); |
709 | fh->nexthdr = nexthdr; | 714 | fh->nexthdr = nexthdr; |
710 | fh->reserved = 0; | 715 | fh->reserved = 0; |
711 | fh->frag_off = htons(IP6_MF); | 716 | fh->frag_off = htons(IP6_MF); |
712 | frag_id = fh->identification; | 717 | frag_id = fh->identification; |
713 | 718 | ||
714 | first_len = skb_pagelen(skb); | 719 | first_len = skb_pagelen(skb); |
715 | skb->data_len = first_len - skb_headlen(skb); | 720 | skb->data_len = first_len - skb_headlen(skb); |
716 | skb->len = first_len; | 721 | skb->len = first_len; |
717 | ipv6_hdr(skb)->payload_len = htons(first_len - | 722 | ipv6_hdr(skb)->payload_len = htons(first_len - |
718 | sizeof(struct ipv6hdr)); | 723 | sizeof(struct ipv6hdr)); |
719 | 724 | ||
720 | dst_hold(&rt->dst); | 725 | dst_hold(&rt->dst); |
721 | 726 | ||
722 | for (;;) { | 727 | for (;;) { |
723 | /* Prepare header of the next frame, | 728 | /* Prepare header of the next frame, |
724 | * before previous one went down. */ | 729 | * before previous one went down. */ |
725 | if (frag) { | 730 | if (frag) { |
726 | frag->ip_summed = CHECKSUM_NONE; | 731 | frag->ip_summed = CHECKSUM_NONE; |
727 | skb_reset_transport_header(frag); | 732 | skb_reset_transport_header(frag); |
728 | fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); | 733 | fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); |
729 | __skb_push(frag, hlen); | 734 | __skb_push(frag, hlen); |
730 | skb_reset_network_header(frag); | 735 | skb_reset_network_header(frag); |
731 | memcpy(skb_network_header(frag), tmp_hdr, | 736 | memcpy(skb_network_header(frag), tmp_hdr, |
732 | hlen); | 737 | hlen); |
733 | offset += skb->len - hlen - sizeof(struct frag_hdr); | 738 | offset += skb->len - hlen - sizeof(struct frag_hdr); |
734 | fh->nexthdr = nexthdr; | 739 | fh->nexthdr = nexthdr; |
735 | fh->reserved = 0; | 740 | fh->reserved = 0; |
736 | fh->frag_off = htons(offset); | 741 | fh->frag_off = htons(offset); |
737 | if (frag->next != NULL) | 742 | if (frag->next != NULL) |
738 | fh->frag_off |= htons(IP6_MF); | 743 | fh->frag_off |= htons(IP6_MF); |
739 | fh->identification = frag_id; | 744 | fh->identification = frag_id; |
740 | ipv6_hdr(frag)->payload_len = | 745 | ipv6_hdr(frag)->payload_len = |
741 | htons(frag->len - | 746 | htons(frag->len - |
742 | sizeof(struct ipv6hdr)); | 747 | sizeof(struct ipv6hdr)); |
743 | ip6_copy_metadata(frag, skb); | 748 | ip6_copy_metadata(frag, skb); |
744 | } | 749 | } |
745 | 750 | ||
746 | err = output(skb); | 751 | err = output(skb); |
747 | if(!err) | 752 | if(!err) |
748 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), | 753 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), |
749 | IPSTATS_MIB_FRAGCREATES); | 754 | IPSTATS_MIB_FRAGCREATES); |
750 | 755 | ||
751 | if (err || !frag) | 756 | if (err || !frag) |
752 | break; | 757 | break; |
753 | 758 | ||
754 | skb = frag; | 759 | skb = frag; |
755 | frag = skb->next; | 760 | frag = skb->next; |
756 | skb->next = NULL; | 761 | skb->next = NULL; |
757 | } | 762 | } |
758 | 763 | ||
759 | kfree(tmp_hdr); | 764 | kfree(tmp_hdr); |
760 | 765 | ||
761 | if (err == 0) { | 766 | if (err == 0) { |
762 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), | 767 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), |
763 | IPSTATS_MIB_FRAGOKS); | 768 | IPSTATS_MIB_FRAGOKS); |
764 | dst_release(&rt->dst); | 769 | dst_release(&rt->dst); |
765 | return 0; | 770 | return 0; |
766 | } | 771 | } |
767 | 772 | ||
768 | while (frag) { | 773 | while (frag) { |
769 | skb = frag->next; | 774 | skb = frag->next; |
770 | kfree_skb(frag); | 775 | kfree_skb(frag); |
771 | frag = skb; | 776 | frag = skb; |
772 | } | 777 | } |
773 | 778 | ||
774 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), | 779 | IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), |
775 | IPSTATS_MIB_FRAGFAILS); | 780 | IPSTATS_MIB_FRAGFAILS); |
776 | dst_release(&rt->dst); | 781 | dst_release(&rt->dst); |
777 | return err; | 782 | return err; |
778 | 783 | ||
779 | slow_path_clean: | 784 | slow_path_clean: |
780 | skb_walk_frags(skb, frag2) { | 785 | skb_walk_frags(skb, frag2) { |
781 | if (frag2 == frag) | 786 | if (frag2 == frag) |
782 | break; | 787 | break; |
783 | frag2->sk = NULL; | 788 | frag2->sk = NULL; |
784 | frag2->destructor = NULL; | 789 | frag2->destructor = NULL; |
785 | skb->truesize += frag2->truesize; | 790 | skb->truesize += frag2->truesize; |
786 | } | 791 | } |
787 | } | 792 | } |
788 | 793 | ||
789 | slow_path: | 794 | slow_path: |
790 | left = skb->len - hlen; /* Space per frame */ | 795 | left = skb->len - hlen; /* Space per frame */ |
791 | ptr = hlen; /* Where to start from */ | 796 | ptr = hlen; /* Where to start from */ |
792 | 797 | ||
793 | /* | 798 | /* |
794 | * Fragment the datagram. | 799 | * Fragment the datagram. |
795 | */ | 800 | */ |
796 | 801 | ||
797 | *prevhdr = NEXTHDR_FRAGMENT; | 802 | *prevhdr = NEXTHDR_FRAGMENT; |
798 | 803 | ||
799 | /* | 804 | /* |
800 | * Keep copying data until we run out. | 805 | * Keep copying data until we run out. |
801 | */ | 806 | */ |
802 | while(left > 0) { | 807 | while(left > 0) { |
803 | len = left; | 808 | len = left; |
804 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 809 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
805 | if (len > mtu) | 810 | if (len > mtu) |
806 | len = mtu; | 811 | len = mtu; |
807 | /* IF: we are not sending up to and including the packet end | 812 | /* IF: we are not sending up to and including the packet end |
808 | then align the next start on an eight byte boundary */ | 813 | then align the next start on an eight byte boundary */ |
809 | if (len < left) { | 814 | if (len < left) { |
810 | len &= ~7; | 815 | len &= ~7; |
811 | } | 816 | } |
812 | /* | 817 | /* |
813 | * Allocate buffer. | 818 | * Allocate buffer. |
814 | */ | 819 | */ |
815 | 820 | ||
816 | if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { | 821 | if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { |
817 | NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); | 822 | NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); |
818 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 823 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
819 | IPSTATS_MIB_FRAGFAILS); | 824 | IPSTATS_MIB_FRAGFAILS); |
820 | err = -ENOMEM; | 825 | err = -ENOMEM; |
821 | goto fail; | 826 | goto fail; |
822 | } | 827 | } |
823 | 828 | ||
824 | /* | 829 | /* |
825 | * Set up data on packet | 830 | * Set up data on packet |
826 | */ | 831 | */ |
827 | 832 | ||
828 | ip6_copy_metadata(frag, skb); | 833 | ip6_copy_metadata(frag, skb); |
829 | skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); | 834 | skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); |
830 | skb_put(frag, len + hlen + sizeof(struct frag_hdr)); | 835 | skb_put(frag, len + hlen + sizeof(struct frag_hdr)); |
831 | skb_reset_network_header(frag); | 836 | skb_reset_network_header(frag); |
832 | fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); | 837 | fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); |
833 | frag->transport_header = (frag->network_header + hlen + | 838 | frag->transport_header = (frag->network_header + hlen + |
834 | sizeof(struct frag_hdr)); | 839 | sizeof(struct frag_hdr)); |
835 | 840 | ||
836 | /* | 841 | /* |
837 | * Charge the memory for the fragment to any owner | 842 | * Charge the memory for the fragment to any owner |
838 | * it might possess | 843 | * it might possess |
839 | */ | 844 | */ |
840 | if (skb->sk) | 845 | if (skb->sk) |
841 | skb_set_owner_w(frag, skb->sk); | 846 | skb_set_owner_w(frag, skb->sk); |
842 | 847 | ||
843 | /* | 848 | /* |
844 | * Copy the packet header into the new buffer. | 849 | * Copy the packet header into the new buffer. |
845 | */ | 850 | */ |
846 | skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); | 851 | skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); |
847 | 852 | ||
848 | /* | 853 | /* |
849 | * Build fragment header. | 854 | * Build fragment header. |
850 | */ | 855 | */ |
851 | fh->nexthdr = nexthdr; | 856 | fh->nexthdr = nexthdr; |
852 | fh->reserved = 0; | 857 | fh->reserved = 0; |
853 | if (!frag_id) { | 858 | if (!frag_id) { |
854 | ipv6_select_ident(fh, rt); | 859 | ipv6_select_ident(fh, rt); |
855 | frag_id = fh->identification; | 860 | frag_id = fh->identification; |
856 | } else | 861 | } else |
857 | fh->identification = frag_id; | 862 | fh->identification = frag_id; |
858 | 863 | ||
859 | /* | 864 | /* |
860 | * Copy a block of the IP datagram. | 865 | * Copy a block of the IP datagram. |
861 | */ | 866 | */ |
862 | if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) | 867 | if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) |
863 | BUG(); | 868 | BUG(); |
864 | left -= len; | 869 | left -= len; |
865 | 870 | ||
866 | fh->frag_off = htons(offset); | 871 | fh->frag_off = htons(offset); |
867 | if (left > 0) | 872 | if (left > 0) |
868 | fh->frag_off |= htons(IP6_MF); | 873 | fh->frag_off |= htons(IP6_MF); |
869 | ipv6_hdr(frag)->payload_len = htons(frag->len - | 874 | ipv6_hdr(frag)->payload_len = htons(frag->len - |
870 | sizeof(struct ipv6hdr)); | 875 | sizeof(struct ipv6hdr)); |
871 | 876 | ||
872 | ptr += len; | 877 | ptr += len; |
873 | offset += len; | 878 | offset += len; |
874 | 879 | ||
875 | /* | 880 | /* |
876 | * Put this fragment into the sending queue. | 881 | * Put this fragment into the sending queue. |
877 | */ | 882 | */ |
878 | err = output(frag); | 883 | err = output(frag); |
879 | if (err) | 884 | if (err) |
880 | goto fail; | 885 | goto fail; |
881 | 886 | ||
882 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 887 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
883 | IPSTATS_MIB_FRAGCREATES); | 888 | IPSTATS_MIB_FRAGCREATES); |
884 | } | 889 | } |
885 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 890 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
886 | IPSTATS_MIB_FRAGOKS); | 891 | IPSTATS_MIB_FRAGOKS); |
887 | kfree_skb(skb); | 892 | kfree_skb(skb); |
888 | return err; | 893 | return err; |
889 | 894 | ||
890 | fail: | 895 | fail: |
891 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), | 896 | IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), |
892 | IPSTATS_MIB_FRAGFAILS); | 897 | IPSTATS_MIB_FRAGFAILS); |
893 | kfree_skb(skb); | 898 | kfree_skb(skb); |
894 | return err; | 899 | return err; |
895 | } | 900 | } |
896 | 901 | ||
897 | static inline int ip6_rt_check(const struct rt6key *rt_key, | 902 | static inline int ip6_rt_check(const struct rt6key *rt_key, |
898 | const struct in6_addr *fl_addr, | 903 | const struct in6_addr *fl_addr, |
899 | const struct in6_addr *addr_cache) | 904 | const struct in6_addr *addr_cache) |
900 | { | 905 | { |
901 | return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && | 906 | return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && |
902 | (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); | 907 | (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); |
903 | } | 908 | } |
904 | 909 | ||
905 | static struct dst_entry *ip6_sk_dst_check(struct sock *sk, | 910 | static struct dst_entry *ip6_sk_dst_check(struct sock *sk, |
906 | struct dst_entry *dst, | 911 | struct dst_entry *dst, |
907 | const struct flowi6 *fl6) | 912 | const struct flowi6 *fl6) |
908 | { | 913 | { |
909 | struct ipv6_pinfo *np = inet6_sk(sk); | 914 | struct ipv6_pinfo *np = inet6_sk(sk); |
910 | struct rt6_info *rt = (struct rt6_info *)dst; | 915 | struct rt6_info *rt = (struct rt6_info *)dst; |
911 | 916 | ||
912 | if (!dst) | 917 | if (!dst) |
913 | goto out; | 918 | goto out; |
914 | 919 | ||
915 | /* Yes, checking route validity in not connected | 920 | /* Yes, checking route validity in not connected |
916 | * case is not very simple. Take into account, | 921 | * case is not very simple. Take into account, |
917 | * that we do not support routing by source, TOS, | 922 | * that we do not support routing by source, TOS, |
918 | * and MSG_DONTROUTE --ANK (980726) | 923 | * and MSG_DONTROUTE --ANK (980726) |
919 | * | 924 | * |
920 | * 1. ip6_rt_check(): If route was host route, | 925 | * 1. ip6_rt_check(): If route was host route, |
921 | * check that cached destination is current. | 926 | * check that cached destination is current. |
922 | * If it is network route, we still may | 927 | * If it is network route, we still may |
923 | * check its validity using saved pointer | 928 | * check its validity using saved pointer |
924 | * to the last used address: daddr_cache. | 929 | * to the last used address: daddr_cache. |
925 | * We do not want to save whole address now, | 930 | * We do not want to save whole address now, |
926 | * (because main consumer of this service | 931 | * (because main consumer of this service |
927 | * is tcp, which has not this problem), | 932 | * is tcp, which has not this problem), |
928 | * so that the last trick works only on connected | 933 | * so that the last trick works only on connected |
929 | * sockets. | 934 | * sockets. |
930 | * 2. oif also should be the same. | 935 | * 2. oif also should be the same. |
931 | */ | 936 | */ |
932 | if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || | 937 | if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || |
933 | #ifdef CONFIG_IPV6_SUBTREES | 938 | #ifdef CONFIG_IPV6_SUBTREES |
934 | ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || | 939 | ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || |
935 | #endif | 940 | #endif |
936 | (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { | 941 | (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { |
937 | dst_release(dst); | 942 | dst_release(dst); |
938 | dst = NULL; | 943 | dst = NULL; |
939 | } | 944 | } |
940 | 945 | ||
941 | out: | 946 | out: |
942 | return dst; | 947 | return dst; |
943 | } | 948 | } |
944 | 949 | ||
945 | static int ip6_dst_lookup_tail(struct sock *sk, | 950 | static int ip6_dst_lookup_tail(struct sock *sk, |
946 | struct dst_entry **dst, struct flowi6 *fl6) | 951 | struct dst_entry **dst, struct flowi6 *fl6) |
947 | { | 952 | { |
948 | struct net *net = sock_net(sk); | 953 | struct net *net = sock_net(sk); |
949 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 954 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
950 | struct neighbour *n; | 955 | struct neighbour *n; |
951 | #endif | 956 | #endif |
952 | int err; | 957 | int err; |
953 | 958 | ||
954 | if (*dst == NULL) | 959 | if (*dst == NULL) |
955 | *dst = ip6_route_output(net, sk, fl6); | 960 | *dst = ip6_route_output(net, sk, fl6); |
956 | 961 | ||
957 | if ((err = (*dst)->error)) | 962 | if ((err = (*dst)->error)) |
958 | goto out_err_release; | 963 | goto out_err_release; |
959 | 964 | ||
960 | if (ipv6_addr_any(&fl6->saddr)) { | 965 | if (ipv6_addr_any(&fl6->saddr)) { |
961 | struct rt6_info *rt = (struct rt6_info *) *dst; | 966 | struct rt6_info *rt = (struct rt6_info *) *dst; |
962 | err = ip6_route_get_saddr(net, rt, &fl6->daddr, | 967 | err = ip6_route_get_saddr(net, rt, &fl6->daddr, |
963 | sk ? inet6_sk(sk)->srcprefs : 0, | 968 | sk ? inet6_sk(sk)->srcprefs : 0, |
964 | &fl6->saddr); | 969 | &fl6->saddr); |
965 | if (err) | 970 | if (err) |
966 | goto out_err_release; | 971 | goto out_err_release; |
967 | } | 972 | } |
968 | 973 | ||
969 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 974 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
970 | /* | 975 | /* |
971 | * Here if the dst entry we've looked up | 976 | * Here if the dst entry we've looked up |
972 | * has a neighbour entry that is in the INCOMPLETE | 977 | * has a neighbour entry that is in the INCOMPLETE |
973 | * state and the src address from the flow is | 978 | * state and the src address from the flow is |
974 | * marked as OPTIMISTIC, we release the found | 979 | * marked as OPTIMISTIC, we release the found |
975 | * dst entry and replace it instead with the | 980 | * dst entry and replace it instead with the |
976 | * dst entry of the nexthop router | 981 | * dst entry of the nexthop router |
977 | */ | 982 | */ |
983 | rcu_read_lock(); | ||
978 | n = dst_get_neighbour(*dst); | 984 | n = dst_get_neighbour(*dst); |
979 | if (n && !(n->nud_state & NUD_VALID)) { | 985 | if (n && !(n->nud_state & NUD_VALID)) { |
980 | struct inet6_ifaddr *ifp; | 986 | struct inet6_ifaddr *ifp; |
981 | struct flowi6 fl_gw6; | 987 | struct flowi6 fl_gw6; |
982 | int redirect; | 988 | int redirect; |
983 | 989 | ||
990 | rcu_read_unlock(); | ||
984 | ifp = ipv6_get_ifaddr(net, &fl6->saddr, | 991 | ifp = ipv6_get_ifaddr(net, &fl6->saddr, |
985 | (*dst)->dev, 1); | 992 | (*dst)->dev, 1); |
986 | 993 | ||
987 | redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); | 994 | redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); |
988 | if (ifp) | 995 | if (ifp) |
989 | in6_ifa_put(ifp); | 996 | in6_ifa_put(ifp); |
990 | 997 | ||
991 | if (redirect) { | 998 | if (redirect) { |
992 | /* | 999 | /* |
993 | * We need to get the dst entry for the | 1000 | * We need to get the dst entry for the |
994 | * default router instead | 1001 | * default router instead |
995 | */ | 1002 | */ |
996 | dst_release(*dst); | 1003 | dst_release(*dst); |
997 | memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); | 1004 | memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); |
998 | memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); | 1005 | memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); |
999 | *dst = ip6_route_output(net, sk, &fl_gw6); | 1006 | *dst = ip6_route_output(net, sk, &fl_gw6); |
1000 | if ((err = (*dst)->error)) | 1007 | if ((err = (*dst)->error)) |
1001 | goto out_err_release; | 1008 | goto out_err_release; |
1002 | } | 1009 | } |
1010 | } else { | ||
1011 | rcu_read_unlock(); | ||
1003 | } | 1012 | } |
1004 | #endif | 1013 | #endif |
1005 | 1014 | ||
1006 | return 0; | 1015 | return 0; |
1007 | 1016 | ||
1008 | out_err_release: | 1017 | out_err_release: |
1009 | if (err == -ENETUNREACH) | 1018 | if (err == -ENETUNREACH) |
1010 | IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); | 1019 | IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); |
1011 | dst_release(*dst); | 1020 | dst_release(*dst); |
1012 | *dst = NULL; | 1021 | *dst = NULL; |
1013 | return err; | 1022 | return err; |
1014 | } | 1023 | } |
1015 | 1024 | ||
1016 | /** | 1025 | /** |
1017 | * ip6_dst_lookup - perform route lookup on flow | 1026 | * ip6_dst_lookup - perform route lookup on flow |
1018 | * @sk: socket which provides route info | 1027 | * @sk: socket which provides route info |
1019 | * @dst: pointer to dst_entry * for result | 1028 | * @dst: pointer to dst_entry * for result |
1020 | * @fl6: flow to lookup | 1029 | * @fl6: flow to lookup |
1021 | * | 1030 | * |
1022 | * This function performs a route lookup on the given flow. | 1031 | * This function performs a route lookup on the given flow. |
1023 | * | 1032 | * |
1024 | * It returns zero on success, or a standard errno code on error. | 1033 | * It returns zero on success, or a standard errno code on error. |
1025 | */ | 1034 | */ |
1026 | int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) | 1035 | int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) |
1027 | { | 1036 | { |
1028 | *dst = NULL; | 1037 | *dst = NULL; |
1029 | return ip6_dst_lookup_tail(sk, dst, fl6); | 1038 | return ip6_dst_lookup_tail(sk, dst, fl6); |
1030 | } | 1039 | } |
1031 | EXPORT_SYMBOL_GPL(ip6_dst_lookup); | 1040 | EXPORT_SYMBOL_GPL(ip6_dst_lookup); |
1032 | 1041 | ||
1033 | /** | 1042 | /** |
1034 | * ip6_dst_lookup_flow - perform route lookup on flow with ipsec | 1043 | * ip6_dst_lookup_flow - perform route lookup on flow with ipsec |
1035 | * @sk: socket which provides route info | 1044 | * @sk: socket which provides route info |
1036 | * @fl6: flow to lookup | 1045 | * @fl6: flow to lookup |
1037 | * @final_dst: final destination address for ipsec lookup | 1046 | * @final_dst: final destination address for ipsec lookup |
1038 | * @can_sleep: we are in a sleepable context | 1047 | * @can_sleep: we are in a sleepable context |
1039 | * | 1048 | * |
1040 | * This function performs a route lookup on the given flow. | 1049 | * This function performs a route lookup on the given flow. |
1041 | * | 1050 | * |
1042 | * It returns a valid dst pointer on success, or a pointer encoded | 1051 | * It returns a valid dst pointer on success, or a pointer encoded |
1043 | * error code. | 1052 | * error code. |
1044 | */ | 1053 | */ |
1045 | struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, | 1054 | struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, |
1046 | const struct in6_addr *final_dst, | 1055 | const struct in6_addr *final_dst, |
1047 | bool can_sleep) | 1056 | bool can_sleep) |
1048 | { | 1057 | { |
1049 | struct dst_entry *dst = NULL; | 1058 | struct dst_entry *dst = NULL; |
1050 | int err; | 1059 | int err; |
1051 | 1060 | ||
1052 | err = ip6_dst_lookup_tail(sk, &dst, fl6); | 1061 | err = ip6_dst_lookup_tail(sk, &dst, fl6); |
1053 | if (err) | 1062 | if (err) |
1054 | return ERR_PTR(err); | 1063 | return ERR_PTR(err); |
1055 | if (final_dst) | 1064 | if (final_dst) |
1056 | ipv6_addr_copy(&fl6->daddr, final_dst); | 1065 | ipv6_addr_copy(&fl6->daddr, final_dst); |
1057 | if (can_sleep) | 1066 | if (can_sleep) |
1058 | fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; | 1067 | fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; |
1059 | 1068 | ||
1060 | return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); | 1069 | return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); |
1061 | } | 1070 | } |
1062 | EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); | 1071 | EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); |
1063 | 1072 | ||
1064 | /** | 1073 | /** |
1065 | * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow | 1074 | * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow |
1066 | * @sk: socket which provides the dst cache and route info | 1075 | * @sk: socket which provides the dst cache and route info |
1067 | * @fl6: flow to lookup | 1076 | * @fl6: flow to lookup |
1068 | * @final_dst: final destination address for ipsec lookup | 1077 | * @final_dst: final destination address for ipsec lookup |
1069 | * @can_sleep: we are in a sleepable context | 1078 | * @can_sleep: we are in a sleepable context |
1070 | * | 1079 | * |
1071 | * This function performs a route lookup on the given flow with the | 1080 | * This function performs a route lookup on the given flow with the |
1072 | * possibility of using the cached route in the socket if it is valid. | 1081 | * possibility of using the cached route in the socket if it is valid. |
1073 | * It will take the socket dst lock when operating on the dst cache. | 1082 | * It will take the socket dst lock when operating on the dst cache. |
1074 | * As a result, this function can only be used in process context. | 1083 | * As a result, this function can only be used in process context. |
1075 | * | 1084 | * |
1076 | * It returns a valid dst pointer on success, or a pointer encoded | 1085 | * It returns a valid dst pointer on success, or a pointer encoded |
1077 | * error code. | 1086 | * error code. |
1078 | */ | 1087 | */ |
1079 | struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, | 1088 | struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, |
1080 | const struct in6_addr *final_dst, | 1089 | const struct in6_addr *final_dst, |
1081 | bool can_sleep) | 1090 | bool can_sleep) |
1082 | { | 1091 | { |
1083 | struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); | 1092 | struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); |
1084 | int err; | 1093 | int err; |
1085 | 1094 | ||
1086 | dst = ip6_sk_dst_check(sk, dst, fl6); | 1095 | dst = ip6_sk_dst_check(sk, dst, fl6); |
1087 | 1096 | ||
1088 | err = ip6_dst_lookup_tail(sk, &dst, fl6); | 1097 | err = ip6_dst_lookup_tail(sk, &dst, fl6); |
1089 | if (err) | 1098 | if (err) |
1090 | return ERR_PTR(err); | 1099 | return ERR_PTR(err); |
1091 | if (final_dst) | 1100 | if (final_dst) |
1092 | ipv6_addr_copy(&fl6->daddr, final_dst); | 1101 | ipv6_addr_copy(&fl6->daddr, final_dst); |
1093 | if (can_sleep) | 1102 | if (can_sleep) |
1094 | fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; | 1103 | fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; |
1095 | 1104 | ||
1096 | return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); | 1105 | return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); |
1097 | } | 1106 | } |
1098 | EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); | 1107 | EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); |
1099 | 1108 | ||
1100 | static inline int ip6_ufo_append_data(struct sock *sk, | 1109 | static inline int ip6_ufo_append_data(struct sock *sk, |
1101 | int getfrag(void *from, char *to, int offset, int len, | 1110 | int getfrag(void *from, char *to, int offset, int len, |
1102 | int odd, struct sk_buff *skb), | 1111 | int odd, struct sk_buff *skb), |
1103 | void *from, int length, int hh_len, int fragheaderlen, | 1112 | void *from, int length, int hh_len, int fragheaderlen, |
1104 | int transhdrlen, int mtu,unsigned int flags, | 1113 | int transhdrlen, int mtu,unsigned int flags, |
1105 | struct rt6_info *rt) | 1114 | struct rt6_info *rt) |
1106 | 1115 | ||
1107 | { | 1116 | { |
1108 | struct sk_buff *skb; | 1117 | struct sk_buff *skb; |
1109 | int err; | 1118 | int err; |
1110 | 1119 | ||
1111 | /* There is support for UDP large send offload by network | 1120 | /* There is support for UDP large send offload by network |
1112 | * device, so create one single skb packet containing complete | 1121 | * device, so create one single skb packet containing complete |
1113 | * udp datagram | 1122 | * udp datagram |
1114 | */ | 1123 | */ |
1115 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 1124 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { |
1116 | skb = sock_alloc_send_skb(sk, | 1125 | skb = sock_alloc_send_skb(sk, |
1117 | hh_len + fragheaderlen + transhdrlen + 20, | 1126 | hh_len + fragheaderlen + transhdrlen + 20, |
1118 | (flags & MSG_DONTWAIT), &err); | 1127 | (flags & MSG_DONTWAIT), &err); |
1119 | if (skb == NULL) | 1128 | if (skb == NULL) |
1120 | return -ENOMEM; | 1129 | return -ENOMEM; |
1121 | 1130 | ||
1122 | /* reserve space for Hardware header */ | 1131 | /* reserve space for Hardware header */ |
1123 | skb_reserve(skb, hh_len); | 1132 | skb_reserve(skb, hh_len); |
1124 | 1133 | ||
1125 | /* create space for UDP/IP header */ | 1134 | /* create space for UDP/IP header */ |
1126 | skb_put(skb,fragheaderlen + transhdrlen); | 1135 | skb_put(skb,fragheaderlen + transhdrlen); |
1127 | 1136 | ||
1128 | /* initialize network header pointer */ | 1137 | /* initialize network header pointer */ |
1129 | skb_reset_network_header(skb); | 1138 | skb_reset_network_header(skb); |
1130 | 1139 | ||
1131 | /* initialize protocol header pointer */ | 1140 | /* initialize protocol header pointer */ |
1132 | skb->transport_header = skb->network_header + fragheaderlen; | 1141 | skb->transport_header = skb->network_header + fragheaderlen; |
1133 | 1142 | ||
1134 | skb->ip_summed = CHECKSUM_PARTIAL; | 1143 | skb->ip_summed = CHECKSUM_PARTIAL; |
1135 | skb->csum = 0; | 1144 | skb->csum = 0; |
1136 | } | 1145 | } |
1137 | 1146 | ||
1138 | err = skb_append_datato_frags(sk,skb, getfrag, from, | 1147 | err = skb_append_datato_frags(sk,skb, getfrag, from, |
1139 | (length - transhdrlen)); | 1148 | (length - transhdrlen)); |
1140 | if (!err) { | 1149 | if (!err) { |
1141 | struct frag_hdr fhdr; | 1150 | struct frag_hdr fhdr; |
1142 | 1151 | ||
1143 | /* Specify the length of each IPv6 datagram fragment. | 1152 | /* Specify the length of each IPv6 datagram fragment. |
1144 | * It has to be a multiple of 8. | 1153 | * It has to be a multiple of 8. |
1145 | */ | 1154 | */ |
1146 | skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - | 1155 | skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - |
1147 | sizeof(struct frag_hdr)) & ~7; | 1156 | sizeof(struct frag_hdr)) & ~7; |
1148 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 1157 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
1149 | ipv6_select_ident(&fhdr, rt); | 1158 | ipv6_select_ident(&fhdr, rt); |
1150 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; | 1159 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; |
1151 | __skb_queue_tail(&sk->sk_write_queue, skb); | 1160 | __skb_queue_tail(&sk->sk_write_queue, skb); |
1152 | 1161 | ||
1153 | return 0; | 1162 | return 0; |
1154 | } | 1163 | } |
1155 | /* There is not enough support do UPD LSO, | 1164 | /* There is not enough support do UPD LSO, |
1156 | * so follow normal path | 1165 | * so follow normal path |
1157 | */ | 1166 | */ |
1158 | kfree_skb(skb); | 1167 | kfree_skb(skb); |
1159 | 1168 | ||
1160 | return err; | 1169 | return err; |
1161 | } | 1170 | } |
1162 | 1171 | ||
1163 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, | 1172 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, |
1164 | gfp_t gfp) | 1173 | gfp_t gfp) |
1165 | { | 1174 | { |
1166 | return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; | 1175 | return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; |
1167 | } | 1176 | } |
1168 | 1177 | ||
1169 | static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, | 1178 | static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, |
1170 | gfp_t gfp) | 1179 | gfp_t gfp) |
1171 | { | 1180 | { |
1172 | return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; | 1181 | return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; |
1173 | } | 1182 | } |
1174 | 1183 | ||
1175 | int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, | 1184 | int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, |
1176 | int offset, int len, int odd, struct sk_buff *skb), | 1185 | int offset, int len, int odd, struct sk_buff *skb), |
1177 | void *from, int length, int transhdrlen, | 1186 | void *from, int length, int transhdrlen, |
1178 | int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, | 1187 | int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, |
1179 | struct rt6_info *rt, unsigned int flags, int dontfrag) | 1188 | struct rt6_info *rt, unsigned int flags, int dontfrag) |
1180 | { | 1189 | { |
1181 | struct inet_sock *inet = inet_sk(sk); | 1190 | struct inet_sock *inet = inet_sk(sk); |
1182 | struct ipv6_pinfo *np = inet6_sk(sk); | 1191 | struct ipv6_pinfo *np = inet6_sk(sk); |
1183 | struct inet_cork *cork; | 1192 | struct inet_cork *cork; |
1184 | struct sk_buff *skb; | 1193 | struct sk_buff *skb; |
1185 | unsigned int maxfraglen, fragheaderlen; | 1194 | unsigned int maxfraglen, fragheaderlen; |
1186 | int exthdrlen; | 1195 | int exthdrlen; |
1187 | int hh_len; | 1196 | int hh_len; |
1188 | int mtu; | 1197 | int mtu; |
1189 | int copy; | 1198 | int copy; |
1190 | int err; | 1199 | int err; |
1191 | int offset = 0; | 1200 | int offset = 0; |
1192 | int csummode = CHECKSUM_NONE; | 1201 | int csummode = CHECKSUM_NONE; |
1193 | __u8 tx_flags = 0; | 1202 | __u8 tx_flags = 0; |
1194 | 1203 | ||
1195 | if (flags&MSG_PROBE) | 1204 | if (flags&MSG_PROBE) |
1196 | return 0; | 1205 | return 0; |
1197 | cork = &inet->cork.base; | 1206 | cork = &inet->cork.base; |
1198 | if (skb_queue_empty(&sk->sk_write_queue)) { | 1207 | if (skb_queue_empty(&sk->sk_write_queue)) { |
1199 | /* | 1208 | /* |
1200 | * setup for corking | 1209 | * setup for corking |
1201 | */ | 1210 | */ |
1202 | if (opt) { | 1211 | if (opt) { |
1203 | if (WARN_ON(np->cork.opt)) | 1212 | if (WARN_ON(np->cork.opt)) |
1204 | return -EINVAL; | 1213 | return -EINVAL; |
1205 | 1214 | ||
1206 | np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); | 1215 | np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); |
1207 | if (unlikely(np->cork.opt == NULL)) | 1216 | if (unlikely(np->cork.opt == NULL)) |
1208 | return -ENOBUFS; | 1217 | return -ENOBUFS; |
1209 | 1218 | ||
1210 | np->cork.opt->tot_len = opt->tot_len; | 1219 | np->cork.opt->tot_len = opt->tot_len; |
1211 | np->cork.opt->opt_flen = opt->opt_flen; | 1220 | np->cork.opt->opt_flen = opt->opt_flen; |
1212 | np->cork.opt->opt_nflen = opt->opt_nflen; | 1221 | np->cork.opt->opt_nflen = opt->opt_nflen; |
1213 | 1222 | ||
1214 | np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, | 1223 | np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, |
1215 | sk->sk_allocation); | 1224 | sk->sk_allocation); |
1216 | if (opt->dst0opt && !np->cork.opt->dst0opt) | 1225 | if (opt->dst0opt && !np->cork.opt->dst0opt) |
1217 | return -ENOBUFS; | 1226 | return -ENOBUFS; |
1218 | 1227 | ||
1219 | np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, | 1228 | np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, |
1220 | sk->sk_allocation); | 1229 | sk->sk_allocation); |
1221 | if (opt->dst1opt && !np->cork.opt->dst1opt) | 1230 | if (opt->dst1opt && !np->cork.opt->dst1opt) |
1222 | return -ENOBUFS; | 1231 | return -ENOBUFS; |
1223 | 1232 | ||
1224 | np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, | 1233 | np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, |
1225 | sk->sk_allocation); | 1234 | sk->sk_allocation); |
1226 | if (opt->hopopt && !np->cork.opt->hopopt) | 1235 | if (opt->hopopt && !np->cork.opt->hopopt) |
1227 | return -ENOBUFS; | 1236 | return -ENOBUFS; |
1228 | 1237 | ||
1229 | np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, | 1238 | np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, |
1230 | sk->sk_allocation); | 1239 | sk->sk_allocation); |
1231 | if (opt->srcrt && !np->cork.opt->srcrt) | 1240 | if (opt->srcrt && !np->cork.opt->srcrt) |
1232 | return -ENOBUFS; | 1241 | return -ENOBUFS; |
1233 | 1242 | ||
1234 | /* need source address above miyazawa*/ | 1243 | /* need source address above miyazawa*/ |
1235 | } | 1244 | } |
1236 | dst_hold(&rt->dst); | 1245 | dst_hold(&rt->dst); |
1237 | cork->dst = &rt->dst; | 1246 | cork->dst = &rt->dst; |
1238 | inet->cork.fl.u.ip6 = *fl6; | 1247 | inet->cork.fl.u.ip6 = *fl6; |
1239 | np->cork.hop_limit = hlimit; | 1248 | np->cork.hop_limit = hlimit; |
1240 | np->cork.tclass = tclass; | 1249 | np->cork.tclass = tclass; |
1241 | mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? | 1250 | mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? |
1242 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | 1251 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); |
1243 | if (np->frag_size < mtu) { | 1252 | if (np->frag_size < mtu) { |
1244 | if (np->frag_size) | 1253 | if (np->frag_size) |
1245 | mtu = np->frag_size; | 1254 | mtu = np->frag_size; |
1246 | } | 1255 | } |
1247 | cork->fragsize = mtu; | 1256 | cork->fragsize = mtu; |
1248 | if (dst_allfrag(rt->dst.path)) | 1257 | if (dst_allfrag(rt->dst.path)) |
1249 | cork->flags |= IPCORK_ALLFRAG; | 1258 | cork->flags |= IPCORK_ALLFRAG; |
1250 | cork->length = 0; | 1259 | cork->length = 0; |
1251 | sk->sk_sndmsg_page = NULL; | 1260 | sk->sk_sndmsg_page = NULL; |
1252 | sk->sk_sndmsg_off = 0; | 1261 | sk->sk_sndmsg_off = 0; |
1253 | exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - | 1262 | exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - |
1254 | rt->rt6i_nfheader_len; | 1263 | rt->rt6i_nfheader_len; |
1255 | length += exthdrlen; | 1264 | length += exthdrlen; |
1256 | transhdrlen += exthdrlen; | 1265 | transhdrlen += exthdrlen; |
1257 | } else { | 1266 | } else { |
1258 | rt = (struct rt6_info *)cork->dst; | 1267 | rt = (struct rt6_info *)cork->dst; |
1259 | fl6 = &inet->cork.fl.u.ip6; | 1268 | fl6 = &inet->cork.fl.u.ip6; |
1260 | opt = np->cork.opt; | 1269 | opt = np->cork.opt; |
1261 | transhdrlen = 0; | 1270 | transhdrlen = 0; |
1262 | exthdrlen = 0; | 1271 | exthdrlen = 0; |
1263 | mtu = cork->fragsize; | 1272 | mtu = cork->fragsize; |
1264 | } | 1273 | } |
1265 | 1274 | ||
1266 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 1275 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
1267 | 1276 | ||
1268 | fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + | 1277 | fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + |
1269 | (opt ? opt->opt_nflen : 0); | 1278 | (opt ? opt->opt_nflen : 0); |
1270 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); | 1279 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); |
1271 | 1280 | ||
1272 | if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { | 1281 | if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { |
1273 | if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { | 1282 | if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { |
1274 | ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); | 1283 | ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); |
1275 | return -EMSGSIZE; | 1284 | return -EMSGSIZE; |
1276 | } | 1285 | } |
1277 | } | 1286 | } |
1278 | 1287 | ||
1279 | /* For UDP, check if TX timestamp is enabled */ | 1288 | /* For UDP, check if TX timestamp is enabled */ |
1280 | if (sk->sk_type == SOCK_DGRAM) { | 1289 | if (sk->sk_type == SOCK_DGRAM) { |
1281 | err = sock_tx_timestamp(sk, &tx_flags); | 1290 | err = sock_tx_timestamp(sk, &tx_flags); |
1282 | if (err) | 1291 | if (err) |
1283 | goto error; | 1292 | goto error; |
1284 | } | 1293 | } |
1285 | 1294 | ||
1286 | /* | 1295 | /* |
1287 | * Let's try using as much space as possible. | 1296 | * Let's try using as much space as possible. |
1288 | * Use MTU if total length of the message fits into the MTU. | 1297 | * Use MTU if total length of the message fits into the MTU. |
1289 | * Otherwise, we need to reserve fragment header and | 1298 | * Otherwise, we need to reserve fragment header and |
1290 | * fragment alignment (= 8-15 octects, in total). | 1299 | * fragment alignment (= 8-15 octects, in total). |
1291 | * | 1300 | * |
1292 | * Note that we may need to "move" the data from the tail of | 1301 | * Note that we may need to "move" the data from the tail of |
1293 | * of the buffer to the new fragment when we split | 1302 | * of the buffer to the new fragment when we split |
1294 | * the message. | 1303 | * the message. |
1295 | * | 1304 | * |
1296 | * FIXME: It may be fragmented into multiple chunks | 1305 | * FIXME: It may be fragmented into multiple chunks |
1297 | * at once if non-fragmentable extension headers | 1306 | * at once if non-fragmentable extension headers |
1298 | * are too large. | 1307 | * are too large. |
1299 | * --yoshfuji | 1308 | * --yoshfuji |
1300 | */ | 1309 | */ |
1301 | 1310 | ||
1302 | cork->length += length; | 1311 | cork->length += length; |
1303 | if (length > mtu) { | 1312 | if (length > mtu) { |
1304 | int proto = sk->sk_protocol; | 1313 | int proto = sk->sk_protocol; |
1305 | if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ | 1314 | if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ |
1306 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); | 1315 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); |
1307 | return -EMSGSIZE; | 1316 | return -EMSGSIZE; |
1308 | } | 1317 | } |
1309 | 1318 | ||
1310 | if (proto == IPPROTO_UDP && | 1319 | if (proto == IPPROTO_UDP && |
1311 | (rt->dst.dev->features & NETIF_F_UFO)) { | 1320 | (rt->dst.dev->features & NETIF_F_UFO)) { |
1312 | 1321 | ||
1313 | err = ip6_ufo_append_data(sk, getfrag, from, length, | 1322 | err = ip6_ufo_append_data(sk, getfrag, from, length, |
1314 | hh_len, fragheaderlen, | 1323 | hh_len, fragheaderlen, |
1315 | transhdrlen, mtu, flags, rt); | 1324 | transhdrlen, mtu, flags, rt); |
1316 | if (err) | 1325 | if (err) |
1317 | goto error; | 1326 | goto error; |
1318 | return 0; | 1327 | return 0; |
1319 | } | 1328 | } |
1320 | } | 1329 | } |
1321 | 1330 | ||
1322 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1331 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) |
1323 | goto alloc_new_skb; | 1332 | goto alloc_new_skb; |
1324 | 1333 | ||
1325 | while (length > 0) { | 1334 | while (length > 0) { |
1326 | /* Check if the remaining data fits into current packet. */ | 1335 | /* Check if the remaining data fits into current packet. */ |
1327 | copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; | 1336 | copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; |
1328 | if (copy < length) | 1337 | if (copy < length) |
1329 | copy = maxfraglen - skb->len; | 1338 | copy = maxfraglen - skb->len; |
1330 | 1339 | ||
1331 | if (copy <= 0) { | 1340 | if (copy <= 0) { |
1332 | char *data; | 1341 | char *data; |
1333 | unsigned int datalen; | 1342 | unsigned int datalen; |
1334 | unsigned int fraglen; | 1343 | unsigned int fraglen; |
1335 | unsigned int fraggap; | 1344 | unsigned int fraggap; |
1336 | unsigned int alloclen; | 1345 | unsigned int alloclen; |
1337 | struct sk_buff *skb_prev; | 1346 | struct sk_buff *skb_prev; |
1338 | alloc_new_skb: | 1347 | alloc_new_skb: |
1339 | skb_prev = skb; | 1348 | skb_prev = skb; |
1340 | 1349 | ||
1341 | /* There's no room in the current skb */ | 1350 | /* There's no room in the current skb */ |
1342 | if (skb_prev) | 1351 | if (skb_prev) |
1343 | fraggap = skb_prev->len - maxfraglen; | 1352 | fraggap = skb_prev->len - maxfraglen; |
1344 | else | 1353 | else |
1345 | fraggap = 0; | 1354 | fraggap = 0; |
1346 | 1355 | ||
1347 | /* | 1356 | /* |
1348 | * If remaining data exceeds the mtu, | 1357 | * If remaining data exceeds the mtu, |
1349 | * we know we need more fragment(s). | 1358 | * we know we need more fragment(s). |
1350 | */ | 1359 | */ |
1351 | datalen = length + fraggap; | 1360 | datalen = length + fraggap; |
1352 | if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) | 1361 | if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) |
1353 | datalen = maxfraglen - fragheaderlen; | 1362 | datalen = maxfraglen - fragheaderlen; |
1354 | 1363 | ||
1355 | fraglen = datalen + fragheaderlen; | 1364 | fraglen = datalen + fragheaderlen; |
1356 | if ((flags & MSG_MORE) && | 1365 | if ((flags & MSG_MORE) && |
1357 | !(rt->dst.dev->features&NETIF_F_SG)) | 1366 | !(rt->dst.dev->features&NETIF_F_SG)) |
1358 | alloclen = mtu; | 1367 | alloclen = mtu; |
1359 | else | 1368 | else |
1360 | alloclen = datalen + fragheaderlen; | 1369 | alloclen = datalen + fragheaderlen; |
1361 | 1370 | ||
1362 | /* | 1371 | /* |
1363 | * The last fragment gets additional space at tail. | 1372 | * The last fragment gets additional space at tail. |
1364 | * Note: we overallocate on fragments with MSG_MODE | 1373 | * Note: we overallocate on fragments with MSG_MODE |
1365 | * because we have no idea if we're the last one. | 1374 | * because we have no idea if we're the last one. |
1366 | */ | 1375 | */ |
1367 | if (datalen == length + fraggap) | 1376 | if (datalen == length + fraggap) |
1368 | alloclen += rt->dst.trailer_len; | 1377 | alloclen += rt->dst.trailer_len; |
1369 | 1378 | ||
1370 | /* | 1379 | /* |
1371 | * We just reserve space for fragment header. | 1380 | * We just reserve space for fragment header. |
1372 | * Note: this may be overallocation if the message | 1381 | * Note: this may be overallocation if the message |
1373 | * (without MSG_MORE) fits into the MTU. | 1382 | * (without MSG_MORE) fits into the MTU. |
1374 | */ | 1383 | */ |
1375 | alloclen += sizeof(struct frag_hdr); | 1384 | alloclen += sizeof(struct frag_hdr); |
1376 | 1385 | ||
1377 | if (transhdrlen) { | 1386 | if (transhdrlen) { |
1378 | skb = sock_alloc_send_skb(sk, | 1387 | skb = sock_alloc_send_skb(sk, |
1379 | alloclen + hh_len, | 1388 | alloclen + hh_len, |
1380 | (flags & MSG_DONTWAIT), &err); | 1389 | (flags & MSG_DONTWAIT), &err); |
1381 | } else { | 1390 | } else { |
1382 | skb = NULL; | 1391 | skb = NULL; |
1383 | if (atomic_read(&sk->sk_wmem_alloc) <= | 1392 | if (atomic_read(&sk->sk_wmem_alloc) <= |
1384 | 2 * sk->sk_sndbuf) | 1393 | 2 * sk->sk_sndbuf) |
1385 | skb = sock_wmalloc(sk, | 1394 | skb = sock_wmalloc(sk, |
1386 | alloclen + hh_len, 1, | 1395 | alloclen + hh_len, 1, |
1387 | sk->sk_allocation); | 1396 | sk->sk_allocation); |
1388 | if (unlikely(skb == NULL)) | 1397 | if (unlikely(skb == NULL)) |
1389 | err = -ENOBUFS; | 1398 | err = -ENOBUFS; |
1390 | else { | 1399 | else { |
1391 | /* Only the initial fragment | 1400 | /* Only the initial fragment |
1392 | * is time stamped. | 1401 | * is time stamped. |
1393 | */ | 1402 | */ |
1394 | tx_flags = 0; | 1403 | tx_flags = 0; |
1395 | } | 1404 | } |
1396 | } | 1405 | } |
1397 | if (skb == NULL) | 1406 | if (skb == NULL) |
1398 | goto error; | 1407 | goto error; |
1399 | /* | 1408 | /* |
1400 | * Fill in the control structures | 1409 | * Fill in the control structures |
1401 | */ | 1410 | */ |
1402 | skb->ip_summed = csummode; | 1411 | skb->ip_summed = csummode; |
1403 | skb->csum = 0; | 1412 | skb->csum = 0; |
1404 | /* reserve for fragmentation */ | 1413 | /* reserve for fragmentation */ |
1405 | skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); | 1414 | skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); |
1406 | 1415 | ||
1407 | if (sk->sk_type == SOCK_DGRAM) | 1416 | if (sk->sk_type == SOCK_DGRAM) |
1408 | skb_shinfo(skb)->tx_flags = tx_flags; | 1417 | skb_shinfo(skb)->tx_flags = tx_flags; |
1409 | 1418 | ||
1410 | /* | 1419 | /* |
1411 | * Find where to start putting bytes | 1420 | * Find where to start putting bytes |
1412 | */ | 1421 | */ |
1413 | data = skb_put(skb, fraglen); | 1422 | data = skb_put(skb, fraglen); |
1414 | skb_set_network_header(skb, exthdrlen); | 1423 | skb_set_network_header(skb, exthdrlen); |
1415 | data += fragheaderlen; | 1424 | data += fragheaderlen; |
1416 | skb->transport_header = (skb->network_header + | 1425 | skb->transport_header = (skb->network_header + |
1417 | fragheaderlen); | 1426 | fragheaderlen); |
1418 | if (fraggap) { | 1427 | if (fraggap) { |
1419 | skb->csum = skb_copy_and_csum_bits( | 1428 | skb->csum = skb_copy_and_csum_bits( |
1420 | skb_prev, maxfraglen, | 1429 | skb_prev, maxfraglen, |
1421 | data + transhdrlen, fraggap, 0); | 1430 | data + transhdrlen, fraggap, 0); |
1422 | skb_prev->csum = csum_sub(skb_prev->csum, | 1431 | skb_prev->csum = csum_sub(skb_prev->csum, |
1423 | skb->csum); | 1432 | skb->csum); |
1424 | data += fraggap; | 1433 | data += fraggap; |
1425 | pskb_trim_unique(skb_prev, maxfraglen); | 1434 | pskb_trim_unique(skb_prev, maxfraglen); |
1426 | } | 1435 | } |
1427 | copy = datalen - transhdrlen - fraggap; | 1436 | copy = datalen - transhdrlen - fraggap; |
1428 | if (copy < 0) { | 1437 | if (copy < 0) { |
1429 | err = -EINVAL; | 1438 | err = -EINVAL; |
1430 | kfree_skb(skb); | 1439 | kfree_skb(skb); |
1431 | goto error; | 1440 | goto error; |
1432 | } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { | 1441 | } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { |
1433 | err = -EFAULT; | 1442 | err = -EFAULT; |
1434 | kfree_skb(skb); | 1443 | kfree_skb(skb); |
1435 | goto error; | 1444 | goto error; |
1436 | } | 1445 | } |
1437 | 1446 | ||
1438 | offset += copy; | 1447 | offset += copy; |
1439 | length -= datalen - fraggap; | 1448 | length -= datalen - fraggap; |
1440 | transhdrlen = 0; | 1449 | transhdrlen = 0; |
1441 | exthdrlen = 0; | 1450 | exthdrlen = 0; |
1442 | csummode = CHECKSUM_NONE; | 1451 | csummode = CHECKSUM_NONE; |
1443 | 1452 | ||
1444 | /* | 1453 | /* |
1445 | * Put the packet on the pending queue | 1454 | * Put the packet on the pending queue |
1446 | */ | 1455 | */ |
1447 | __skb_queue_tail(&sk->sk_write_queue, skb); | 1456 | __skb_queue_tail(&sk->sk_write_queue, skb); |
1448 | continue; | 1457 | continue; |
1449 | } | 1458 | } |
1450 | 1459 | ||
1451 | if (copy > length) | 1460 | if (copy > length) |
1452 | copy = length; | 1461 | copy = length; |
1453 | 1462 | ||
1454 | if (!(rt->dst.dev->features&NETIF_F_SG)) { | 1463 | if (!(rt->dst.dev->features&NETIF_F_SG)) { |
1455 | unsigned int off; | 1464 | unsigned int off; |
1456 | 1465 | ||
1457 | off = skb->len; | 1466 | off = skb->len; |
1458 | if (getfrag(from, skb_put(skb, copy), | 1467 | if (getfrag(from, skb_put(skb, copy), |
1459 | offset, copy, off, skb) < 0) { | 1468 | offset, copy, off, skb) < 0) { |
1460 | __skb_trim(skb, off); | 1469 | __skb_trim(skb, off); |
1461 | err = -EFAULT; | 1470 | err = -EFAULT; |
1462 | goto error; | 1471 | goto error; |
1463 | } | 1472 | } |
1464 | } else { | 1473 | } else { |
1465 | int i = skb_shinfo(skb)->nr_frags; | 1474 | int i = skb_shinfo(skb)->nr_frags; |
1466 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 1475 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
1467 | struct page *page = sk->sk_sndmsg_page; | 1476 | struct page *page = sk->sk_sndmsg_page; |
1468 | int off = sk->sk_sndmsg_off; | 1477 | int off = sk->sk_sndmsg_off; |
1469 | unsigned int left; | 1478 | unsigned int left; |
1470 | 1479 | ||
1471 | if (page && (left = PAGE_SIZE - off) > 0) { | 1480 | if (page && (left = PAGE_SIZE - off) > 0) { |
1472 | if (copy >= left) | 1481 | if (copy >= left) |
1473 | copy = left; | 1482 | copy = left; |
1474 | if (page != frag->page) { | 1483 | if (page != frag->page) { |
1475 | if (i == MAX_SKB_FRAGS) { | 1484 | if (i == MAX_SKB_FRAGS) { |
1476 | err = -EMSGSIZE; | 1485 | err = -EMSGSIZE; |
1477 | goto error; | 1486 | goto error; |
1478 | } | 1487 | } |
1479 | get_page(page); | 1488 | get_page(page); |
1480 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); | 1489 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); |
1481 | frag = &skb_shinfo(skb)->frags[i]; | 1490 | frag = &skb_shinfo(skb)->frags[i]; |
1482 | } | 1491 | } |
1483 | } else if(i < MAX_SKB_FRAGS) { | 1492 | } else if(i < MAX_SKB_FRAGS) { |
1484 | if (copy > PAGE_SIZE) | 1493 | if (copy > PAGE_SIZE) |
1485 | copy = PAGE_SIZE; | 1494 | copy = PAGE_SIZE; |
1486 | page = alloc_pages(sk->sk_allocation, 0); | 1495 | page = alloc_pages(sk->sk_allocation, 0); |
1487 | if (page == NULL) { | 1496 | if (page == NULL) { |
1488 | err = -ENOMEM; | 1497 | err = -ENOMEM; |
1489 | goto error; | 1498 | goto error; |
1490 | } | 1499 | } |
1491 | sk->sk_sndmsg_page = page; | 1500 | sk->sk_sndmsg_page = page; |
1492 | sk->sk_sndmsg_off = 0; | 1501 | sk->sk_sndmsg_off = 0; |
1493 | 1502 | ||
1494 | skb_fill_page_desc(skb, i, page, 0, 0); | 1503 | skb_fill_page_desc(skb, i, page, 0, 0); |
1495 | frag = &skb_shinfo(skb)->frags[i]; | 1504 | frag = &skb_shinfo(skb)->frags[i]; |
1496 | } else { | 1505 | } else { |
1497 | err = -EMSGSIZE; | 1506 | err = -EMSGSIZE; |
1498 | goto error; | 1507 | goto error; |
1499 | } | 1508 | } |
1500 | if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { | 1509 | if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { |
1501 | err = -EFAULT; | 1510 | err = -EFAULT; |
1502 | goto error; | 1511 | goto error; |
1503 | } | 1512 | } |
1504 | sk->sk_sndmsg_off += copy; | 1513 | sk->sk_sndmsg_off += copy; |
1505 | frag->size += copy; | 1514 | frag->size += copy; |
1506 | skb->len += copy; | 1515 | skb->len += copy; |
1507 | skb->data_len += copy; | 1516 | skb->data_len += copy; |
1508 | skb->truesize += copy; | 1517 | skb->truesize += copy; |
1509 | atomic_add(copy, &sk->sk_wmem_alloc); | 1518 | atomic_add(copy, &sk->sk_wmem_alloc); |
1510 | } | 1519 | } |
1511 | offset += copy; | 1520 | offset += copy; |
1512 | length -= copy; | 1521 | length -= copy; |
1513 | } | 1522 | } |
1514 | return 0; | 1523 | return 0; |
1515 | error: | 1524 | error: |
1516 | cork->length -= length; | 1525 | cork->length -= length; |
1517 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); | 1526 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); |
1518 | return err; | 1527 | return err; |
1519 | } | 1528 | } |
1520 | 1529 | ||
1521 | static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) | 1530 | static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) |
1522 | { | 1531 | { |
1523 | if (np->cork.opt) { | 1532 | if (np->cork.opt) { |
1524 | kfree(np->cork.opt->dst0opt); | 1533 | kfree(np->cork.opt->dst0opt); |
1525 | kfree(np->cork.opt->dst1opt); | 1534 | kfree(np->cork.opt->dst1opt); |
1526 | kfree(np->cork.opt->hopopt); | 1535 | kfree(np->cork.opt->hopopt); |
1527 | kfree(np->cork.opt->srcrt); | 1536 | kfree(np->cork.opt->srcrt); |
1528 | kfree(np->cork.opt); | 1537 | kfree(np->cork.opt); |
1529 | np->cork.opt = NULL; | 1538 | np->cork.opt = NULL; |
1530 | } | 1539 | } |
1531 | 1540 | ||
1532 | if (inet->cork.base.dst) { | 1541 | if (inet->cork.base.dst) { |
1533 | dst_release(inet->cork.base.dst); | 1542 | dst_release(inet->cork.base.dst); |
1534 | inet->cork.base.dst = NULL; | 1543 | inet->cork.base.dst = NULL; |
1535 | inet->cork.base.flags &= ~IPCORK_ALLFRAG; | 1544 | inet->cork.base.flags &= ~IPCORK_ALLFRAG; |
1536 | } | 1545 | } |
1537 | memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); | 1546 | memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); |
1538 | } | 1547 | } |
1539 | 1548 | ||
1540 | int ip6_push_pending_frames(struct sock *sk) | 1549 | int ip6_push_pending_frames(struct sock *sk) |
1541 | { | 1550 | { |
1542 | struct sk_buff *skb, *tmp_skb; | 1551 | struct sk_buff *skb, *tmp_skb; |
1543 | struct sk_buff **tail_skb; | 1552 | struct sk_buff **tail_skb; |
1544 | struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; | 1553 | struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; |
1545 | struct inet_sock *inet = inet_sk(sk); | 1554 | struct inet_sock *inet = inet_sk(sk); |
1546 | struct ipv6_pinfo *np = inet6_sk(sk); | 1555 | struct ipv6_pinfo *np = inet6_sk(sk); |
1547 | struct net *net = sock_net(sk); | 1556 | struct net *net = sock_net(sk); |
1548 | struct ipv6hdr *hdr; | 1557 | struct ipv6hdr *hdr; |
1549 | struct ipv6_txoptions *opt = np->cork.opt; | 1558 | struct ipv6_txoptions *opt = np->cork.opt; |
1550 | struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; | 1559 | struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; |
1551 | struct flowi6 *fl6 = &inet->cork.fl.u.ip6; | 1560 | struct flowi6 *fl6 = &inet->cork.fl.u.ip6; |
1552 | unsigned char proto = fl6->flowi6_proto; | 1561 | unsigned char proto = fl6->flowi6_proto; |
1553 | int err = 0; | 1562 | int err = 0; |
1554 | 1563 | ||
1555 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) | 1564 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) |
1556 | goto out; | 1565 | goto out; |
1557 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1566 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1558 | 1567 | ||
1559 | /* move skb->data to ip header from ext header */ | 1568 | /* move skb->data to ip header from ext header */ |
1560 | if (skb->data < skb_network_header(skb)) | 1569 | if (skb->data < skb_network_header(skb)) |
1561 | __skb_pull(skb, skb_network_offset(skb)); | 1570 | __skb_pull(skb, skb_network_offset(skb)); |
1562 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1571 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { |
1563 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1572 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1564 | *tail_skb = tmp_skb; | 1573 | *tail_skb = tmp_skb; |
1565 | tail_skb = &(tmp_skb->next); | 1574 | tail_skb = &(tmp_skb->next); |
1566 | skb->len += tmp_skb->len; | 1575 | skb->len += tmp_skb->len; |
1567 | skb->data_len += tmp_skb->len; | 1576 | skb->data_len += tmp_skb->len; |
1568 | skb->truesize += tmp_skb->truesize; | 1577 | skb->truesize += tmp_skb->truesize; |
1569 | tmp_skb->destructor = NULL; | 1578 | tmp_skb->destructor = NULL; |
1570 | tmp_skb->sk = NULL; | 1579 | tmp_skb->sk = NULL; |
1571 | } | 1580 | } |
1572 | 1581 | ||
1573 | /* Allow local fragmentation. */ | 1582 | /* Allow local fragmentation. */ |
1574 | if (np->pmtudisc < IPV6_PMTUDISC_DO) | 1583 | if (np->pmtudisc < IPV6_PMTUDISC_DO) |
1575 | skb->local_df = 1; | 1584 | skb->local_df = 1; |
1576 | 1585 | ||
1577 | ipv6_addr_copy(final_dst, &fl6->daddr); | 1586 | ipv6_addr_copy(final_dst, &fl6->daddr); |
1578 | __skb_pull(skb, skb_network_header_len(skb)); | 1587 | __skb_pull(skb, skb_network_header_len(skb)); |
1579 | if (opt && opt->opt_flen) | 1588 | if (opt && opt->opt_flen) |
1580 | ipv6_push_frag_opts(skb, opt, &proto); | 1589 | ipv6_push_frag_opts(skb, opt, &proto); |
1581 | if (opt && opt->opt_nflen) | 1590 | if (opt && opt->opt_nflen) |
1582 | ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); | 1591 | ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); |
1583 | 1592 | ||
1584 | skb_push(skb, sizeof(struct ipv6hdr)); | 1593 | skb_push(skb, sizeof(struct ipv6hdr)); |
1585 | skb_reset_network_header(skb); | 1594 | skb_reset_network_header(skb); |
1586 | hdr = ipv6_hdr(skb); | 1595 | hdr = ipv6_hdr(skb); |
1587 | 1596 | ||
1588 | *(__be32*)hdr = fl6->flowlabel | | 1597 | *(__be32*)hdr = fl6->flowlabel | |
1589 | htonl(0x60000000 | ((int)np->cork.tclass << 20)); | 1598 | htonl(0x60000000 | ((int)np->cork.tclass << 20)); |
1590 | 1599 | ||
1591 | hdr->hop_limit = np->cork.hop_limit; | 1600 | hdr->hop_limit = np->cork.hop_limit; |
1592 | hdr->nexthdr = proto; | 1601 | hdr->nexthdr = proto; |
1593 | ipv6_addr_copy(&hdr->saddr, &fl6->saddr); | 1602 | ipv6_addr_copy(&hdr->saddr, &fl6->saddr); |
1594 | ipv6_addr_copy(&hdr->daddr, final_dst); | 1603 | ipv6_addr_copy(&hdr->daddr, final_dst); |
1595 | 1604 | ||
1596 | skb->priority = sk->sk_priority; | 1605 | skb->priority = sk->sk_priority; |
1597 | skb->mark = sk->sk_mark; | 1606 | skb->mark = sk->sk_mark; |
1598 | 1607 | ||
1599 | skb_dst_set(skb, dst_clone(&rt->dst)); | 1608 | skb_dst_set(skb, dst_clone(&rt->dst)); |
1600 | IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); | 1609 | IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); |
1601 | if (proto == IPPROTO_ICMPV6) { | 1610 | if (proto == IPPROTO_ICMPV6) { |
1602 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); | 1611 | struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); |
1603 | 1612 | ||
1604 | ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); | 1613 | ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); |
1605 | ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); | 1614 | ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); |
1606 | } | 1615 | } |
1607 | 1616 | ||
1608 | err = ip6_local_out(skb); | 1617 | err = ip6_local_out(skb); |
1609 | if (err) { | 1618 | if (err) { |
1610 | if (err > 0) | 1619 | if (err > 0) |
1611 | err = net_xmit_errno(err); | 1620 | err = net_xmit_errno(err); |
1612 | if (err) | 1621 | if (err) |
1613 | goto error; | 1622 | goto error; |
1614 | } | 1623 | } |
1615 | 1624 | ||
1616 | out: | 1625 | out: |
1617 | ip6_cork_release(inet, np); | 1626 | ip6_cork_release(inet, np); |
1618 | return err; | 1627 | return err; |
1619 | error: | 1628 | error: |
1620 | IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); | 1629 | IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); |
1621 | goto out; | 1630 | goto out; |
1622 | } | 1631 | } |
1623 | 1632 | ||
1624 | void ip6_flush_pending_frames(struct sock *sk) | 1633 | void ip6_flush_pending_frames(struct sock *sk) |
1625 | { | 1634 | { |
1626 | struct sk_buff *skb; | 1635 | struct sk_buff *skb; |
1627 | 1636 | ||
1628 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { | 1637 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { |
1629 | if (skb_dst(skb)) | 1638 | if (skb_dst(skb)) |
1630 | IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), | 1639 | IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), |
1631 | IPSTATS_MIB_OUTDISCARDS); | 1640 | IPSTATS_MIB_OUTDISCARDS); |
1632 | kfree_skb(skb); | 1641 | kfree_skb(skb); |
1633 | } | 1642 | } |
1634 | 1643 | ||
1635 | ip6_cork_release(inet_sk(sk), inet6_sk(sk)); | 1644 | ip6_cork_release(inet_sk(sk), inet6_sk(sk)); |
1636 | } | 1645 | } |
1637 | 1646 |
1 | /* | 1 | /* |
2 | * Linux INET6 implementation | 2 | * Linux INET6 implementation |
3 | * FIB front-end. | 3 | * FIB front-end. |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version | 10 | * as published by the Free Software Foundation; either version |
11 | * 2 of the License, or (at your option) any later version. | 11 | * 2 of the License, or (at your option) any later version. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* Changes: | 14 | /* Changes: |
15 | * | 15 | * |
16 | * YOSHIFUJI Hideaki @USAGI | 16 | * YOSHIFUJI Hideaki @USAGI |
17 | * reworked default router selection. | 17 | * reworked default router selection. |
18 | * - respect outgoing interface | 18 | * - respect outgoing interface |
19 | * - select from (probably) reachable routers (i.e. | 19 | * - select from (probably) reachable routers (i.e. |
20 | * routers in REACHABLE, STALE, DELAY or PROBE states). | 20 | * routers in REACHABLE, STALE, DELAY or PROBE states). |
21 | * - always select the same router if it is (probably) | 21 | * - always select the same router if it is (probably) |
22 | * reachable. otherwise, round-robin the list. | 22 | * reachable. otherwise, round-robin the list. |
23 | * Ville Nuorvala | 23 | * Ville Nuorvala |
24 | * Fixed routing subtrees. | 24 | * Fixed routing subtrees. |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/capability.h> | 27 | #include <linux/capability.h> |
28 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
30 | #include <linux/times.h> | 30 | #include <linux/times.h> |
31 | #include <linux/socket.h> | 31 | #include <linux/socket.h> |
32 | #include <linux/sockios.h> | 32 | #include <linux/sockios.h> |
33 | #include <linux/net.h> | 33 | #include <linux/net.h> |
34 | #include <linux/route.h> | 34 | #include <linux/route.h> |
35 | #include <linux/netdevice.h> | 35 | #include <linux/netdevice.h> |
36 | #include <linux/in6.h> | 36 | #include <linux/in6.h> |
37 | #include <linux/mroute6.h> | 37 | #include <linux/mroute6.h> |
38 | #include <linux/init.h> | 38 | #include <linux/init.h> |
39 | #include <linux/if_arp.h> | 39 | #include <linux/if_arp.h> |
40 | #include <linux/proc_fs.h> | 40 | #include <linux/proc_fs.h> |
41 | #include <linux/seq_file.h> | 41 | #include <linux/seq_file.h> |
42 | #include <linux/nsproxy.h> | 42 | #include <linux/nsproxy.h> |
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <net/net_namespace.h> | 44 | #include <net/net_namespace.h> |
45 | #include <net/snmp.h> | 45 | #include <net/snmp.h> |
46 | #include <net/ipv6.h> | 46 | #include <net/ipv6.h> |
47 | #include <net/ip6_fib.h> | 47 | #include <net/ip6_fib.h> |
48 | #include <net/ip6_route.h> | 48 | #include <net/ip6_route.h> |
49 | #include <net/ndisc.h> | 49 | #include <net/ndisc.h> |
50 | #include <net/addrconf.h> | 50 | #include <net/addrconf.h> |
51 | #include <net/tcp.h> | 51 | #include <net/tcp.h> |
52 | #include <linux/rtnetlink.h> | 52 | #include <linux/rtnetlink.h> |
53 | #include <net/dst.h> | 53 | #include <net/dst.h> |
54 | #include <net/xfrm.h> | 54 | #include <net/xfrm.h> |
55 | #include <net/netevent.h> | 55 | #include <net/netevent.h> |
56 | #include <net/netlink.h> | 56 | #include <net/netlink.h> |
57 | 57 | ||
58 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
59 | 59 | ||
60 | #ifdef CONFIG_SYSCTL | 60 | #ifdef CONFIG_SYSCTL |
61 | #include <linux/sysctl.h> | 61 | #include <linux/sysctl.h> |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | /* Set to 3 to get tracing. */ | 64 | /* Set to 3 to get tracing. */ |
65 | #define RT6_DEBUG 2 | 65 | #define RT6_DEBUG 2 |
66 | 66 | ||
67 | #if RT6_DEBUG >= 3 | 67 | #if RT6_DEBUG >= 3 |
68 | #define RDBG(x) printk x | 68 | #define RDBG(x) printk x |
69 | #define RT6_TRACE(x...) printk(KERN_DEBUG x) | 69 | #define RT6_TRACE(x...) printk(KERN_DEBUG x) |
70 | #else | 70 | #else |
71 | #define RDBG(x) | 71 | #define RDBG(x) |
72 | #define RT6_TRACE(x...) do { ; } while (0) | 72 | #define RT6_TRACE(x...) do { ; } while (0) |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, | 75 | static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, |
76 | const struct in6_addr *dest); | 76 | const struct in6_addr *dest); |
77 | static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); | 77 | static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); |
78 | static unsigned int ip6_default_advmss(const struct dst_entry *dst); | 78 | static unsigned int ip6_default_advmss(const struct dst_entry *dst); |
79 | static unsigned int ip6_default_mtu(const struct dst_entry *dst); | 79 | static unsigned int ip6_default_mtu(const struct dst_entry *dst); |
80 | static struct dst_entry *ip6_negative_advice(struct dst_entry *); | 80 | static struct dst_entry *ip6_negative_advice(struct dst_entry *); |
81 | static void ip6_dst_destroy(struct dst_entry *); | 81 | static void ip6_dst_destroy(struct dst_entry *); |
82 | static void ip6_dst_ifdown(struct dst_entry *, | 82 | static void ip6_dst_ifdown(struct dst_entry *, |
83 | struct net_device *dev, int how); | 83 | struct net_device *dev, int how); |
84 | static int ip6_dst_gc(struct dst_ops *ops); | 84 | static int ip6_dst_gc(struct dst_ops *ops); |
85 | 85 | ||
86 | static int ip6_pkt_discard(struct sk_buff *skb); | 86 | static int ip6_pkt_discard(struct sk_buff *skb); |
87 | static int ip6_pkt_discard_out(struct sk_buff *skb); | 87 | static int ip6_pkt_discard_out(struct sk_buff *skb); |
88 | static void ip6_link_failure(struct sk_buff *skb); | 88 | static void ip6_link_failure(struct sk_buff *skb); |
89 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 89 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
90 | 90 | ||
91 | #ifdef CONFIG_IPV6_ROUTE_INFO | 91 | #ifdef CONFIG_IPV6_ROUTE_INFO |
92 | static struct rt6_info *rt6_add_route_info(struct net *net, | 92 | static struct rt6_info *rt6_add_route_info(struct net *net, |
93 | const struct in6_addr *prefix, int prefixlen, | 93 | const struct in6_addr *prefix, int prefixlen, |
94 | const struct in6_addr *gwaddr, int ifindex, | 94 | const struct in6_addr *gwaddr, int ifindex, |
95 | unsigned pref); | 95 | unsigned pref); |
96 | static struct rt6_info *rt6_get_route_info(struct net *net, | 96 | static struct rt6_info *rt6_get_route_info(struct net *net, |
97 | const struct in6_addr *prefix, int prefixlen, | 97 | const struct in6_addr *prefix, int prefixlen, |
98 | const struct in6_addr *gwaddr, int ifindex); | 98 | const struct in6_addr *gwaddr, int ifindex); |
99 | #endif | 99 | #endif |
100 | 100 | ||
101 | static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) | 101 | static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) |
102 | { | 102 | { |
103 | struct rt6_info *rt = (struct rt6_info *) dst; | 103 | struct rt6_info *rt = (struct rt6_info *) dst; |
104 | struct inet_peer *peer; | 104 | struct inet_peer *peer; |
105 | u32 *p = NULL; | 105 | u32 *p = NULL; |
106 | 106 | ||
107 | if (!rt->rt6i_peer) | 107 | if (!rt->rt6i_peer) |
108 | rt6_bind_peer(rt, 1); | 108 | rt6_bind_peer(rt, 1); |
109 | 109 | ||
110 | peer = rt->rt6i_peer; | 110 | peer = rt->rt6i_peer; |
111 | if (peer) { | 111 | if (peer) { |
112 | u32 *old_p = __DST_METRICS_PTR(old); | 112 | u32 *old_p = __DST_METRICS_PTR(old); |
113 | unsigned long prev, new; | 113 | unsigned long prev, new; |
114 | 114 | ||
115 | p = peer->metrics; | 115 | p = peer->metrics; |
116 | if (inet_metrics_new(peer)) | 116 | if (inet_metrics_new(peer)) |
117 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | 117 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); |
118 | 118 | ||
119 | new = (unsigned long) p; | 119 | new = (unsigned long) p; |
120 | prev = cmpxchg(&dst->_metrics, old, new); | 120 | prev = cmpxchg(&dst->_metrics, old, new); |
121 | 121 | ||
122 | if (prev != old) { | 122 | if (prev != old) { |
123 | p = __DST_METRICS_PTR(prev); | 123 | p = __DST_METRICS_PTR(prev); |
124 | if (prev & DST_METRICS_READ_ONLY) | 124 | if (prev & DST_METRICS_READ_ONLY) |
125 | p = NULL; | 125 | p = NULL; |
126 | } | 126 | } |
127 | } | 127 | } |
128 | return p; | 128 | return p; |
129 | } | 129 | } |
130 | 130 | ||
131 | static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) | 131 | static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
132 | { | 132 | { |
133 | return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); | 133 | return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); |
134 | } | 134 | } |
135 | 135 | ||
136 | static struct dst_ops ip6_dst_ops_template = { | 136 | static struct dst_ops ip6_dst_ops_template = { |
137 | .family = AF_INET6, | 137 | .family = AF_INET6, |
138 | .protocol = cpu_to_be16(ETH_P_IPV6), | 138 | .protocol = cpu_to_be16(ETH_P_IPV6), |
139 | .gc = ip6_dst_gc, | 139 | .gc = ip6_dst_gc, |
140 | .gc_thresh = 1024, | 140 | .gc_thresh = 1024, |
141 | .check = ip6_dst_check, | 141 | .check = ip6_dst_check, |
142 | .default_advmss = ip6_default_advmss, | 142 | .default_advmss = ip6_default_advmss, |
143 | .default_mtu = ip6_default_mtu, | 143 | .default_mtu = ip6_default_mtu, |
144 | .cow_metrics = ipv6_cow_metrics, | 144 | .cow_metrics = ipv6_cow_metrics, |
145 | .destroy = ip6_dst_destroy, | 145 | .destroy = ip6_dst_destroy, |
146 | .ifdown = ip6_dst_ifdown, | 146 | .ifdown = ip6_dst_ifdown, |
147 | .negative_advice = ip6_negative_advice, | 147 | .negative_advice = ip6_negative_advice, |
148 | .link_failure = ip6_link_failure, | 148 | .link_failure = ip6_link_failure, |
149 | .update_pmtu = ip6_rt_update_pmtu, | 149 | .update_pmtu = ip6_rt_update_pmtu, |
150 | .local_out = __ip6_local_out, | 150 | .local_out = __ip6_local_out, |
151 | .neigh_lookup = ip6_neigh_lookup, | 151 | .neigh_lookup = ip6_neigh_lookup, |
152 | }; | 152 | }; |
153 | 153 | ||
154 | static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) | 154 | static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) |
155 | { | 155 | { |
156 | return 0; | 156 | return 0; |
157 | } | 157 | } |
158 | 158 | ||
159 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 159 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
160 | { | 160 | { |
161 | } | 161 | } |
162 | 162 | ||
163 | static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, | 163 | static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, |
164 | unsigned long old) | 164 | unsigned long old) |
165 | { | 165 | { |
166 | return NULL; | 166 | return NULL; |
167 | } | 167 | } |
168 | 168 | ||
169 | static struct dst_ops ip6_dst_blackhole_ops = { | 169 | static struct dst_ops ip6_dst_blackhole_ops = { |
170 | .family = AF_INET6, | 170 | .family = AF_INET6, |
171 | .protocol = cpu_to_be16(ETH_P_IPV6), | 171 | .protocol = cpu_to_be16(ETH_P_IPV6), |
172 | .destroy = ip6_dst_destroy, | 172 | .destroy = ip6_dst_destroy, |
173 | .check = ip6_dst_check, | 173 | .check = ip6_dst_check, |
174 | .default_mtu = ip6_blackhole_default_mtu, | 174 | .default_mtu = ip6_blackhole_default_mtu, |
175 | .default_advmss = ip6_default_advmss, | 175 | .default_advmss = ip6_default_advmss, |
176 | .update_pmtu = ip6_rt_blackhole_update_pmtu, | 176 | .update_pmtu = ip6_rt_blackhole_update_pmtu, |
177 | .cow_metrics = ip6_rt_blackhole_cow_metrics, | 177 | .cow_metrics = ip6_rt_blackhole_cow_metrics, |
178 | .neigh_lookup = ip6_neigh_lookup, | 178 | .neigh_lookup = ip6_neigh_lookup, |
179 | }; | 179 | }; |
180 | 180 | ||
181 | static const u32 ip6_template_metrics[RTAX_MAX] = { | 181 | static const u32 ip6_template_metrics[RTAX_MAX] = { |
182 | [RTAX_HOPLIMIT - 1] = 255, | 182 | [RTAX_HOPLIMIT - 1] = 255, |
183 | }; | 183 | }; |
184 | 184 | ||
185 | static struct rt6_info ip6_null_entry_template = { | 185 | static struct rt6_info ip6_null_entry_template = { |
186 | .dst = { | 186 | .dst = { |
187 | .__refcnt = ATOMIC_INIT(1), | 187 | .__refcnt = ATOMIC_INIT(1), |
188 | .__use = 1, | 188 | .__use = 1, |
189 | .obsolete = -1, | 189 | .obsolete = -1, |
190 | .error = -ENETUNREACH, | 190 | .error = -ENETUNREACH, |
191 | .input = ip6_pkt_discard, | 191 | .input = ip6_pkt_discard, |
192 | .output = ip6_pkt_discard_out, | 192 | .output = ip6_pkt_discard_out, |
193 | }, | 193 | }, |
194 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | 194 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), |
195 | .rt6i_protocol = RTPROT_KERNEL, | 195 | .rt6i_protocol = RTPROT_KERNEL, |
196 | .rt6i_metric = ~(u32) 0, | 196 | .rt6i_metric = ~(u32) 0, |
197 | .rt6i_ref = ATOMIC_INIT(1), | 197 | .rt6i_ref = ATOMIC_INIT(1), |
198 | }; | 198 | }; |
199 | 199 | ||
200 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 200 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
201 | 201 | ||
202 | static int ip6_pkt_prohibit(struct sk_buff *skb); | 202 | static int ip6_pkt_prohibit(struct sk_buff *skb); |
203 | static int ip6_pkt_prohibit_out(struct sk_buff *skb); | 203 | static int ip6_pkt_prohibit_out(struct sk_buff *skb); |
204 | 204 | ||
205 | static struct rt6_info ip6_prohibit_entry_template = { | 205 | static struct rt6_info ip6_prohibit_entry_template = { |
206 | .dst = { | 206 | .dst = { |
207 | .__refcnt = ATOMIC_INIT(1), | 207 | .__refcnt = ATOMIC_INIT(1), |
208 | .__use = 1, | 208 | .__use = 1, |
209 | .obsolete = -1, | 209 | .obsolete = -1, |
210 | .error = -EACCES, | 210 | .error = -EACCES, |
211 | .input = ip6_pkt_prohibit, | 211 | .input = ip6_pkt_prohibit, |
212 | .output = ip6_pkt_prohibit_out, | 212 | .output = ip6_pkt_prohibit_out, |
213 | }, | 213 | }, |
214 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | 214 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), |
215 | .rt6i_protocol = RTPROT_KERNEL, | 215 | .rt6i_protocol = RTPROT_KERNEL, |
216 | .rt6i_metric = ~(u32) 0, | 216 | .rt6i_metric = ~(u32) 0, |
217 | .rt6i_ref = ATOMIC_INIT(1), | 217 | .rt6i_ref = ATOMIC_INIT(1), |
218 | }; | 218 | }; |
219 | 219 | ||
220 | static struct rt6_info ip6_blk_hole_entry_template = { | 220 | static struct rt6_info ip6_blk_hole_entry_template = { |
221 | .dst = { | 221 | .dst = { |
222 | .__refcnt = ATOMIC_INIT(1), | 222 | .__refcnt = ATOMIC_INIT(1), |
223 | .__use = 1, | 223 | .__use = 1, |
224 | .obsolete = -1, | 224 | .obsolete = -1, |
225 | .error = -EINVAL, | 225 | .error = -EINVAL, |
226 | .input = dst_discard, | 226 | .input = dst_discard, |
227 | .output = dst_discard, | 227 | .output = dst_discard, |
228 | }, | 228 | }, |
229 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | 229 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), |
230 | .rt6i_protocol = RTPROT_KERNEL, | 230 | .rt6i_protocol = RTPROT_KERNEL, |
231 | .rt6i_metric = ~(u32) 0, | 231 | .rt6i_metric = ~(u32) 0, |
232 | .rt6i_ref = ATOMIC_INIT(1), | 232 | .rt6i_ref = ATOMIC_INIT(1), |
233 | }; | 233 | }; |
234 | 234 | ||
235 | #endif | 235 | #endif |
236 | 236 | ||
237 | /* allocate dst with ip6_dst_ops */ | 237 | /* allocate dst with ip6_dst_ops */ |
238 | static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, | 238 | static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, |
239 | struct net_device *dev, | 239 | struct net_device *dev, |
240 | int flags) | 240 | int flags) |
241 | { | 241 | { |
242 | struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); | 242 | struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); |
243 | 243 | ||
244 | memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); | 244 | memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); |
245 | 245 | ||
246 | return rt; | 246 | return rt; |
247 | } | 247 | } |
248 | 248 | ||
249 | static void ip6_dst_destroy(struct dst_entry *dst) | 249 | static void ip6_dst_destroy(struct dst_entry *dst) |
250 | { | 250 | { |
251 | struct rt6_info *rt = (struct rt6_info *)dst; | 251 | struct rt6_info *rt = (struct rt6_info *)dst; |
252 | struct inet6_dev *idev = rt->rt6i_idev; | 252 | struct inet6_dev *idev = rt->rt6i_idev; |
253 | struct inet_peer *peer = rt->rt6i_peer; | 253 | struct inet_peer *peer = rt->rt6i_peer; |
254 | 254 | ||
255 | if (idev != NULL) { | 255 | if (idev != NULL) { |
256 | rt->rt6i_idev = NULL; | 256 | rt->rt6i_idev = NULL; |
257 | in6_dev_put(idev); | 257 | in6_dev_put(idev); |
258 | } | 258 | } |
259 | if (peer) { | 259 | if (peer) { |
260 | rt->rt6i_peer = NULL; | 260 | rt->rt6i_peer = NULL; |
261 | inet_putpeer(peer); | 261 | inet_putpeer(peer); |
262 | } | 262 | } |
263 | } | 263 | } |
264 | 264 | ||
265 | static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); | 265 | static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); |
266 | 266 | ||
267 | static u32 rt6_peer_genid(void) | 267 | static u32 rt6_peer_genid(void) |
268 | { | 268 | { |
269 | return atomic_read(&__rt6_peer_genid); | 269 | return atomic_read(&__rt6_peer_genid); |
270 | } | 270 | } |
271 | 271 | ||
272 | void rt6_bind_peer(struct rt6_info *rt, int create) | 272 | void rt6_bind_peer(struct rt6_info *rt, int create) |
273 | { | 273 | { |
274 | struct inet_peer *peer; | 274 | struct inet_peer *peer; |
275 | 275 | ||
276 | peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); | 276 | peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); |
277 | if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) | 277 | if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) |
278 | inet_putpeer(peer); | 278 | inet_putpeer(peer); |
279 | else | 279 | else |
280 | rt->rt6i_peer_genid = rt6_peer_genid(); | 280 | rt->rt6i_peer_genid = rt6_peer_genid(); |
281 | } | 281 | } |
282 | 282 | ||
283 | static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 283 | static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
284 | int how) | 284 | int how) |
285 | { | 285 | { |
286 | struct rt6_info *rt = (struct rt6_info *)dst; | 286 | struct rt6_info *rt = (struct rt6_info *)dst; |
287 | struct inet6_dev *idev = rt->rt6i_idev; | 287 | struct inet6_dev *idev = rt->rt6i_idev; |
288 | struct net_device *loopback_dev = | 288 | struct net_device *loopback_dev = |
289 | dev_net(dev)->loopback_dev; | 289 | dev_net(dev)->loopback_dev; |
290 | 290 | ||
291 | if (dev != loopback_dev && idev != NULL && idev->dev == dev) { | 291 | if (dev != loopback_dev && idev != NULL && idev->dev == dev) { |
292 | struct inet6_dev *loopback_idev = | 292 | struct inet6_dev *loopback_idev = |
293 | in6_dev_get(loopback_dev); | 293 | in6_dev_get(loopback_dev); |
294 | if (loopback_idev != NULL) { | 294 | if (loopback_idev != NULL) { |
295 | rt->rt6i_idev = loopback_idev; | 295 | rt->rt6i_idev = loopback_idev; |
296 | in6_dev_put(idev); | 296 | in6_dev_put(idev); |
297 | } | 297 | } |
298 | } | 298 | } |
299 | } | 299 | } |
300 | 300 | ||
301 | static __inline__ int rt6_check_expired(const struct rt6_info *rt) | 301 | static __inline__ int rt6_check_expired(const struct rt6_info *rt) |
302 | { | 302 | { |
303 | return (rt->rt6i_flags & RTF_EXPIRES) && | 303 | return (rt->rt6i_flags & RTF_EXPIRES) && |
304 | time_after(jiffies, rt->rt6i_expires); | 304 | time_after(jiffies, rt->rt6i_expires); |
305 | } | 305 | } |
306 | 306 | ||
307 | static inline int rt6_need_strict(const struct in6_addr *daddr) | 307 | static inline int rt6_need_strict(const struct in6_addr *daddr) |
308 | { | 308 | { |
309 | return ipv6_addr_type(daddr) & | 309 | return ipv6_addr_type(daddr) & |
310 | (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); | 310 | (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); |
311 | } | 311 | } |
312 | 312 | ||
313 | /* | 313 | /* |
314 | * Route lookup. Any table->tb6_lock is implied. | 314 | * Route lookup. Any table->tb6_lock is implied. |
315 | */ | 315 | */ |
316 | 316 | ||
317 | static inline struct rt6_info *rt6_device_match(struct net *net, | 317 | static inline struct rt6_info *rt6_device_match(struct net *net, |
318 | struct rt6_info *rt, | 318 | struct rt6_info *rt, |
319 | const struct in6_addr *saddr, | 319 | const struct in6_addr *saddr, |
320 | int oif, | 320 | int oif, |
321 | int flags) | 321 | int flags) |
322 | { | 322 | { |
323 | struct rt6_info *local = NULL; | 323 | struct rt6_info *local = NULL; |
324 | struct rt6_info *sprt; | 324 | struct rt6_info *sprt; |
325 | 325 | ||
326 | if (!oif && ipv6_addr_any(saddr)) | 326 | if (!oif && ipv6_addr_any(saddr)) |
327 | goto out; | 327 | goto out; |
328 | 328 | ||
329 | for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { | 329 | for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { |
330 | struct net_device *dev = sprt->rt6i_dev; | 330 | struct net_device *dev = sprt->rt6i_dev; |
331 | 331 | ||
332 | if (oif) { | 332 | if (oif) { |
333 | if (dev->ifindex == oif) | 333 | if (dev->ifindex == oif) |
334 | return sprt; | 334 | return sprt; |
335 | if (dev->flags & IFF_LOOPBACK) { | 335 | if (dev->flags & IFF_LOOPBACK) { |
336 | if (sprt->rt6i_idev == NULL || | 336 | if (sprt->rt6i_idev == NULL || |
337 | sprt->rt6i_idev->dev->ifindex != oif) { | 337 | sprt->rt6i_idev->dev->ifindex != oif) { |
338 | if (flags & RT6_LOOKUP_F_IFACE && oif) | 338 | if (flags & RT6_LOOKUP_F_IFACE && oif) |
339 | continue; | 339 | continue; |
340 | if (local && (!oif || | 340 | if (local && (!oif || |
341 | local->rt6i_idev->dev->ifindex == oif)) | 341 | local->rt6i_idev->dev->ifindex == oif)) |
342 | continue; | 342 | continue; |
343 | } | 343 | } |
344 | local = sprt; | 344 | local = sprt; |
345 | } | 345 | } |
346 | } else { | 346 | } else { |
347 | if (ipv6_chk_addr(net, saddr, dev, | 347 | if (ipv6_chk_addr(net, saddr, dev, |
348 | flags & RT6_LOOKUP_F_IFACE)) | 348 | flags & RT6_LOOKUP_F_IFACE)) |
349 | return sprt; | 349 | return sprt; |
350 | } | 350 | } |
351 | } | 351 | } |
352 | 352 | ||
353 | if (oif) { | 353 | if (oif) { |
354 | if (local) | 354 | if (local) |
355 | return local; | 355 | return local; |
356 | 356 | ||
357 | if (flags & RT6_LOOKUP_F_IFACE) | 357 | if (flags & RT6_LOOKUP_F_IFACE) |
358 | return net->ipv6.ip6_null_entry; | 358 | return net->ipv6.ip6_null_entry; |
359 | } | 359 | } |
360 | out: | 360 | out: |
361 | return rt; | 361 | return rt; |
362 | } | 362 | } |
363 | 363 | ||
364 | #ifdef CONFIG_IPV6_ROUTER_PREF | 364 | #ifdef CONFIG_IPV6_ROUTER_PREF |
365 | static void rt6_probe(struct rt6_info *rt) | 365 | static void rt6_probe(struct rt6_info *rt) |
366 | { | 366 | { |
367 | struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; | 367 | struct neighbour *neigh; |
368 | /* | 368 | /* |
369 | * Okay, this does not seem to be appropriate | 369 | * Okay, this does not seem to be appropriate |
370 | * for now, however, we need to check if it | 370 | * for now, however, we need to check if it |
371 | * is really so; aka Router Reachability Probing. | 371 | * is really so; aka Router Reachability Probing. |
372 | * | 372 | * |
373 | * Router Reachability Probe MUST be rate-limited | 373 | * Router Reachability Probe MUST be rate-limited |
374 | * to no more than one per minute. | 374 | * to no more than one per minute. |
375 | */ | 375 | */ |
376 | rcu_read_lock(); | ||
377 | neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; | ||
376 | if (!neigh || (neigh->nud_state & NUD_VALID)) | 378 | if (!neigh || (neigh->nud_state & NUD_VALID)) |
377 | return; | 379 | goto out; |
378 | read_lock_bh(&neigh->lock); | 380 | read_lock_bh(&neigh->lock); |
379 | if (!(neigh->nud_state & NUD_VALID) && | 381 | if (!(neigh->nud_state & NUD_VALID) && |
380 | time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { | 382 | time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { |
381 | struct in6_addr mcaddr; | 383 | struct in6_addr mcaddr; |
382 | struct in6_addr *target; | 384 | struct in6_addr *target; |
383 | 385 | ||
384 | neigh->updated = jiffies; | 386 | neigh->updated = jiffies; |
385 | read_unlock_bh(&neigh->lock); | 387 | read_unlock_bh(&neigh->lock); |
386 | 388 | ||
387 | target = (struct in6_addr *)&neigh->primary_key; | 389 | target = (struct in6_addr *)&neigh->primary_key; |
388 | addrconf_addr_solict_mult(target, &mcaddr); | 390 | addrconf_addr_solict_mult(target, &mcaddr); |
389 | ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); | 391 | ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); |
390 | } else | 392 | } else { |
391 | read_unlock_bh(&neigh->lock); | 393 | read_unlock_bh(&neigh->lock); |
394 | } | ||
395 | out: | ||
396 | rcu_read_unlock(); | ||
392 | } | 397 | } |
393 | #else | 398 | #else |
394 | static inline void rt6_probe(struct rt6_info *rt) | 399 | static inline void rt6_probe(struct rt6_info *rt) |
395 | { | 400 | { |
396 | } | 401 | } |
397 | #endif | 402 | #endif |
398 | 403 | ||
399 | /* | 404 | /* |
400 | * Default Router Selection (RFC 2461 6.3.6) | 405 | * Default Router Selection (RFC 2461 6.3.6) |
401 | */ | 406 | */ |
402 | static inline int rt6_check_dev(struct rt6_info *rt, int oif) | 407 | static inline int rt6_check_dev(struct rt6_info *rt, int oif) |
403 | { | 408 | { |
404 | struct net_device *dev = rt->rt6i_dev; | 409 | struct net_device *dev = rt->rt6i_dev; |
405 | if (!oif || dev->ifindex == oif) | 410 | if (!oif || dev->ifindex == oif) |
406 | return 2; | 411 | return 2; |
407 | if ((dev->flags & IFF_LOOPBACK) && | 412 | if ((dev->flags & IFF_LOOPBACK) && |
408 | rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) | 413 | rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) |
409 | return 1; | 414 | return 1; |
410 | return 0; | 415 | return 0; |
411 | } | 416 | } |
412 | 417 | ||
413 | static inline int rt6_check_neigh(struct rt6_info *rt) | 418 | static inline int rt6_check_neigh(struct rt6_info *rt) |
414 | { | 419 | { |
415 | struct neighbour *neigh = dst_get_neighbour(&rt->dst); | 420 | struct neighbour *neigh; |
416 | int m; | 421 | int m; |
422 | |||
423 | rcu_read_lock(); | ||
424 | neigh = dst_get_neighbour(&rt->dst); | ||
417 | if (rt->rt6i_flags & RTF_NONEXTHOP || | 425 | if (rt->rt6i_flags & RTF_NONEXTHOP || |
418 | !(rt->rt6i_flags & RTF_GATEWAY)) | 426 | !(rt->rt6i_flags & RTF_GATEWAY)) |
419 | m = 1; | 427 | m = 1; |
420 | else if (neigh) { | 428 | else if (neigh) { |
421 | read_lock_bh(&neigh->lock); | 429 | read_lock_bh(&neigh->lock); |
422 | if (neigh->nud_state & NUD_VALID) | 430 | if (neigh->nud_state & NUD_VALID) |
423 | m = 2; | 431 | m = 2; |
424 | #ifdef CONFIG_IPV6_ROUTER_PREF | 432 | #ifdef CONFIG_IPV6_ROUTER_PREF |
425 | else if (neigh->nud_state & NUD_FAILED) | 433 | else if (neigh->nud_state & NUD_FAILED) |
426 | m = 0; | 434 | m = 0; |
427 | #endif | 435 | #endif |
428 | else | 436 | else |
429 | m = 1; | 437 | m = 1; |
430 | read_unlock_bh(&neigh->lock); | 438 | read_unlock_bh(&neigh->lock); |
431 | } else | 439 | } else |
432 | m = 0; | 440 | m = 0; |
441 | rcu_read_unlock(); | ||
433 | return m; | 442 | return m; |
434 | } | 443 | } |
435 | 444 | ||
436 | static int rt6_score_route(struct rt6_info *rt, int oif, | 445 | static int rt6_score_route(struct rt6_info *rt, int oif, |
437 | int strict) | 446 | int strict) |
438 | { | 447 | { |
439 | int m, n; | 448 | int m, n; |
440 | 449 | ||
441 | m = rt6_check_dev(rt, oif); | 450 | m = rt6_check_dev(rt, oif); |
442 | if (!m && (strict & RT6_LOOKUP_F_IFACE)) | 451 | if (!m && (strict & RT6_LOOKUP_F_IFACE)) |
443 | return -1; | 452 | return -1; |
444 | #ifdef CONFIG_IPV6_ROUTER_PREF | 453 | #ifdef CONFIG_IPV6_ROUTER_PREF |
445 | m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; | 454 | m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; |
446 | #endif | 455 | #endif |
447 | n = rt6_check_neigh(rt); | 456 | n = rt6_check_neigh(rt); |
448 | if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) | 457 | if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) |
449 | return -1; | 458 | return -1; |
450 | return m; | 459 | return m; |
451 | } | 460 | } |
452 | 461 | ||
453 | static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, | 462 | static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, |
454 | int *mpri, struct rt6_info *match) | 463 | int *mpri, struct rt6_info *match) |
455 | { | 464 | { |
456 | int m; | 465 | int m; |
457 | 466 | ||
458 | if (rt6_check_expired(rt)) | 467 | if (rt6_check_expired(rt)) |
459 | goto out; | 468 | goto out; |
460 | 469 | ||
461 | m = rt6_score_route(rt, oif, strict); | 470 | m = rt6_score_route(rt, oif, strict); |
462 | if (m < 0) | 471 | if (m < 0) |
463 | goto out; | 472 | goto out; |
464 | 473 | ||
465 | if (m > *mpri) { | 474 | if (m > *mpri) { |
466 | if (strict & RT6_LOOKUP_F_REACHABLE) | 475 | if (strict & RT6_LOOKUP_F_REACHABLE) |
467 | rt6_probe(match); | 476 | rt6_probe(match); |
468 | *mpri = m; | 477 | *mpri = m; |
469 | match = rt; | 478 | match = rt; |
470 | } else if (strict & RT6_LOOKUP_F_REACHABLE) { | 479 | } else if (strict & RT6_LOOKUP_F_REACHABLE) { |
471 | rt6_probe(rt); | 480 | rt6_probe(rt); |
472 | } | 481 | } |
473 | 482 | ||
474 | out: | 483 | out: |
475 | return match; | 484 | return match; |
476 | } | 485 | } |
477 | 486 | ||
478 | static struct rt6_info *find_rr_leaf(struct fib6_node *fn, | 487 | static struct rt6_info *find_rr_leaf(struct fib6_node *fn, |
479 | struct rt6_info *rr_head, | 488 | struct rt6_info *rr_head, |
480 | u32 metric, int oif, int strict) | 489 | u32 metric, int oif, int strict) |
481 | { | 490 | { |
482 | struct rt6_info *rt, *match; | 491 | struct rt6_info *rt, *match; |
483 | int mpri = -1; | 492 | int mpri = -1; |
484 | 493 | ||
485 | match = NULL; | 494 | match = NULL; |
486 | for (rt = rr_head; rt && rt->rt6i_metric == metric; | 495 | for (rt = rr_head; rt && rt->rt6i_metric == metric; |
487 | rt = rt->dst.rt6_next) | 496 | rt = rt->dst.rt6_next) |
488 | match = find_match(rt, oif, strict, &mpri, match); | 497 | match = find_match(rt, oif, strict, &mpri, match); |
489 | for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; | 498 | for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; |
490 | rt = rt->dst.rt6_next) | 499 | rt = rt->dst.rt6_next) |
491 | match = find_match(rt, oif, strict, &mpri, match); | 500 | match = find_match(rt, oif, strict, &mpri, match); |
492 | 501 | ||
493 | return match; | 502 | return match; |
494 | } | 503 | } |
495 | 504 | ||
496 | static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) | 505 | static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) |
497 | { | 506 | { |
498 | struct rt6_info *match, *rt0; | 507 | struct rt6_info *match, *rt0; |
499 | struct net *net; | 508 | struct net *net; |
500 | 509 | ||
501 | RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", | 510 | RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", |
502 | __func__, fn->leaf, oif); | 511 | __func__, fn->leaf, oif); |
503 | 512 | ||
504 | rt0 = fn->rr_ptr; | 513 | rt0 = fn->rr_ptr; |
505 | if (!rt0) | 514 | if (!rt0) |
506 | fn->rr_ptr = rt0 = fn->leaf; | 515 | fn->rr_ptr = rt0 = fn->leaf; |
507 | 516 | ||
508 | match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); | 517 | match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); |
509 | 518 | ||
510 | if (!match && | 519 | if (!match && |
511 | (strict & RT6_LOOKUP_F_REACHABLE)) { | 520 | (strict & RT6_LOOKUP_F_REACHABLE)) { |
512 | struct rt6_info *next = rt0->dst.rt6_next; | 521 | struct rt6_info *next = rt0->dst.rt6_next; |
513 | 522 | ||
514 | /* no entries matched; do round-robin */ | 523 | /* no entries matched; do round-robin */ |
515 | if (!next || next->rt6i_metric != rt0->rt6i_metric) | 524 | if (!next || next->rt6i_metric != rt0->rt6i_metric) |
516 | next = fn->leaf; | 525 | next = fn->leaf; |
517 | 526 | ||
518 | if (next != rt0) | 527 | if (next != rt0) |
519 | fn->rr_ptr = next; | 528 | fn->rr_ptr = next; |
520 | } | 529 | } |
521 | 530 | ||
522 | RT6_TRACE("%s() => %p\n", | 531 | RT6_TRACE("%s() => %p\n", |
523 | __func__, match); | 532 | __func__, match); |
524 | 533 | ||
525 | net = dev_net(rt0->rt6i_dev); | 534 | net = dev_net(rt0->rt6i_dev); |
526 | return match ? match : net->ipv6.ip6_null_entry; | 535 | return match ? match : net->ipv6.ip6_null_entry; |
527 | } | 536 | } |
528 | 537 | ||
529 | #ifdef CONFIG_IPV6_ROUTE_INFO | 538 | #ifdef CONFIG_IPV6_ROUTE_INFO |
530 | int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, | 539 | int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, |
531 | const struct in6_addr *gwaddr) | 540 | const struct in6_addr *gwaddr) |
532 | { | 541 | { |
533 | struct net *net = dev_net(dev); | 542 | struct net *net = dev_net(dev); |
534 | struct route_info *rinfo = (struct route_info *) opt; | 543 | struct route_info *rinfo = (struct route_info *) opt; |
535 | struct in6_addr prefix_buf, *prefix; | 544 | struct in6_addr prefix_buf, *prefix; |
536 | unsigned int pref; | 545 | unsigned int pref; |
537 | unsigned long lifetime; | 546 | unsigned long lifetime; |
538 | struct rt6_info *rt; | 547 | struct rt6_info *rt; |
539 | 548 | ||
540 | if (len < sizeof(struct route_info)) { | 549 | if (len < sizeof(struct route_info)) { |
541 | return -EINVAL; | 550 | return -EINVAL; |
542 | } | 551 | } |
543 | 552 | ||
544 | /* Sanity check for prefix_len and length */ | 553 | /* Sanity check for prefix_len and length */ |
545 | if (rinfo->length > 3) { | 554 | if (rinfo->length > 3) { |
546 | return -EINVAL; | 555 | return -EINVAL; |
547 | } else if (rinfo->prefix_len > 128) { | 556 | } else if (rinfo->prefix_len > 128) { |
548 | return -EINVAL; | 557 | return -EINVAL; |
549 | } else if (rinfo->prefix_len > 64) { | 558 | } else if (rinfo->prefix_len > 64) { |
550 | if (rinfo->length < 2) { | 559 | if (rinfo->length < 2) { |
551 | return -EINVAL; | 560 | return -EINVAL; |
552 | } | 561 | } |
553 | } else if (rinfo->prefix_len > 0) { | 562 | } else if (rinfo->prefix_len > 0) { |
554 | if (rinfo->length < 1) { | 563 | if (rinfo->length < 1) { |
555 | return -EINVAL; | 564 | return -EINVAL; |
556 | } | 565 | } |
557 | } | 566 | } |
558 | 567 | ||
559 | pref = rinfo->route_pref; | 568 | pref = rinfo->route_pref; |
560 | if (pref == ICMPV6_ROUTER_PREF_INVALID) | 569 | if (pref == ICMPV6_ROUTER_PREF_INVALID) |
561 | return -EINVAL; | 570 | return -EINVAL; |
562 | 571 | ||
563 | lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); | 572 | lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); |
564 | 573 | ||
565 | if (rinfo->length == 3) | 574 | if (rinfo->length == 3) |
566 | prefix = (struct in6_addr *)rinfo->prefix; | 575 | prefix = (struct in6_addr *)rinfo->prefix; |
567 | else { | 576 | else { |
568 | /* this function is safe */ | 577 | /* this function is safe */ |
569 | ipv6_addr_prefix(&prefix_buf, | 578 | ipv6_addr_prefix(&prefix_buf, |
570 | (struct in6_addr *)rinfo->prefix, | 579 | (struct in6_addr *)rinfo->prefix, |
571 | rinfo->prefix_len); | 580 | rinfo->prefix_len); |
572 | prefix = &prefix_buf; | 581 | prefix = &prefix_buf; |
573 | } | 582 | } |
574 | 583 | ||
575 | rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, | 584 | rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, |
576 | dev->ifindex); | 585 | dev->ifindex); |
577 | 586 | ||
578 | if (rt && !lifetime) { | 587 | if (rt && !lifetime) { |
579 | ip6_del_rt(rt); | 588 | ip6_del_rt(rt); |
580 | rt = NULL; | 589 | rt = NULL; |
581 | } | 590 | } |
582 | 591 | ||
583 | if (!rt && lifetime) | 592 | if (!rt && lifetime) |
584 | rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, | 593 | rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, |
585 | pref); | 594 | pref); |
586 | else if (rt) | 595 | else if (rt) |
587 | rt->rt6i_flags = RTF_ROUTEINFO | | 596 | rt->rt6i_flags = RTF_ROUTEINFO | |
588 | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); | 597 | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); |
589 | 598 | ||
590 | if (rt) { | 599 | if (rt) { |
591 | if (!addrconf_finite_timeout(lifetime)) { | 600 | if (!addrconf_finite_timeout(lifetime)) { |
592 | rt->rt6i_flags &= ~RTF_EXPIRES; | 601 | rt->rt6i_flags &= ~RTF_EXPIRES; |
593 | } else { | 602 | } else { |
594 | rt->rt6i_expires = jiffies + HZ * lifetime; | 603 | rt->rt6i_expires = jiffies + HZ * lifetime; |
595 | rt->rt6i_flags |= RTF_EXPIRES; | 604 | rt->rt6i_flags |= RTF_EXPIRES; |
596 | } | 605 | } |
597 | dst_release(&rt->dst); | 606 | dst_release(&rt->dst); |
598 | } | 607 | } |
599 | return 0; | 608 | return 0; |
600 | } | 609 | } |
601 | #endif | 610 | #endif |
602 | 611 | ||
603 | #define BACKTRACK(__net, saddr) \ | 612 | #define BACKTRACK(__net, saddr) \ |
604 | do { \ | 613 | do { \ |
605 | if (rt == __net->ipv6.ip6_null_entry) { \ | 614 | if (rt == __net->ipv6.ip6_null_entry) { \ |
606 | struct fib6_node *pn; \ | 615 | struct fib6_node *pn; \ |
607 | while (1) { \ | 616 | while (1) { \ |
608 | if (fn->fn_flags & RTN_TL_ROOT) \ | 617 | if (fn->fn_flags & RTN_TL_ROOT) \ |
609 | goto out; \ | 618 | goto out; \ |
610 | pn = fn->parent; \ | 619 | pn = fn->parent; \ |
611 | if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ | 620 | if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ |
612 | fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ | 621 | fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ |
613 | else \ | 622 | else \ |
614 | fn = pn; \ | 623 | fn = pn; \ |
615 | if (fn->fn_flags & RTN_RTINFO) \ | 624 | if (fn->fn_flags & RTN_RTINFO) \ |
616 | goto restart; \ | 625 | goto restart; \ |
617 | } \ | 626 | } \ |
618 | } \ | 627 | } \ |
619 | } while(0) | 628 | } while(0) |
620 | 629 | ||
621 | static struct rt6_info *ip6_pol_route_lookup(struct net *net, | 630 | static struct rt6_info *ip6_pol_route_lookup(struct net *net, |
622 | struct fib6_table *table, | 631 | struct fib6_table *table, |
623 | struct flowi6 *fl6, int flags) | 632 | struct flowi6 *fl6, int flags) |
624 | { | 633 | { |
625 | struct fib6_node *fn; | 634 | struct fib6_node *fn; |
626 | struct rt6_info *rt; | 635 | struct rt6_info *rt; |
627 | 636 | ||
628 | read_lock_bh(&table->tb6_lock); | 637 | read_lock_bh(&table->tb6_lock); |
629 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); | 638 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); |
630 | restart: | 639 | restart: |
631 | rt = fn->leaf; | 640 | rt = fn->leaf; |
632 | rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); | 641 | rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); |
633 | BACKTRACK(net, &fl6->saddr); | 642 | BACKTRACK(net, &fl6->saddr); |
634 | out: | 643 | out: |
635 | dst_use(&rt->dst, jiffies); | 644 | dst_use(&rt->dst, jiffies); |
636 | read_unlock_bh(&table->tb6_lock); | 645 | read_unlock_bh(&table->tb6_lock); |
637 | return rt; | 646 | return rt; |
638 | 647 | ||
639 | } | 648 | } |
640 | 649 | ||
641 | struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, | 650 | struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, |
642 | const struct in6_addr *saddr, int oif, int strict) | 651 | const struct in6_addr *saddr, int oif, int strict) |
643 | { | 652 | { |
644 | struct flowi6 fl6 = { | 653 | struct flowi6 fl6 = { |
645 | .flowi6_oif = oif, | 654 | .flowi6_oif = oif, |
646 | .daddr = *daddr, | 655 | .daddr = *daddr, |
647 | }; | 656 | }; |
648 | struct dst_entry *dst; | 657 | struct dst_entry *dst; |
649 | int flags = strict ? RT6_LOOKUP_F_IFACE : 0; | 658 | int flags = strict ? RT6_LOOKUP_F_IFACE : 0; |
650 | 659 | ||
651 | if (saddr) { | 660 | if (saddr) { |
652 | memcpy(&fl6.saddr, saddr, sizeof(*saddr)); | 661 | memcpy(&fl6.saddr, saddr, sizeof(*saddr)); |
653 | flags |= RT6_LOOKUP_F_HAS_SADDR; | 662 | flags |= RT6_LOOKUP_F_HAS_SADDR; |
654 | } | 663 | } |
655 | 664 | ||
656 | dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); | 665 | dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); |
657 | if (dst->error == 0) | 666 | if (dst->error == 0) |
658 | return (struct rt6_info *) dst; | 667 | return (struct rt6_info *) dst; |
659 | 668 | ||
660 | dst_release(dst); | 669 | dst_release(dst); |
661 | 670 | ||
662 | return NULL; | 671 | return NULL; |
663 | } | 672 | } |
664 | 673 | ||
665 | EXPORT_SYMBOL(rt6_lookup); | 674 | EXPORT_SYMBOL(rt6_lookup); |
666 | 675 | ||
667 | /* ip6_ins_rt is called with FREE table->tb6_lock. | 676 | /* ip6_ins_rt is called with FREE table->tb6_lock. |
668 | It takes new route entry, the addition fails by any reason the | 677 | It takes new route entry, the addition fails by any reason the |
669 | route is freed. In any case, if caller does not hold it, it may | 678 | route is freed. In any case, if caller does not hold it, it may |
670 | be destroyed. | 679 | be destroyed. |
671 | */ | 680 | */ |
672 | 681 | ||
673 | static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) | 682 | static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) |
674 | { | 683 | { |
675 | int err; | 684 | int err; |
676 | struct fib6_table *table; | 685 | struct fib6_table *table; |
677 | 686 | ||
678 | table = rt->rt6i_table; | 687 | table = rt->rt6i_table; |
679 | write_lock_bh(&table->tb6_lock); | 688 | write_lock_bh(&table->tb6_lock); |
680 | err = fib6_add(&table->tb6_root, rt, info); | 689 | err = fib6_add(&table->tb6_root, rt, info); |
681 | write_unlock_bh(&table->tb6_lock); | 690 | write_unlock_bh(&table->tb6_lock); |
682 | 691 | ||
683 | return err; | 692 | return err; |
684 | } | 693 | } |
685 | 694 | ||
686 | int ip6_ins_rt(struct rt6_info *rt) | 695 | int ip6_ins_rt(struct rt6_info *rt) |
687 | { | 696 | { |
688 | struct nl_info info = { | 697 | struct nl_info info = { |
689 | .nl_net = dev_net(rt->rt6i_dev), | 698 | .nl_net = dev_net(rt->rt6i_dev), |
690 | }; | 699 | }; |
691 | return __ip6_ins_rt(rt, &info); | 700 | return __ip6_ins_rt(rt, &info); |
692 | } | 701 | } |
693 | 702 | ||
694 | static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, | 703 | static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, |
695 | const struct in6_addr *daddr, | 704 | const struct in6_addr *daddr, |
696 | const struct in6_addr *saddr) | 705 | const struct in6_addr *saddr) |
697 | { | 706 | { |
698 | struct rt6_info *rt; | 707 | struct rt6_info *rt; |
699 | 708 | ||
700 | /* | 709 | /* |
701 | * Clone the route. | 710 | * Clone the route. |
702 | */ | 711 | */ |
703 | 712 | ||
704 | rt = ip6_rt_copy(ort, daddr); | 713 | rt = ip6_rt_copy(ort, daddr); |
705 | 714 | ||
706 | if (rt) { | 715 | if (rt) { |
707 | struct neighbour *neigh; | 716 | struct neighbour *neigh; |
708 | int attempts = !in_softirq(); | 717 | int attempts = !in_softirq(); |
709 | 718 | ||
710 | if (!(rt->rt6i_flags&RTF_GATEWAY)) { | 719 | if (!(rt->rt6i_flags&RTF_GATEWAY)) { |
711 | if (rt->rt6i_dst.plen != 128 && | 720 | if (rt->rt6i_dst.plen != 128 && |
712 | ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) | 721 | ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) |
713 | rt->rt6i_flags |= RTF_ANYCAST; | 722 | rt->rt6i_flags |= RTF_ANYCAST; |
714 | ipv6_addr_copy(&rt->rt6i_gateway, daddr); | 723 | ipv6_addr_copy(&rt->rt6i_gateway, daddr); |
715 | } | 724 | } |
716 | 725 | ||
717 | rt->rt6i_dst.plen = 128; | 726 | rt->rt6i_dst.plen = 128; |
718 | rt->rt6i_flags |= RTF_CACHE; | 727 | rt->rt6i_flags |= RTF_CACHE; |
719 | rt->dst.flags |= DST_HOST; | 728 | rt->dst.flags |= DST_HOST; |
720 | 729 | ||
721 | #ifdef CONFIG_IPV6_SUBTREES | 730 | #ifdef CONFIG_IPV6_SUBTREES |
722 | if (rt->rt6i_src.plen && saddr) { | 731 | if (rt->rt6i_src.plen && saddr) { |
723 | ipv6_addr_copy(&rt->rt6i_src.addr, saddr); | 732 | ipv6_addr_copy(&rt->rt6i_src.addr, saddr); |
724 | rt->rt6i_src.plen = 128; | 733 | rt->rt6i_src.plen = 128; |
725 | } | 734 | } |
726 | #endif | 735 | #endif |
727 | 736 | ||
728 | retry: | 737 | retry: |
729 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | 738 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); |
730 | if (IS_ERR(neigh)) { | 739 | if (IS_ERR(neigh)) { |
731 | struct net *net = dev_net(rt->rt6i_dev); | 740 | struct net *net = dev_net(rt->rt6i_dev); |
732 | int saved_rt_min_interval = | 741 | int saved_rt_min_interval = |
733 | net->ipv6.sysctl.ip6_rt_gc_min_interval; | 742 | net->ipv6.sysctl.ip6_rt_gc_min_interval; |
734 | int saved_rt_elasticity = | 743 | int saved_rt_elasticity = |
735 | net->ipv6.sysctl.ip6_rt_gc_elasticity; | 744 | net->ipv6.sysctl.ip6_rt_gc_elasticity; |
736 | 745 | ||
737 | if (attempts-- > 0) { | 746 | if (attempts-- > 0) { |
738 | net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; | 747 | net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; |
739 | net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; | 748 | net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; |
740 | 749 | ||
741 | ip6_dst_gc(&net->ipv6.ip6_dst_ops); | 750 | ip6_dst_gc(&net->ipv6.ip6_dst_ops); |
742 | 751 | ||
743 | net->ipv6.sysctl.ip6_rt_gc_elasticity = | 752 | net->ipv6.sysctl.ip6_rt_gc_elasticity = |
744 | saved_rt_elasticity; | 753 | saved_rt_elasticity; |
745 | net->ipv6.sysctl.ip6_rt_gc_min_interval = | 754 | net->ipv6.sysctl.ip6_rt_gc_min_interval = |
746 | saved_rt_min_interval; | 755 | saved_rt_min_interval; |
747 | goto retry; | 756 | goto retry; |
748 | } | 757 | } |
749 | 758 | ||
750 | if (net_ratelimit()) | 759 | if (net_ratelimit()) |
751 | printk(KERN_WARNING | 760 | printk(KERN_WARNING |
752 | "ipv6: Neighbour table overflow.\n"); | 761 | "ipv6: Neighbour table overflow.\n"); |
753 | dst_free(&rt->dst); | 762 | dst_free(&rt->dst); |
754 | return NULL; | 763 | return NULL; |
755 | } | 764 | } |
756 | dst_set_neighbour(&rt->dst, neigh); | 765 | dst_set_neighbour(&rt->dst, neigh); |
757 | 766 | ||
758 | } | 767 | } |
759 | 768 | ||
760 | return rt; | 769 | return rt; |
761 | } | 770 | } |
762 | 771 | ||
763 | static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, | 772 | static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, |
764 | const struct in6_addr *daddr) | 773 | const struct in6_addr *daddr) |
765 | { | 774 | { |
766 | struct rt6_info *rt = ip6_rt_copy(ort, daddr); | 775 | struct rt6_info *rt = ip6_rt_copy(ort, daddr); |
767 | 776 | ||
768 | if (rt) { | 777 | if (rt) { |
769 | rt->rt6i_dst.plen = 128; | 778 | rt->rt6i_dst.plen = 128; |
770 | rt->rt6i_flags |= RTF_CACHE; | 779 | rt->rt6i_flags |= RTF_CACHE; |
771 | rt->dst.flags |= DST_HOST; | 780 | rt->dst.flags |= DST_HOST; |
772 | dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); | 781 | dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); |
773 | } | 782 | } |
774 | return rt; | 783 | return rt; |
775 | } | 784 | } |
776 | 785 | ||
777 | static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, | 786 | static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, |
778 | struct flowi6 *fl6, int flags) | 787 | struct flowi6 *fl6, int flags) |
779 | { | 788 | { |
780 | struct fib6_node *fn; | 789 | struct fib6_node *fn; |
781 | struct rt6_info *rt, *nrt; | 790 | struct rt6_info *rt, *nrt; |
782 | int strict = 0; | 791 | int strict = 0; |
783 | int attempts = 3; | 792 | int attempts = 3; |
784 | int err; | 793 | int err; |
785 | int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; | 794 | int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; |
786 | 795 | ||
787 | strict |= flags & RT6_LOOKUP_F_IFACE; | 796 | strict |= flags & RT6_LOOKUP_F_IFACE; |
788 | 797 | ||
789 | relookup: | 798 | relookup: |
790 | read_lock_bh(&table->tb6_lock); | 799 | read_lock_bh(&table->tb6_lock); |
791 | 800 | ||
792 | restart_2: | 801 | restart_2: |
793 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); | 802 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); |
794 | 803 | ||
795 | restart: | 804 | restart: |
796 | rt = rt6_select(fn, oif, strict | reachable); | 805 | rt = rt6_select(fn, oif, strict | reachable); |
797 | 806 | ||
798 | BACKTRACK(net, &fl6->saddr); | 807 | BACKTRACK(net, &fl6->saddr); |
799 | if (rt == net->ipv6.ip6_null_entry || | 808 | if (rt == net->ipv6.ip6_null_entry || |
800 | rt->rt6i_flags & RTF_CACHE) | 809 | rt->rt6i_flags & RTF_CACHE) |
801 | goto out; | 810 | goto out; |
802 | 811 | ||
803 | dst_hold(&rt->dst); | 812 | dst_hold(&rt->dst); |
804 | read_unlock_bh(&table->tb6_lock); | 813 | read_unlock_bh(&table->tb6_lock); |
805 | 814 | ||
806 | if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) | 815 | if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
807 | nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); | 816 | nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); |
808 | else if (!(rt->dst.flags & DST_HOST)) | 817 | else if (!(rt->dst.flags & DST_HOST)) |
809 | nrt = rt6_alloc_clone(rt, &fl6->daddr); | 818 | nrt = rt6_alloc_clone(rt, &fl6->daddr); |
810 | else | 819 | else |
811 | goto out2; | 820 | goto out2; |
812 | 821 | ||
813 | dst_release(&rt->dst); | 822 | dst_release(&rt->dst); |
814 | rt = nrt ? : net->ipv6.ip6_null_entry; | 823 | rt = nrt ? : net->ipv6.ip6_null_entry; |
815 | 824 | ||
816 | dst_hold(&rt->dst); | 825 | dst_hold(&rt->dst); |
817 | if (nrt) { | 826 | if (nrt) { |
818 | err = ip6_ins_rt(nrt); | 827 | err = ip6_ins_rt(nrt); |
819 | if (!err) | 828 | if (!err) |
820 | goto out2; | 829 | goto out2; |
821 | } | 830 | } |
822 | 831 | ||
823 | if (--attempts <= 0) | 832 | if (--attempts <= 0) |
824 | goto out2; | 833 | goto out2; |
825 | 834 | ||
826 | /* | 835 | /* |
827 | * Race condition! In the gap, when table->tb6_lock was | 836 | * Race condition! In the gap, when table->tb6_lock was |
828 | * released someone could insert this route. Relookup. | 837 | * released someone could insert this route. Relookup. |
829 | */ | 838 | */ |
830 | dst_release(&rt->dst); | 839 | dst_release(&rt->dst); |
831 | goto relookup; | 840 | goto relookup; |
832 | 841 | ||
833 | out: | 842 | out: |
834 | if (reachable) { | 843 | if (reachable) { |
835 | reachable = 0; | 844 | reachable = 0; |
836 | goto restart_2; | 845 | goto restart_2; |
837 | } | 846 | } |
838 | dst_hold(&rt->dst); | 847 | dst_hold(&rt->dst); |
839 | read_unlock_bh(&table->tb6_lock); | 848 | read_unlock_bh(&table->tb6_lock); |
840 | out2: | 849 | out2: |
841 | rt->dst.lastuse = jiffies; | 850 | rt->dst.lastuse = jiffies; |
842 | rt->dst.__use++; | 851 | rt->dst.__use++; |
843 | 852 | ||
844 | return rt; | 853 | return rt; |
845 | } | 854 | } |
846 | 855 | ||
847 | static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, | 856 | static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, |
848 | struct flowi6 *fl6, int flags) | 857 | struct flowi6 *fl6, int flags) |
849 | { | 858 | { |
850 | return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); | 859 | return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); |
851 | } | 860 | } |
852 | 861 | ||
853 | void ip6_route_input(struct sk_buff *skb) | 862 | void ip6_route_input(struct sk_buff *skb) |
854 | { | 863 | { |
855 | const struct ipv6hdr *iph = ipv6_hdr(skb); | 864 | const struct ipv6hdr *iph = ipv6_hdr(skb); |
856 | struct net *net = dev_net(skb->dev); | 865 | struct net *net = dev_net(skb->dev); |
857 | int flags = RT6_LOOKUP_F_HAS_SADDR; | 866 | int flags = RT6_LOOKUP_F_HAS_SADDR; |
858 | struct flowi6 fl6 = { | 867 | struct flowi6 fl6 = { |
859 | .flowi6_iif = skb->dev->ifindex, | 868 | .flowi6_iif = skb->dev->ifindex, |
860 | .daddr = iph->daddr, | 869 | .daddr = iph->daddr, |
861 | .saddr = iph->saddr, | 870 | .saddr = iph->saddr, |
862 | .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, | 871 | .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, |
863 | .flowi6_mark = skb->mark, | 872 | .flowi6_mark = skb->mark, |
864 | .flowi6_proto = iph->nexthdr, | 873 | .flowi6_proto = iph->nexthdr, |
865 | }; | 874 | }; |
866 | 875 | ||
867 | if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) | 876 | if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) |
868 | flags |= RT6_LOOKUP_F_IFACE; | 877 | flags |= RT6_LOOKUP_F_IFACE; |
869 | 878 | ||
870 | skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); | 879 | skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); |
871 | } | 880 | } |
872 | 881 | ||
873 | static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, | 882 | static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, |
874 | struct flowi6 *fl6, int flags) | 883 | struct flowi6 *fl6, int flags) |
875 | { | 884 | { |
876 | return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); | 885 | return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); |
877 | } | 886 | } |
878 | 887 | ||
879 | struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, | 888 | struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, |
880 | struct flowi6 *fl6) | 889 | struct flowi6 *fl6) |
881 | { | 890 | { |
882 | int flags = 0; | 891 | int flags = 0; |
883 | 892 | ||
884 | if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) | 893 | if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) |
885 | flags |= RT6_LOOKUP_F_IFACE; | 894 | flags |= RT6_LOOKUP_F_IFACE; |
886 | 895 | ||
887 | if (!ipv6_addr_any(&fl6->saddr)) | 896 | if (!ipv6_addr_any(&fl6->saddr)) |
888 | flags |= RT6_LOOKUP_F_HAS_SADDR; | 897 | flags |= RT6_LOOKUP_F_HAS_SADDR; |
889 | else if (sk) | 898 | else if (sk) |
890 | flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); | 899 | flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); |
891 | 900 | ||
892 | return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); | 901 | return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); |
893 | } | 902 | } |
894 | 903 | ||
895 | EXPORT_SYMBOL(ip6_route_output); | 904 | EXPORT_SYMBOL(ip6_route_output); |
896 | 905 | ||
897 | struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 906 | struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
898 | { | 907 | { |
899 | struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; | 908 | struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; |
900 | struct dst_entry *new = NULL; | 909 | struct dst_entry *new = NULL; |
901 | 910 | ||
902 | rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); | 911 | rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); |
903 | if (rt) { | 912 | if (rt) { |
904 | memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); | 913 | memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); |
905 | 914 | ||
906 | new = &rt->dst; | 915 | new = &rt->dst; |
907 | 916 | ||
908 | new->__use = 1; | 917 | new->__use = 1; |
909 | new->input = dst_discard; | 918 | new->input = dst_discard; |
910 | new->output = dst_discard; | 919 | new->output = dst_discard; |
911 | 920 | ||
912 | if (dst_metrics_read_only(&ort->dst)) | 921 | if (dst_metrics_read_only(&ort->dst)) |
913 | new->_metrics = ort->dst._metrics; | 922 | new->_metrics = ort->dst._metrics; |
914 | else | 923 | else |
915 | dst_copy_metrics(new, &ort->dst); | 924 | dst_copy_metrics(new, &ort->dst); |
916 | rt->rt6i_idev = ort->rt6i_idev; | 925 | rt->rt6i_idev = ort->rt6i_idev; |
917 | if (rt->rt6i_idev) | 926 | if (rt->rt6i_idev) |
918 | in6_dev_hold(rt->rt6i_idev); | 927 | in6_dev_hold(rt->rt6i_idev); |
919 | rt->rt6i_expires = 0; | 928 | rt->rt6i_expires = 0; |
920 | 929 | ||
921 | ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); | 930 | ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); |
922 | rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; | 931 | rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; |
923 | rt->rt6i_metric = 0; | 932 | rt->rt6i_metric = 0; |
924 | 933 | ||
925 | memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); | 934 | memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); |
926 | #ifdef CONFIG_IPV6_SUBTREES | 935 | #ifdef CONFIG_IPV6_SUBTREES |
927 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); | 936 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); |
928 | #endif | 937 | #endif |
929 | 938 | ||
930 | dst_free(new); | 939 | dst_free(new); |
931 | } | 940 | } |
932 | 941 | ||
933 | dst_release(dst_orig); | 942 | dst_release(dst_orig); |
934 | return new ? new : ERR_PTR(-ENOMEM); | 943 | return new ? new : ERR_PTR(-ENOMEM); |
935 | } | 944 | } |
936 | 945 | ||
937 | /* | 946 | /* |
938 | * Destination cache support functions | 947 | * Destination cache support functions |
939 | */ | 948 | */ |
940 | 949 | ||
941 | static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) | 950 | static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) |
942 | { | 951 | { |
943 | struct rt6_info *rt; | 952 | struct rt6_info *rt; |
944 | 953 | ||
945 | rt = (struct rt6_info *) dst; | 954 | rt = (struct rt6_info *) dst; |
946 | 955 | ||
947 | if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { | 956 | if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { |
948 | if (rt->rt6i_peer_genid != rt6_peer_genid()) { | 957 | if (rt->rt6i_peer_genid != rt6_peer_genid()) { |
949 | if (!rt->rt6i_peer) | 958 | if (!rt->rt6i_peer) |
950 | rt6_bind_peer(rt, 0); | 959 | rt6_bind_peer(rt, 0); |
951 | rt->rt6i_peer_genid = rt6_peer_genid(); | 960 | rt->rt6i_peer_genid = rt6_peer_genid(); |
952 | } | 961 | } |
953 | return dst; | 962 | return dst; |
954 | } | 963 | } |
955 | return NULL; | 964 | return NULL; |
956 | } | 965 | } |
957 | 966 | ||
958 | static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) | 967 | static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) |
959 | { | 968 | { |
960 | struct rt6_info *rt = (struct rt6_info *) dst; | 969 | struct rt6_info *rt = (struct rt6_info *) dst; |
961 | 970 | ||
962 | if (rt) { | 971 | if (rt) { |
963 | if (rt->rt6i_flags & RTF_CACHE) { | 972 | if (rt->rt6i_flags & RTF_CACHE) { |
964 | if (rt6_check_expired(rt)) { | 973 | if (rt6_check_expired(rt)) { |
965 | ip6_del_rt(rt); | 974 | ip6_del_rt(rt); |
966 | dst = NULL; | 975 | dst = NULL; |
967 | } | 976 | } |
968 | } else { | 977 | } else { |
969 | dst_release(dst); | 978 | dst_release(dst); |
970 | dst = NULL; | 979 | dst = NULL; |
971 | } | 980 | } |
972 | } | 981 | } |
973 | return dst; | 982 | return dst; |
974 | } | 983 | } |
975 | 984 | ||
976 | static void ip6_link_failure(struct sk_buff *skb) | 985 | static void ip6_link_failure(struct sk_buff *skb) |
977 | { | 986 | { |
978 | struct rt6_info *rt; | 987 | struct rt6_info *rt; |
979 | 988 | ||
980 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); | 989 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); |
981 | 990 | ||
982 | rt = (struct rt6_info *) skb_dst(skb); | 991 | rt = (struct rt6_info *) skb_dst(skb); |
983 | if (rt) { | 992 | if (rt) { |
984 | if (rt->rt6i_flags&RTF_CACHE) { | 993 | if (rt->rt6i_flags&RTF_CACHE) { |
985 | dst_set_expires(&rt->dst, 0); | 994 | dst_set_expires(&rt->dst, 0); |
986 | rt->rt6i_flags |= RTF_EXPIRES; | 995 | rt->rt6i_flags |= RTF_EXPIRES; |
987 | } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) | 996 | } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) |
988 | rt->rt6i_node->fn_sernum = -1; | 997 | rt->rt6i_node->fn_sernum = -1; |
989 | } | 998 | } |
990 | } | 999 | } |
991 | 1000 | ||
992 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1001 | static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
993 | { | 1002 | { |
994 | struct rt6_info *rt6 = (struct rt6_info*)dst; | 1003 | struct rt6_info *rt6 = (struct rt6_info*)dst; |
995 | 1004 | ||
996 | if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { | 1005 | if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { |
997 | rt6->rt6i_flags |= RTF_MODIFIED; | 1006 | rt6->rt6i_flags |= RTF_MODIFIED; |
998 | if (mtu < IPV6_MIN_MTU) { | 1007 | if (mtu < IPV6_MIN_MTU) { |
999 | u32 features = dst_metric(dst, RTAX_FEATURES); | 1008 | u32 features = dst_metric(dst, RTAX_FEATURES); |
1000 | mtu = IPV6_MIN_MTU; | 1009 | mtu = IPV6_MIN_MTU; |
1001 | features |= RTAX_FEATURE_ALLFRAG; | 1010 | features |= RTAX_FEATURE_ALLFRAG; |
1002 | dst_metric_set(dst, RTAX_FEATURES, features); | 1011 | dst_metric_set(dst, RTAX_FEATURES, features); |
1003 | } | 1012 | } |
1004 | dst_metric_set(dst, RTAX_MTU, mtu); | 1013 | dst_metric_set(dst, RTAX_MTU, mtu); |
1005 | } | 1014 | } |
1006 | } | 1015 | } |
1007 | 1016 | ||
1008 | static unsigned int ip6_default_advmss(const struct dst_entry *dst) | 1017 | static unsigned int ip6_default_advmss(const struct dst_entry *dst) |
1009 | { | 1018 | { |
1010 | struct net_device *dev = dst->dev; | 1019 | struct net_device *dev = dst->dev; |
1011 | unsigned int mtu = dst_mtu(dst); | 1020 | unsigned int mtu = dst_mtu(dst); |
1012 | struct net *net = dev_net(dev); | 1021 | struct net *net = dev_net(dev); |
1013 | 1022 | ||
1014 | mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); | 1023 | mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); |
1015 | 1024 | ||
1016 | if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) | 1025 | if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) |
1017 | mtu = net->ipv6.sysctl.ip6_rt_min_advmss; | 1026 | mtu = net->ipv6.sysctl.ip6_rt_min_advmss; |
1018 | 1027 | ||
1019 | /* | 1028 | /* |
1020 | * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and | 1029 | * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and |
1021 | * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. | 1030 | * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. |
1022 | * IPV6_MAXPLEN is also valid and means: "any MSS, | 1031 | * IPV6_MAXPLEN is also valid and means: "any MSS, |
1023 | * rely only on pmtu discovery" | 1032 | * rely only on pmtu discovery" |
1024 | */ | 1033 | */ |
1025 | if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) | 1034 | if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) |
1026 | mtu = IPV6_MAXPLEN; | 1035 | mtu = IPV6_MAXPLEN; |
1027 | return mtu; | 1036 | return mtu; |
1028 | } | 1037 | } |
1029 | 1038 | ||
1030 | static unsigned int ip6_default_mtu(const struct dst_entry *dst) | 1039 | static unsigned int ip6_default_mtu(const struct dst_entry *dst) |
1031 | { | 1040 | { |
1032 | unsigned int mtu = IPV6_MIN_MTU; | 1041 | unsigned int mtu = IPV6_MIN_MTU; |
1033 | struct inet6_dev *idev; | 1042 | struct inet6_dev *idev; |
1034 | 1043 | ||
1035 | rcu_read_lock(); | 1044 | rcu_read_lock(); |
1036 | idev = __in6_dev_get(dst->dev); | 1045 | idev = __in6_dev_get(dst->dev); |
1037 | if (idev) | 1046 | if (idev) |
1038 | mtu = idev->cnf.mtu6; | 1047 | mtu = idev->cnf.mtu6; |
1039 | rcu_read_unlock(); | 1048 | rcu_read_unlock(); |
1040 | 1049 | ||
1041 | return mtu; | 1050 | return mtu; |
1042 | } | 1051 | } |
1043 | 1052 | ||
1044 | static struct dst_entry *icmp6_dst_gc_list; | 1053 | static struct dst_entry *icmp6_dst_gc_list; |
1045 | static DEFINE_SPINLOCK(icmp6_dst_lock); | 1054 | static DEFINE_SPINLOCK(icmp6_dst_lock); |
1046 | 1055 | ||
1047 | struct dst_entry *icmp6_dst_alloc(struct net_device *dev, | 1056 | struct dst_entry *icmp6_dst_alloc(struct net_device *dev, |
1048 | struct neighbour *neigh, | 1057 | struct neighbour *neigh, |
1049 | const struct in6_addr *addr) | 1058 | const struct in6_addr *addr) |
1050 | { | 1059 | { |
1051 | struct rt6_info *rt; | 1060 | struct rt6_info *rt; |
1052 | struct inet6_dev *idev = in6_dev_get(dev); | 1061 | struct inet6_dev *idev = in6_dev_get(dev); |
1053 | struct net *net = dev_net(dev); | 1062 | struct net *net = dev_net(dev); |
1054 | 1063 | ||
1055 | if (unlikely(idev == NULL)) | 1064 | if (unlikely(idev == NULL)) |
1056 | return NULL; | 1065 | return NULL; |
1057 | 1066 | ||
1058 | rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); | 1067 | rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); |
1059 | if (unlikely(rt == NULL)) { | 1068 | if (unlikely(rt == NULL)) { |
1060 | in6_dev_put(idev); | 1069 | in6_dev_put(idev); |
1061 | goto out; | 1070 | goto out; |
1062 | } | 1071 | } |
1063 | 1072 | ||
1064 | if (neigh) | 1073 | if (neigh) |
1065 | neigh_hold(neigh); | 1074 | neigh_hold(neigh); |
1066 | else { | 1075 | else { |
1067 | neigh = ndisc_get_neigh(dev, addr); | 1076 | neigh = ndisc_get_neigh(dev, addr); |
1068 | if (IS_ERR(neigh)) | 1077 | if (IS_ERR(neigh)) |
1069 | neigh = NULL; | 1078 | neigh = NULL; |
1070 | } | 1079 | } |
1071 | 1080 | ||
1072 | rt->rt6i_idev = idev; | 1081 | rt->rt6i_idev = idev; |
1073 | dst_set_neighbour(&rt->dst, neigh); | 1082 | dst_set_neighbour(&rt->dst, neigh); |
1074 | atomic_set(&rt->dst.__refcnt, 1); | 1083 | atomic_set(&rt->dst.__refcnt, 1); |
1075 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); | 1084 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); |
1076 | dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); | 1085 | dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); |
1077 | rt->dst.output = ip6_output; | 1086 | rt->dst.output = ip6_output; |
1078 | 1087 | ||
1079 | spin_lock_bh(&icmp6_dst_lock); | 1088 | spin_lock_bh(&icmp6_dst_lock); |
1080 | rt->dst.next = icmp6_dst_gc_list; | 1089 | rt->dst.next = icmp6_dst_gc_list; |
1081 | icmp6_dst_gc_list = &rt->dst; | 1090 | icmp6_dst_gc_list = &rt->dst; |
1082 | spin_unlock_bh(&icmp6_dst_lock); | 1091 | spin_unlock_bh(&icmp6_dst_lock); |
1083 | 1092 | ||
1084 | fib6_force_start_gc(net); | 1093 | fib6_force_start_gc(net); |
1085 | 1094 | ||
1086 | out: | 1095 | out: |
1087 | return &rt->dst; | 1096 | return &rt->dst; |
1088 | } | 1097 | } |
1089 | 1098 | ||
1090 | int icmp6_dst_gc(void) | 1099 | int icmp6_dst_gc(void) |
1091 | { | 1100 | { |
1092 | struct dst_entry *dst, **pprev; | 1101 | struct dst_entry *dst, **pprev; |
1093 | int more = 0; | 1102 | int more = 0; |
1094 | 1103 | ||
1095 | spin_lock_bh(&icmp6_dst_lock); | 1104 | spin_lock_bh(&icmp6_dst_lock); |
1096 | pprev = &icmp6_dst_gc_list; | 1105 | pprev = &icmp6_dst_gc_list; |
1097 | 1106 | ||
1098 | while ((dst = *pprev) != NULL) { | 1107 | while ((dst = *pprev) != NULL) { |
1099 | if (!atomic_read(&dst->__refcnt)) { | 1108 | if (!atomic_read(&dst->__refcnt)) { |
1100 | *pprev = dst->next; | 1109 | *pprev = dst->next; |
1101 | dst_free(dst); | 1110 | dst_free(dst); |
1102 | } else { | 1111 | } else { |
1103 | pprev = &dst->next; | 1112 | pprev = &dst->next; |
1104 | ++more; | 1113 | ++more; |
1105 | } | 1114 | } |
1106 | } | 1115 | } |
1107 | 1116 | ||
1108 | spin_unlock_bh(&icmp6_dst_lock); | 1117 | spin_unlock_bh(&icmp6_dst_lock); |
1109 | 1118 | ||
1110 | return more; | 1119 | return more; |
1111 | } | 1120 | } |
1112 | 1121 | ||
1113 | static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), | 1122 | static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), |
1114 | void *arg) | 1123 | void *arg) |
1115 | { | 1124 | { |
1116 | struct dst_entry *dst, **pprev; | 1125 | struct dst_entry *dst, **pprev; |
1117 | 1126 | ||
1118 | spin_lock_bh(&icmp6_dst_lock); | 1127 | spin_lock_bh(&icmp6_dst_lock); |
1119 | pprev = &icmp6_dst_gc_list; | 1128 | pprev = &icmp6_dst_gc_list; |
1120 | while ((dst = *pprev) != NULL) { | 1129 | while ((dst = *pprev) != NULL) { |
1121 | struct rt6_info *rt = (struct rt6_info *) dst; | 1130 | struct rt6_info *rt = (struct rt6_info *) dst; |
1122 | if (func(rt, arg)) { | 1131 | if (func(rt, arg)) { |
1123 | *pprev = dst->next; | 1132 | *pprev = dst->next; |
1124 | dst_free(dst); | 1133 | dst_free(dst); |
1125 | } else { | 1134 | } else { |
1126 | pprev = &dst->next; | 1135 | pprev = &dst->next; |
1127 | } | 1136 | } |
1128 | } | 1137 | } |
1129 | spin_unlock_bh(&icmp6_dst_lock); | 1138 | spin_unlock_bh(&icmp6_dst_lock); |
1130 | } | 1139 | } |
1131 | 1140 | ||
1132 | static int ip6_dst_gc(struct dst_ops *ops) | 1141 | static int ip6_dst_gc(struct dst_ops *ops) |
1133 | { | 1142 | { |
1134 | unsigned long now = jiffies; | 1143 | unsigned long now = jiffies; |
1135 | struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); | 1144 | struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); |
1136 | int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; | 1145 | int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; |
1137 | int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; | 1146 | int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; |
1138 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; | 1147 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; |
1139 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; | 1148 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; |
1140 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; | 1149 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; |
1141 | int entries; | 1150 | int entries; |
1142 | 1151 | ||
1143 | entries = dst_entries_get_fast(ops); | 1152 | entries = dst_entries_get_fast(ops); |
1144 | if (time_after(rt_last_gc + rt_min_interval, now) && | 1153 | if (time_after(rt_last_gc + rt_min_interval, now) && |
1145 | entries <= rt_max_size) | 1154 | entries <= rt_max_size) |
1146 | goto out; | 1155 | goto out; |
1147 | 1156 | ||
1148 | net->ipv6.ip6_rt_gc_expire++; | 1157 | net->ipv6.ip6_rt_gc_expire++; |
1149 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | 1158 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); |
1150 | net->ipv6.ip6_rt_last_gc = now; | 1159 | net->ipv6.ip6_rt_last_gc = now; |
1151 | entries = dst_entries_get_slow(ops); | 1160 | entries = dst_entries_get_slow(ops); |
1152 | if (entries < ops->gc_thresh) | 1161 | if (entries < ops->gc_thresh) |
1153 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; | 1162 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; |
1154 | out: | 1163 | out: |
1155 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; | 1164 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; |
1156 | return entries > rt_max_size; | 1165 | return entries > rt_max_size; |
1157 | } | 1166 | } |
1158 | 1167 | ||
1159 | /* Clean host part of a prefix. Not necessary in radix tree, | 1168 | /* Clean host part of a prefix. Not necessary in radix tree, |
1160 | but results in cleaner routing tables. | 1169 | but results in cleaner routing tables. |
1161 | 1170 | ||
1162 | Remove it only when all the things will work! | 1171 | Remove it only when all the things will work! |
1163 | */ | 1172 | */ |
1164 | 1173 | ||
1165 | int ip6_dst_hoplimit(struct dst_entry *dst) | 1174 | int ip6_dst_hoplimit(struct dst_entry *dst) |
1166 | { | 1175 | { |
1167 | int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); | 1176 | int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); |
1168 | if (hoplimit == 0) { | 1177 | if (hoplimit == 0) { |
1169 | struct net_device *dev = dst->dev; | 1178 | struct net_device *dev = dst->dev; |
1170 | struct inet6_dev *idev; | 1179 | struct inet6_dev *idev; |
1171 | 1180 | ||
1172 | rcu_read_lock(); | 1181 | rcu_read_lock(); |
1173 | idev = __in6_dev_get(dev); | 1182 | idev = __in6_dev_get(dev); |
1174 | if (idev) | 1183 | if (idev) |
1175 | hoplimit = idev->cnf.hop_limit; | 1184 | hoplimit = idev->cnf.hop_limit; |
1176 | else | 1185 | else |
1177 | hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; | 1186 | hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; |
1178 | rcu_read_unlock(); | 1187 | rcu_read_unlock(); |
1179 | } | 1188 | } |
1180 | return hoplimit; | 1189 | return hoplimit; |
1181 | } | 1190 | } |
1182 | EXPORT_SYMBOL(ip6_dst_hoplimit); | 1191 | EXPORT_SYMBOL(ip6_dst_hoplimit); |
1183 | 1192 | ||
1184 | /* | 1193 | /* |
1185 | * | 1194 | * |
1186 | */ | 1195 | */ |
1187 | 1196 | ||
1188 | int ip6_route_add(struct fib6_config *cfg) | 1197 | int ip6_route_add(struct fib6_config *cfg) |
1189 | { | 1198 | { |
1190 | int err; | 1199 | int err; |
1191 | struct net *net = cfg->fc_nlinfo.nl_net; | 1200 | struct net *net = cfg->fc_nlinfo.nl_net; |
1192 | struct rt6_info *rt = NULL; | 1201 | struct rt6_info *rt = NULL; |
1193 | struct net_device *dev = NULL; | 1202 | struct net_device *dev = NULL; |
1194 | struct inet6_dev *idev = NULL; | 1203 | struct inet6_dev *idev = NULL; |
1195 | struct fib6_table *table; | 1204 | struct fib6_table *table; |
1196 | int addr_type; | 1205 | int addr_type; |
1197 | 1206 | ||
1198 | if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) | 1207 | if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) |
1199 | return -EINVAL; | 1208 | return -EINVAL; |
1200 | #ifndef CONFIG_IPV6_SUBTREES | 1209 | #ifndef CONFIG_IPV6_SUBTREES |
1201 | if (cfg->fc_src_len) | 1210 | if (cfg->fc_src_len) |
1202 | return -EINVAL; | 1211 | return -EINVAL; |
1203 | #endif | 1212 | #endif |
1204 | if (cfg->fc_ifindex) { | 1213 | if (cfg->fc_ifindex) { |
1205 | err = -ENODEV; | 1214 | err = -ENODEV; |
1206 | dev = dev_get_by_index(net, cfg->fc_ifindex); | 1215 | dev = dev_get_by_index(net, cfg->fc_ifindex); |
1207 | if (!dev) | 1216 | if (!dev) |
1208 | goto out; | 1217 | goto out; |
1209 | idev = in6_dev_get(dev); | 1218 | idev = in6_dev_get(dev); |
1210 | if (!idev) | 1219 | if (!idev) |
1211 | goto out; | 1220 | goto out; |
1212 | } | 1221 | } |
1213 | 1222 | ||
1214 | if (cfg->fc_metric == 0) | 1223 | if (cfg->fc_metric == 0) |
1215 | cfg->fc_metric = IP6_RT_PRIO_USER; | 1224 | cfg->fc_metric = IP6_RT_PRIO_USER; |
1216 | 1225 | ||
1217 | table = fib6_new_table(net, cfg->fc_table); | 1226 | table = fib6_new_table(net, cfg->fc_table); |
1218 | if (table == NULL) { | 1227 | if (table == NULL) { |
1219 | err = -ENOBUFS; | 1228 | err = -ENOBUFS; |
1220 | goto out; | 1229 | goto out; |
1221 | } | 1230 | } |
1222 | 1231 | ||
1223 | rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); | 1232 | rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); |
1224 | 1233 | ||
1225 | if (rt == NULL) { | 1234 | if (rt == NULL) { |
1226 | err = -ENOMEM; | 1235 | err = -ENOMEM; |
1227 | goto out; | 1236 | goto out; |
1228 | } | 1237 | } |
1229 | 1238 | ||
1230 | rt->dst.obsolete = -1; | 1239 | rt->dst.obsolete = -1; |
1231 | rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? | 1240 | rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? |
1232 | jiffies + clock_t_to_jiffies(cfg->fc_expires) : | 1241 | jiffies + clock_t_to_jiffies(cfg->fc_expires) : |
1233 | 0; | 1242 | 0; |
1234 | 1243 | ||
1235 | if (cfg->fc_protocol == RTPROT_UNSPEC) | 1244 | if (cfg->fc_protocol == RTPROT_UNSPEC) |
1236 | cfg->fc_protocol = RTPROT_BOOT; | 1245 | cfg->fc_protocol = RTPROT_BOOT; |
1237 | rt->rt6i_protocol = cfg->fc_protocol; | 1246 | rt->rt6i_protocol = cfg->fc_protocol; |
1238 | 1247 | ||
1239 | addr_type = ipv6_addr_type(&cfg->fc_dst); | 1248 | addr_type = ipv6_addr_type(&cfg->fc_dst); |
1240 | 1249 | ||
1241 | if (addr_type & IPV6_ADDR_MULTICAST) | 1250 | if (addr_type & IPV6_ADDR_MULTICAST) |
1242 | rt->dst.input = ip6_mc_input; | 1251 | rt->dst.input = ip6_mc_input; |
1243 | else if (cfg->fc_flags & RTF_LOCAL) | 1252 | else if (cfg->fc_flags & RTF_LOCAL) |
1244 | rt->dst.input = ip6_input; | 1253 | rt->dst.input = ip6_input; |
1245 | else | 1254 | else |
1246 | rt->dst.input = ip6_forward; | 1255 | rt->dst.input = ip6_forward; |
1247 | 1256 | ||
1248 | rt->dst.output = ip6_output; | 1257 | rt->dst.output = ip6_output; |
1249 | 1258 | ||
1250 | ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); | 1259 | ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); |
1251 | rt->rt6i_dst.plen = cfg->fc_dst_len; | 1260 | rt->rt6i_dst.plen = cfg->fc_dst_len; |
1252 | if (rt->rt6i_dst.plen == 128) | 1261 | if (rt->rt6i_dst.plen == 128) |
1253 | rt->dst.flags |= DST_HOST; | 1262 | rt->dst.flags |= DST_HOST; |
1254 | 1263 | ||
1255 | #ifdef CONFIG_IPV6_SUBTREES | 1264 | #ifdef CONFIG_IPV6_SUBTREES |
1256 | ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); | 1265 | ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); |
1257 | rt->rt6i_src.plen = cfg->fc_src_len; | 1266 | rt->rt6i_src.plen = cfg->fc_src_len; |
1258 | #endif | 1267 | #endif |
1259 | 1268 | ||
1260 | rt->rt6i_metric = cfg->fc_metric; | 1269 | rt->rt6i_metric = cfg->fc_metric; |
1261 | 1270 | ||
1262 | /* We cannot add true routes via loopback here, | 1271 | /* We cannot add true routes via loopback here, |
1263 | they would result in kernel looping; promote them to reject routes | 1272 | they would result in kernel looping; promote them to reject routes |
1264 | */ | 1273 | */ |
1265 | if ((cfg->fc_flags & RTF_REJECT) || | 1274 | if ((cfg->fc_flags & RTF_REJECT) || |
1266 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) | 1275 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) |
1267 | && !(cfg->fc_flags&RTF_LOCAL))) { | 1276 | && !(cfg->fc_flags&RTF_LOCAL))) { |
1268 | /* hold loopback dev/idev if we haven't done so. */ | 1277 | /* hold loopback dev/idev if we haven't done so. */ |
1269 | if (dev != net->loopback_dev) { | 1278 | if (dev != net->loopback_dev) { |
1270 | if (dev) { | 1279 | if (dev) { |
1271 | dev_put(dev); | 1280 | dev_put(dev); |
1272 | in6_dev_put(idev); | 1281 | in6_dev_put(idev); |
1273 | } | 1282 | } |
1274 | dev = net->loopback_dev; | 1283 | dev = net->loopback_dev; |
1275 | dev_hold(dev); | 1284 | dev_hold(dev); |
1276 | idev = in6_dev_get(dev); | 1285 | idev = in6_dev_get(dev); |
1277 | if (!idev) { | 1286 | if (!idev) { |
1278 | err = -ENODEV; | 1287 | err = -ENODEV; |
1279 | goto out; | 1288 | goto out; |
1280 | } | 1289 | } |
1281 | } | 1290 | } |
1282 | rt->dst.output = ip6_pkt_discard_out; | 1291 | rt->dst.output = ip6_pkt_discard_out; |
1283 | rt->dst.input = ip6_pkt_discard; | 1292 | rt->dst.input = ip6_pkt_discard; |
1284 | rt->dst.error = -ENETUNREACH; | 1293 | rt->dst.error = -ENETUNREACH; |
1285 | rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; | 1294 | rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; |
1286 | goto install_route; | 1295 | goto install_route; |
1287 | } | 1296 | } |
1288 | 1297 | ||
1289 | if (cfg->fc_flags & RTF_GATEWAY) { | 1298 | if (cfg->fc_flags & RTF_GATEWAY) { |
1290 | const struct in6_addr *gw_addr; | 1299 | const struct in6_addr *gw_addr; |
1291 | int gwa_type; | 1300 | int gwa_type; |
1292 | 1301 | ||
1293 | gw_addr = &cfg->fc_gateway; | 1302 | gw_addr = &cfg->fc_gateway; |
1294 | ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); | 1303 | ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); |
1295 | gwa_type = ipv6_addr_type(gw_addr); | 1304 | gwa_type = ipv6_addr_type(gw_addr); |
1296 | 1305 | ||
1297 | if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { | 1306 | if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { |
1298 | struct rt6_info *grt; | 1307 | struct rt6_info *grt; |
1299 | 1308 | ||
1300 | /* IPv6 strictly inhibits using not link-local | 1309 | /* IPv6 strictly inhibits using not link-local |
1301 | addresses as nexthop address. | 1310 | addresses as nexthop address. |
1302 | Otherwise, router will not able to send redirects. | 1311 | Otherwise, router will not able to send redirects. |
1303 | It is very good, but in some (rare!) circumstances | 1312 | It is very good, but in some (rare!) circumstances |
1304 | (SIT, PtP, NBMA NOARP links) it is handy to allow | 1313 | (SIT, PtP, NBMA NOARP links) it is handy to allow |
1305 | some exceptions. --ANK | 1314 | some exceptions. --ANK |
1306 | */ | 1315 | */ |
1307 | err = -EINVAL; | 1316 | err = -EINVAL; |
1308 | if (!(gwa_type&IPV6_ADDR_UNICAST)) | 1317 | if (!(gwa_type&IPV6_ADDR_UNICAST)) |
1309 | goto out; | 1318 | goto out; |
1310 | 1319 | ||
1311 | grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); | 1320 | grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); |
1312 | 1321 | ||
1313 | err = -EHOSTUNREACH; | 1322 | err = -EHOSTUNREACH; |
1314 | if (grt == NULL) | 1323 | if (grt == NULL) |
1315 | goto out; | 1324 | goto out; |
1316 | if (dev) { | 1325 | if (dev) { |
1317 | if (dev != grt->rt6i_dev) { | 1326 | if (dev != grt->rt6i_dev) { |
1318 | dst_release(&grt->dst); | 1327 | dst_release(&grt->dst); |
1319 | goto out; | 1328 | goto out; |
1320 | } | 1329 | } |
1321 | } else { | 1330 | } else { |
1322 | dev = grt->rt6i_dev; | 1331 | dev = grt->rt6i_dev; |
1323 | idev = grt->rt6i_idev; | 1332 | idev = grt->rt6i_idev; |
1324 | dev_hold(dev); | 1333 | dev_hold(dev); |
1325 | in6_dev_hold(grt->rt6i_idev); | 1334 | in6_dev_hold(grt->rt6i_idev); |
1326 | } | 1335 | } |
1327 | if (!(grt->rt6i_flags&RTF_GATEWAY)) | 1336 | if (!(grt->rt6i_flags&RTF_GATEWAY)) |
1328 | err = 0; | 1337 | err = 0; |
1329 | dst_release(&grt->dst); | 1338 | dst_release(&grt->dst); |
1330 | 1339 | ||
1331 | if (err) | 1340 | if (err) |
1332 | goto out; | 1341 | goto out; |
1333 | } | 1342 | } |
1334 | err = -EINVAL; | 1343 | err = -EINVAL; |
1335 | if (dev == NULL || (dev->flags&IFF_LOOPBACK)) | 1344 | if (dev == NULL || (dev->flags&IFF_LOOPBACK)) |
1336 | goto out; | 1345 | goto out; |
1337 | } | 1346 | } |
1338 | 1347 | ||
1339 | err = -ENODEV; | 1348 | err = -ENODEV; |
1340 | if (dev == NULL) | 1349 | if (dev == NULL) |
1341 | goto out; | 1350 | goto out; |
1342 | 1351 | ||
1343 | if (!ipv6_addr_any(&cfg->fc_prefsrc)) { | 1352 | if (!ipv6_addr_any(&cfg->fc_prefsrc)) { |
1344 | if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { | 1353 | if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { |
1345 | err = -EINVAL; | 1354 | err = -EINVAL; |
1346 | goto out; | 1355 | goto out; |
1347 | } | 1356 | } |
1348 | ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc); | 1357 | ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc); |
1349 | rt->rt6i_prefsrc.plen = 128; | 1358 | rt->rt6i_prefsrc.plen = 128; |
1350 | } else | 1359 | } else |
1351 | rt->rt6i_prefsrc.plen = 0; | 1360 | rt->rt6i_prefsrc.plen = 0; |
1352 | 1361 | ||
1353 | if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { | 1362 | if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { |
1354 | struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); | 1363 | struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); |
1355 | if (IS_ERR(n)) { | 1364 | if (IS_ERR(n)) { |
1356 | err = PTR_ERR(n); | 1365 | err = PTR_ERR(n); |
1357 | goto out; | 1366 | goto out; |
1358 | } | 1367 | } |
1359 | dst_set_neighbour(&rt->dst, n); | 1368 | dst_set_neighbour(&rt->dst, n); |
1360 | } | 1369 | } |
1361 | 1370 | ||
1362 | rt->rt6i_flags = cfg->fc_flags; | 1371 | rt->rt6i_flags = cfg->fc_flags; |
1363 | 1372 | ||
1364 | install_route: | 1373 | install_route: |
1365 | if (cfg->fc_mx) { | 1374 | if (cfg->fc_mx) { |
1366 | struct nlattr *nla; | 1375 | struct nlattr *nla; |
1367 | int remaining; | 1376 | int remaining; |
1368 | 1377 | ||
1369 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { | 1378 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { |
1370 | int type = nla_type(nla); | 1379 | int type = nla_type(nla); |
1371 | 1380 | ||
1372 | if (type) { | 1381 | if (type) { |
1373 | if (type > RTAX_MAX) { | 1382 | if (type > RTAX_MAX) { |
1374 | err = -EINVAL; | 1383 | err = -EINVAL; |
1375 | goto out; | 1384 | goto out; |
1376 | } | 1385 | } |
1377 | 1386 | ||
1378 | dst_metric_set(&rt->dst, type, nla_get_u32(nla)); | 1387 | dst_metric_set(&rt->dst, type, nla_get_u32(nla)); |
1379 | } | 1388 | } |
1380 | } | 1389 | } |
1381 | } | 1390 | } |
1382 | 1391 | ||
1383 | rt->dst.dev = dev; | 1392 | rt->dst.dev = dev; |
1384 | rt->rt6i_idev = idev; | 1393 | rt->rt6i_idev = idev; |
1385 | rt->rt6i_table = table; | 1394 | rt->rt6i_table = table; |
1386 | 1395 | ||
1387 | cfg->fc_nlinfo.nl_net = dev_net(dev); | 1396 | cfg->fc_nlinfo.nl_net = dev_net(dev); |
1388 | 1397 | ||
1389 | return __ip6_ins_rt(rt, &cfg->fc_nlinfo); | 1398 | return __ip6_ins_rt(rt, &cfg->fc_nlinfo); |
1390 | 1399 | ||
1391 | out: | 1400 | out: |
1392 | if (dev) | 1401 | if (dev) |
1393 | dev_put(dev); | 1402 | dev_put(dev); |
1394 | if (idev) | 1403 | if (idev) |
1395 | in6_dev_put(idev); | 1404 | in6_dev_put(idev); |
1396 | if (rt) | 1405 | if (rt) |
1397 | dst_free(&rt->dst); | 1406 | dst_free(&rt->dst); |
1398 | return err; | 1407 | return err; |
1399 | } | 1408 | } |
1400 | 1409 | ||
1401 | static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) | 1410 | static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) |
1402 | { | 1411 | { |
1403 | int err; | 1412 | int err; |
1404 | struct fib6_table *table; | 1413 | struct fib6_table *table; |
1405 | struct net *net = dev_net(rt->rt6i_dev); | 1414 | struct net *net = dev_net(rt->rt6i_dev); |
1406 | 1415 | ||
1407 | if (rt == net->ipv6.ip6_null_entry) | 1416 | if (rt == net->ipv6.ip6_null_entry) |
1408 | return -ENOENT; | 1417 | return -ENOENT; |
1409 | 1418 | ||
1410 | table = rt->rt6i_table; | 1419 | table = rt->rt6i_table; |
1411 | write_lock_bh(&table->tb6_lock); | 1420 | write_lock_bh(&table->tb6_lock); |
1412 | 1421 | ||
1413 | err = fib6_del(rt, info); | 1422 | err = fib6_del(rt, info); |
1414 | dst_release(&rt->dst); | 1423 | dst_release(&rt->dst); |
1415 | 1424 | ||
1416 | write_unlock_bh(&table->tb6_lock); | 1425 | write_unlock_bh(&table->tb6_lock); |
1417 | 1426 | ||
1418 | return err; | 1427 | return err; |
1419 | } | 1428 | } |
1420 | 1429 | ||
1421 | int ip6_del_rt(struct rt6_info *rt) | 1430 | int ip6_del_rt(struct rt6_info *rt) |
1422 | { | 1431 | { |
1423 | struct nl_info info = { | 1432 | struct nl_info info = { |
1424 | .nl_net = dev_net(rt->rt6i_dev), | 1433 | .nl_net = dev_net(rt->rt6i_dev), |
1425 | }; | 1434 | }; |
1426 | return __ip6_del_rt(rt, &info); | 1435 | return __ip6_del_rt(rt, &info); |
1427 | } | 1436 | } |
1428 | 1437 | ||
1429 | static int ip6_route_del(struct fib6_config *cfg) | 1438 | static int ip6_route_del(struct fib6_config *cfg) |
1430 | { | 1439 | { |
1431 | struct fib6_table *table; | 1440 | struct fib6_table *table; |
1432 | struct fib6_node *fn; | 1441 | struct fib6_node *fn; |
1433 | struct rt6_info *rt; | 1442 | struct rt6_info *rt; |
1434 | int err = -ESRCH; | 1443 | int err = -ESRCH; |
1435 | 1444 | ||
1436 | table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); | 1445 | table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); |
1437 | if (table == NULL) | 1446 | if (table == NULL) |
1438 | return err; | 1447 | return err; |
1439 | 1448 | ||
1440 | read_lock_bh(&table->tb6_lock); | 1449 | read_lock_bh(&table->tb6_lock); |
1441 | 1450 | ||
1442 | fn = fib6_locate(&table->tb6_root, | 1451 | fn = fib6_locate(&table->tb6_root, |
1443 | &cfg->fc_dst, cfg->fc_dst_len, | 1452 | &cfg->fc_dst, cfg->fc_dst_len, |
1444 | &cfg->fc_src, cfg->fc_src_len); | 1453 | &cfg->fc_src, cfg->fc_src_len); |
1445 | 1454 | ||
1446 | if (fn) { | 1455 | if (fn) { |
1447 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { | 1456 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { |
1448 | if (cfg->fc_ifindex && | 1457 | if (cfg->fc_ifindex && |
1449 | (rt->rt6i_dev == NULL || | 1458 | (rt->rt6i_dev == NULL || |
1450 | rt->rt6i_dev->ifindex != cfg->fc_ifindex)) | 1459 | rt->rt6i_dev->ifindex != cfg->fc_ifindex)) |
1451 | continue; | 1460 | continue; |
1452 | if (cfg->fc_flags & RTF_GATEWAY && | 1461 | if (cfg->fc_flags & RTF_GATEWAY && |
1453 | !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) | 1462 | !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) |
1454 | continue; | 1463 | continue; |
1455 | if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) | 1464 | if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) |
1456 | continue; | 1465 | continue; |
1457 | dst_hold(&rt->dst); | 1466 | dst_hold(&rt->dst); |
1458 | read_unlock_bh(&table->tb6_lock); | 1467 | read_unlock_bh(&table->tb6_lock); |
1459 | 1468 | ||
1460 | return __ip6_del_rt(rt, &cfg->fc_nlinfo); | 1469 | return __ip6_del_rt(rt, &cfg->fc_nlinfo); |
1461 | } | 1470 | } |
1462 | } | 1471 | } |
1463 | read_unlock_bh(&table->tb6_lock); | 1472 | read_unlock_bh(&table->tb6_lock); |
1464 | 1473 | ||
1465 | return err; | 1474 | return err; |
1466 | } | 1475 | } |
1467 | 1476 | ||
1468 | /* | 1477 | /* |
1469 | * Handle redirects | 1478 | * Handle redirects |
1470 | */ | 1479 | */ |
1471 | struct ip6rd_flowi { | 1480 | struct ip6rd_flowi { |
1472 | struct flowi6 fl6; | 1481 | struct flowi6 fl6; |
1473 | struct in6_addr gateway; | 1482 | struct in6_addr gateway; |
1474 | }; | 1483 | }; |
1475 | 1484 | ||
1476 | static struct rt6_info *__ip6_route_redirect(struct net *net, | 1485 | static struct rt6_info *__ip6_route_redirect(struct net *net, |
1477 | struct fib6_table *table, | 1486 | struct fib6_table *table, |
1478 | struct flowi6 *fl6, | 1487 | struct flowi6 *fl6, |
1479 | int flags) | 1488 | int flags) |
1480 | { | 1489 | { |
1481 | struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; | 1490 | struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; |
1482 | struct rt6_info *rt; | 1491 | struct rt6_info *rt; |
1483 | struct fib6_node *fn; | 1492 | struct fib6_node *fn; |
1484 | 1493 | ||
1485 | /* | 1494 | /* |
1486 | * Get the "current" route for this destination and | 1495 | * Get the "current" route for this destination and |
1487 | * check if the redirect has come from approriate router. | 1496 | * check if the redirect has come from approriate router. |
1488 | * | 1497 | * |
1489 | * RFC 2461 specifies that redirects should only be | 1498 | * RFC 2461 specifies that redirects should only be |
1490 | * accepted if they come from the nexthop to the target. | 1499 | * accepted if they come from the nexthop to the target. |
1491 | * Due to the way the routes are chosen, this notion | 1500 | * Due to the way the routes are chosen, this notion |
1492 | * is a bit fuzzy and one might need to check all possible | 1501 | * is a bit fuzzy and one might need to check all possible |
1493 | * routes. | 1502 | * routes. |
1494 | */ | 1503 | */ |
1495 | 1504 | ||
1496 | read_lock_bh(&table->tb6_lock); | 1505 | read_lock_bh(&table->tb6_lock); |
1497 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); | 1506 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); |
1498 | restart: | 1507 | restart: |
1499 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { | 1508 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { |
1500 | /* | 1509 | /* |
1501 | * Current route is on-link; redirect is always invalid. | 1510 | * Current route is on-link; redirect is always invalid. |
1502 | * | 1511 | * |
1503 | * Seems, previous statement is not true. It could | 1512 | * Seems, previous statement is not true. It could |
1504 | * be node, which looks for us as on-link (f.e. proxy ndisc) | 1513 | * be node, which looks for us as on-link (f.e. proxy ndisc) |
1505 | * But then router serving it might decide, that we should | 1514 | * But then router serving it might decide, that we should |
1506 | * know truth 8)8) --ANK (980726). | 1515 | * know truth 8)8) --ANK (980726). |
1507 | */ | 1516 | */ |
1508 | if (rt6_check_expired(rt)) | 1517 | if (rt6_check_expired(rt)) |
1509 | continue; | 1518 | continue; |
1510 | if (!(rt->rt6i_flags & RTF_GATEWAY)) | 1519 | if (!(rt->rt6i_flags & RTF_GATEWAY)) |
1511 | continue; | 1520 | continue; |
1512 | if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) | 1521 | if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) |
1513 | continue; | 1522 | continue; |
1514 | if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) | 1523 | if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) |
1515 | continue; | 1524 | continue; |
1516 | break; | 1525 | break; |
1517 | } | 1526 | } |
1518 | 1527 | ||
1519 | if (!rt) | 1528 | if (!rt) |
1520 | rt = net->ipv6.ip6_null_entry; | 1529 | rt = net->ipv6.ip6_null_entry; |
1521 | BACKTRACK(net, &fl6->saddr); | 1530 | BACKTRACK(net, &fl6->saddr); |
1522 | out: | 1531 | out: |
1523 | dst_hold(&rt->dst); | 1532 | dst_hold(&rt->dst); |
1524 | 1533 | ||
1525 | read_unlock_bh(&table->tb6_lock); | 1534 | read_unlock_bh(&table->tb6_lock); |
1526 | 1535 | ||
1527 | return rt; | 1536 | return rt; |
1528 | }; | 1537 | }; |
1529 | 1538 | ||
1530 | static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, | 1539 | static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, |
1531 | const struct in6_addr *src, | 1540 | const struct in6_addr *src, |
1532 | const struct in6_addr *gateway, | 1541 | const struct in6_addr *gateway, |
1533 | struct net_device *dev) | 1542 | struct net_device *dev) |
1534 | { | 1543 | { |
1535 | int flags = RT6_LOOKUP_F_HAS_SADDR; | 1544 | int flags = RT6_LOOKUP_F_HAS_SADDR; |
1536 | struct net *net = dev_net(dev); | 1545 | struct net *net = dev_net(dev); |
1537 | struct ip6rd_flowi rdfl = { | 1546 | struct ip6rd_flowi rdfl = { |
1538 | .fl6 = { | 1547 | .fl6 = { |
1539 | .flowi6_oif = dev->ifindex, | 1548 | .flowi6_oif = dev->ifindex, |
1540 | .daddr = *dest, | 1549 | .daddr = *dest, |
1541 | .saddr = *src, | 1550 | .saddr = *src, |
1542 | }, | 1551 | }, |
1543 | }; | 1552 | }; |
1544 | 1553 | ||
1545 | ipv6_addr_copy(&rdfl.gateway, gateway); | 1554 | ipv6_addr_copy(&rdfl.gateway, gateway); |
1546 | 1555 | ||
1547 | if (rt6_need_strict(dest)) | 1556 | if (rt6_need_strict(dest)) |
1548 | flags |= RT6_LOOKUP_F_IFACE; | 1557 | flags |= RT6_LOOKUP_F_IFACE; |
1549 | 1558 | ||
1550 | return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, | 1559 | return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, |
1551 | flags, __ip6_route_redirect); | 1560 | flags, __ip6_route_redirect); |
1552 | } | 1561 | } |
1553 | 1562 | ||
1554 | void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, | 1563 | void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, |
1555 | const struct in6_addr *saddr, | 1564 | const struct in6_addr *saddr, |
1556 | struct neighbour *neigh, u8 *lladdr, int on_link) | 1565 | struct neighbour *neigh, u8 *lladdr, int on_link) |
1557 | { | 1566 | { |
1558 | struct rt6_info *rt, *nrt = NULL; | 1567 | struct rt6_info *rt, *nrt = NULL; |
1559 | struct netevent_redirect netevent; | 1568 | struct netevent_redirect netevent; |
1560 | struct net *net = dev_net(neigh->dev); | 1569 | struct net *net = dev_net(neigh->dev); |
1561 | 1570 | ||
1562 | rt = ip6_route_redirect(dest, src, saddr, neigh->dev); | 1571 | rt = ip6_route_redirect(dest, src, saddr, neigh->dev); |
1563 | 1572 | ||
1564 | if (rt == net->ipv6.ip6_null_entry) { | 1573 | if (rt == net->ipv6.ip6_null_entry) { |
1565 | if (net_ratelimit()) | 1574 | if (net_ratelimit()) |
1566 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " | 1575 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " |
1567 | "for redirect target\n"); | 1576 | "for redirect target\n"); |
1568 | goto out; | 1577 | goto out; |
1569 | } | 1578 | } |
1570 | 1579 | ||
1571 | /* | 1580 | /* |
1572 | * We have finally decided to accept it. | 1581 | * We have finally decided to accept it. |
1573 | */ | 1582 | */ |
1574 | 1583 | ||
1575 | neigh_update(neigh, lladdr, NUD_STALE, | 1584 | neigh_update(neigh, lladdr, NUD_STALE, |
1576 | NEIGH_UPDATE_F_WEAK_OVERRIDE| | 1585 | NEIGH_UPDATE_F_WEAK_OVERRIDE| |
1577 | NEIGH_UPDATE_F_OVERRIDE| | 1586 | NEIGH_UPDATE_F_OVERRIDE| |
1578 | (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| | 1587 | (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| |
1579 | NEIGH_UPDATE_F_ISROUTER)) | 1588 | NEIGH_UPDATE_F_ISROUTER)) |
1580 | ); | 1589 | ); |
1581 | 1590 | ||
1582 | /* | 1591 | /* |
1583 | * Redirect received -> path was valid. | 1592 | * Redirect received -> path was valid. |
1584 | * Look, redirects are sent only in response to data packets, | 1593 | * Look, redirects are sent only in response to data packets, |
1585 | * so that this nexthop apparently is reachable. --ANK | 1594 | * so that this nexthop apparently is reachable. --ANK |
1586 | */ | 1595 | */ |
1587 | dst_confirm(&rt->dst); | 1596 | dst_confirm(&rt->dst); |
1588 | 1597 | ||
1589 | /* Duplicate redirect: silently ignore. */ | 1598 | /* Duplicate redirect: silently ignore. */ |
1590 | if (neigh == dst_get_neighbour(&rt->dst)) | 1599 | if (neigh == dst_get_neighbour_raw(&rt->dst)) |
1591 | goto out; | 1600 | goto out; |
1592 | 1601 | ||
1593 | nrt = ip6_rt_copy(rt, dest); | 1602 | nrt = ip6_rt_copy(rt, dest); |
1594 | if (nrt == NULL) | 1603 | if (nrt == NULL) |
1595 | goto out; | 1604 | goto out; |
1596 | 1605 | ||
1597 | nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; | 1606 | nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; |
1598 | if (on_link) | 1607 | if (on_link) |
1599 | nrt->rt6i_flags &= ~RTF_GATEWAY; | 1608 | nrt->rt6i_flags &= ~RTF_GATEWAY; |
1600 | 1609 | ||
1601 | nrt->rt6i_dst.plen = 128; | 1610 | nrt->rt6i_dst.plen = 128; |
1602 | nrt->dst.flags |= DST_HOST; | 1611 | nrt->dst.flags |= DST_HOST; |
1603 | 1612 | ||
1604 | ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); | 1613 | ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); |
1605 | dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); | 1614 | dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); |
1606 | 1615 | ||
1607 | if (ip6_ins_rt(nrt)) | 1616 | if (ip6_ins_rt(nrt)) |
1608 | goto out; | 1617 | goto out; |
1609 | 1618 | ||
1610 | netevent.old = &rt->dst; | 1619 | netevent.old = &rt->dst; |
1611 | netevent.new = &nrt->dst; | 1620 | netevent.new = &nrt->dst; |
1612 | call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); | 1621 | call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); |
1613 | 1622 | ||
1614 | if (rt->rt6i_flags&RTF_CACHE) { | 1623 | if (rt->rt6i_flags&RTF_CACHE) { |
1615 | ip6_del_rt(rt); | 1624 | ip6_del_rt(rt); |
1616 | return; | 1625 | return; |
1617 | } | 1626 | } |
1618 | 1627 | ||
1619 | out: | 1628 | out: |
1620 | dst_release(&rt->dst); | 1629 | dst_release(&rt->dst); |
1621 | } | 1630 | } |
1622 | 1631 | ||
1623 | /* | 1632 | /* |
1624 | * Handle ICMP "packet too big" messages | 1633 | * Handle ICMP "packet too big" messages |
1625 | * i.e. Path MTU discovery | 1634 | * i.e. Path MTU discovery |
1626 | */ | 1635 | */ |
1627 | 1636 | ||
1628 | static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, | 1637 | static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, |
1629 | struct net *net, u32 pmtu, int ifindex) | 1638 | struct net *net, u32 pmtu, int ifindex) |
1630 | { | 1639 | { |
1631 | struct rt6_info *rt, *nrt; | 1640 | struct rt6_info *rt, *nrt; |
1632 | int allfrag = 0; | 1641 | int allfrag = 0; |
1633 | again: | 1642 | again: |
1634 | rt = rt6_lookup(net, daddr, saddr, ifindex, 0); | 1643 | rt = rt6_lookup(net, daddr, saddr, ifindex, 0); |
1635 | if (rt == NULL) | 1644 | if (rt == NULL) |
1636 | return; | 1645 | return; |
1637 | 1646 | ||
1638 | if (rt6_check_expired(rt)) { | 1647 | if (rt6_check_expired(rt)) { |
1639 | ip6_del_rt(rt); | 1648 | ip6_del_rt(rt); |
1640 | goto again; | 1649 | goto again; |
1641 | } | 1650 | } |
1642 | 1651 | ||
1643 | if (pmtu >= dst_mtu(&rt->dst)) | 1652 | if (pmtu >= dst_mtu(&rt->dst)) |
1644 | goto out; | 1653 | goto out; |
1645 | 1654 | ||
1646 | if (pmtu < IPV6_MIN_MTU) { | 1655 | if (pmtu < IPV6_MIN_MTU) { |
1647 | /* | 1656 | /* |
1648 | * According to RFC2460, PMTU is set to the IPv6 Minimum Link | 1657 | * According to RFC2460, PMTU is set to the IPv6 Minimum Link |
1649 | * MTU (1280) and a fragment header should always be included | 1658 | * MTU (1280) and a fragment header should always be included |
1650 | * after a node receiving Too Big message reporting PMTU is | 1659 | * after a node receiving Too Big message reporting PMTU is |
1651 | * less than the IPv6 Minimum Link MTU. | 1660 | * less than the IPv6 Minimum Link MTU. |
1652 | */ | 1661 | */ |
1653 | pmtu = IPV6_MIN_MTU; | 1662 | pmtu = IPV6_MIN_MTU; |
1654 | allfrag = 1; | 1663 | allfrag = 1; |
1655 | } | 1664 | } |
1656 | 1665 | ||
1657 | /* New mtu received -> path was valid. | 1666 | /* New mtu received -> path was valid. |
1658 | They are sent only in response to data packets, | 1667 | They are sent only in response to data packets, |
1659 | so that this nexthop apparently is reachable. --ANK | 1668 | so that this nexthop apparently is reachable. --ANK |
1660 | */ | 1669 | */ |
1661 | dst_confirm(&rt->dst); | 1670 | dst_confirm(&rt->dst); |
1662 | 1671 | ||
1663 | /* Host route. If it is static, it would be better | 1672 | /* Host route. If it is static, it would be better |
1664 | not to override it, but add new one, so that | 1673 | not to override it, but add new one, so that |
1665 | when cache entry will expire old pmtu | 1674 | when cache entry will expire old pmtu |
1666 | would return automatically. | 1675 | would return automatically. |
1667 | */ | 1676 | */ |
1668 | if (rt->rt6i_flags & RTF_CACHE) { | 1677 | if (rt->rt6i_flags & RTF_CACHE) { |
1669 | dst_metric_set(&rt->dst, RTAX_MTU, pmtu); | 1678 | dst_metric_set(&rt->dst, RTAX_MTU, pmtu); |
1670 | if (allfrag) { | 1679 | if (allfrag) { |
1671 | u32 features = dst_metric(&rt->dst, RTAX_FEATURES); | 1680 | u32 features = dst_metric(&rt->dst, RTAX_FEATURES); |
1672 | features |= RTAX_FEATURE_ALLFRAG; | 1681 | features |= RTAX_FEATURE_ALLFRAG; |
1673 | dst_metric_set(&rt->dst, RTAX_FEATURES, features); | 1682 | dst_metric_set(&rt->dst, RTAX_FEATURES, features); |
1674 | } | 1683 | } |
1675 | dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); | 1684 | dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); |
1676 | rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; | 1685 | rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; |
1677 | goto out; | 1686 | goto out; |
1678 | } | 1687 | } |
1679 | 1688 | ||
1680 | /* Network route. | 1689 | /* Network route. |
1681 | Two cases are possible: | 1690 | Two cases are possible: |
1682 | 1. It is connected route. Action: COW | 1691 | 1. It is connected route. Action: COW |
1683 | 2. It is gatewayed route or NONEXTHOP route. Action: clone it. | 1692 | 2. It is gatewayed route or NONEXTHOP route. Action: clone it. |
1684 | */ | 1693 | */ |
1685 | if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) | 1694 | if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
1686 | nrt = rt6_alloc_cow(rt, daddr, saddr); | 1695 | nrt = rt6_alloc_cow(rt, daddr, saddr); |
1687 | else | 1696 | else |
1688 | nrt = rt6_alloc_clone(rt, daddr); | 1697 | nrt = rt6_alloc_clone(rt, daddr); |
1689 | 1698 | ||
1690 | if (nrt) { | 1699 | if (nrt) { |
1691 | dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); | 1700 | dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); |
1692 | if (allfrag) { | 1701 | if (allfrag) { |
1693 | u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); | 1702 | u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); |
1694 | features |= RTAX_FEATURE_ALLFRAG; | 1703 | features |= RTAX_FEATURE_ALLFRAG; |
1695 | dst_metric_set(&nrt->dst, RTAX_FEATURES, features); | 1704 | dst_metric_set(&nrt->dst, RTAX_FEATURES, features); |
1696 | } | 1705 | } |
1697 | 1706 | ||
1698 | /* According to RFC 1981, detecting PMTU increase shouldn't be | 1707 | /* According to RFC 1981, detecting PMTU increase shouldn't be |
1699 | * happened within 5 mins, the recommended timer is 10 mins. | 1708 | * happened within 5 mins, the recommended timer is 10 mins. |
1700 | * Here this route expiration time is set to ip6_rt_mtu_expires | 1709 | * Here this route expiration time is set to ip6_rt_mtu_expires |
1701 | * which is 10 mins. After 10 mins the decreased pmtu is expired | 1710 | * which is 10 mins. After 10 mins the decreased pmtu is expired |
1702 | * and detecting PMTU increase will be automatically happened. | 1711 | * and detecting PMTU increase will be automatically happened. |
1703 | */ | 1712 | */ |
1704 | dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); | 1713 | dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); |
1705 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; | 1714 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; |
1706 | 1715 | ||
1707 | ip6_ins_rt(nrt); | 1716 | ip6_ins_rt(nrt); |
1708 | } | 1717 | } |
1709 | out: | 1718 | out: |
1710 | dst_release(&rt->dst); | 1719 | dst_release(&rt->dst); |
1711 | } | 1720 | } |
1712 | 1721 | ||
1713 | void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, | 1722 | void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, |
1714 | struct net_device *dev, u32 pmtu) | 1723 | struct net_device *dev, u32 pmtu) |
1715 | { | 1724 | { |
1716 | struct net *net = dev_net(dev); | 1725 | struct net *net = dev_net(dev); |
1717 | 1726 | ||
1718 | /* | 1727 | /* |
1719 | * RFC 1981 states that a node "MUST reduce the size of the packets it | 1728 | * RFC 1981 states that a node "MUST reduce the size of the packets it |
1720 | * is sending along the path" that caused the Packet Too Big message. | 1729 | * is sending along the path" that caused the Packet Too Big message. |
1721 | * Since it's not possible in the general case to determine which | 1730 | * Since it's not possible in the general case to determine which |
1722 | * interface was used to send the original packet, we update the MTU | 1731 | * interface was used to send the original packet, we update the MTU |
1723 | * on the interface that will be used to send future packets. We also | 1732 | * on the interface that will be used to send future packets. We also |
1724 | * update the MTU on the interface that received the Packet Too Big in | 1733 | * update the MTU on the interface that received the Packet Too Big in |
1725 | * case the original packet was forced out that interface with | 1734 | * case the original packet was forced out that interface with |
1726 | * SO_BINDTODEVICE or similar. This is the next best thing to the | 1735 | * SO_BINDTODEVICE or similar. This is the next best thing to the |
1727 | * correct behaviour, which would be to update the MTU on all | 1736 | * correct behaviour, which would be to update the MTU on all |
1728 | * interfaces. | 1737 | * interfaces. |
1729 | */ | 1738 | */ |
1730 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); | 1739 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); |
1731 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); | 1740 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); |
1732 | } | 1741 | } |
1733 | 1742 | ||
1734 | /* | 1743 | /* |
1735 | * Misc support functions | 1744 | * Misc support functions |
1736 | */ | 1745 | */ |
1737 | 1746 | ||
1738 | static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, | 1747 | static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, |
1739 | const struct in6_addr *dest) | 1748 | const struct in6_addr *dest) |
1740 | { | 1749 | { |
1741 | struct net *net = dev_net(ort->rt6i_dev); | 1750 | struct net *net = dev_net(ort->rt6i_dev); |
1742 | struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, | 1751 | struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, |
1743 | ort->dst.dev, 0); | 1752 | ort->dst.dev, 0); |
1744 | 1753 | ||
1745 | if (rt) { | 1754 | if (rt) { |
1746 | rt->dst.input = ort->dst.input; | 1755 | rt->dst.input = ort->dst.input; |
1747 | rt->dst.output = ort->dst.output; | 1756 | rt->dst.output = ort->dst.output; |
1748 | 1757 | ||
1749 | ipv6_addr_copy(&rt->rt6i_dst.addr, dest); | 1758 | ipv6_addr_copy(&rt->rt6i_dst.addr, dest); |
1750 | rt->rt6i_dst.plen = ort->rt6i_dst.plen; | 1759 | rt->rt6i_dst.plen = ort->rt6i_dst.plen; |
1751 | dst_copy_metrics(&rt->dst, &ort->dst); | 1760 | dst_copy_metrics(&rt->dst, &ort->dst); |
1752 | rt->dst.error = ort->dst.error; | 1761 | rt->dst.error = ort->dst.error; |
1753 | rt->rt6i_idev = ort->rt6i_idev; | 1762 | rt->rt6i_idev = ort->rt6i_idev; |
1754 | if (rt->rt6i_idev) | 1763 | if (rt->rt6i_idev) |
1755 | in6_dev_hold(rt->rt6i_idev); | 1764 | in6_dev_hold(rt->rt6i_idev); |
1756 | rt->dst.lastuse = jiffies; | 1765 | rt->dst.lastuse = jiffies; |
1757 | rt->rt6i_expires = 0; | 1766 | rt->rt6i_expires = 0; |
1758 | 1767 | ||
1759 | ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); | 1768 | ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); |
1760 | rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; | 1769 | rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; |
1761 | rt->rt6i_metric = 0; | 1770 | rt->rt6i_metric = 0; |
1762 | 1771 | ||
1763 | #ifdef CONFIG_IPV6_SUBTREES | 1772 | #ifdef CONFIG_IPV6_SUBTREES |
1764 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); | 1773 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); |
1765 | #endif | 1774 | #endif |
1766 | memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); | 1775 | memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); |
1767 | rt->rt6i_table = ort->rt6i_table; | 1776 | rt->rt6i_table = ort->rt6i_table; |
1768 | } | 1777 | } |
1769 | return rt; | 1778 | return rt; |
1770 | } | 1779 | } |
1771 | 1780 | ||
1772 | #ifdef CONFIG_IPV6_ROUTE_INFO | 1781 | #ifdef CONFIG_IPV6_ROUTE_INFO |
1773 | static struct rt6_info *rt6_get_route_info(struct net *net, | 1782 | static struct rt6_info *rt6_get_route_info(struct net *net, |
1774 | const struct in6_addr *prefix, int prefixlen, | 1783 | const struct in6_addr *prefix, int prefixlen, |
1775 | const struct in6_addr *gwaddr, int ifindex) | 1784 | const struct in6_addr *gwaddr, int ifindex) |
1776 | { | 1785 | { |
1777 | struct fib6_node *fn; | 1786 | struct fib6_node *fn; |
1778 | struct rt6_info *rt = NULL; | 1787 | struct rt6_info *rt = NULL; |
1779 | struct fib6_table *table; | 1788 | struct fib6_table *table; |
1780 | 1789 | ||
1781 | table = fib6_get_table(net, RT6_TABLE_INFO); | 1790 | table = fib6_get_table(net, RT6_TABLE_INFO); |
1782 | if (table == NULL) | 1791 | if (table == NULL) |
1783 | return NULL; | 1792 | return NULL; |
1784 | 1793 | ||
1785 | write_lock_bh(&table->tb6_lock); | 1794 | write_lock_bh(&table->tb6_lock); |
1786 | fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); | 1795 | fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); |
1787 | if (!fn) | 1796 | if (!fn) |
1788 | goto out; | 1797 | goto out; |
1789 | 1798 | ||
1790 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { | 1799 | for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { |
1791 | if (rt->rt6i_dev->ifindex != ifindex) | 1800 | if (rt->rt6i_dev->ifindex != ifindex) |
1792 | continue; | 1801 | continue; |
1793 | if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) | 1802 | if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) |
1794 | continue; | 1803 | continue; |
1795 | if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) | 1804 | if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) |
1796 | continue; | 1805 | continue; |
1797 | dst_hold(&rt->dst); | 1806 | dst_hold(&rt->dst); |
1798 | break; | 1807 | break; |
1799 | } | 1808 | } |
1800 | out: | 1809 | out: |
1801 | write_unlock_bh(&table->tb6_lock); | 1810 | write_unlock_bh(&table->tb6_lock); |
1802 | return rt; | 1811 | return rt; |
1803 | } | 1812 | } |
1804 | 1813 | ||
1805 | static struct rt6_info *rt6_add_route_info(struct net *net, | 1814 | static struct rt6_info *rt6_add_route_info(struct net *net, |
1806 | const struct in6_addr *prefix, int prefixlen, | 1815 | const struct in6_addr *prefix, int prefixlen, |
1807 | const struct in6_addr *gwaddr, int ifindex, | 1816 | const struct in6_addr *gwaddr, int ifindex, |
1808 | unsigned pref) | 1817 | unsigned pref) |
1809 | { | 1818 | { |
1810 | struct fib6_config cfg = { | 1819 | struct fib6_config cfg = { |
1811 | .fc_table = RT6_TABLE_INFO, | 1820 | .fc_table = RT6_TABLE_INFO, |
1812 | .fc_metric = IP6_RT_PRIO_USER, | 1821 | .fc_metric = IP6_RT_PRIO_USER, |
1813 | .fc_ifindex = ifindex, | 1822 | .fc_ifindex = ifindex, |
1814 | .fc_dst_len = prefixlen, | 1823 | .fc_dst_len = prefixlen, |
1815 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | | 1824 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | |
1816 | RTF_UP | RTF_PREF(pref), | 1825 | RTF_UP | RTF_PREF(pref), |
1817 | .fc_nlinfo.pid = 0, | 1826 | .fc_nlinfo.pid = 0, |
1818 | .fc_nlinfo.nlh = NULL, | 1827 | .fc_nlinfo.nlh = NULL, |
1819 | .fc_nlinfo.nl_net = net, | 1828 | .fc_nlinfo.nl_net = net, |
1820 | }; | 1829 | }; |
1821 | 1830 | ||
1822 | ipv6_addr_copy(&cfg.fc_dst, prefix); | 1831 | ipv6_addr_copy(&cfg.fc_dst, prefix); |
1823 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); | 1832 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); |
1824 | 1833 | ||
1825 | /* We should treat it as a default route if prefix length is 0. */ | 1834 | /* We should treat it as a default route if prefix length is 0. */ |
1826 | if (!prefixlen) | 1835 | if (!prefixlen) |
1827 | cfg.fc_flags |= RTF_DEFAULT; | 1836 | cfg.fc_flags |= RTF_DEFAULT; |
1828 | 1837 | ||
1829 | ip6_route_add(&cfg); | 1838 | ip6_route_add(&cfg); |
1830 | 1839 | ||
1831 | return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); | 1840 | return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); |
1832 | } | 1841 | } |
1833 | #endif | 1842 | #endif |
1834 | 1843 | ||
1835 | struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) | 1844 | struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) |
1836 | { | 1845 | { |
1837 | struct rt6_info *rt; | 1846 | struct rt6_info *rt; |
1838 | struct fib6_table *table; | 1847 | struct fib6_table *table; |
1839 | 1848 | ||
1840 | table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); | 1849 | table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); |
1841 | if (table == NULL) | 1850 | if (table == NULL) |
1842 | return NULL; | 1851 | return NULL; |
1843 | 1852 | ||
1844 | write_lock_bh(&table->tb6_lock); | 1853 | write_lock_bh(&table->tb6_lock); |
1845 | for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { | 1854 | for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { |
1846 | if (dev == rt->rt6i_dev && | 1855 | if (dev == rt->rt6i_dev && |
1847 | ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && | 1856 | ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && |
1848 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) | 1857 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) |
1849 | break; | 1858 | break; |
1850 | } | 1859 | } |
1851 | if (rt) | 1860 | if (rt) |
1852 | dst_hold(&rt->dst); | 1861 | dst_hold(&rt->dst); |
1853 | write_unlock_bh(&table->tb6_lock); | 1862 | write_unlock_bh(&table->tb6_lock); |
1854 | return rt; | 1863 | return rt; |
1855 | } | 1864 | } |
1856 | 1865 | ||
1857 | struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, | 1866 | struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, |
1858 | struct net_device *dev, | 1867 | struct net_device *dev, |
1859 | unsigned int pref) | 1868 | unsigned int pref) |
1860 | { | 1869 | { |
1861 | struct fib6_config cfg = { | 1870 | struct fib6_config cfg = { |
1862 | .fc_table = RT6_TABLE_DFLT, | 1871 | .fc_table = RT6_TABLE_DFLT, |
1863 | .fc_metric = IP6_RT_PRIO_USER, | 1872 | .fc_metric = IP6_RT_PRIO_USER, |
1864 | .fc_ifindex = dev->ifindex, | 1873 | .fc_ifindex = dev->ifindex, |
1865 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | | 1874 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | |
1866 | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), | 1875 | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), |
1867 | .fc_nlinfo.pid = 0, | 1876 | .fc_nlinfo.pid = 0, |
1868 | .fc_nlinfo.nlh = NULL, | 1877 | .fc_nlinfo.nlh = NULL, |
1869 | .fc_nlinfo.nl_net = dev_net(dev), | 1878 | .fc_nlinfo.nl_net = dev_net(dev), |
1870 | }; | 1879 | }; |
1871 | 1880 | ||
1872 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); | 1881 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); |
1873 | 1882 | ||
1874 | ip6_route_add(&cfg); | 1883 | ip6_route_add(&cfg); |
1875 | 1884 | ||
1876 | return rt6_get_dflt_router(gwaddr, dev); | 1885 | return rt6_get_dflt_router(gwaddr, dev); |
1877 | } | 1886 | } |
1878 | 1887 | ||
1879 | void rt6_purge_dflt_routers(struct net *net) | 1888 | void rt6_purge_dflt_routers(struct net *net) |
1880 | { | 1889 | { |
1881 | struct rt6_info *rt; | 1890 | struct rt6_info *rt; |
1882 | struct fib6_table *table; | 1891 | struct fib6_table *table; |
1883 | 1892 | ||
1884 | /* NOTE: Keep consistent with rt6_get_dflt_router */ | 1893 | /* NOTE: Keep consistent with rt6_get_dflt_router */ |
1885 | table = fib6_get_table(net, RT6_TABLE_DFLT); | 1894 | table = fib6_get_table(net, RT6_TABLE_DFLT); |
1886 | if (table == NULL) | 1895 | if (table == NULL) |
1887 | return; | 1896 | return; |
1888 | 1897 | ||
1889 | restart: | 1898 | restart: |
1890 | read_lock_bh(&table->tb6_lock); | 1899 | read_lock_bh(&table->tb6_lock); |
1891 | for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { | 1900 | for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { |
1892 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { | 1901 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { |
1893 | dst_hold(&rt->dst); | 1902 | dst_hold(&rt->dst); |
1894 | read_unlock_bh(&table->tb6_lock); | 1903 | read_unlock_bh(&table->tb6_lock); |
1895 | ip6_del_rt(rt); | 1904 | ip6_del_rt(rt); |
1896 | goto restart; | 1905 | goto restart; |
1897 | } | 1906 | } |
1898 | } | 1907 | } |
1899 | read_unlock_bh(&table->tb6_lock); | 1908 | read_unlock_bh(&table->tb6_lock); |
1900 | } | 1909 | } |
1901 | 1910 | ||
1902 | static void rtmsg_to_fib6_config(struct net *net, | 1911 | static void rtmsg_to_fib6_config(struct net *net, |
1903 | struct in6_rtmsg *rtmsg, | 1912 | struct in6_rtmsg *rtmsg, |
1904 | struct fib6_config *cfg) | 1913 | struct fib6_config *cfg) |
1905 | { | 1914 | { |
1906 | memset(cfg, 0, sizeof(*cfg)); | 1915 | memset(cfg, 0, sizeof(*cfg)); |
1907 | 1916 | ||
1908 | cfg->fc_table = RT6_TABLE_MAIN; | 1917 | cfg->fc_table = RT6_TABLE_MAIN; |
1909 | cfg->fc_ifindex = rtmsg->rtmsg_ifindex; | 1918 | cfg->fc_ifindex = rtmsg->rtmsg_ifindex; |
1910 | cfg->fc_metric = rtmsg->rtmsg_metric; | 1919 | cfg->fc_metric = rtmsg->rtmsg_metric; |
1911 | cfg->fc_expires = rtmsg->rtmsg_info; | 1920 | cfg->fc_expires = rtmsg->rtmsg_info; |
1912 | cfg->fc_dst_len = rtmsg->rtmsg_dst_len; | 1921 | cfg->fc_dst_len = rtmsg->rtmsg_dst_len; |
1913 | cfg->fc_src_len = rtmsg->rtmsg_src_len; | 1922 | cfg->fc_src_len = rtmsg->rtmsg_src_len; |
1914 | cfg->fc_flags = rtmsg->rtmsg_flags; | 1923 | cfg->fc_flags = rtmsg->rtmsg_flags; |
1915 | 1924 | ||
1916 | cfg->fc_nlinfo.nl_net = net; | 1925 | cfg->fc_nlinfo.nl_net = net; |
1917 | 1926 | ||
1918 | ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); | 1927 | ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); |
1919 | ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); | 1928 | ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); |
1920 | ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); | 1929 | ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); |
1921 | } | 1930 | } |
1922 | 1931 | ||
1923 | int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) | 1932 | int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
1924 | { | 1933 | { |
1925 | struct fib6_config cfg; | 1934 | struct fib6_config cfg; |
1926 | struct in6_rtmsg rtmsg; | 1935 | struct in6_rtmsg rtmsg; |
1927 | int err; | 1936 | int err; |
1928 | 1937 | ||
1929 | switch(cmd) { | 1938 | switch(cmd) { |
1930 | case SIOCADDRT: /* Add a route */ | 1939 | case SIOCADDRT: /* Add a route */ |
1931 | case SIOCDELRT: /* Delete a route */ | 1940 | case SIOCDELRT: /* Delete a route */ |
1932 | if (!capable(CAP_NET_ADMIN)) | 1941 | if (!capable(CAP_NET_ADMIN)) |
1933 | return -EPERM; | 1942 | return -EPERM; |
1934 | err = copy_from_user(&rtmsg, arg, | 1943 | err = copy_from_user(&rtmsg, arg, |
1935 | sizeof(struct in6_rtmsg)); | 1944 | sizeof(struct in6_rtmsg)); |
1936 | if (err) | 1945 | if (err) |
1937 | return -EFAULT; | 1946 | return -EFAULT; |
1938 | 1947 | ||
1939 | rtmsg_to_fib6_config(net, &rtmsg, &cfg); | 1948 | rtmsg_to_fib6_config(net, &rtmsg, &cfg); |
1940 | 1949 | ||
1941 | rtnl_lock(); | 1950 | rtnl_lock(); |
1942 | switch (cmd) { | 1951 | switch (cmd) { |
1943 | case SIOCADDRT: | 1952 | case SIOCADDRT: |
1944 | err = ip6_route_add(&cfg); | 1953 | err = ip6_route_add(&cfg); |
1945 | break; | 1954 | break; |
1946 | case SIOCDELRT: | 1955 | case SIOCDELRT: |
1947 | err = ip6_route_del(&cfg); | 1956 | err = ip6_route_del(&cfg); |
1948 | break; | 1957 | break; |
1949 | default: | 1958 | default: |
1950 | err = -EINVAL; | 1959 | err = -EINVAL; |
1951 | } | 1960 | } |
1952 | rtnl_unlock(); | 1961 | rtnl_unlock(); |
1953 | 1962 | ||
1954 | return err; | 1963 | return err; |
1955 | } | 1964 | } |
1956 | 1965 | ||
1957 | return -EINVAL; | 1966 | return -EINVAL; |
1958 | } | 1967 | } |
1959 | 1968 | ||
1960 | /* | 1969 | /* |
1961 | * Drop the packet on the floor | 1970 | * Drop the packet on the floor |
1962 | */ | 1971 | */ |
1963 | 1972 | ||
1964 | static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) | 1973 | static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) |
1965 | { | 1974 | { |
1966 | int type; | 1975 | int type; |
1967 | struct dst_entry *dst = skb_dst(skb); | 1976 | struct dst_entry *dst = skb_dst(skb); |
1968 | switch (ipstats_mib_noroutes) { | 1977 | switch (ipstats_mib_noroutes) { |
1969 | case IPSTATS_MIB_INNOROUTES: | 1978 | case IPSTATS_MIB_INNOROUTES: |
1970 | type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); | 1979 | type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); |
1971 | if (type == IPV6_ADDR_ANY) { | 1980 | if (type == IPV6_ADDR_ANY) { |
1972 | IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), | 1981 | IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), |
1973 | IPSTATS_MIB_INADDRERRORS); | 1982 | IPSTATS_MIB_INADDRERRORS); |
1974 | break; | 1983 | break; |
1975 | } | 1984 | } |
1976 | /* FALLTHROUGH */ | 1985 | /* FALLTHROUGH */ |
1977 | case IPSTATS_MIB_OUTNOROUTES: | 1986 | case IPSTATS_MIB_OUTNOROUTES: |
1978 | IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), | 1987 | IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), |
1979 | ipstats_mib_noroutes); | 1988 | ipstats_mib_noroutes); |
1980 | break; | 1989 | break; |
1981 | } | 1990 | } |
1982 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); | 1991 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); |
1983 | kfree_skb(skb); | 1992 | kfree_skb(skb); |
1984 | return 0; | 1993 | return 0; |
1985 | } | 1994 | } |
1986 | 1995 | ||
1987 | static int ip6_pkt_discard(struct sk_buff *skb) | 1996 | static int ip6_pkt_discard(struct sk_buff *skb) |
1988 | { | 1997 | { |
1989 | return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); | 1998 | return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); |
1990 | } | 1999 | } |
1991 | 2000 | ||
1992 | static int ip6_pkt_discard_out(struct sk_buff *skb) | 2001 | static int ip6_pkt_discard_out(struct sk_buff *skb) |
1993 | { | 2002 | { |
1994 | skb->dev = skb_dst(skb)->dev; | 2003 | skb->dev = skb_dst(skb)->dev; |
1995 | return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); | 2004 | return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); |
1996 | } | 2005 | } |
1997 | 2006 | ||
1998 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2007 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1999 | 2008 | ||
2000 | static int ip6_pkt_prohibit(struct sk_buff *skb) | 2009 | static int ip6_pkt_prohibit(struct sk_buff *skb) |
2001 | { | 2010 | { |
2002 | return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); | 2011 | return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); |
2003 | } | 2012 | } |
2004 | 2013 | ||
2005 | static int ip6_pkt_prohibit_out(struct sk_buff *skb) | 2014 | static int ip6_pkt_prohibit_out(struct sk_buff *skb) |
2006 | { | 2015 | { |
2007 | skb->dev = skb_dst(skb)->dev; | 2016 | skb->dev = skb_dst(skb)->dev; |
2008 | return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); | 2017 | return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); |
2009 | } | 2018 | } |
2010 | 2019 | ||
2011 | #endif | 2020 | #endif |
2012 | 2021 | ||
2013 | /* | 2022 | /* |
2014 | * Allocate a dst for local (unicast / anycast) address. | 2023 | * Allocate a dst for local (unicast / anycast) address. |
2015 | */ | 2024 | */ |
2016 | 2025 | ||
2017 | struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, | 2026 | struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, |
2018 | const struct in6_addr *addr, | 2027 | const struct in6_addr *addr, |
2019 | int anycast) | 2028 | int anycast) |
2020 | { | 2029 | { |
2021 | struct net *net = dev_net(idev->dev); | 2030 | struct net *net = dev_net(idev->dev); |
2022 | struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, | 2031 | struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, |
2023 | net->loopback_dev, 0); | 2032 | net->loopback_dev, 0); |
2024 | struct neighbour *neigh; | 2033 | struct neighbour *neigh; |
2025 | 2034 | ||
2026 | if (rt == NULL) { | 2035 | if (rt == NULL) { |
2027 | if (net_ratelimit()) | 2036 | if (net_ratelimit()) |
2028 | pr_warning("IPv6: Maximum number of routes reached," | 2037 | pr_warning("IPv6: Maximum number of routes reached," |
2029 | " consider increasing route/max_size.\n"); | 2038 | " consider increasing route/max_size.\n"); |
2030 | return ERR_PTR(-ENOMEM); | 2039 | return ERR_PTR(-ENOMEM); |
2031 | } | 2040 | } |
2032 | 2041 | ||
2033 | in6_dev_hold(idev); | 2042 | in6_dev_hold(idev); |
2034 | 2043 | ||
2035 | rt->dst.flags |= DST_HOST; | 2044 | rt->dst.flags |= DST_HOST; |
2036 | rt->dst.input = ip6_input; | 2045 | rt->dst.input = ip6_input; |
2037 | rt->dst.output = ip6_output; | 2046 | rt->dst.output = ip6_output; |
2038 | rt->rt6i_idev = idev; | 2047 | rt->rt6i_idev = idev; |
2039 | rt->dst.obsolete = -1; | 2048 | rt->dst.obsolete = -1; |
2040 | 2049 | ||
2041 | rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; | 2050 | rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; |
2042 | if (anycast) | 2051 | if (anycast) |
2043 | rt->rt6i_flags |= RTF_ANYCAST; | 2052 | rt->rt6i_flags |= RTF_ANYCAST; |
2044 | else | 2053 | else |
2045 | rt->rt6i_flags |= RTF_LOCAL; | 2054 | rt->rt6i_flags |= RTF_LOCAL; |
2046 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | 2055 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); |
2047 | if (IS_ERR(neigh)) { | 2056 | if (IS_ERR(neigh)) { |
2048 | dst_free(&rt->dst); | 2057 | dst_free(&rt->dst); |
2049 | 2058 | ||
2050 | return ERR_CAST(neigh); | 2059 | return ERR_CAST(neigh); |
2051 | } | 2060 | } |
2052 | dst_set_neighbour(&rt->dst, neigh); | 2061 | dst_set_neighbour(&rt->dst, neigh); |
2053 | 2062 | ||
2054 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); | 2063 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); |
2055 | rt->rt6i_dst.plen = 128; | 2064 | rt->rt6i_dst.plen = 128; |
2056 | rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); | 2065 | rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); |
2057 | 2066 | ||
2058 | atomic_set(&rt->dst.__refcnt, 1); | 2067 | atomic_set(&rt->dst.__refcnt, 1); |
2059 | 2068 | ||
2060 | return rt; | 2069 | return rt; |
2061 | } | 2070 | } |
2062 | 2071 | ||
2063 | int ip6_route_get_saddr(struct net *net, | 2072 | int ip6_route_get_saddr(struct net *net, |
2064 | struct rt6_info *rt, | 2073 | struct rt6_info *rt, |
2065 | const struct in6_addr *daddr, | 2074 | const struct in6_addr *daddr, |
2066 | unsigned int prefs, | 2075 | unsigned int prefs, |
2067 | struct in6_addr *saddr) | 2076 | struct in6_addr *saddr) |
2068 | { | 2077 | { |
2069 | struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); | 2078 | struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); |
2070 | int err = 0; | 2079 | int err = 0; |
2071 | if (rt->rt6i_prefsrc.plen) | 2080 | if (rt->rt6i_prefsrc.plen) |
2072 | ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr); | 2081 | ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr); |
2073 | else | 2082 | else |
2074 | err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, | 2083 | err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, |
2075 | daddr, prefs, saddr); | 2084 | daddr, prefs, saddr); |
2076 | return err; | 2085 | return err; |
2077 | } | 2086 | } |
2078 | 2087 | ||
2079 | /* remove deleted ip from prefsrc entries */ | 2088 | /* remove deleted ip from prefsrc entries */ |
2080 | struct arg_dev_net_ip { | 2089 | struct arg_dev_net_ip { |
2081 | struct net_device *dev; | 2090 | struct net_device *dev; |
2082 | struct net *net; | 2091 | struct net *net; |
2083 | struct in6_addr *addr; | 2092 | struct in6_addr *addr; |
2084 | }; | 2093 | }; |
2085 | 2094 | ||
2086 | static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) | 2095 | static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) |
2087 | { | 2096 | { |
2088 | struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; | 2097 | struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; |
2089 | struct net *net = ((struct arg_dev_net_ip *)arg)->net; | 2098 | struct net *net = ((struct arg_dev_net_ip *)arg)->net; |
2090 | struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; | 2099 | struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; |
2091 | 2100 | ||
2092 | if (((void *)rt->rt6i_dev == dev || dev == NULL) && | 2101 | if (((void *)rt->rt6i_dev == dev || dev == NULL) && |
2093 | rt != net->ipv6.ip6_null_entry && | 2102 | rt != net->ipv6.ip6_null_entry && |
2094 | ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { | 2103 | ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { |
2095 | /* remove prefsrc entry */ | 2104 | /* remove prefsrc entry */ |
2096 | rt->rt6i_prefsrc.plen = 0; | 2105 | rt->rt6i_prefsrc.plen = 0; |
2097 | } | 2106 | } |
2098 | return 0; | 2107 | return 0; |
2099 | } | 2108 | } |
2100 | 2109 | ||
2101 | void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) | 2110 | void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) |
2102 | { | 2111 | { |
2103 | struct net *net = dev_net(ifp->idev->dev); | 2112 | struct net *net = dev_net(ifp->idev->dev); |
2104 | struct arg_dev_net_ip adni = { | 2113 | struct arg_dev_net_ip adni = { |
2105 | .dev = ifp->idev->dev, | 2114 | .dev = ifp->idev->dev, |
2106 | .net = net, | 2115 | .net = net, |
2107 | .addr = &ifp->addr, | 2116 | .addr = &ifp->addr, |
2108 | }; | 2117 | }; |
2109 | fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); | 2118 | fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); |
2110 | } | 2119 | } |
2111 | 2120 | ||
2112 | struct arg_dev_net { | 2121 | struct arg_dev_net { |
2113 | struct net_device *dev; | 2122 | struct net_device *dev; |
2114 | struct net *net; | 2123 | struct net *net; |
2115 | }; | 2124 | }; |
2116 | 2125 | ||
2117 | static int fib6_ifdown(struct rt6_info *rt, void *arg) | 2126 | static int fib6_ifdown(struct rt6_info *rt, void *arg) |
2118 | { | 2127 | { |
2119 | const struct arg_dev_net *adn = arg; | 2128 | const struct arg_dev_net *adn = arg; |
2120 | const struct net_device *dev = adn->dev; | 2129 | const struct net_device *dev = adn->dev; |
2121 | 2130 | ||
2122 | if ((rt->rt6i_dev == dev || dev == NULL) && | 2131 | if ((rt->rt6i_dev == dev || dev == NULL) && |
2123 | rt != adn->net->ipv6.ip6_null_entry) { | 2132 | rt != adn->net->ipv6.ip6_null_entry) { |
2124 | RT6_TRACE("deleted by ifdown %p\n", rt); | 2133 | RT6_TRACE("deleted by ifdown %p\n", rt); |
2125 | return -1; | 2134 | return -1; |
2126 | } | 2135 | } |
2127 | return 0; | 2136 | return 0; |
2128 | } | 2137 | } |
2129 | 2138 | ||
2130 | void rt6_ifdown(struct net *net, struct net_device *dev) | 2139 | void rt6_ifdown(struct net *net, struct net_device *dev) |
2131 | { | 2140 | { |
2132 | struct arg_dev_net adn = { | 2141 | struct arg_dev_net adn = { |
2133 | .dev = dev, | 2142 | .dev = dev, |
2134 | .net = net, | 2143 | .net = net, |
2135 | }; | 2144 | }; |
2136 | 2145 | ||
2137 | fib6_clean_all(net, fib6_ifdown, 0, &adn); | 2146 | fib6_clean_all(net, fib6_ifdown, 0, &adn); |
2138 | icmp6_clean_all(fib6_ifdown, &adn); | 2147 | icmp6_clean_all(fib6_ifdown, &adn); |
2139 | } | 2148 | } |
2140 | 2149 | ||
2141 | struct rt6_mtu_change_arg | 2150 | struct rt6_mtu_change_arg |
2142 | { | 2151 | { |
2143 | struct net_device *dev; | 2152 | struct net_device *dev; |
2144 | unsigned mtu; | 2153 | unsigned mtu; |
2145 | }; | 2154 | }; |
2146 | 2155 | ||
2147 | static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) | 2156 | static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) |
2148 | { | 2157 | { |
2149 | struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; | 2158 | struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; |
2150 | struct inet6_dev *idev; | 2159 | struct inet6_dev *idev; |
2151 | 2160 | ||
2152 | /* In IPv6 pmtu discovery is not optional, | 2161 | /* In IPv6 pmtu discovery is not optional, |
2153 | so that RTAX_MTU lock cannot disable it. | 2162 | so that RTAX_MTU lock cannot disable it. |
2154 | We still use this lock to block changes | 2163 | We still use this lock to block changes |
2155 | caused by addrconf/ndisc. | 2164 | caused by addrconf/ndisc. |
2156 | */ | 2165 | */ |
2157 | 2166 | ||
2158 | idev = __in6_dev_get(arg->dev); | 2167 | idev = __in6_dev_get(arg->dev); |
2159 | if (idev == NULL) | 2168 | if (idev == NULL) |
2160 | return 0; | 2169 | return 0; |
2161 | 2170 | ||
2162 | /* For administrative MTU increase, there is no way to discover | 2171 | /* For administrative MTU increase, there is no way to discover |
2163 | IPv6 PMTU increase, so PMTU increase should be updated here. | 2172 | IPv6 PMTU increase, so PMTU increase should be updated here. |
2164 | Since RFC 1981 doesn't include administrative MTU increase | 2173 | Since RFC 1981 doesn't include administrative MTU increase |
2165 | update PMTU increase is a MUST. (i.e. jumbo frame) | 2174 | update PMTU increase is a MUST. (i.e. jumbo frame) |
2166 | */ | 2175 | */ |
2167 | /* | 2176 | /* |
2168 | If new MTU is less than route PMTU, this new MTU will be the | 2177 | If new MTU is less than route PMTU, this new MTU will be the |
2169 | lowest MTU in the path, update the route PMTU to reflect PMTU | 2178 | lowest MTU in the path, update the route PMTU to reflect PMTU |
2170 | decreases; if new MTU is greater than route PMTU, and the | 2179 | decreases; if new MTU is greater than route PMTU, and the |
2171 | old MTU is the lowest MTU in the path, update the route PMTU | 2180 | old MTU is the lowest MTU in the path, update the route PMTU |
2172 | to reflect the increase. In this case if the other nodes' MTU | 2181 | to reflect the increase. In this case if the other nodes' MTU |
2173 | also have the lowest MTU, TOO BIG MESSAGE will be lead to | 2182 | also have the lowest MTU, TOO BIG MESSAGE will be lead to |
2174 | PMTU discouvery. | 2183 | PMTU discouvery. |
2175 | */ | 2184 | */ |
2176 | if (rt->rt6i_dev == arg->dev && | 2185 | if (rt->rt6i_dev == arg->dev && |
2177 | !dst_metric_locked(&rt->dst, RTAX_MTU) && | 2186 | !dst_metric_locked(&rt->dst, RTAX_MTU) && |
2178 | (dst_mtu(&rt->dst) >= arg->mtu || | 2187 | (dst_mtu(&rt->dst) >= arg->mtu || |
2179 | (dst_mtu(&rt->dst) < arg->mtu && | 2188 | (dst_mtu(&rt->dst) < arg->mtu && |
2180 | dst_mtu(&rt->dst) == idev->cnf.mtu6))) { | 2189 | dst_mtu(&rt->dst) == idev->cnf.mtu6))) { |
2181 | dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); | 2190 | dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); |
2182 | } | 2191 | } |
2183 | return 0; | 2192 | return 0; |
2184 | } | 2193 | } |
2185 | 2194 | ||
2186 | void rt6_mtu_change(struct net_device *dev, unsigned mtu) | 2195 | void rt6_mtu_change(struct net_device *dev, unsigned mtu) |
2187 | { | 2196 | { |
2188 | struct rt6_mtu_change_arg arg = { | 2197 | struct rt6_mtu_change_arg arg = { |
2189 | .dev = dev, | 2198 | .dev = dev, |
2190 | .mtu = mtu, | 2199 | .mtu = mtu, |
2191 | }; | 2200 | }; |
2192 | 2201 | ||
2193 | fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); | 2202 | fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); |
2194 | } | 2203 | } |
2195 | 2204 | ||
2196 | static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { | 2205 | static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { |
2197 | [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, | 2206 | [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, |
2198 | [RTA_OIF] = { .type = NLA_U32 }, | 2207 | [RTA_OIF] = { .type = NLA_U32 }, |
2199 | [RTA_IIF] = { .type = NLA_U32 }, | 2208 | [RTA_IIF] = { .type = NLA_U32 }, |
2200 | [RTA_PRIORITY] = { .type = NLA_U32 }, | 2209 | [RTA_PRIORITY] = { .type = NLA_U32 }, |
2201 | [RTA_METRICS] = { .type = NLA_NESTED }, | 2210 | [RTA_METRICS] = { .type = NLA_NESTED }, |
2202 | }; | 2211 | }; |
2203 | 2212 | ||
2204 | static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, | 2213 | static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, |
2205 | struct fib6_config *cfg) | 2214 | struct fib6_config *cfg) |
2206 | { | 2215 | { |
2207 | struct rtmsg *rtm; | 2216 | struct rtmsg *rtm; |
2208 | struct nlattr *tb[RTA_MAX+1]; | 2217 | struct nlattr *tb[RTA_MAX+1]; |
2209 | int err; | 2218 | int err; |
2210 | 2219 | ||
2211 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); | 2220 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); |
2212 | if (err < 0) | 2221 | if (err < 0) |
2213 | goto errout; | 2222 | goto errout; |
2214 | 2223 | ||
2215 | err = -EINVAL; | 2224 | err = -EINVAL; |
2216 | rtm = nlmsg_data(nlh); | 2225 | rtm = nlmsg_data(nlh); |
2217 | memset(cfg, 0, sizeof(*cfg)); | 2226 | memset(cfg, 0, sizeof(*cfg)); |
2218 | 2227 | ||
2219 | cfg->fc_table = rtm->rtm_table; | 2228 | cfg->fc_table = rtm->rtm_table; |
2220 | cfg->fc_dst_len = rtm->rtm_dst_len; | 2229 | cfg->fc_dst_len = rtm->rtm_dst_len; |
2221 | cfg->fc_src_len = rtm->rtm_src_len; | 2230 | cfg->fc_src_len = rtm->rtm_src_len; |
2222 | cfg->fc_flags = RTF_UP; | 2231 | cfg->fc_flags = RTF_UP; |
2223 | cfg->fc_protocol = rtm->rtm_protocol; | 2232 | cfg->fc_protocol = rtm->rtm_protocol; |
2224 | 2233 | ||
2225 | if (rtm->rtm_type == RTN_UNREACHABLE) | 2234 | if (rtm->rtm_type == RTN_UNREACHABLE) |
2226 | cfg->fc_flags |= RTF_REJECT; | 2235 | cfg->fc_flags |= RTF_REJECT; |
2227 | 2236 | ||
2228 | if (rtm->rtm_type == RTN_LOCAL) | 2237 | if (rtm->rtm_type == RTN_LOCAL) |
2229 | cfg->fc_flags |= RTF_LOCAL; | 2238 | cfg->fc_flags |= RTF_LOCAL; |
2230 | 2239 | ||
2231 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; | 2240 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; |
2232 | cfg->fc_nlinfo.nlh = nlh; | 2241 | cfg->fc_nlinfo.nlh = nlh; |
2233 | cfg->fc_nlinfo.nl_net = sock_net(skb->sk); | 2242 | cfg->fc_nlinfo.nl_net = sock_net(skb->sk); |
2234 | 2243 | ||
2235 | if (tb[RTA_GATEWAY]) { | 2244 | if (tb[RTA_GATEWAY]) { |
2236 | nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); | 2245 | nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); |
2237 | cfg->fc_flags |= RTF_GATEWAY; | 2246 | cfg->fc_flags |= RTF_GATEWAY; |
2238 | } | 2247 | } |
2239 | 2248 | ||
2240 | if (tb[RTA_DST]) { | 2249 | if (tb[RTA_DST]) { |
2241 | int plen = (rtm->rtm_dst_len + 7) >> 3; | 2250 | int plen = (rtm->rtm_dst_len + 7) >> 3; |
2242 | 2251 | ||
2243 | if (nla_len(tb[RTA_DST]) < plen) | 2252 | if (nla_len(tb[RTA_DST]) < plen) |
2244 | goto errout; | 2253 | goto errout; |
2245 | 2254 | ||
2246 | nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); | 2255 | nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); |
2247 | } | 2256 | } |
2248 | 2257 | ||
2249 | if (tb[RTA_SRC]) { | 2258 | if (tb[RTA_SRC]) { |
2250 | int plen = (rtm->rtm_src_len + 7) >> 3; | 2259 | int plen = (rtm->rtm_src_len + 7) >> 3; |
2251 | 2260 | ||
2252 | if (nla_len(tb[RTA_SRC]) < plen) | 2261 | if (nla_len(tb[RTA_SRC]) < plen) |
2253 | goto errout; | 2262 | goto errout; |
2254 | 2263 | ||
2255 | nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); | 2264 | nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); |
2256 | } | 2265 | } |
2257 | 2266 | ||
2258 | if (tb[RTA_PREFSRC]) | 2267 | if (tb[RTA_PREFSRC]) |
2259 | nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); | 2268 | nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); |
2260 | 2269 | ||
2261 | if (tb[RTA_OIF]) | 2270 | if (tb[RTA_OIF]) |
2262 | cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); | 2271 | cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); |
2263 | 2272 | ||
2264 | if (tb[RTA_PRIORITY]) | 2273 | if (tb[RTA_PRIORITY]) |
2265 | cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); | 2274 | cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); |
2266 | 2275 | ||
2267 | if (tb[RTA_METRICS]) { | 2276 | if (tb[RTA_METRICS]) { |
2268 | cfg->fc_mx = nla_data(tb[RTA_METRICS]); | 2277 | cfg->fc_mx = nla_data(tb[RTA_METRICS]); |
2269 | cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); | 2278 | cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); |
2270 | } | 2279 | } |
2271 | 2280 | ||
2272 | if (tb[RTA_TABLE]) | 2281 | if (tb[RTA_TABLE]) |
2273 | cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); | 2282 | cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); |
2274 | 2283 | ||
2275 | err = 0; | 2284 | err = 0; |
2276 | errout: | 2285 | errout: |
2277 | return err; | 2286 | return err; |
2278 | } | 2287 | } |
2279 | 2288 | ||
2280 | static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 2289 | static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
2281 | { | 2290 | { |
2282 | struct fib6_config cfg; | 2291 | struct fib6_config cfg; |
2283 | int err; | 2292 | int err; |
2284 | 2293 | ||
2285 | err = rtm_to_fib6_config(skb, nlh, &cfg); | 2294 | err = rtm_to_fib6_config(skb, nlh, &cfg); |
2286 | if (err < 0) | 2295 | if (err < 0) |
2287 | return err; | 2296 | return err; |
2288 | 2297 | ||
2289 | return ip6_route_del(&cfg); | 2298 | return ip6_route_del(&cfg); |
2290 | } | 2299 | } |
2291 | 2300 | ||
2292 | static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 2301 | static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
2293 | { | 2302 | { |
2294 | struct fib6_config cfg; | 2303 | struct fib6_config cfg; |
2295 | int err; | 2304 | int err; |
2296 | 2305 | ||
2297 | err = rtm_to_fib6_config(skb, nlh, &cfg); | 2306 | err = rtm_to_fib6_config(skb, nlh, &cfg); |
2298 | if (err < 0) | 2307 | if (err < 0) |
2299 | return err; | 2308 | return err; |
2300 | 2309 | ||
2301 | return ip6_route_add(&cfg); | 2310 | return ip6_route_add(&cfg); |
2302 | } | 2311 | } |
2303 | 2312 | ||
2304 | static inline size_t rt6_nlmsg_size(void) | 2313 | static inline size_t rt6_nlmsg_size(void) |
2305 | { | 2314 | { |
2306 | return NLMSG_ALIGN(sizeof(struct rtmsg)) | 2315 | return NLMSG_ALIGN(sizeof(struct rtmsg)) |
2307 | + nla_total_size(16) /* RTA_SRC */ | 2316 | + nla_total_size(16) /* RTA_SRC */ |
2308 | + nla_total_size(16) /* RTA_DST */ | 2317 | + nla_total_size(16) /* RTA_DST */ |
2309 | + nla_total_size(16) /* RTA_GATEWAY */ | 2318 | + nla_total_size(16) /* RTA_GATEWAY */ |
2310 | + nla_total_size(16) /* RTA_PREFSRC */ | 2319 | + nla_total_size(16) /* RTA_PREFSRC */ |
2311 | + nla_total_size(4) /* RTA_TABLE */ | 2320 | + nla_total_size(4) /* RTA_TABLE */ |
2312 | + nla_total_size(4) /* RTA_IIF */ | 2321 | + nla_total_size(4) /* RTA_IIF */ |
2313 | + nla_total_size(4) /* RTA_OIF */ | 2322 | + nla_total_size(4) /* RTA_OIF */ |
2314 | + nla_total_size(4) /* RTA_PRIORITY */ | 2323 | + nla_total_size(4) /* RTA_PRIORITY */ |
2315 | + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ | 2324 | + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ |
2316 | + nla_total_size(sizeof(struct rta_cacheinfo)); | 2325 | + nla_total_size(sizeof(struct rta_cacheinfo)); |
2317 | } | 2326 | } |
2318 | 2327 | ||
2319 | static int rt6_fill_node(struct net *net, | 2328 | static int rt6_fill_node(struct net *net, |
2320 | struct sk_buff *skb, struct rt6_info *rt, | 2329 | struct sk_buff *skb, struct rt6_info *rt, |
2321 | struct in6_addr *dst, struct in6_addr *src, | 2330 | struct in6_addr *dst, struct in6_addr *src, |
2322 | int iif, int type, u32 pid, u32 seq, | 2331 | int iif, int type, u32 pid, u32 seq, |
2323 | int prefix, int nowait, unsigned int flags) | 2332 | int prefix, int nowait, unsigned int flags) |
2324 | { | 2333 | { |
2325 | struct rtmsg *rtm; | 2334 | struct rtmsg *rtm; |
2326 | struct nlmsghdr *nlh; | 2335 | struct nlmsghdr *nlh; |
2327 | long expires; | 2336 | long expires; |
2328 | u32 table; | 2337 | u32 table; |
2338 | struct neighbour *n; | ||
2329 | 2339 | ||
2330 | if (prefix) { /* user wants prefix routes only */ | 2340 | if (prefix) { /* user wants prefix routes only */ |
2331 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { | 2341 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { |
2332 | /* success since this is not a prefix route */ | 2342 | /* success since this is not a prefix route */ |
2333 | return 1; | 2343 | return 1; |
2334 | } | 2344 | } |
2335 | } | 2345 | } |
2336 | 2346 | ||
2337 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); | 2347 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); |
2338 | if (nlh == NULL) | 2348 | if (nlh == NULL) |
2339 | return -EMSGSIZE; | 2349 | return -EMSGSIZE; |
2340 | 2350 | ||
2341 | rtm = nlmsg_data(nlh); | 2351 | rtm = nlmsg_data(nlh); |
2342 | rtm->rtm_family = AF_INET6; | 2352 | rtm->rtm_family = AF_INET6; |
2343 | rtm->rtm_dst_len = rt->rt6i_dst.plen; | 2353 | rtm->rtm_dst_len = rt->rt6i_dst.plen; |
2344 | rtm->rtm_src_len = rt->rt6i_src.plen; | 2354 | rtm->rtm_src_len = rt->rt6i_src.plen; |
2345 | rtm->rtm_tos = 0; | 2355 | rtm->rtm_tos = 0; |
2346 | if (rt->rt6i_table) | 2356 | if (rt->rt6i_table) |
2347 | table = rt->rt6i_table->tb6_id; | 2357 | table = rt->rt6i_table->tb6_id; |
2348 | else | 2358 | else |
2349 | table = RT6_TABLE_UNSPEC; | 2359 | table = RT6_TABLE_UNSPEC; |
2350 | rtm->rtm_table = table; | 2360 | rtm->rtm_table = table; |
2351 | NLA_PUT_U32(skb, RTA_TABLE, table); | 2361 | NLA_PUT_U32(skb, RTA_TABLE, table); |
2352 | if (rt->rt6i_flags&RTF_REJECT) | 2362 | if (rt->rt6i_flags&RTF_REJECT) |
2353 | rtm->rtm_type = RTN_UNREACHABLE; | 2363 | rtm->rtm_type = RTN_UNREACHABLE; |
2354 | else if (rt->rt6i_flags&RTF_LOCAL) | 2364 | else if (rt->rt6i_flags&RTF_LOCAL) |
2355 | rtm->rtm_type = RTN_LOCAL; | 2365 | rtm->rtm_type = RTN_LOCAL; |
2356 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) | 2366 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) |
2357 | rtm->rtm_type = RTN_LOCAL; | 2367 | rtm->rtm_type = RTN_LOCAL; |
2358 | else | 2368 | else |
2359 | rtm->rtm_type = RTN_UNICAST; | 2369 | rtm->rtm_type = RTN_UNICAST; |
2360 | rtm->rtm_flags = 0; | 2370 | rtm->rtm_flags = 0; |
2361 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | 2371 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; |
2362 | rtm->rtm_protocol = rt->rt6i_protocol; | 2372 | rtm->rtm_protocol = rt->rt6i_protocol; |
2363 | if (rt->rt6i_flags&RTF_DYNAMIC) | 2373 | if (rt->rt6i_flags&RTF_DYNAMIC) |
2364 | rtm->rtm_protocol = RTPROT_REDIRECT; | 2374 | rtm->rtm_protocol = RTPROT_REDIRECT; |
2365 | else if (rt->rt6i_flags & RTF_ADDRCONF) | 2375 | else if (rt->rt6i_flags & RTF_ADDRCONF) |
2366 | rtm->rtm_protocol = RTPROT_KERNEL; | 2376 | rtm->rtm_protocol = RTPROT_KERNEL; |
2367 | else if (rt->rt6i_flags&RTF_DEFAULT) | 2377 | else if (rt->rt6i_flags&RTF_DEFAULT) |
2368 | rtm->rtm_protocol = RTPROT_RA; | 2378 | rtm->rtm_protocol = RTPROT_RA; |
2369 | 2379 | ||
2370 | if (rt->rt6i_flags&RTF_CACHE) | 2380 | if (rt->rt6i_flags&RTF_CACHE) |
2371 | rtm->rtm_flags |= RTM_F_CLONED; | 2381 | rtm->rtm_flags |= RTM_F_CLONED; |
2372 | 2382 | ||
2373 | if (dst) { | 2383 | if (dst) { |
2374 | NLA_PUT(skb, RTA_DST, 16, dst); | 2384 | NLA_PUT(skb, RTA_DST, 16, dst); |
2375 | rtm->rtm_dst_len = 128; | 2385 | rtm->rtm_dst_len = 128; |
2376 | } else if (rtm->rtm_dst_len) | 2386 | } else if (rtm->rtm_dst_len) |
2377 | NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); | 2387 | NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); |
2378 | #ifdef CONFIG_IPV6_SUBTREES | 2388 | #ifdef CONFIG_IPV6_SUBTREES |
2379 | if (src) { | 2389 | if (src) { |
2380 | NLA_PUT(skb, RTA_SRC, 16, src); | 2390 | NLA_PUT(skb, RTA_SRC, 16, src); |
2381 | rtm->rtm_src_len = 128; | 2391 | rtm->rtm_src_len = 128; |
2382 | } else if (rtm->rtm_src_len) | 2392 | } else if (rtm->rtm_src_len) |
2383 | NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); | 2393 | NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); |
2384 | #endif | 2394 | #endif |
2385 | if (iif) { | 2395 | if (iif) { |
2386 | #ifdef CONFIG_IPV6_MROUTE | 2396 | #ifdef CONFIG_IPV6_MROUTE |
2387 | if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { | 2397 | if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { |
2388 | int err = ip6mr_get_route(net, skb, rtm, nowait); | 2398 | int err = ip6mr_get_route(net, skb, rtm, nowait); |
2389 | if (err <= 0) { | 2399 | if (err <= 0) { |
2390 | if (!nowait) { | 2400 | if (!nowait) { |
2391 | if (err == 0) | 2401 | if (err == 0) |
2392 | return 0; | 2402 | return 0; |
2393 | goto nla_put_failure; | 2403 | goto nla_put_failure; |
2394 | } else { | 2404 | } else { |
2395 | if (err == -EMSGSIZE) | 2405 | if (err == -EMSGSIZE) |
2396 | goto nla_put_failure; | 2406 | goto nla_put_failure; |
2397 | } | 2407 | } |
2398 | } | 2408 | } |
2399 | } else | 2409 | } else |
2400 | #endif | 2410 | #endif |
2401 | NLA_PUT_U32(skb, RTA_IIF, iif); | 2411 | NLA_PUT_U32(skb, RTA_IIF, iif); |
2402 | } else if (dst) { | 2412 | } else if (dst) { |
2403 | struct in6_addr saddr_buf; | 2413 | struct in6_addr saddr_buf; |
2404 | if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0) | 2414 | if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0) |
2405 | NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); | 2415 | NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); |
2406 | } | 2416 | } |
2407 | 2417 | ||
2408 | if (rt->rt6i_prefsrc.plen) { | 2418 | if (rt->rt6i_prefsrc.plen) { |
2409 | struct in6_addr saddr_buf; | 2419 | struct in6_addr saddr_buf; |
2410 | ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr); | 2420 | ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr); |
2411 | NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); | 2421 | NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); |
2412 | } | 2422 | } |
2413 | 2423 | ||
2414 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 2424 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2415 | goto nla_put_failure; | 2425 | goto nla_put_failure; |
2416 | 2426 | ||
2417 | if (dst_get_neighbour(&rt->dst)) | 2427 | rcu_read_lock(); |
2418 | NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); | 2428 | n = dst_get_neighbour(&rt->dst); |
2429 | if (n) | ||
2430 | NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); | ||
2431 | rcu_read_unlock(); | ||
2419 | 2432 | ||
2420 | if (rt->dst.dev) | 2433 | if (rt->dst.dev) |
2421 | NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); | 2434 | NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); |
2422 | 2435 | ||
2423 | NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); | 2436 | NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); |
2424 | 2437 | ||
2425 | if (!(rt->rt6i_flags & RTF_EXPIRES)) | 2438 | if (!(rt->rt6i_flags & RTF_EXPIRES)) |
2426 | expires = 0; | 2439 | expires = 0; |
2427 | else if (rt->rt6i_expires - jiffies < INT_MAX) | 2440 | else if (rt->rt6i_expires - jiffies < INT_MAX) |
2428 | expires = rt->rt6i_expires - jiffies; | 2441 | expires = rt->rt6i_expires - jiffies; |
2429 | else | 2442 | else |
2430 | expires = INT_MAX; | 2443 | expires = INT_MAX; |
2431 | 2444 | ||
2432 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, | 2445 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, |
2433 | expires, rt->dst.error) < 0) | 2446 | expires, rt->dst.error) < 0) |
2434 | goto nla_put_failure; | 2447 | goto nla_put_failure; |
2435 | 2448 | ||
2436 | return nlmsg_end(skb, nlh); | 2449 | return nlmsg_end(skb, nlh); |
2437 | 2450 | ||
2438 | nla_put_failure: | 2451 | nla_put_failure: |
2439 | nlmsg_cancel(skb, nlh); | 2452 | nlmsg_cancel(skb, nlh); |
2440 | return -EMSGSIZE; | 2453 | return -EMSGSIZE; |
2441 | } | 2454 | } |
2442 | 2455 | ||
2443 | int rt6_dump_route(struct rt6_info *rt, void *p_arg) | 2456 | int rt6_dump_route(struct rt6_info *rt, void *p_arg) |
2444 | { | 2457 | { |
2445 | struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; | 2458 | struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; |
2446 | int prefix; | 2459 | int prefix; |
2447 | 2460 | ||
2448 | if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { | 2461 | if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { |
2449 | struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); | 2462 | struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); |
2450 | prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; | 2463 | prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; |
2451 | } else | 2464 | } else |
2452 | prefix = 0; | 2465 | prefix = 0; |
2453 | 2466 | ||
2454 | return rt6_fill_node(arg->net, | 2467 | return rt6_fill_node(arg->net, |
2455 | arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, | 2468 | arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, |
2456 | NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, | 2469 | NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, |
2457 | prefix, 0, NLM_F_MULTI); | 2470 | prefix, 0, NLM_F_MULTI); |
2458 | } | 2471 | } |
2459 | 2472 | ||
2460 | static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 2473 | static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
2461 | { | 2474 | { |
2462 | struct net *net = sock_net(in_skb->sk); | 2475 | struct net *net = sock_net(in_skb->sk); |
2463 | struct nlattr *tb[RTA_MAX+1]; | 2476 | struct nlattr *tb[RTA_MAX+1]; |
2464 | struct rt6_info *rt; | 2477 | struct rt6_info *rt; |
2465 | struct sk_buff *skb; | 2478 | struct sk_buff *skb; |
2466 | struct rtmsg *rtm; | 2479 | struct rtmsg *rtm; |
2467 | struct flowi6 fl6; | 2480 | struct flowi6 fl6; |
2468 | int err, iif = 0; | 2481 | int err, iif = 0; |
2469 | 2482 | ||
2470 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); | 2483 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); |
2471 | if (err < 0) | 2484 | if (err < 0) |
2472 | goto errout; | 2485 | goto errout; |
2473 | 2486 | ||
2474 | err = -EINVAL; | 2487 | err = -EINVAL; |
2475 | memset(&fl6, 0, sizeof(fl6)); | 2488 | memset(&fl6, 0, sizeof(fl6)); |
2476 | 2489 | ||
2477 | if (tb[RTA_SRC]) { | 2490 | if (tb[RTA_SRC]) { |
2478 | if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) | 2491 | if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) |
2479 | goto errout; | 2492 | goto errout; |
2480 | 2493 | ||
2481 | ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); | 2494 | ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); |
2482 | } | 2495 | } |
2483 | 2496 | ||
2484 | if (tb[RTA_DST]) { | 2497 | if (tb[RTA_DST]) { |
2485 | if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) | 2498 | if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) |
2486 | goto errout; | 2499 | goto errout; |
2487 | 2500 | ||
2488 | ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); | 2501 | ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); |
2489 | } | 2502 | } |
2490 | 2503 | ||
2491 | if (tb[RTA_IIF]) | 2504 | if (tb[RTA_IIF]) |
2492 | iif = nla_get_u32(tb[RTA_IIF]); | 2505 | iif = nla_get_u32(tb[RTA_IIF]); |
2493 | 2506 | ||
2494 | if (tb[RTA_OIF]) | 2507 | if (tb[RTA_OIF]) |
2495 | fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); | 2508 | fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); |
2496 | 2509 | ||
2497 | if (iif) { | 2510 | if (iif) { |
2498 | struct net_device *dev; | 2511 | struct net_device *dev; |
2499 | dev = __dev_get_by_index(net, iif); | 2512 | dev = __dev_get_by_index(net, iif); |
2500 | if (!dev) { | 2513 | if (!dev) { |
2501 | err = -ENODEV; | 2514 | err = -ENODEV; |
2502 | goto errout; | 2515 | goto errout; |
2503 | } | 2516 | } |
2504 | } | 2517 | } |
2505 | 2518 | ||
2506 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 2519 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
2507 | if (skb == NULL) { | 2520 | if (skb == NULL) { |
2508 | err = -ENOBUFS; | 2521 | err = -ENOBUFS; |
2509 | goto errout; | 2522 | goto errout; |
2510 | } | 2523 | } |
2511 | 2524 | ||
2512 | /* Reserve room for dummy headers, this skb can pass | 2525 | /* Reserve room for dummy headers, this skb can pass |
2513 | through good chunk of routing engine. | 2526 | through good chunk of routing engine. |
2514 | */ | 2527 | */ |
2515 | skb_reset_mac_header(skb); | 2528 | skb_reset_mac_header(skb); |
2516 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); | 2529 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); |
2517 | 2530 | ||
2518 | rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); | 2531 | rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); |
2519 | skb_dst_set(skb, &rt->dst); | 2532 | skb_dst_set(skb, &rt->dst); |
2520 | 2533 | ||
2521 | err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, | 2534 | err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, |
2522 | RTM_NEWROUTE, NETLINK_CB(in_skb).pid, | 2535 | RTM_NEWROUTE, NETLINK_CB(in_skb).pid, |
2523 | nlh->nlmsg_seq, 0, 0, 0); | 2536 | nlh->nlmsg_seq, 0, 0, 0); |
2524 | if (err < 0) { | 2537 | if (err < 0) { |
2525 | kfree_skb(skb); | 2538 | kfree_skb(skb); |
2526 | goto errout; | 2539 | goto errout; |
2527 | } | 2540 | } |
2528 | 2541 | ||
2529 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); | 2542 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); |
2530 | errout: | 2543 | errout: |
2531 | return err; | 2544 | return err; |
2532 | } | 2545 | } |
2533 | 2546 | ||
2534 | void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) | 2547 | void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) |
2535 | { | 2548 | { |
2536 | struct sk_buff *skb; | 2549 | struct sk_buff *skb; |
2537 | struct net *net = info->nl_net; | 2550 | struct net *net = info->nl_net; |
2538 | u32 seq; | 2551 | u32 seq; |
2539 | int err; | 2552 | int err; |
2540 | 2553 | ||
2541 | err = -ENOBUFS; | 2554 | err = -ENOBUFS; |
2542 | seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; | 2555 | seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; |
2543 | 2556 | ||
2544 | skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); | 2557 | skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); |
2545 | if (skb == NULL) | 2558 | if (skb == NULL) |
2546 | goto errout; | 2559 | goto errout; |
2547 | 2560 | ||
2548 | err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, | 2561 | err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, |
2549 | event, info->pid, seq, 0, 0, 0); | 2562 | event, info->pid, seq, 0, 0, 0); |
2550 | if (err < 0) { | 2563 | if (err < 0) { |
2551 | /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ | 2564 | /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ |
2552 | WARN_ON(err == -EMSGSIZE); | 2565 | WARN_ON(err == -EMSGSIZE); |
2553 | kfree_skb(skb); | 2566 | kfree_skb(skb); |
2554 | goto errout; | 2567 | goto errout; |
2555 | } | 2568 | } |
2556 | rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, | 2569 | rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, |
2557 | info->nlh, gfp_any()); | 2570 | info->nlh, gfp_any()); |
2558 | return; | 2571 | return; |
2559 | errout: | 2572 | errout: |
2560 | if (err < 0) | 2573 | if (err < 0) |
2561 | rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); | 2574 | rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); |
2562 | } | 2575 | } |
2563 | 2576 | ||
2564 | static int ip6_route_dev_notify(struct notifier_block *this, | 2577 | static int ip6_route_dev_notify(struct notifier_block *this, |
2565 | unsigned long event, void *data) | 2578 | unsigned long event, void *data) |
2566 | { | 2579 | { |
2567 | struct net_device *dev = (struct net_device *)data; | 2580 | struct net_device *dev = (struct net_device *)data; |
2568 | struct net *net = dev_net(dev); | 2581 | struct net *net = dev_net(dev); |
2569 | 2582 | ||
2570 | if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { | 2583 | if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { |
2571 | net->ipv6.ip6_null_entry->dst.dev = dev; | 2584 | net->ipv6.ip6_null_entry->dst.dev = dev; |
2572 | net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); | 2585 | net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); |
2573 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2586 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
2574 | net->ipv6.ip6_prohibit_entry->dst.dev = dev; | 2587 | net->ipv6.ip6_prohibit_entry->dst.dev = dev; |
2575 | net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); | 2588 | net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); |
2576 | net->ipv6.ip6_blk_hole_entry->dst.dev = dev; | 2589 | net->ipv6.ip6_blk_hole_entry->dst.dev = dev; |
2577 | net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); | 2590 | net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); |
2578 | #endif | 2591 | #endif |
2579 | } | 2592 | } |
2580 | 2593 | ||
2581 | return NOTIFY_OK; | 2594 | return NOTIFY_OK; |
2582 | } | 2595 | } |
2583 | 2596 | ||
2584 | /* | 2597 | /* |
2585 | * /proc | 2598 | * /proc |
2586 | */ | 2599 | */ |
2587 | 2600 | ||
2588 | #ifdef CONFIG_PROC_FS | 2601 | #ifdef CONFIG_PROC_FS |
2589 | 2602 | ||
2590 | struct rt6_proc_arg | 2603 | struct rt6_proc_arg |
2591 | { | 2604 | { |
2592 | char *buffer; | 2605 | char *buffer; |
2593 | int offset; | 2606 | int offset; |
2594 | int length; | 2607 | int length; |
2595 | int skip; | 2608 | int skip; |
2596 | int len; | 2609 | int len; |
2597 | }; | 2610 | }; |
2598 | 2611 | ||
2599 | static int rt6_info_route(struct rt6_info *rt, void *p_arg) | 2612 | static int rt6_info_route(struct rt6_info *rt, void *p_arg) |
2600 | { | 2613 | { |
2601 | struct seq_file *m = p_arg; | 2614 | struct seq_file *m = p_arg; |
2602 | struct neighbour *n; | 2615 | struct neighbour *n; |
2603 | 2616 | ||
2604 | seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); | 2617 | seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); |
2605 | 2618 | ||
2606 | #ifdef CONFIG_IPV6_SUBTREES | 2619 | #ifdef CONFIG_IPV6_SUBTREES |
2607 | seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); | 2620 | seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); |
2608 | #else | 2621 | #else |
2609 | seq_puts(m, "00000000000000000000000000000000 00 "); | 2622 | seq_puts(m, "00000000000000000000000000000000 00 "); |
2610 | #endif | 2623 | #endif |
2624 | rcu_read_lock(); | ||
2611 | n = dst_get_neighbour(&rt->dst); | 2625 | n = dst_get_neighbour(&rt->dst); |
2612 | if (n) { | 2626 | if (n) { |
2613 | seq_printf(m, "%pi6", n->primary_key); | 2627 | seq_printf(m, "%pi6", n->primary_key); |
2614 | } else { | 2628 | } else { |
2615 | seq_puts(m, "00000000000000000000000000000000"); | 2629 | seq_puts(m, "00000000000000000000000000000000"); |
2616 | } | 2630 | } |
2631 | rcu_read_unlock(); | ||
2617 | seq_printf(m, " %08x %08x %08x %08x %8s\n", | 2632 | seq_printf(m, " %08x %08x %08x %08x %8s\n", |
2618 | rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), | 2633 | rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), |
2619 | rt->dst.__use, rt->rt6i_flags, | 2634 | rt->dst.__use, rt->rt6i_flags, |
2620 | rt->rt6i_dev ? rt->rt6i_dev->name : ""); | 2635 | rt->rt6i_dev ? rt->rt6i_dev->name : ""); |
2621 | return 0; | 2636 | return 0; |
2622 | } | 2637 | } |
2623 | 2638 | ||
2624 | static int ipv6_route_show(struct seq_file *m, void *v) | 2639 | static int ipv6_route_show(struct seq_file *m, void *v) |
2625 | { | 2640 | { |
2626 | struct net *net = (struct net *)m->private; | 2641 | struct net *net = (struct net *)m->private; |
2627 | fib6_clean_all(net, rt6_info_route, 0, m); | 2642 | fib6_clean_all(net, rt6_info_route, 0, m); |
2628 | return 0; | 2643 | return 0; |
2629 | } | 2644 | } |
2630 | 2645 | ||
2631 | static int ipv6_route_open(struct inode *inode, struct file *file) | 2646 | static int ipv6_route_open(struct inode *inode, struct file *file) |
2632 | { | 2647 | { |
2633 | return single_open_net(inode, file, ipv6_route_show); | 2648 | return single_open_net(inode, file, ipv6_route_show); |
2634 | } | 2649 | } |
2635 | 2650 | ||
2636 | static const struct file_operations ipv6_route_proc_fops = { | 2651 | static const struct file_operations ipv6_route_proc_fops = { |
2637 | .owner = THIS_MODULE, | 2652 | .owner = THIS_MODULE, |
2638 | .open = ipv6_route_open, | 2653 | .open = ipv6_route_open, |
2639 | .read = seq_read, | 2654 | .read = seq_read, |
2640 | .llseek = seq_lseek, | 2655 | .llseek = seq_lseek, |
2641 | .release = single_release_net, | 2656 | .release = single_release_net, |
2642 | }; | 2657 | }; |
2643 | 2658 | ||
2644 | static int rt6_stats_seq_show(struct seq_file *seq, void *v) | 2659 | static int rt6_stats_seq_show(struct seq_file *seq, void *v) |
2645 | { | 2660 | { |
2646 | struct net *net = (struct net *)seq->private; | 2661 | struct net *net = (struct net *)seq->private; |
2647 | seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", | 2662 | seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", |
2648 | net->ipv6.rt6_stats->fib_nodes, | 2663 | net->ipv6.rt6_stats->fib_nodes, |
2649 | net->ipv6.rt6_stats->fib_route_nodes, | 2664 | net->ipv6.rt6_stats->fib_route_nodes, |
2650 | net->ipv6.rt6_stats->fib_rt_alloc, | 2665 | net->ipv6.rt6_stats->fib_rt_alloc, |
2651 | net->ipv6.rt6_stats->fib_rt_entries, | 2666 | net->ipv6.rt6_stats->fib_rt_entries, |
2652 | net->ipv6.rt6_stats->fib_rt_cache, | 2667 | net->ipv6.rt6_stats->fib_rt_cache, |
2653 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), | 2668 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), |
2654 | net->ipv6.rt6_stats->fib_discarded_routes); | 2669 | net->ipv6.rt6_stats->fib_discarded_routes); |
2655 | 2670 | ||
2656 | return 0; | 2671 | return 0; |
2657 | } | 2672 | } |
2658 | 2673 | ||
2659 | static int rt6_stats_seq_open(struct inode *inode, struct file *file) | 2674 | static int rt6_stats_seq_open(struct inode *inode, struct file *file) |
2660 | { | 2675 | { |
2661 | return single_open_net(inode, file, rt6_stats_seq_show); | 2676 | return single_open_net(inode, file, rt6_stats_seq_show); |
2662 | } | 2677 | } |
2663 | 2678 | ||
2664 | static const struct file_operations rt6_stats_seq_fops = { | 2679 | static const struct file_operations rt6_stats_seq_fops = { |
2665 | .owner = THIS_MODULE, | 2680 | .owner = THIS_MODULE, |
2666 | .open = rt6_stats_seq_open, | 2681 | .open = rt6_stats_seq_open, |
2667 | .read = seq_read, | 2682 | .read = seq_read, |
2668 | .llseek = seq_lseek, | 2683 | .llseek = seq_lseek, |
2669 | .release = single_release_net, | 2684 | .release = single_release_net, |
2670 | }; | 2685 | }; |
2671 | #endif /* CONFIG_PROC_FS */ | 2686 | #endif /* CONFIG_PROC_FS */ |
2672 | 2687 | ||
2673 | #ifdef CONFIG_SYSCTL | 2688 | #ifdef CONFIG_SYSCTL |
2674 | 2689 | ||
2675 | static | 2690 | static |
2676 | int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, | 2691 | int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, |
2677 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2692 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2678 | { | 2693 | { |
2679 | struct net *net; | 2694 | struct net *net; |
2680 | int delay; | 2695 | int delay; |
2681 | if (!write) | 2696 | if (!write) |
2682 | return -EINVAL; | 2697 | return -EINVAL; |
2683 | 2698 | ||
2684 | net = (struct net *)ctl->extra1; | 2699 | net = (struct net *)ctl->extra1; |
2685 | delay = net->ipv6.sysctl.flush_delay; | 2700 | delay = net->ipv6.sysctl.flush_delay; |
2686 | proc_dointvec(ctl, write, buffer, lenp, ppos); | 2701 | proc_dointvec(ctl, write, buffer, lenp, ppos); |
2687 | fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); | 2702 | fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); |
2688 | return 0; | 2703 | return 0; |
2689 | } | 2704 | } |
2690 | 2705 | ||
2691 | ctl_table ipv6_route_table_template[] = { | 2706 | ctl_table ipv6_route_table_template[] = { |
2692 | { | 2707 | { |
2693 | .procname = "flush", | 2708 | .procname = "flush", |
2694 | .data = &init_net.ipv6.sysctl.flush_delay, | 2709 | .data = &init_net.ipv6.sysctl.flush_delay, |
2695 | .maxlen = sizeof(int), | 2710 | .maxlen = sizeof(int), |
2696 | .mode = 0200, | 2711 | .mode = 0200, |
2697 | .proc_handler = ipv6_sysctl_rtcache_flush | 2712 | .proc_handler = ipv6_sysctl_rtcache_flush |
2698 | }, | 2713 | }, |
2699 | { | 2714 | { |
2700 | .procname = "gc_thresh", | 2715 | .procname = "gc_thresh", |
2701 | .data = &ip6_dst_ops_template.gc_thresh, | 2716 | .data = &ip6_dst_ops_template.gc_thresh, |
2702 | .maxlen = sizeof(int), | 2717 | .maxlen = sizeof(int), |
2703 | .mode = 0644, | 2718 | .mode = 0644, |
2704 | .proc_handler = proc_dointvec, | 2719 | .proc_handler = proc_dointvec, |
2705 | }, | 2720 | }, |
2706 | { | 2721 | { |
2707 | .procname = "max_size", | 2722 | .procname = "max_size", |
2708 | .data = &init_net.ipv6.sysctl.ip6_rt_max_size, | 2723 | .data = &init_net.ipv6.sysctl.ip6_rt_max_size, |
2709 | .maxlen = sizeof(int), | 2724 | .maxlen = sizeof(int), |
2710 | .mode = 0644, | 2725 | .mode = 0644, |
2711 | .proc_handler = proc_dointvec, | 2726 | .proc_handler = proc_dointvec, |
2712 | }, | 2727 | }, |
2713 | { | 2728 | { |
2714 | .procname = "gc_min_interval", | 2729 | .procname = "gc_min_interval", |
2715 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, | 2730 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, |
2716 | .maxlen = sizeof(int), | 2731 | .maxlen = sizeof(int), |
2717 | .mode = 0644, | 2732 | .mode = 0644, |
2718 | .proc_handler = proc_dointvec_jiffies, | 2733 | .proc_handler = proc_dointvec_jiffies, |
2719 | }, | 2734 | }, |
2720 | { | 2735 | { |
2721 | .procname = "gc_timeout", | 2736 | .procname = "gc_timeout", |
2722 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, | 2737 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, |
2723 | .maxlen = sizeof(int), | 2738 | .maxlen = sizeof(int), |
2724 | .mode = 0644, | 2739 | .mode = 0644, |
2725 | .proc_handler = proc_dointvec_jiffies, | 2740 | .proc_handler = proc_dointvec_jiffies, |
2726 | }, | 2741 | }, |
2727 | { | 2742 | { |
2728 | .procname = "gc_interval", | 2743 | .procname = "gc_interval", |
2729 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, | 2744 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, |
2730 | .maxlen = sizeof(int), | 2745 | .maxlen = sizeof(int), |
2731 | .mode = 0644, | 2746 | .mode = 0644, |
2732 | .proc_handler = proc_dointvec_jiffies, | 2747 | .proc_handler = proc_dointvec_jiffies, |
2733 | }, | 2748 | }, |
2734 | { | 2749 | { |
2735 | .procname = "gc_elasticity", | 2750 | .procname = "gc_elasticity", |
2736 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, | 2751 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, |
2737 | .maxlen = sizeof(int), | 2752 | .maxlen = sizeof(int), |
2738 | .mode = 0644, | 2753 | .mode = 0644, |
2739 | .proc_handler = proc_dointvec, | 2754 | .proc_handler = proc_dointvec, |
2740 | }, | 2755 | }, |
2741 | { | 2756 | { |
2742 | .procname = "mtu_expires", | 2757 | .procname = "mtu_expires", |
2743 | .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, | 2758 | .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, |
2744 | .maxlen = sizeof(int), | 2759 | .maxlen = sizeof(int), |
2745 | .mode = 0644, | 2760 | .mode = 0644, |
2746 | .proc_handler = proc_dointvec_jiffies, | 2761 | .proc_handler = proc_dointvec_jiffies, |
2747 | }, | 2762 | }, |
2748 | { | 2763 | { |
2749 | .procname = "min_adv_mss", | 2764 | .procname = "min_adv_mss", |
2750 | .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, | 2765 | .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, |
2751 | .maxlen = sizeof(int), | 2766 | .maxlen = sizeof(int), |
2752 | .mode = 0644, | 2767 | .mode = 0644, |
2753 | .proc_handler = proc_dointvec, | 2768 | .proc_handler = proc_dointvec, |
2754 | }, | 2769 | }, |
2755 | { | 2770 | { |
2756 | .procname = "gc_min_interval_ms", | 2771 | .procname = "gc_min_interval_ms", |
2757 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, | 2772 | .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, |
2758 | .maxlen = sizeof(int), | 2773 | .maxlen = sizeof(int), |
2759 | .mode = 0644, | 2774 | .mode = 0644, |
2760 | .proc_handler = proc_dointvec_ms_jiffies, | 2775 | .proc_handler = proc_dointvec_ms_jiffies, |
2761 | }, | 2776 | }, |
2762 | { } | 2777 | { } |
2763 | }; | 2778 | }; |
2764 | 2779 | ||
2765 | struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) | 2780 | struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) |
2766 | { | 2781 | { |
2767 | struct ctl_table *table; | 2782 | struct ctl_table *table; |
2768 | 2783 | ||
2769 | table = kmemdup(ipv6_route_table_template, | 2784 | table = kmemdup(ipv6_route_table_template, |
2770 | sizeof(ipv6_route_table_template), | 2785 | sizeof(ipv6_route_table_template), |
2771 | GFP_KERNEL); | 2786 | GFP_KERNEL); |
2772 | 2787 | ||
2773 | if (table) { | 2788 | if (table) { |
2774 | table[0].data = &net->ipv6.sysctl.flush_delay; | 2789 | table[0].data = &net->ipv6.sysctl.flush_delay; |
2775 | table[0].extra1 = net; | 2790 | table[0].extra1 = net; |
2776 | table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; | 2791 | table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; |
2777 | table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; | 2792 | table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; |
2778 | table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; | 2793 | table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; |
2779 | table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; | 2794 | table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; |
2780 | table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; | 2795 | table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; |
2781 | table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; | 2796 | table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; |
2782 | table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; | 2797 | table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; |
2783 | table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; | 2798 | table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; |
2784 | table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; | 2799 | table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; |
2785 | } | 2800 | } |
2786 | 2801 | ||
2787 | return table; | 2802 | return table; |
2788 | } | 2803 | } |
2789 | #endif | 2804 | #endif |
2790 | 2805 | ||
2791 | static int __net_init ip6_route_net_init(struct net *net) | 2806 | static int __net_init ip6_route_net_init(struct net *net) |
2792 | { | 2807 | { |
2793 | int ret = -ENOMEM; | 2808 | int ret = -ENOMEM; |
2794 | 2809 | ||
2795 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, | 2810 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, |
2796 | sizeof(net->ipv6.ip6_dst_ops)); | 2811 | sizeof(net->ipv6.ip6_dst_ops)); |
2797 | 2812 | ||
2798 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) | 2813 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) |
2799 | goto out_ip6_dst_ops; | 2814 | goto out_ip6_dst_ops; |
2800 | 2815 | ||
2801 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, | 2816 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, |
2802 | sizeof(*net->ipv6.ip6_null_entry), | 2817 | sizeof(*net->ipv6.ip6_null_entry), |
2803 | GFP_KERNEL); | 2818 | GFP_KERNEL); |
2804 | if (!net->ipv6.ip6_null_entry) | 2819 | if (!net->ipv6.ip6_null_entry) |
2805 | goto out_ip6_dst_entries; | 2820 | goto out_ip6_dst_entries; |
2806 | net->ipv6.ip6_null_entry->dst.path = | 2821 | net->ipv6.ip6_null_entry->dst.path = |
2807 | (struct dst_entry *)net->ipv6.ip6_null_entry; | 2822 | (struct dst_entry *)net->ipv6.ip6_null_entry; |
2808 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2823 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
2809 | dst_init_metrics(&net->ipv6.ip6_null_entry->dst, | 2824 | dst_init_metrics(&net->ipv6.ip6_null_entry->dst, |
2810 | ip6_template_metrics, true); | 2825 | ip6_template_metrics, true); |
2811 | 2826 | ||
2812 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2827 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
2813 | net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, | 2828 | net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, |
2814 | sizeof(*net->ipv6.ip6_prohibit_entry), | 2829 | sizeof(*net->ipv6.ip6_prohibit_entry), |
2815 | GFP_KERNEL); | 2830 | GFP_KERNEL); |
2816 | if (!net->ipv6.ip6_prohibit_entry) | 2831 | if (!net->ipv6.ip6_prohibit_entry) |
2817 | goto out_ip6_null_entry; | 2832 | goto out_ip6_null_entry; |
2818 | net->ipv6.ip6_prohibit_entry->dst.path = | 2833 | net->ipv6.ip6_prohibit_entry->dst.path = |
2819 | (struct dst_entry *)net->ipv6.ip6_prohibit_entry; | 2834 | (struct dst_entry *)net->ipv6.ip6_prohibit_entry; |
2820 | net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2835 | net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
2821 | dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, | 2836 | dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, |
2822 | ip6_template_metrics, true); | 2837 | ip6_template_metrics, true); |
2823 | 2838 | ||
2824 | net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, | 2839 | net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, |
2825 | sizeof(*net->ipv6.ip6_blk_hole_entry), | 2840 | sizeof(*net->ipv6.ip6_blk_hole_entry), |
2826 | GFP_KERNEL); | 2841 | GFP_KERNEL); |
2827 | if (!net->ipv6.ip6_blk_hole_entry) | 2842 | if (!net->ipv6.ip6_blk_hole_entry) |
2828 | goto out_ip6_prohibit_entry; | 2843 | goto out_ip6_prohibit_entry; |
2829 | net->ipv6.ip6_blk_hole_entry->dst.path = | 2844 | net->ipv6.ip6_blk_hole_entry->dst.path = |
2830 | (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; | 2845 | (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; |
2831 | net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2846 | net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
2832 | dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, | 2847 | dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, |
2833 | ip6_template_metrics, true); | 2848 | ip6_template_metrics, true); |
2834 | #endif | 2849 | #endif |
2835 | 2850 | ||
2836 | net->ipv6.sysctl.flush_delay = 0; | 2851 | net->ipv6.sysctl.flush_delay = 0; |
2837 | net->ipv6.sysctl.ip6_rt_max_size = 4096; | 2852 | net->ipv6.sysctl.ip6_rt_max_size = 4096; |
2838 | net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; | 2853 | net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; |
2839 | net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; | 2854 | net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; |
2840 | net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; | 2855 | net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; |
2841 | net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; | 2856 | net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; |
2842 | net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; | 2857 | net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; |
2843 | net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; | 2858 | net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; |
2844 | 2859 | ||
2845 | #ifdef CONFIG_PROC_FS | 2860 | #ifdef CONFIG_PROC_FS |
2846 | proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); | 2861 | proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); |
2847 | proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); | 2862 | proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); |
2848 | #endif | 2863 | #endif |
2849 | net->ipv6.ip6_rt_gc_expire = 30*HZ; | 2864 | net->ipv6.ip6_rt_gc_expire = 30*HZ; |
2850 | 2865 | ||
2851 | ret = 0; | 2866 | ret = 0; |
2852 | out: | 2867 | out: |
2853 | return ret; | 2868 | return ret; |
2854 | 2869 | ||
2855 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2870 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
2856 | out_ip6_prohibit_entry: | 2871 | out_ip6_prohibit_entry: |
2857 | kfree(net->ipv6.ip6_prohibit_entry); | 2872 | kfree(net->ipv6.ip6_prohibit_entry); |
2858 | out_ip6_null_entry: | 2873 | out_ip6_null_entry: |
2859 | kfree(net->ipv6.ip6_null_entry); | 2874 | kfree(net->ipv6.ip6_null_entry); |
2860 | #endif | 2875 | #endif |
2861 | out_ip6_dst_entries: | 2876 | out_ip6_dst_entries: |
2862 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | 2877 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); |
2863 | out_ip6_dst_ops: | 2878 | out_ip6_dst_ops: |
2864 | goto out; | 2879 | goto out; |
2865 | } | 2880 | } |
2866 | 2881 | ||
2867 | static void __net_exit ip6_route_net_exit(struct net *net) | 2882 | static void __net_exit ip6_route_net_exit(struct net *net) |
2868 | { | 2883 | { |
2869 | #ifdef CONFIG_PROC_FS | 2884 | #ifdef CONFIG_PROC_FS |
2870 | proc_net_remove(net, "ipv6_route"); | 2885 | proc_net_remove(net, "ipv6_route"); |
2871 | proc_net_remove(net, "rt6_stats"); | 2886 | proc_net_remove(net, "rt6_stats"); |
2872 | #endif | 2887 | #endif |
2873 | kfree(net->ipv6.ip6_null_entry); | 2888 | kfree(net->ipv6.ip6_null_entry); |
2874 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2889 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
2875 | kfree(net->ipv6.ip6_prohibit_entry); | 2890 | kfree(net->ipv6.ip6_prohibit_entry); |
2876 | kfree(net->ipv6.ip6_blk_hole_entry); | 2891 | kfree(net->ipv6.ip6_blk_hole_entry); |
2877 | #endif | 2892 | #endif |
2878 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | 2893 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); |
2879 | } | 2894 | } |
2880 | 2895 | ||
2881 | static struct pernet_operations ip6_route_net_ops = { | 2896 | static struct pernet_operations ip6_route_net_ops = { |
2882 | .init = ip6_route_net_init, | 2897 | .init = ip6_route_net_init, |
2883 | .exit = ip6_route_net_exit, | 2898 | .exit = ip6_route_net_exit, |
2884 | }; | 2899 | }; |
2885 | 2900 | ||
2886 | static struct notifier_block ip6_route_dev_notifier = { | 2901 | static struct notifier_block ip6_route_dev_notifier = { |
2887 | .notifier_call = ip6_route_dev_notify, | 2902 | .notifier_call = ip6_route_dev_notify, |
2888 | .priority = 0, | 2903 | .priority = 0, |
2889 | }; | 2904 | }; |
2890 | 2905 | ||
2891 | int __init ip6_route_init(void) | 2906 | int __init ip6_route_init(void) |
2892 | { | 2907 | { |
2893 | int ret; | 2908 | int ret; |
2894 | 2909 | ||
2895 | ret = -ENOMEM; | 2910 | ret = -ENOMEM; |
2896 | ip6_dst_ops_template.kmem_cachep = | 2911 | ip6_dst_ops_template.kmem_cachep = |
2897 | kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, | 2912 | kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, |
2898 | SLAB_HWCACHE_ALIGN, NULL); | 2913 | SLAB_HWCACHE_ALIGN, NULL); |
2899 | if (!ip6_dst_ops_template.kmem_cachep) | 2914 | if (!ip6_dst_ops_template.kmem_cachep) |
2900 | goto out; | 2915 | goto out; |
2901 | 2916 | ||
2902 | ret = dst_entries_init(&ip6_dst_blackhole_ops); | 2917 | ret = dst_entries_init(&ip6_dst_blackhole_ops); |
2903 | if (ret) | 2918 | if (ret) |
2904 | goto out_kmem_cache; | 2919 | goto out_kmem_cache; |
2905 | 2920 | ||
2906 | ret = register_pernet_subsys(&ip6_route_net_ops); | 2921 | ret = register_pernet_subsys(&ip6_route_net_ops); |
2907 | if (ret) | 2922 | if (ret) |
2908 | goto out_dst_entries; | 2923 | goto out_dst_entries; |
2909 | 2924 | ||
2910 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; | 2925 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; |
2911 | 2926 | ||
2912 | /* Registering of the loopback is done before this portion of code, | 2927 | /* Registering of the loopback is done before this portion of code, |
2913 | * the loopback reference in rt6_info will not be taken, do it | 2928 | * the loopback reference in rt6_info will not be taken, do it |
2914 | * manually for init_net */ | 2929 | * manually for init_net */ |
2915 | init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; | 2930 | init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; |
2916 | init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); | 2931 | init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); |
2917 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 2932 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
2918 | init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; | 2933 | init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; |
2919 | init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); | 2934 | init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); |
2920 | init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; | 2935 | init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; |
2921 | init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); | 2936 | init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); |
2922 | #endif | 2937 | #endif |
2923 | ret = fib6_init(); | 2938 | ret = fib6_init(); |
2924 | if (ret) | 2939 | if (ret) |
2925 | goto out_register_subsys; | 2940 | goto out_register_subsys; |
2926 | 2941 | ||
2927 | ret = xfrm6_init(); | 2942 | ret = xfrm6_init(); |
2928 | if (ret) | 2943 | if (ret) |
2929 | goto out_fib6_init; | 2944 | goto out_fib6_init; |
2930 | 2945 | ||
2931 | ret = fib6_rules_init(); | 2946 | ret = fib6_rules_init(); |
2932 | if (ret) | 2947 | if (ret) |
2933 | goto xfrm6_init; | 2948 | goto xfrm6_init; |
2934 | 2949 | ||
2935 | ret = -ENOBUFS; | 2950 | ret = -ENOBUFS; |
2936 | if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || | 2951 | if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || |
2937 | __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || | 2952 | __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || |
2938 | __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) | 2953 | __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) |
2939 | goto fib6_rules_init; | 2954 | goto fib6_rules_init; |
2940 | 2955 | ||
2941 | ret = register_netdevice_notifier(&ip6_route_dev_notifier); | 2956 | ret = register_netdevice_notifier(&ip6_route_dev_notifier); |
2942 | if (ret) | 2957 | if (ret) |
2943 | goto fib6_rules_init; | 2958 | goto fib6_rules_init; |
2944 | 2959 | ||
2945 | out: | 2960 | out: |
2946 | return ret; | 2961 | return ret; |
2947 | 2962 | ||
2948 | fib6_rules_init: | 2963 | fib6_rules_init: |
2949 | fib6_rules_cleanup(); | 2964 | fib6_rules_cleanup(); |
2950 | xfrm6_init: | 2965 | xfrm6_init: |
2951 | xfrm6_fini(); | 2966 | xfrm6_fini(); |
2952 | out_fib6_init: | 2967 | out_fib6_init: |
2953 | fib6_gc_cleanup(); | 2968 | fib6_gc_cleanup(); |
2954 | out_register_subsys: | 2969 | out_register_subsys: |
2955 | unregister_pernet_subsys(&ip6_route_net_ops); | 2970 | unregister_pernet_subsys(&ip6_route_net_ops); |
2956 | out_dst_entries: | 2971 | out_dst_entries: |
2957 | dst_entries_destroy(&ip6_dst_blackhole_ops); | 2972 | dst_entries_destroy(&ip6_dst_blackhole_ops); |
2958 | out_kmem_cache: | 2973 | out_kmem_cache: |
2959 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2974 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2960 | goto out; | 2975 | goto out; |
2961 | } | 2976 | } |
2962 | 2977 | ||
2963 | void ip6_route_cleanup(void) | 2978 | void ip6_route_cleanup(void) |
2964 | { | 2979 | { |
2965 | unregister_netdevice_notifier(&ip6_route_dev_notifier); | 2980 | unregister_netdevice_notifier(&ip6_route_dev_notifier); |
2966 | fib6_rules_cleanup(); | 2981 | fib6_rules_cleanup(); |
2967 | xfrm6_fini(); | 2982 | xfrm6_fini(); |
2968 | fib6_gc_cleanup(); | 2983 | fib6_gc_cleanup(); |
2969 | unregister_pernet_subsys(&ip6_route_net_ops); | 2984 | unregister_pernet_subsys(&ip6_route_net_ops); |
2970 | dst_entries_destroy(&ip6_dst_blackhole_ops); | 2985 | dst_entries_destroy(&ip6_dst_blackhole_ops); |
2971 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2986 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2972 | } | 2987 | } |
2973 | 2988 |
-
mentioned in commit e03315
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826
-
mentioned in commit 94f826