Commit 307f2fb95e9b96b3577916e73d92e104f8f26494

Authored by Hannes Frederic Sowa
Committed by David S. Miller
1 parent 9b4fe5fb0b

ipv6: only static routes qualify for equal cost multipathing

Static routes in this case are non-expiring routes which did not get
configured by autoconf or by icmpv6 redirects.

To make sure we actually get an ecmp route while searching for the first
one in this fib6_node's leafs, also make sure it matches the ecmp route
assumptions.

v2:
a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF
   already ensures that this route, even if added again without
   RTF_EXPIRES (in case of a RA announcement with infinite timeout),
   does not cause the rt6i_nsiblings logic to go wrong if a later RA
   updates the expiration time later.

v3:
a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so,
   because an pmtu event could update the RTF_EXPIRES flag and we would
   not count this route, if another route joins this set. We now filter
   only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that
   don't get changed after rt6_info construction.

Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 11 additions and 4 deletions Inline Diff

1 /* 1 /*
2 * Linux INET6 implementation 2 * Linux INET6 implementation
3 * Forwarding Information Database 3 * Forwarding Information Database
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
12 */ 12 */
13 13
14 /* 14 /*
15 * Changes: 15 * Changes:
16 * Yuji SEKIYA @USAGI: Support default route on router node; 16 * Yuji SEKIYA @USAGI: Support default route on router node;
17 * remove ip6_null_entry from the top of 17 * remove ip6_null_entry from the top of
18 * routing table. 18 * routing table.
19 * Ville Nuorvala: Fixed routing subtrees. 19 * Ville Nuorvala: Fixed routing subtrees.
20 */ 20 */
21 21
22 #define pr_fmt(fmt) "IPv6: " fmt 22 #define pr_fmt(fmt) "IPv6: " fmt
23 23
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/types.h> 25 #include <linux/types.h>
26 #include <linux/net.h> 26 #include <linux/net.h>
27 #include <linux/route.h> 27 #include <linux/route.h>
28 #include <linux/netdevice.h> 28 #include <linux/netdevice.h>
29 #include <linux/in6.h> 29 #include <linux/in6.h>
30 #include <linux/init.h> 30 #include <linux/init.h>
31 #include <linux/list.h> 31 #include <linux/list.h>
32 #include <linux/slab.h> 32 #include <linux/slab.h>
33 33
34 #include <net/ipv6.h> 34 #include <net/ipv6.h>
35 #include <net/ndisc.h> 35 #include <net/ndisc.h>
36 #include <net/addrconf.h> 36 #include <net/addrconf.h>
37 37
38 #include <net/ip6_fib.h> 38 #include <net/ip6_fib.h>
39 #include <net/ip6_route.h> 39 #include <net/ip6_route.h>
40 40
41 #define RT6_DEBUG 2 41 #define RT6_DEBUG 2
42 42
43 #if RT6_DEBUG >= 3 43 #if RT6_DEBUG >= 3
44 #define RT6_TRACE(x...) pr_debug(x) 44 #define RT6_TRACE(x...) pr_debug(x)
45 #else 45 #else
46 #define RT6_TRACE(x...) do { ; } while (0) 46 #define RT6_TRACE(x...) do { ; } while (0)
47 #endif 47 #endif
48 48
49 static struct kmem_cache * fib6_node_kmem __read_mostly; 49 static struct kmem_cache * fib6_node_kmem __read_mostly;
50 50
51 enum fib_walk_state_t 51 enum fib_walk_state_t
52 { 52 {
53 #ifdef CONFIG_IPV6_SUBTREES 53 #ifdef CONFIG_IPV6_SUBTREES
54 FWS_S, 54 FWS_S,
55 #endif 55 #endif
56 FWS_L, 56 FWS_L,
57 FWS_R, 57 FWS_R,
58 FWS_C, 58 FWS_C,
59 FWS_U 59 FWS_U
60 }; 60 };
61 61
62 struct fib6_cleaner_t 62 struct fib6_cleaner_t
63 { 63 {
64 struct fib6_walker_t w; 64 struct fib6_walker_t w;
65 struct net *net; 65 struct net *net;
66 int (*func)(struct rt6_info *, void *arg); 66 int (*func)(struct rt6_info *, void *arg);
67 void *arg; 67 void *arg;
68 }; 68 };
69 69
70 static DEFINE_RWLOCK(fib6_walker_lock); 70 static DEFINE_RWLOCK(fib6_walker_lock);
71 71
72 #ifdef CONFIG_IPV6_SUBTREES 72 #ifdef CONFIG_IPV6_SUBTREES
73 #define FWS_INIT FWS_S 73 #define FWS_INIT FWS_S
74 #else 74 #else
75 #define FWS_INIT FWS_L 75 #define FWS_INIT FWS_L
76 #endif 76 #endif
77 77
78 static void fib6_prune_clones(struct net *net, struct fib6_node *fn, 78 static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
79 struct rt6_info *rt); 79 struct rt6_info *rt);
80 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 80 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
81 static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 81 static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
82 static int fib6_walk(struct fib6_walker_t *w); 82 static int fib6_walk(struct fib6_walker_t *w);
83 static int fib6_walk_continue(struct fib6_walker_t *w); 83 static int fib6_walk_continue(struct fib6_walker_t *w);
84 84
85 /* 85 /*
86 * A routing update causes an increase of the serial number on the 86 * A routing update causes an increase of the serial number on the
87 * affected subtree. This allows for cached routes to be asynchronously 87 * affected subtree. This allows for cached routes to be asynchronously
88 * tested when modifications are made to the destination cache as a 88 * tested when modifications are made to the destination cache as a
89 * result of redirects, path MTU changes, etc. 89 * result of redirects, path MTU changes, etc.
90 */ 90 */
91 91
92 static __u32 rt_sernum; 92 static __u32 rt_sernum;
93 93
94 static void fib6_gc_timer_cb(unsigned long arg); 94 static void fib6_gc_timer_cb(unsigned long arg);
95 95
96 static LIST_HEAD(fib6_walkers); 96 static LIST_HEAD(fib6_walkers);
97 #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) 97 #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
98 98
99 static inline void fib6_walker_link(struct fib6_walker_t *w) 99 static inline void fib6_walker_link(struct fib6_walker_t *w)
100 { 100 {
101 write_lock_bh(&fib6_walker_lock); 101 write_lock_bh(&fib6_walker_lock);
102 list_add(&w->lh, &fib6_walkers); 102 list_add(&w->lh, &fib6_walkers);
103 write_unlock_bh(&fib6_walker_lock); 103 write_unlock_bh(&fib6_walker_lock);
104 } 104 }
105 105
106 static inline void fib6_walker_unlink(struct fib6_walker_t *w) 106 static inline void fib6_walker_unlink(struct fib6_walker_t *w)
107 { 107 {
108 write_lock_bh(&fib6_walker_lock); 108 write_lock_bh(&fib6_walker_lock);
109 list_del(&w->lh); 109 list_del(&w->lh);
110 write_unlock_bh(&fib6_walker_lock); 110 write_unlock_bh(&fib6_walker_lock);
111 } 111 }
112 static __inline__ u32 fib6_new_sernum(void) 112 static __inline__ u32 fib6_new_sernum(void)
113 { 113 {
114 u32 n = ++rt_sernum; 114 u32 n = ++rt_sernum;
115 if ((__s32)n <= 0) 115 if ((__s32)n <= 0)
116 rt_sernum = n = 1; 116 rt_sernum = n = 1;
117 return n; 117 return n;
118 } 118 }
119 119
120 /* 120 /*
121 * Auxiliary address test functions for the radix tree. 121 * Auxiliary address test functions for the radix tree.
122 * 122 *
123 * These assume a 32bit processor (although it will work on 123 * These assume a 32bit processor (although it will work on
124 * 64bit processors) 124 * 64bit processors)
125 */ 125 */
126 126
127 /* 127 /*
128 * test bit 128 * test bit
129 */ 129 */
130 #if defined(__LITTLE_ENDIAN) 130 #if defined(__LITTLE_ENDIAN)
131 # define BITOP_BE32_SWIZZLE (0x1F & ~7) 131 # define BITOP_BE32_SWIZZLE (0x1F & ~7)
132 #else 132 #else
133 # define BITOP_BE32_SWIZZLE 0 133 # define BITOP_BE32_SWIZZLE 0
134 #endif 134 #endif
135 135
136 static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) 136 static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
137 { 137 {
138 const __be32 *addr = token; 138 const __be32 *addr = token;
139 /* 139 /*
140 * Here, 140 * Here,
141 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) 141 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
142 * is optimized version of 142 * is optimized version of
143 * htonl(1 << ((~fn_bit)&0x1F)) 143 * htonl(1 << ((~fn_bit)&0x1F))
144 * See include/asm-generic/bitops/le.h. 144 * See include/asm-generic/bitops/le.h.
145 */ 145 */
146 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & 146 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
147 addr[fn_bit >> 5]; 147 addr[fn_bit >> 5];
148 } 148 }
149 149
150 static __inline__ struct fib6_node * node_alloc(void) 150 static __inline__ struct fib6_node * node_alloc(void)
151 { 151 {
152 struct fib6_node *fn; 152 struct fib6_node *fn;
153 153
154 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); 154 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
155 155
156 return fn; 156 return fn;
157 } 157 }
158 158
159 static __inline__ void node_free(struct fib6_node * fn) 159 static __inline__ void node_free(struct fib6_node * fn)
160 { 160 {
161 kmem_cache_free(fib6_node_kmem, fn); 161 kmem_cache_free(fib6_node_kmem, fn);
162 } 162 }
163 163
164 static __inline__ void rt6_release(struct rt6_info *rt) 164 static __inline__ void rt6_release(struct rt6_info *rt)
165 { 165 {
166 if (atomic_dec_and_test(&rt->rt6i_ref)) 166 if (atomic_dec_and_test(&rt->rt6i_ref))
167 dst_free(&rt->dst); 167 dst_free(&rt->dst);
168 } 168 }
169 169
170 static void fib6_link_table(struct net *net, struct fib6_table *tb) 170 static void fib6_link_table(struct net *net, struct fib6_table *tb)
171 { 171 {
172 unsigned int h; 172 unsigned int h;
173 173
174 /* 174 /*
175 * Initialize table lock at a single place to give lockdep a key, 175 * Initialize table lock at a single place to give lockdep a key,
176 * tables aren't visible prior to being linked to the list. 176 * tables aren't visible prior to being linked to the list.
177 */ 177 */
178 rwlock_init(&tb->tb6_lock); 178 rwlock_init(&tb->tb6_lock);
179 179
180 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); 180 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
181 181
182 /* 182 /*
183 * No protection necessary, this is the only list mutatation 183 * No protection necessary, this is the only list mutatation
184 * operation, tables never disappear once they exist. 184 * operation, tables never disappear once they exist.
185 */ 185 */
186 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); 186 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
187 } 187 }
188 188
189 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 189 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
190 190
191 static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) 191 static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
192 { 192 {
193 struct fib6_table *table; 193 struct fib6_table *table;
194 194
195 table = kzalloc(sizeof(*table), GFP_ATOMIC); 195 table = kzalloc(sizeof(*table), GFP_ATOMIC);
196 if (table) { 196 if (table) {
197 table->tb6_id = id; 197 table->tb6_id = id;
198 table->tb6_root.leaf = net->ipv6.ip6_null_entry; 198 table->tb6_root.leaf = net->ipv6.ip6_null_entry;
199 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 199 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
200 inet_peer_base_init(&table->tb6_peers); 200 inet_peer_base_init(&table->tb6_peers);
201 } 201 }
202 202
203 return table; 203 return table;
204 } 204 }
205 205
206 struct fib6_table *fib6_new_table(struct net *net, u32 id) 206 struct fib6_table *fib6_new_table(struct net *net, u32 id)
207 { 207 {
208 struct fib6_table *tb; 208 struct fib6_table *tb;
209 209
210 if (id == 0) 210 if (id == 0)
211 id = RT6_TABLE_MAIN; 211 id = RT6_TABLE_MAIN;
212 tb = fib6_get_table(net, id); 212 tb = fib6_get_table(net, id);
213 if (tb) 213 if (tb)
214 return tb; 214 return tb;
215 215
216 tb = fib6_alloc_table(net, id); 216 tb = fib6_alloc_table(net, id);
217 if (tb) 217 if (tb)
218 fib6_link_table(net, tb); 218 fib6_link_table(net, tb);
219 219
220 return tb; 220 return tb;
221 } 221 }
222 222
223 struct fib6_table *fib6_get_table(struct net *net, u32 id) 223 struct fib6_table *fib6_get_table(struct net *net, u32 id)
224 { 224 {
225 struct fib6_table *tb; 225 struct fib6_table *tb;
226 struct hlist_head *head; 226 struct hlist_head *head;
227 unsigned int h; 227 unsigned int h;
228 228
229 if (id == 0) 229 if (id == 0)
230 id = RT6_TABLE_MAIN; 230 id = RT6_TABLE_MAIN;
231 h = id & (FIB6_TABLE_HASHSZ - 1); 231 h = id & (FIB6_TABLE_HASHSZ - 1);
232 rcu_read_lock(); 232 rcu_read_lock();
233 head = &net->ipv6.fib_table_hash[h]; 233 head = &net->ipv6.fib_table_hash[h];
234 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 234 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
235 if (tb->tb6_id == id) { 235 if (tb->tb6_id == id) {
236 rcu_read_unlock(); 236 rcu_read_unlock();
237 return tb; 237 return tb;
238 } 238 }
239 } 239 }
240 rcu_read_unlock(); 240 rcu_read_unlock();
241 241
242 return NULL; 242 return NULL;
243 } 243 }
244 244
245 static void __net_init fib6_tables_init(struct net *net) 245 static void __net_init fib6_tables_init(struct net *net)
246 { 246 {
247 fib6_link_table(net, net->ipv6.fib6_main_tbl); 247 fib6_link_table(net, net->ipv6.fib6_main_tbl);
248 fib6_link_table(net, net->ipv6.fib6_local_tbl); 248 fib6_link_table(net, net->ipv6.fib6_local_tbl);
249 } 249 }
250 #else 250 #else
251 251
252 struct fib6_table *fib6_new_table(struct net *net, u32 id) 252 struct fib6_table *fib6_new_table(struct net *net, u32 id)
253 { 253 {
254 return fib6_get_table(net, id); 254 return fib6_get_table(net, id);
255 } 255 }
256 256
257 struct fib6_table *fib6_get_table(struct net *net, u32 id) 257 struct fib6_table *fib6_get_table(struct net *net, u32 id)
258 { 258 {
259 return net->ipv6.fib6_main_tbl; 259 return net->ipv6.fib6_main_tbl;
260 } 260 }
261 261
262 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 262 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
263 int flags, pol_lookup_t lookup) 263 int flags, pol_lookup_t lookup)
264 { 264 {
265 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); 265 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
266 } 266 }
267 267
268 static void __net_init fib6_tables_init(struct net *net) 268 static void __net_init fib6_tables_init(struct net *net)
269 { 269 {
270 fib6_link_table(net, net->ipv6.fib6_main_tbl); 270 fib6_link_table(net, net->ipv6.fib6_main_tbl);
271 } 271 }
272 272
273 #endif 273 #endif
274 274
275 static int fib6_dump_node(struct fib6_walker_t *w) 275 static int fib6_dump_node(struct fib6_walker_t *w)
276 { 276 {
277 int res; 277 int res;
278 struct rt6_info *rt; 278 struct rt6_info *rt;
279 279
280 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 280 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
281 res = rt6_dump_route(rt, w->args); 281 res = rt6_dump_route(rt, w->args);
282 if (res < 0) { 282 if (res < 0) {
283 /* Frame is full, suspend walking */ 283 /* Frame is full, suspend walking */
284 w->leaf = rt; 284 w->leaf = rt;
285 return 1; 285 return 1;
286 } 286 }
287 WARN_ON(res == 0); 287 WARN_ON(res == 0);
288 } 288 }
289 w->leaf = NULL; 289 w->leaf = NULL;
290 return 0; 290 return 0;
291 } 291 }
292 292
293 static void fib6_dump_end(struct netlink_callback *cb) 293 static void fib6_dump_end(struct netlink_callback *cb)
294 { 294 {
295 struct fib6_walker_t *w = (void*)cb->args[2]; 295 struct fib6_walker_t *w = (void*)cb->args[2];
296 296
297 if (w) { 297 if (w) {
298 if (cb->args[4]) { 298 if (cb->args[4]) {
299 cb->args[4] = 0; 299 cb->args[4] = 0;
300 fib6_walker_unlink(w); 300 fib6_walker_unlink(w);
301 } 301 }
302 cb->args[2] = 0; 302 cb->args[2] = 0;
303 kfree(w); 303 kfree(w);
304 } 304 }
305 cb->done = (void*)cb->args[3]; 305 cb->done = (void*)cb->args[3];
306 cb->args[1] = 3; 306 cb->args[1] = 3;
307 } 307 }
308 308
309 static int fib6_dump_done(struct netlink_callback *cb) 309 static int fib6_dump_done(struct netlink_callback *cb)
310 { 310 {
311 fib6_dump_end(cb); 311 fib6_dump_end(cb);
312 return cb->done ? cb->done(cb) : 0; 312 return cb->done ? cb->done(cb) : 0;
313 } 313 }
314 314
315 static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, 315 static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
316 struct netlink_callback *cb) 316 struct netlink_callback *cb)
317 { 317 {
318 struct fib6_walker_t *w; 318 struct fib6_walker_t *w;
319 int res; 319 int res;
320 320
321 w = (void *)cb->args[2]; 321 w = (void *)cb->args[2];
322 w->root = &table->tb6_root; 322 w->root = &table->tb6_root;
323 323
324 if (cb->args[4] == 0) { 324 if (cb->args[4] == 0) {
325 w->count = 0; 325 w->count = 0;
326 w->skip = 0; 326 w->skip = 0;
327 327
328 read_lock_bh(&table->tb6_lock); 328 read_lock_bh(&table->tb6_lock);
329 res = fib6_walk(w); 329 res = fib6_walk(w);
330 read_unlock_bh(&table->tb6_lock); 330 read_unlock_bh(&table->tb6_lock);
331 if (res > 0) { 331 if (res > 0) {
332 cb->args[4] = 1; 332 cb->args[4] = 1;
333 cb->args[5] = w->root->fn_sernum; 333 cb->args[5] = w->root->fn_sernum;
334 } 334 }
335 } else { 335 } else {
336 if (cb->args[5] != w->root->fn_sernum) { 336 if (cb->args[5] != w->root->fn_sernum) {
337 /* Begin at the root if the tree changed */ 337 /* Begin at the root if the tree changed */
338 cb->args[5] = w->root->fn_sernum; 338 cb->args[5] = w->root->fn_sernum;
339 w->state = FWS_INIT; 339 w->state = FWS_INIT;
340 w->node = w->root; 340 w->node = w->root;
341 w->skip = w->count; 341 w->skip = w->count;
342 } else 342 } else
343 w->skip = 0; 343 w->skip = 0;
344 344
345 read_lock_bh(&table->tb6_lock); 345 read_lock_bh(&table->tb6_lock);
346 res = fib6_walk_continue(w); 346 res = fib6_walk_continue(w);
347 read_unlock_bh(&table->tb6_lock); 347 read_unlock_bh(&table->tb6_lock);
348 if (res <= 0) { 348 if (res <= 0) {
349 fib6_walker_unlink(w); 349 fib6_walker_unlink(w);
350 cb->args[4] = 0; 350 cb->args[4] = 0;
351 } 351 }
352 } 352 }
353 353
354 return res; 354 return res;
355 } 355 }
356 356
357 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 357 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
358 { 358 {
359 struct net *net = sock_net(skb->sk); 359 struct net *net = sock_net(skb->sk);
360 unsigned int h, s_h; 360 unsigned int h, s_h;
361 unsigned int e = 0, s_e; 361 unsigned int e = 0, s_e;
362 struct rt6_rtnl_dump_arg arg; 362 struct rt6_rtnl_dump_arg arg;
363 struct fib6_walker_t *w; 363 struct fib6_walker_t *w;
364 struct fib6_table *tb; 364 struct fib6_table *tb;
365 struct hlist_head *head; 365 struct hlist_head *head;
366 int res = 0; 366 int res = 0;
367 367
368 s_h = cb->args[0]; 368 s_h = cb->args[0];
369 s_e = cb->args[1]; 369 s_e = cb->args[1];
370 370
371 w = (void *)cb->args[2]; 371 w = (void *)cb->args[2];
372 if (!w) { 372 if (!w) {
373 /* New dump: 373 /* New dump:
374 * 374 *
375 * 1. hook callback destructor. 375 * 1. hook callback destructor.
376 */ 376 */
377 cb->args[3] = (long)cb->done; 377 cb->args[3] = (long)cb->done;
378 cb->done = fib6_dump_done; 378 cb->done = fib6_dump_done;
379 379
380 /* 380 /*
381 * 2. allocate and initialize walker. 381 * 2. allocate and initialize walker.
382 */ 382 */
383 w = kzalloc(sizeof(*w), GFP_ATOMIC); 383 w = kzalloc(sizeof(*w), GFP_ATOMIC);
384 if (!w) 384 if (!w)
385 return -ENOMEM; 385 return -ENOMEM;
386 w->func = fib6_dump_node; 386 w->func = fib6_dump_node;
387 cb->args[2] = (long)w; 387 cb->args[2] = (long)w;
388 } 388 }
389 389
390 arg.skb = skb; 390 arg.skb = skb;
391 arg.cb = cb; 391 arg.cb = cb;
392 arg.net = net; 392 arg.net = net;
393 w->args = &arg; 393 w->args = &arg;
394 394
395 rcu_read_lock(); 395 rcu_read_lock();
396 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 396 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
397 e = 0; 397 e = 0;
398 head = &net->ipv6.fib_table_hash[h]; 398 head = &net->ipv6.fib_table_hash[h];
399 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 399 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
400 if (e < s_e) 400 if (e < s_e)
401 goto next; 401 goto next;
402 res = fib6_dump_table(tb, skb, cb); 402 res = fib6_dump_table(tb, skb, cb);
403 if (res != 0) 403 if (res != 0)
404 goto out; 404 goto out;
405 next: 405 next:
406 e++; 406 e++;
407 } 407 }
408 } 408 }
409 out: 409 out:
410 rcu_read_unlock(); 410 rcu_read_unlock();
411 cb->args[1] = e; 411 cb->args[1] = e;
412 cb->args[0] = h; 412 cb->args[0] = h;
413 413
414 res = res < 0 ? res : skb->len; 414 res = res < 0 ? res : skb->len;
415 if (res <= 0) 415 if (res <= 0)
416 fib6_dump_end(cb); 416 fib6_dump_end(cb);
417 return res; 417 return res;
418 } 418 }
419 419
420 /* 420 /*
421 * Routing Table 421 * Routing Table
422 * 422 *
423 * return the appropriate node for a routing tree "add" operation 423 * return the appropriate node for a routing tree "add" operation
424 * by either creating and inserting or by returning an existing 424 * by either creating and inserting or by returning an existing
425 * node. 425 * node.
426 */ 426 */
427 427
428 static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, 428 static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
429 int addrlen, int plen, 429 int addrlen, int plen,
430 int offset, int allow_create, 430 int offset, int allow_create,
431 int replace_required) 431 int replace_required)
432 { 432 {
433 struct fib6_node *fn, *in, *ln; 433 struct fib6_node *fn, *in, *ln;
434 struct fib6_node *pn = NULL; 434 struct fib6_node *pn = NULL;
435 struct rt6key *key; 435 struct rt6key *key;
436 int bit; 436 int bit;
437 __be32 dir = 0; 437 __be32 dir = 0;
438 __u32 sernum = fib6_new_sernum(); 438 __u32 sernum = fib6_new_sernum();
439 439
440 RT6_TRACE("fib6_add_1\n"); 440 RT6_TRACE("fib6_add_1\n");
441 441
442 /* insert node in tree */ 442 /* insert node in tree */
443 443
444 fn = root; 444 fn = root;
445 445
446 do { 446 do {
447 key = (struct rt6key *)((u8 *)fn->leaf + offset); 447 key = (struct rt6key *)((u8 *)fn->leaf + offset);
448 448
449 /* 449 /*
450 * Prefix match 450 * Prefix match
451 */ 451 */
452 if (plen < fn->fn_bit || 452 if (plen < fn->fn_bit ||
453 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { 453 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
454 if (!allow_create) { 454 if (!allow_create) {
455 if (replace_required) { 455 if (replace_required) {
456 pr_warn("Can't replace route, no match found\n"); 456 pr_warn("Can't replace route, no match found\n");
457 return ERR_PTR(-ENOENT); 457 return ERR_PTR(-ENOENT);
458 } 458 }
459 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 459 pr_warn("NLM_F_CREATE should be set when creating new route\n");
460 } 460 }
461 goto insert_above; 461 goto insert_above;
462 } 462 }
463 463
464 /* 464 /*
465 * Exact match ? 465 * Exact match ?
466 */ 466 */
467 467
468 if (plen == fn->fn_bit) { 468 if (plen == fn->fn_bit) {
469 /* clean up an intermediate node */ 469 /* clean up an intermediate node */
470 if (!(fn->fn_flags & RTN_RTINFO)) { 470 if (!(fn->fn_flags & RTN_RTINFO)) {
471 rt6_release(fn->leaf); 471 rt6_release(fn->leaf);
472 fn->leaf = NULL; 472 fn->leaf = NULL;
473 } 473 }
474 474
475 fn->fn_sernum = sernum; 475 fn->fn_sernum = sernum;
476 476
477 return fn; 477 return fn;
478 } 478 }
479 479
480 /* 480 /*
481 * We have more bits to go 481 * We have more bits to go
482 */ 482 */
483 483
484 /* Try to walk down on tree. */ 484 /* Try to walk down on tree. */
485 fn->fn_sernum = sernum; 485 fn->fn_sernum = sernum;
486 dir = addr_bit_set(addr, fn->fn_bit); 486 dir = addr_bit_set(addr, fn->fn_bit);
487 pn = fn; 487 pn = fn;
488 fn = dir ? fn->right: fn->left; 488 fn = dir ? fn->right: fn->left;
489 } while (fn); 489 } while (fn);
490 490
491 if (!allow_create) { 491 if (!allow_create) {
492 /* We should not create new node because 492 /* We should not create new node because
493 * NLM_F_REPLACE was specified without NLM_F_CREATE 493 * NLM_F_REPLACE was specified without NLM_F_CREATE
494 * I assume it is safe to require NLM_F_CREATE when 494 * I assume it is safe to require NLM_F_CREATE when
495 * REPLACE flag is used! Later we may want to remove the 495 * REPLACE flag is used! Later we may want to remove the
496 * check for replace_required, because according 496 * check for replace_required, because according
497 * to netlink specification, NLM_F_CREATE 497 * to netlink specification, NLM_F_CREATE
498 * MUST be specified if new route is created. 498 * MUST be specified if new route is created.
499 * That would keep IPv6 consistent with IPv4 499 * That would keep IPv6 consistent with IPv4
500 */ 500 */
501 if (replace_required) { 501 if (replace_required) {
502 pr_warn("Can't replace route, no match found\n"); 502 pr_warn("Can't replace route, no match found\n");
503 return ERR_PTR(-ENOENT); 503 return ERR_PTR(-ENOENT);
504 } 504 }
505 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 505 pr_warn("NLM_F_CREATE should be set when creating new route\n");
506 } 506 }
507 /* 507 /*
508 * We walked to the bottom of tree. 508 * We walked to the bottom of tree.
509 * Create new leaf node without children. 509 * Create new leaf node without children.
510 */ 510 */
511 511
512 ln = node_alloc(); 512 ln = node_alloc();
513 513
514 if (!ln) 514 if (!ln)
515 return ERR_PTR(-ENOMEM); 515 return ERR_PTR(-ENOMEM);
516 ln->fn_bit = plen; 516 ln->fn_bit = plen;
517 517
518 ln->parent = pn; 518 ln->parent = pn;
519 ln->fn_sernum = sernum; 519 ln->fn_sernum = sernum;
520 520
521 if (dir) 521 if (dir)
522 pn->right = ln; 522 pn->right = ln;
523 else 523 else
524 pn->left = ln; 524 pn->left = ln;
525 525
526 return ln; 526 return ln;
527 527
528 528
529 insert_above: 529 insert_above:
530 /* 530 /*
531 * split since we don't have a common prefix anymore or 531 * split since we don't have a common prefix anymore or
532 * we have a less significant route. 532 * we have a less significant route.
533 * we've to insert an intermediate node on the list 533 * we've to insert an intermediate node on the list
534 * this new node will point to the one we need to create 534 * this new node will point to the one we need to create
535 * and the current 535 * and the current
536 */ 536 */
537 537
538 pn = fn->parent; 538 pn = fn->parent;
539 539
540 /* find 1st bit in difference between the 2 addrs. 540 /* find 1st bit in difference between the 2 addrs.
541 541
542 See comment in __ipv6_addr_diff: bit may be an invalid value, 542 See comment in __ipv6_addr_diff: bit may be an invalid value,
543 but if it is >= plen, the value is ignored in any case. 543 but if it is >= plen, the value is ignored in any case.
544 */ 544 */
545 545
546 bit = __ipv6_addr_diff(addr, &key->addr, addrlen); 546 bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
547 547
548 /* 548 /*
549 * (intermediate)[in] 549 * (intermediate)[in]
550 * / \ 550 * / \
551 * (new leaf node)[ln] (old node)[fn] 551 * (new leaf node)[ln] (old node)[fn]
552 */ 552 */
553 if (plen > bit) { 553 if (plen > bit) {
554 in = node_alloc(); 554 in = node_alloc();
555 ln = node_alloc(); 555 ln = node_alloc();
556 556
557 if (!in || !ln) { 557 if (!in || !ln) {
558 if (in) 558 if (in)
559 node_free(in); 559 node_free(in);
560 if (ln) 560 if (ln)
561 node_free(ln); 561 node_free(ln);
562 return ERR_PTR(-ENOMEM); 562 return ERR_PTR(-ENOMEM);
563 } 563 }
564 564
565 /* 565 /*
566 * new intermediate node. 566 * new intermediate node.
567 * RTN_RTINFO will 567 * RTN_RTINFO will
568 * be off since that an address that chooses one of 568 * be off since that an address that chooses one of
569 * the branches would not match less specific routes 569 * the branches would not match less specific routes
570 * in the other branch 570 * in the other branch
571 */ 571 */
572 572
573 in->fn_bit = bit; 573 in->fn_bit = bit;
574 574
575 in->parent = pn; 575 in->parent = pn;
576 in->leaf = fn->leaf; 576 in->leaf = fn->leaf;
577 atomic_inc(&in->leaf->rt6i_ref); 577 atomic_inc(&in->leaf->rt6i_ref);
578 578
579 in->fn_sernum = sernum; 579 in->fn_sernum = sernum;
580 580
581 /* update parent pointer */ 581 /* update parent pointer */
582 if (dir) 582 if (dir)
583 pn->right = in; 583 pn->right = in;
584 else 584 else
585 pn->left = in; 585 pn->left = in;
586 586
587 ln->fn_bit = plen; 587 ln->fn_bit = plen;
588 588
589 ln->parent = in; 589 ln->parent = in;
590 fn->parent = in; 590 fn->parent = in;
591 591
592 ln->fn_sernum = sernum; 592 ln->fn_sernum = sernum;
593 593
594 if (addr_bit_set(addr, bit)) { 594 if (addr_bit_set(addr, bit)) {
595 in->right = ln; 595 in->right = ln;
596 in->left = fn; 596 in->left = fn;
597 } else { 597 } else {
598 in->left = ln; 598 in->left = ln;
599 in->right = fn; 599 in->right = fn;
600 } 600 }
601 } else { /* plen <= bit */ 601 } else { /* plen <= bit */
602 602
603 /* 603 /*
604 * (new leaf node)[ln] 604 * (new leaf node)[ln]
605 * / \ 605 * / \
606 * (old node)[fn] NULL 606 * (old node)[fn] NULL
607 */ 607 */
608 608
609 ln = node_alloc(); 609 ln = node_alloc();
610 610
611 if (!ln) 611 if (!ln)
612 return ERR_PTR(-ENOMEM); 612 return ERR_PTR(-ENOMEM);
613 613
614 ln->fn_bit = plen; 614 ln->fn_bit = plen;
615 615
616 ln->parent = pn; 616 ln->parent = pn;
617 617
618 ln->fn_sernum = sernum; 618 ln->fn_sernum = sernum;
619 619
620 if (dir) 620 if (dir)
621 pn->right = ln; 621 pn->right = ln;
622 else 622 else
623 pn->left = ln; 623 pn->left = ln;
624 624
625 if (addr_bit_set(&key->addr, plen)) 625 if (addr_bit_set(&key->addr, plen))
626 ln->right = fn; 626 ln->right = fn;
627 else 627 else
628 ln->left = fn; 628 ln->left = fn;
629 629
630 fn->parent = ln; 630 fn->parent = ln;
631 } 631 }
632 return ln; 632 return ln;
633 } 633 }
634 634
635 static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
636 {
637 return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
638 RTF_GATEWAY;
639 }
640
635 /* 641 /*
636 * Insert routing information in a node. 642 * Insert routing information in a node.
637 */ 643 */
638 644
639 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 645 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
640 struct nl_info *info) 646 struct nl_info *info)
641 { 647 {
642 struct rt6_info *iter = NULL; 648 struct rt6_info *iter = NULL;
643 struct rt6_info **ins; 649 struct rt6_info **ins;
644 int replace = (info->nlh && 650 int replace = (info->nlh &&
645 (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 651 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
646 int add = (!info->nlh || 652 int add = (!info->nlh ||
647 (info->nlh->nlmsg_flags & NLM_F_CREATE)); 653 (info->nlh->nlmsg_flags & NLM_F_CREATE));
648 int found = 0; 654 int found = 0;
655 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
649 656
650 ins = &fn->leaf; 657 ins = &fn->leaf;
651 658
652 for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { 659 for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
653 /* 660 /*
654 * Search for duplicates 661 * Search for duplicates
655 */ 662 */
656 663
657 if (iter->rt6i_metric == rt->rt6i_metric) { 664 if (iter->rt6i_metric == rt->rt6i_metric) {
658 /* 665 /*
659 * Same priority level 666 * Same priority level
660 */ 667 */
661 if (info->nlh && 668 if (info->nlh &&
662 (info->nlh->nlmsg_flags & NLM_F_EXCL)) 669 (info->nlh->nlmsg_flags & NLM_F_EXCL))
663 return -EEXIST; 670 return -EEXIST;
664 if (replace) { 671 if (replace) {
665 found++; 672 found++;
666 break; 673 break;
667 } 674 }
668 675
669 if (iter->dst.dev == rt->dst.dev && 676 if (iter->dst.dev == rt->dst.dev &&
670 iter->rt6i_idev == rt->rt6i_idev && 677 iter->rt6i_idev == rt->rt6i_idev &&
671 ipv6_addr_equal(&iter->rt6i_gateway, 678 ipv6_addr_equal(&iter->rt6i_gateway,
672 &rt->rt6i_gateway)) { 679 &rt->rt6i_gateway)) {
673 if (rt->rt6i_nsiblings) 680 if (rt->rt6i_nsiblings)
674 rt->rt6i_nsiblings = 0; 681 rt->rt6i_nsiblings = 0;
675 if (!(iter->rt6i_flags & RTF_EXPIRES)) 682 if (!(iter->rt6i_flags & RTF_EXPIRES))
676 return -EEXIST; 683 return -EEXIST;
677 if (!(rt->rt6i_flags & RTF_EXPIRES)) 684 if (!(rt->rt6i_flags & RTF_EXPIRES))
678 rt6_clean_expires(iter); 685 rt6_clean_expires(iter);
679 else 686 else
680 rt6_set_expires(iter, rt->dst.expires); 687 rt6_set_expires(iter, rt->dst.expires);
681 return -EEXIST; 688 return -EEXIST;
682 } 689 }
683 /* If we have the same destination and the same metric, 690 /* If we have the same destination and the same metric,
684 * but not the same gateway, then the route we try to 691 * but not the same gateway, then the route we try to
685 * add is sibling to this route, increment our counter 692 * add is sibling to this route, increment our counter
686 * of siblings, and later we will add our route to the 693 * of siblings, and later we will add our route to the
687 * list. 694 * list.
688 * Only static routes (which don't have flag 695 * Only static routes (which don't have flag
689 * RTF_EXPIRES) are used for ECMPv6. 696 * RTF_EXPIRES) are used for ECMPv6.
690 * 697 *
691 * To avoid long list, we only had siblings if the 698 * To avoid long list, we only had siblings if the
692 * route have a gateway. 699 * route have a gateway.
693 */ 700 */
694 if (rt->rt6i_flags & RTF_GATEWAY && 701 if (rt_can_ecmp &&
695 !(rt->rt6i_flags & RTF_EXPIRES) && 702 rt6_qualify_for_ecmp(iter))
696 !(iter->rt6i_flags & RTF_EXPIRES))
697 rt->rt6i_nsiblings++; 703 rt->rt6i_nsiblings++;
698 } 704 }
699 705
700 if (iter->rt6i_metric > rt->rt6i_metric) 706 if (iter->rt6i_metric > rt->rt6i_metric)
701 break; 707 break;
702 708
703 ins = &iter->dst.rt6_next; 709 ins = &iter->dst.rt6_next;
704 } 710 }
705 711
706 /* Reset round-robin state, if necessary */ 712 /* Reset round-robin state, if necessary */
707 if (ins == &fn->leaf) 713 if (ins == &fn->leaf)
708 fn->rr_ptr = NULL; 714 fn->rr_ptr = NULL;
709 715
710 /* Link this route to others same route. */ 716 /* Link this route to others same route. */
711 if (rt->rt6i_nsiblings) { 717 if (rt->rt6i_nsiblings) {
712 unsigned int rt6i_nsiblings; 718 unsigned int rt6i_nsiblings;
713 struct rt6_info *sibling, *temp_sibling; 719 struct rt6_info *sibling, *temp_sibling;
714 720
715 /* Find the first route that have the same metric */ 721 /* Find the first route that have the same metric */
716 sibling = fn->leaf; 722 sibling = fn->leaf;
717 while (sibling) { 723 while (sibling) {
718 if (sibling->rt6i_metric == rt->rt6i_metric) { 724 if (sibling->rt6i_metric == rt->rt6i_metric &&
725 rt6_qualify_for_ecmp(sibling)) {
719 list_add_tail(&rt->rt6i_siblings, 726 list_add_tail(&rt->rt6i_siblings,
720 &sibling->rt6i_siblings); 727 &sibling->rt6i_siblings);
721 break; 728 break;
722 } 729 }
723 sibling = sibling->dst.rt6_next; 730 sibling = sibling->dst.rt6_next;
724 } 731 }
725 /* For each sibling in the list, increment the counter of 732 /* For each sibling in the list, increment the counter of
726 * siblings. BUG() if counters does not match, list of siblings 733 * siblings. BUG() if counters does not match, list of siblings
727 * is broken! 734 * is broken!
728 */ 735 */
729 rt6i_nsiblings = 0; 736 rt6i_nsiblings = 0;
730 list_for_each_entry_safe(sibling, temp_sibling, 737 list_for_each_entry_safe(sibling, temp_sibling,
731 &rt->rt6i_siblings, rt6i_siblings) { 738 &rt->rt6i_siblings, rt6i_siblings) {
732 sibling->rt6i_nsiblings++; 739 sibling->rt6i_nsiblings++;
733 BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); 740 BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
734 rt6i_nsiblings++; 741 rt6i_nsiblings++;
735 } 742 }
736 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); 743 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
737 } 744 }
738 745
739 /* 746 /*
740 * insert node 747 * insert node
741 */ 748 */
742 if (!replace) { 749 if (!replace) {
743 if (!add) 750 if (!add)
744 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 751 pr_warn("NLM_F_CREATE should be set when creating new route\n");
745 752
746 add: 753 add:
747 rt->dst.rt6_next = iter; 754 rt->dst.rt6_next = iter;
748 *ins = rt; 755 *ins = rt;
749 rt->rt6i_node = fn; 756 rt->rt6i_node = fn;
750 atomic_inc(&rt->rt6i_ref); 757 atomic_inc(&rt->rt6i_ref);
751 inet6_rt_notify(RTM_NEWROUTE, rt, info); 758 inet6_rt_notify(RTM_NEWROUTE, rt, info);
752 info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 759 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
753 760
754 if (!(fn->fn_flags & RTN_RTINFO)) { 761 if (!(fn->fn_flags & RTN_RTINFO)) {
755 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 762 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
756 fn->fn_flags |= RTN_RTINFO; 763 fn->fn_flags |= RTN_RTINFO;
757 } 764 }
758 765
759 } else { 766 } else {
760 if (!found) { 767 if (!found) {
761 if (add) 768 if (add)
762 goto add; 769 goto add;
763 pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); 770 pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
764 return -ENOENT; 771 return -ENOENT;
765 } 772 }
766 *ins = rt; 773 *ins = rt;
767 rt->rt6i_node = fn; 774 rt->rt6i_node = fn;
768 rt->dst.rt6_next = iter->dst.rt6_next; 775 rt->dst.rt6_next = iter->dst.rt6_next;
769 atomic_inc(&rt->rt6i_ref); 776 atomic_inc(&rt->rt6i_ref);
770 inet6_rt_notify(RTM_NEWROUTE, rt, info); 777 inet6_rt_notify(RTM_NEWROUTE, rt, info);
771 rt6_release(iter); 778 rt6_release(iter);
772 if (!(fn->fn_flags & RTN_RTINFO)) { 779 if (!(fn->fn_flags & RTN_RTINFO)) {
773 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 780 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
774 fn->fn_flags |= RTN_RTINFO; 781 fn->fn_flags |= RTN_RTINFO;
775 } 782 }
776 } 783 }
777 784
778 return 0; 785 return 0;
779 } 786 }
780 787
781 static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) 788 static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
782 { 789 {
783 if (!timer_pending(&net->ipv6.ip6_fib_timer) && 790 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
784 (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) 791 (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
785 mod_timer(&net->ipv6.ip6_fib_timer, 792 mod_timer(&net->ipv6.ip6_fib_timer,
786 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 793 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
787 } 794 }
788 795
789 void fib6_force_start_gc(struct net *net) 796 void fib6_force_start_gc(struct net *net)
790 { 797 {
791 if (!timer_pending(&net->ipv6.ip6_fib_timer)) 798 if (!timer_pending(&net->ipv6.ip6_fib_timer))
792 mod_timer(&net->ipv6.ip6_fib_timer, 799 mod_timer(&net->ipv6.ip6_fib_timer,
793 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 800 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
794 } 801 }
795 802
796 /* 803 /*
797 * Add routing information to the routing tree. 804 * Add routing information to the routing tree.
798 * <destination addr>/<source addr> 805 * <destination addr>/<source addr>
799 * with source addr info in sub-trees 806 * with source addr info in sub-trees
800 */ 807 */
801 808
802 int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) 809 int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
803 { 810 {
804 struct fib6_node *fn, *pn = NULL; 811 struct fib6_node *fn, *pn = NULL;
805 int err = -ENOMEM; 812 int err = -ENOMEM;
806 int allow_create = 1; 813 int allow_create = 1;
807 int replace_required = 0; 814 int replace_required = 0;
808 815
809 if (info->nlh) { 816 if (info->nlh) {
810 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 817 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
811 allow_create = 0; 818 allow_create = 0;
812 if (info->nlh->nlmsg_flags & NLM_F_REPLACE) 819 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
813 replace_required = 1; 820 replace_required = 1;
814 } 821 }
815 if (!allow_create && !replace_required) 822 if (!allow_create && !replace_required)
816 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 823 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
817 824
818 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 825 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
819 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), 826 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
820 allow_create, replace_required); 827 allow_create, replace_required);
821 828
822 if (IS_ERR(fn)) { 829 if (IS_ERR(fn)) {
823 err = PTR_ERR(fn); 830 err = PTR_ERR(fn);
824 goto out; 831 goto out;
825 } 832 }
826 833
827 pn = fn; 834 pn = fn;
828 835
829 #ifdef CONFIG_IPV6_SUBTREES 836 #ifdef CONFIG_IPV6_SUBTREES
830 if (rt->rt6i_src.plen) { 837 if (rt->rt6i_src.plen) {
831 struct fib6_node *sn; 838 struct fib6_node *sn;
832 839
833 if (!fn->subtree) { 840 if (!fn->subtree) {
834 struct fib6_node *sfn; 841 struct fib6_node *sfn;
835 842
836 /* 843 /*
837 * Create subtree. 844 * Create subtree.
838 * 845 *
839 * fn[main tree] 846 * fn[main tree]
840 * | 847 * |
841 * sfn[subtree root] 848 * sfn[subtree root]
842 * \ 849 * \
843 * sn[new leaf node] 850 * sn[new leaf node]
844 */ 851 */
845 852
846 /* Create subtree root node */ 853 /* Create subtree root node */
847 sfn = node_alloc(); 854 sfn = node_alloc();
848 if (!sfn) 855 if (!sfn)
849 goto st_failure; 856 goto st_failure;
850 857
851 sfn->leaf = info->nl_net->ipv6.ip6_null_entry; 858 sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
852 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); 859 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
853 sfn->fn_flags = RTN_ROOT; 860 sfn->fn_flags = RTN_ROOT;
854 sfn->fn_sernum = fib6_new_sernum(); 861 sfn->fn_sernum = fib6_new_sernum();
855 862
856 /* Now add the first leaf node to new subtree */ 863 /* Now add the first leaf node to new subtree */
857 864
858 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 865 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
859 sizeof(struct in6_addr), rt->rt6i_src.plen, 866 sizeof(struct in6_addr), rt->rt6i_src.plen,
860 offsetof(struct rt6_info, rt6i_src), 867 offsetof(struct rt6_info, rt6i_src),
861 allow_create, replace_required); 868 allow_create, replace_required);
862 869
863 if (IS_ERR(sn)) { 870 if (IS_ERR(sn)) {
864 /* If it is failed, discard just allocated 871 /* If it is failed, discard just allocated
865 root, and then (in st_failure) stale node 872 root, and then (in st_failure) stale node
866 in main tree. 873 in main tree.
867 */ 874 */
868 node_free(sfn); 875 node_free(sfn);
869 err = PTR_ERR(sn); 876 err = PTR_ERR(sn);
870 goto st_failure; 877 goto st_failure;
871 } 878 }
872 879
873 /* Now link new subtree to main tree */ 880 /* Now link new subtree to main tree */
874 sfn->parent = fn; 881 sfn->parent = fn;
875 fn->subtree = sfn; 882 fn->subtree = sfn;
876 } else { 883 } else {
877 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 884 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
878 sizeof(struct in6_addr), rt->rt6i_src.plen, 885 sizeof(struct in6_addr), rt->rt6i_src.plen,
879 offsetof(struct rt6_info, rt6i_src), 886 offsetof(struct rt6_info, rt6i_src),
880 allow_create, replace_required); 887 allow_create, replace_required);
881 888
882 if (IS_ERR(sn)) { 889 if (IS_ERR(sn)) {
883 err = PTR_ERR(sn); 890 err = PTR_ERR(sn);
884 goto st_failure; 891 goto st_failure;
885 } 892 }
886 } 893 }
887 894
888 if (!fn->leaf) { 895 if (!fn->leaf) {
889 fn->leaf = rt; 896 fn->leaf = rt;
890 atomic_inc(&rt->rt6i_ref); 897 atomic_inc(&rt->rt6i_ref);
891 } 898 }
892 fn = sn; 899 fn = sn;
893 } 900 }
894 #endif 901 #endif
895 902
896 err = fib6_add_rt2node(fn, rt, info); 903 err = fib6_add_rt2node(fn, rt, info);
897 if (!err) { 904 if (!err) {
898 fib6_start_gc(info->nl_net, rt); 905 fib6_start_gc(info->nl_net, rt);
899 if (!(rt->rt6i_flags & RTF_CACHE)) 906 if (!(rt->rt6i_flags & RTF_CACHE))
900 fib6_prune_clones(info->nl_net, pn, rt); 907 fib6_prune_clones(info->nl_net, pn, rt);
901 } 908 }
902 909
903 out: 910 out:
904 if (err) { 911 if (err) {
905 #ifdef CONFIG_IPV6_SUBTREES 912 #ifdef CONFIG_IPV6_SUBTREES
906 /* 913 /*
907 * If fib6_add_1 has cleared the old leaf pointer in the 914 * If fib6_add_1 has cleared the old leaf pointer in the
908 * super-tree leaf node we have to find a new one for it. 915 * super-tree leaf node we have to find a new one for it.
909 */ 916 */
910 if (pn != fn && pn->leaf == rt) { 917 if (pn != fn && pn->leaf == rt) {
911 pn->leaf = NULL; 918 pn->leaf = NULL;
912 atomic_dec(&rt->rt6i_ref); 919 atomic_dec(&rt->rt6i_ref);
913 } 920 }
914 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { 921 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
915 pn->leaf = fib6_find_prefix(info->nl_net, pn); 922 pn->leaf = fib6_find_prefix(info->nl_net, pn);
916 #if RT6_DEBUG >= 2 923 #if RT6_DEBUG >= 2
917 if (!pn->leaf) { 924 if (!pn->leaf) {
918 WARN_ON(pn->leaf == NULL); 925 WARN_ON(pn->leaf == NULL);
919 pn->leaf = info->nl_net->ipv6.ip6_null_entry; 926 pn->leaf = info->nl_net->ipv6.ip6_null_entry;
920 } 927 }
921 #endif 928 #endif
922 atomic_inc(&pn->leaf->rt6i_ref); 929 atomic_inc(&pn->leaf->rt6i_ref);
923 } 930 }
924 #endif 931 #endif
925 dst_free(&rt->dst); 932 dst_free(&rt->dst);
926 } 933 }
927 return err; 934 return err;
928 935
929 #ifdef CONFIG_IPV6_SUBTREES 936 #ifdef CONFIG_IPV6_SUBTREES
930 /* Subtree creation failed, probably main tree node 937 /* Subtree creation failed, probably main tree node
931 is orphan. If it is, shoot it. 938 is orphan. If it is, shoot it.
932 */ 939 */
933 st_failure: 940 st_failure:
934 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 941 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
935 fib6_repair_tree(info->nl_net, fn); 942 fib6_repair_tree(info->nl_net, fn);
936 dst_free(&rt->dst); 943 dst_free(&rt->dst);
937 return err; 944 return err;
938 #endif 945 #endif
939 } 946 }
940 947
941 /* 948 /*
942 * Routing tree lookup 949 * Routing tree lookup
943 * 950 *
944 */ 951 */
945 952
946 struct lookup_args { 953 struct lookup_args {
947 int offset; /* key offset on rt6_info */ 954 int offset; /* key offset on rt6_info */
948 const struct in6_addr *addr; /* search key */ 955 const struct in6_addr *addr; /* search key */
949 }; 956 };
950 957
951 static struct fib6_node * fib6_lookup_1(struct fib6_node *root, 958 static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
952 struct lookup_args *args) 959 struct lookup_args *args)
953 { 960 {
954 struct fib6_node *fn; 961 struct fib6_node *fn;
955 __be32 dir; 962 __be32 dir;
956 963
957 if (unlikely(args->offset == 0)) 964 if (unlikely(args->offset == 0))
958 return NULL; 965 return NULL;
959 966
960 /* 967 /*
961 * Descend on a tree 968 * Descend on a tree
962 */ 969 */
963 970
964 fn = root; 971 fn = root;
965 972
966 for (;;) { 973 for (;;) {
967 struct fib6_node *next; 974 struct fib6_node *next;
968 975
969 dir = addr_bit_set(args->addr, fn->fn_bit); 976 dir = addr_bit_set(args->addr, fn->fn_bit);
970 977
971 next = dir ? fn->right : fn->left; 978 next = dir ? fn->right : fn->left;
972 979
973 if (next) { 980 if (next) {
974 fn = next; 981 fn = next;
975 continue; 982 continue;
976 } 983 }
977 break; 984 break;
978 } 985 }
979 986
980 while (fn) { 987 while (fn) {
981 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { 988 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
982 struct rt6key *key; 989 struct rt6key *key;
983 990
984 key = (struct rt6key *) ((u8 *) fn->leaf + 991 key = (struct rt6key *) ((u8 *) fn->leaf +
985 args->offset); 992 args->offset);
986 993
987 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 994 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
988 #ifdef CONFIG_IPV6_SUBTREES 995 #ifdef CONFIG_IPV6_SUBTREES
989 if (fn->subtree) 996 if (fn->subtree)
990 fn = fib6_lookup_1(fn->subtree, args + 1); 997 fn = fib6_lookup_1(fn->subtree, args + 1);
991 #endif 998 #endif
992 if (!fn || fn->fn_flags & RTN_RTINFO) 999 if (!fn || fn->fn_flags & RTN_RTINFO)
993 return fn; 1000 return fn;
994 } 1001 }
995 } 1002 }
996 1003
997 if (fn->fn_flags & RTN_ROOT) 1004 if (fn->fn_flags & RTN_ROOT)
998 break; 1005 break;
999 1006
1000 fn = fn->parent; 1007 fn = fn->parent;
1001 } 1008 }
1002 1009
1003 return NULL; 1010 return NULL;
1004 } 1011 }
1005 1012
1006 struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, 1013 struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
1007 const struct in6_addr *saddr) 1014 const struct in6_addr *saddr)
1008 { 1015 {
1009 struct fib6_node *fn; 1016 struct fib6_node *fn;
1010 struct lookup_args args[] = { 1017 struct lookup_args args[] = {
1011 { 1018 {
1012 .offset = offsetof(struct rt6_info, rt6i_dst), 1019 .offset = offsetof(struct rt6_info, rt6i_dst),
1013 .addr = daddr, 1020 .addr = daddr,
1014 }, 1021 },
1015 #ifdef CONFIG_IPV6_SUBTREES 1022 #ifdef CONFIG_IPV6_SUBTREES
1016 { 1023 {
1017 .offset = offsetof(struct rt6_info, rt6i_src), 1024 .offset = offsetof(struct rt6_info, rt6i_src),
1018 .addr = saddr, 1025 .addr = saddr,
1019 }, 1026 },
1020 #endif 1027 #endif
1021 { 1028 {
1022 .offset = 0, /* sentinel */ 1029 .offset = 0, /* sentinel */
1023 } 1030 }
1024 }; 1031 };
1025 1032
1026 fn = fib6_lookup_1(root, daddr ? args : args + 1); 1033 fn = fib6_lookup_1(root, daddr ? args : args + 1);
1027 if (!fn || fn->fn_flags & RTN_TL_ROOT) 1034 if (!fn || fn->fn_flags & RTN_TL_ROOT)
1028 fn = root; 1035 fn = root;
1029 1036
1030 return fn; 1037 return fn;
1031 } 1038 }
1032 1039
1033 /* 1040 /*
1034 * Get node with specified destination prefix (and source prefix, 1041 * Get node with specified destination prefix (and source prefix,
1035 * if subtrees are used) 1042 * if subtrees are used)
1036 */ 1043 */
1037 1044
1038 1045
1039 static struct fib6_node * fib6_locate_1(struct fib6_node *root, 1046 static struct fib6_node * fib6_locate_1(struct fib6_node *root,
1040 const struct in6_addr *addr, 1047 const struct in6_addr *addr,
1041 int plen, int offset) 1048 int plen, int offset)
1042 { 1049 {
1043 struct fib6_node *fn; 1050 struct fib6_node *fn;
1044 1051
1045 for (fn = root; fn ; ) { 1052 for (fn = root; fn ; ) {
1046 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); 1053 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
1047 1054
1048 /* 1055 /*
1049 * Prefix match 1056 * Prefix match
1050 */ 1057 */
1051 if (plen < fn->fn_bit || 1058 if (plen < fn->fn_bit ||
1052 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 1059 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1053 return NULL; 1060 return NULL;
1054 1061
1055 if (plen == fn->fn_bit) 1062 if (plen == fn->fn_bit)
1056 return fn; 1063 return fn;
1057 1064
1058 /* 1065 /*
1059 * We have more bits to go 1066 * We have more bits to go
1060 */ 1067 */
1061 if (addr_bit_set(addr, fn->fn_bit)) 1068 if (addr_bit_set(addr, fn->fn_bit))
1062 fn = fn->right; 1069 fn = fn->right;
1063 else 1070 else
1064 fn = fn->left; 1071 fn = fn->left;
1065 } 1072 }
1066 return NULL; 1073 return NULL;
1067 } 1074 }
1068 1075
1069 struct fib6_node * fib6_locate(struct fib6_node *root, 1076 struct fib6_node * fib6_locate(struct fib6_node *root,
1070 const struct in6_addr *daddr, int dst_len, 1077 const struct in6_addr *daddr, int dst_len,
1071 const struct in6_addr *saddr, int src_len) 1078 const struct in6_addr *saddr, int src_len)
1072 { 1079 {
1073 struct fib6_node *fn; 1080 struct fib6_node *fn;
1074 1081
1075 fn = fib6_locate_1(root, daddr, dst_len, 1082 fn = fib6_locate_1(root, daddr, dst_len,
1076 offsetof(struct rt6_info, rt6i_dst)); 1083 offsetof(struct rt6_info, rt6i_dst));
1077 1084
1078 #ifdef CONFIG_IPV6_SUBTREES 1085 #ifdef CONFIG_IPV6_SUBTREES
1079 if (src_len) { 1086 if (src_len) {
1080 WARN_ON(saddr == NULL); 1087 WARN_ON(saddr == NULL);
1081 if (fn && fn->subtree) 1088 if (fn && fn->subtree)
1082 fn = fib6_locate_1(fn->subtree, saddr, src_len, 1089 fn = fib6_locate_1(fn->subtree, saddr, src_len,
1083 offsetof(struct rt6_info, rt6i_src)); 1090 offsetof(struct rt6_info, rt6i_src));
1084 } 1091 }
1085 #endif 1092 #endif
1086 1093
1087 if (fn && fn->fn_flags & RTN_RTINFO) 1094 if (fn && fn->fn_flags & RTN_RTINFO)
1088 return fn; 1095 return fn;
1089 1096
1090 return NULL; 1097 return NULL;
1091 } 1098 }
1092 1099
1093 1100
1094 /* 1101 /*
1095 * Deletion 1102 * Deletion
1096 * 1103 *
1097 */ 1104 */
1098 1105
1099 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) 1106 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
1100 { 1107 {
1101 if (fn->fn_flags & RTN_ROOT) 1108 if (fn->fn_flags & RTN_ROOT)
1102 return net->ipv6.ip6_null_entry; 1109 return net->ipv6.ip6_null_entry;
1103 1110
1104 while (fn) { 1111 while (fn) {
1105 if (fn->left) 1112 if (fn->left)
1106 return fn->left->leaf; 1113 return fn->left->leaf;
1107 if (fn->right) 1114 if (fn->right)
1108 return fn->right->leaf; 1115 return fn->right->leaf;
1109 1116
1110 fn = FIB6_SUBTREE(fn); 1117 fn = FIB6_SUBTREE(fn);
1111 } 1118 }
1112 return NULL; 1119 return NULL;
1113 } 1120 }
1114 1121
1115 /* 1122 /*
1116 * Called to trim the tree of intermediate nodes when possible. "fn" 1123 * Called to trim the tree of intermediate nodes when possible. "fn"
1117 * is the node we want to try and remove. 1124 * is the node we want to try and remove.
1118 */ 1125 */
1119 1126
1120 static struct fib6_node *fib6_repair_tree(struct net *net, 1127 static struct fib6_node *fib6_repair_tree(struct net *net,
1121 struct fib6_node *fn) 1128 struct fib6_node *fn)
1122 { 1129 {
1123 int children; 1130 int children;
1124 int nstate; 1131 int nstate;
1125 struct fib6_node *child, *pn; 1132 struct fib6_node *child, *pn;
1126 struct fib6_walker_t *w; 1133 struct fib6_walker_t *w;
1127 int iter = 0; 1134 int iter = 0;
1128 1135
1129 for (;;) { 1136 for (;;) {
1130 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 1137 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1131 iter++; 1138 iter++;
1132 1139
1133 WARN_ON(fn->fn_flags & RTN_RTINFO); 1140 WARN_ON(fn->fn_flags & RTN_RTINFO);
1134 WARN_ON(fn->fn_flags & RTN_TL_ROOT); 1141 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1135 WARN_ON(fn->leaf != NULL); 1142 WARN_ON(fn->leaf != NULL);
1136 1143
1137 children = 0; 1144 children = 0;
1138 child = NULL; 1145 child = NULL;
1139 if (fn->right) child = fn->right, children |= 1; 1146 if (fn->right) child = fn->right, children |= 1;
1140 if (fn->left) child = fn->left, children |= 2; 1147 if (fn->left) child = fn->left, children |= 2;
1141 1148
1142 if (children == 3 || FIB6_SUBTREE(fn) 1149 if (children == 3 || FIB6_SUBTREE(fn)
1143 #ifdef CONFIG_IPV6_SUBTREES 1150 #ifdef CONFIG_IPV6_SUBTREES
1144 /* Subtree root (i.e. fn) may have one child */ 1151 /* Subtree root (i.e. fn) may have one child */
1145 || (children && fn->fn_flags & RTN_ROOT) 1152 || (children && fn->fn_flags & RTN_ROOT)
1146 #endif 1153 #endif
1147 ) { 1154 ) {
1148 fn->leaf = fib6_find_prefix(net, fn); 1155 fn->leaf = fib6_find_prefix(net, fn);
1149 #if RT6_DEBUG >= 2 1156 #if RT6_DEBUG >= 2
1150 if (!fn->leaf) { 1157 if (!fn->leaf) {
1151 WARN_ON(!fn->leaf); 1158 WARN_ON(!fn->leaf);
1152 fn->leaf = net->ipv6.ip6_null_entry; 1159 fn->leaf = net->ipv6.ip6_null_entry;
1153 } 1160 }
1154 #endif 1161 #endif
1155 atomic_inc(&fn->leaf->rt6i_ref); 1162 atomic_inc(&fn->leaf->rt6i_ref);
1156 return fn->parent; 1163 return fn->parent;
1157 } 1164 }
1158 1165
1159 pn = fn->parent; 1166 pn = fn->parent;
1160 #ifdef CONFIG_IPV6_SUBTREES 1167 #ifdef CONFIG_IPV6_SUBTREES
1161 if (FIB6_SUBTREE(pn) == fn) { 1168 if (FIB6_SUBTREE(pn) == fn) {
1162 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1169 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1163 FIB6_SUBTREE(pn) = NULL; 1170 FIB6_SUBTREE(pn) = NULL;
1164 nstate = FWS_L; 1171 nstate = FWS_L;
1165 } else { 1172 } else {
1166 WARN_ON(fn->fn_flags & RTN_ROOT); 1173 WARN_ON(fn->fn_flags & RTN_ROOT);
1167 #endif 1174 #endif
1168 if (pn->right == fn) pn->right = child; 1175 if (pn->right == fn) pn->right = child;
1169 else if (pn->left == fn) pn->left = child; 1176 else if (pn->left == fn) pn->left = child;
1170 #if RT6_DEBUG >= 2 1177 #if RT6_DEBUG >= 2
1171 else 1178 else
1172 WARN_ON(1); 1179 WARN_ON(1);
1173 #endif 1180 #endif
1174 if (child) 1181 if (child)
1175 child->parent = pn; 1182 child->parent = pn;
1176 nstate = FWS_R; 1183 nstate = FWS_R;
1177 #ifdef CONFIG_IPV6_SUBTREES 1184 #ifdef CONFIG_IPV6_SUBTREES
1178 } 1185 }
1179 #endif 1186 #endif
1180 1187
1181 read_lock(&fib6_walker_lock); 1188 read_lock(&fib6_walker_lock);
1182 FOR_WALKERS(w) { 1189 FOR_WALKERS(w) {
1183 if (!child) { 1190 if (!child) {
1184 if (w->root == fn) { 1191 if (w->root == fn) {
1185 w->root = w->node = NULL; 1192 w->root = w->node = NULL;
1186 RT6_TRACE("W %p adjusted by delroot 1\n", w); 1193 RT6_TRACE("W %p adjusted by delroot 1\n", w);
1187 } else if (w->node == fn) { 1194 } else if (w->node == fn) {
1188 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 1195 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1189 w->node = pn; 1196 w->node = pn;
1190 w->state = nstate; 1197 w->state = nstate;
1191 } 1198 }
1192 } else { 1199 } else {
1193 if (w->root == fn) { 1200 if (w->root == fn) {
1194 w->root = child; 1201 w->root = child;
1195 RT6_TRACE("W %p adjusted by delroot 2\n", w); 1202 RT6_TRACE("W %p adjusted by delroot 2\n", w);
1196 } 1203 }
1197 if (w->node == fn) { 1204 if (w->node == fn) {
1198 w->node = child; 1205 w->node = child;
1199 if (children&2) { 1206 if (children&2) {
1200 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 1207 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1201 w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; 1208 w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
1202 } else { 1209 } else {
1203 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 1210 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1204 w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; 1211 w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
1205 } 1212 }
1206 } 1213 }
1207 } 1214 }
1208 } 1215 }
1209 read_unlock(&fib6_walker_lock); 1216 read_unlock(&fib6_walker_lock);
1210 1217
1211 node_free(fn); 1218 node_free(fn);
1212 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1219 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1213 return pn; 1220 return pn;
1214 1221
1215 rt6_release(pn->leaf); 1222 rt6_release(pn->leaf);
1216 pn->leaf = NULL; 1223 pn->leaf = NULL;
1217 fn = pn; 1224 fn = pn;
1218 } 1225 }
1219 } 1226 }
1220 1227
1221 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 1228 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1222 struct nl_info *info) 1229 struct nl_info *info)
1223 { 1230 {
1224 struct fib6_walker_t *w; 1231 struct fib6_walker_t *w;
1225 struct rt6_info *rt = *rtp; 1232 struct rt6_info *rt = *rtp;
1226 struct net *net = info->nl_net; 1233 struct net *net = info->nl_net;
1227 1234
1228 RT6_TRACE("fib6_del_route\n"); 1235 RT6_TRACE("fib6_del_route\n");
1229 1236
1230 /* Unlink it */ 1237 /* Unlink it */
1231 *rtp = rt->dst.rt6_next; 1238 *rtp = rt->dst.rt6_next;
1232 rt->rt6i_node = NULL; 1239 rt->rt6i_node = NULL;
1233 net->ipv6.rt6_stats->fib_rt_entries--; 1240 net->ipv6.rt6_stats->fib_rt_entries--;
1234 net->ipv6.rt6_stats->fib_discarded_routes++; 1241 net->ipv6.rt6_stats->fib_discarded_routes++;
1235 1242
1236 /* Reset round-robin state, if necessary */ 1243 /* Reset round-robin state, if necessary */
1237 if (fn->rr_ptr == rt) 1244 if (fn->rr_ptr == rt)
1238 fn->rr_ptr = NULL; 1245 fn->rr_ptr = NULL;
1239 1246
1240 /* Remove this entry from other siblings */ 1247 /* Remove this entry from other siblings */
1241 if (rt->rt6i_nsiblings) { 1248 if (rt->rt6i_nsiblings) {
1242 struct rt6_info *sibling, *next_sibling; 1249 struct rt6_info *sibling, *next_sibling;
1243 1250
1244 list_for_each_entry_safe(sibling, next_sibling, 1251 list_for_each_entry_safe(sibling, next_sibling,
1245 &rt->rt6i_siblings, rt6i_siblings) 1252 &rt->rt6i_siblings, rt6i_siblings)
1246 sibling->rt6i_nsiblings--; 1253 sibling->rt6i_nsiblings--;
1247 rt->rt6i_nsiblings = 0; 1254 rt->rt6i_nsiblings = 0;
1248 list_del_init(&rt->rt6i_siblings); 1255 list_del_init(&rt->rt6i_siblings);
1249 } 1256 }
1250 1257
1251 /* Adjust walkers */ 1258 /* Adjust walkers */
1252 read_lock(&fib6_walker_lock); 1259 read_lock(&fib6_walker_lock);
1253 FOR_WALKERS(w) { 1260 FOR_WALKERS(w) {
1254 if (w->state == FWS_C && w->leaf == rt) { 1261 if (w->state == FWS_C && w->leaf == rt) {
1255 RT6_TRACE("walker %p adjusted by delroute\n", w); 1262 RT6_TRACE("walker %p adjusted by delroute\n", w);
1256 w->leaf = rt->dst.rt6_next; 1263 w->leaf = rt->dst.rt6_next;
1257 if (!w->leaf) 1264 if (!w->leaf)
1258 w->state = FWS_U; 1265 w->state = FWS_U;
1259 } 1266 }
1260 } 1267 }
1261 read_unlock(&fib6_walker_lock); 1268 read_unlock(&fib6_walker_lock);
1262 1269
1263 rt->dst.rt6_next = NULL; 1270 rt->dst.rt6_next = NULL;
1264 1271
1265 /* If it was last route, expunge its radix tree node */ 1272 /* If it was last route, expunge its radix tree node */
1266 if (!fn->leaf) { 1273 if (!fn->leaf) {
1267 fn->fn_flags &= ~RTN_RTINFO; 1274 fn->fn_flags &= ~RTN_RTINFO;
1268 net->ipv6.rt6_stats->fib_route_nodes--; 1275 net->ipv6.rt6_stats->fib_route_nodes--;
1269 fn = fib6_repair_tree(net, fn); 1276 fn = fib6_repair_tree(net, fn);
1270 } 1277 }
1271 1278
1272 if (atomic_read(&rt->rt6i_ref) != 1) { 1279 if (atomic_read(&rt->rt6i_ref) != 1) {
1273 /* This route is used as dummy address holder in some split 1280 /* This route is used as dummy address holder in some split
1274 * nodes. It is not leaked, but it still holds other resources, 1281 * nodes. It is not leaked, but it still holds other resources,
1275 * which must be released in time. So, scan ascendant nodes 1282 * which must be released in time. So, scan ascendant nodes
1276 * and replace dummy references to this route with references 1283 * and replace dummy references to this route with references
1277 * to still alive ones. 1284 * to still alive ones.
1278 */ 1285 */
1279 while (fn) { 1286 while (fn) {
1280 if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { 1287 if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
1281 fn->leaf = fib6_find_prefix(net, fn); 1288 fn->leaf = fib6_find_prefix(net, fn);
1282 atomic_inc(&fn->leaf->rt6i_ref); 1289 atomic_inc(&fn->leaf->rt6i_ref);
1283 rt6_release(rt); 1290 rt6_release(rt);
1284 } 1291 }
1285 fn = fn->parent; 1292 fn = fn->parent;
1286 } 1293 }
1287 /* No more references are possible at this point. */ 1294 /* No more references are possible at this point. */
1288 BUG_ON(atomic_read(&rt->rt6i_ref) != 1); 1295 BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
1289 } 1296 }
1290 1297
1291 inet6_rt_notify(RTM_DELROUTE, rt, info); 1298 inet6_rt_notify(RTM_DELROUTE, rt, info);
1292 rt6_release(rt); 1299 rt6_release(rt);
1293 } 1300 }
1294 1301
1295 int fib6_del(struct rt6_info *rt, struct nl_info *info) 1302 int fib6_del(struct rt6_info *rt, struct nl_info *info)
1296 { 1303 {
1297 struct net *net = info->nl_net; 1304 struct net *net = info->nl_net;
1298 struct fib6_node *fn = rt->rt6i_node; 1305 struct fib6_node *fn = rt->rt6i_node;
1299 struct rt6_info **rtp; 1306 struct rt6_info **rtp;
1300 1307
1301 #if RT6_DEBUG >= 2 1308 #if RT6_DEBUG >= 2
1302 if (rt->dst.obsolete>0) { 1309 if (rt->dst.obsolete>0) {
1303 WARN_ON(fn != NULL); 1310 WARN_ON(fn != NULL);
1304 return -ENOENT; 1311 return -ENOENT;
1305 } 1312 }
1306 #endif 1313 #endif
1307 if (!fn || rt == net->ipv6.ip6_null_entry) 1314 if (!fn || rt == net->ipv6.ip6_null_entry)
1308 return -ENOENT; 1315 return -ENOENT;
1309 1316
1310 WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 1317 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1311 1318
1312 if (!(rt->rt6i_flags & RTF_CACHE)) { 1319 if (!(rt->rt6i_flags & RTF_CACHE)) {
1313 struct fib6_node *pn = fn; 1320 struct fib6_node *pn = fn;
1314 #ifdef CONFIG_IPV6_SUBTREES 1321 #ifdef CONFIG_IPV6_SUBTREES
1315 /* clones of this route might be in another subtree */ 1322 /* clones of this route might be in another subtree */
1316 if (rt->rt6i_src.plen) { 1323 if (rt->rt6i_src.plen) {
1317 while (!(pn->fn_flags & RTN_ROOT)) 1324 while (!(pn->fn_flags & RTN_ROOT))
1318 pn = pn->parent; 1325 pn = pn->parent;
1319 pn = pn->parent; 1326 pn = pn->parent;
1320 } 1327 }
1321 #endif 1328 #endif
1322 fib6_prune_clones(info->nl_net, pn, rt); 1329 fib6_prune_clones(info->nl_net, pn, rt);
1323 } 1330 }
1324 1331
1325 /* 1332 /*
1326 * Walk the leaf entries looking for ourself 1333 * Walk the leaf entries looking for ourself
1327 */ 1334 */
1328 1335
1329 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { 1336 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
1330 if (*rtp == rt) { 1337 if (*rtp == rt) {
1331 fib6_del_route(fn, rtp, info); 1338 fib6_del_route(fn, rtp, info);
1332 return 0; 1339 return 0;
1333 } 1340 }
1334 } 1341 }
1335 return -ENOENT; 1342 return -ENOENT;
1336 } 1343 }
1337 1344
1338 /* 1345 /*
1339 * Tree traversal function. 1346 * Tree traversal function.
1340 * 1347 *
1341 * Certainly, it is not interrupt safe. 1348 * Certainly, it is not interrupt safe.
1342 * However, it is internally reenterable wrt itself and fib6_add/fib6_del. 1349 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1343 * It means, that we can modify tree during walking 1350 * It means, that we can modify tree during walking
1344 * and use this function for garbage collection, clone pruning, 1351 * and use this function for garbage collection, clone pruning,
1345 * cleaning tree when a device goes down etc. etc. 1352 * cleaning tree when a device goes down etc. etc.
1346 * 1353 *
1347 * It guarantees that every node will be traversed, 1354 * It guarantees that every node will be traversed,
1348 * and that it will be traversed only once. 1355 * and that it will be traversed only once.
1349 * 1356 *
1350 * Callback function w->func may return: 1357 * Callback function w->func may return:
1351 * 0 -> continue walking. 1358 * 0 -> continue walking.
1352 * positive value -> walking is suspended (used by tree dumps, 1359 * positive value -> walking is suspended (used by tree dumps,
1353 * and probably by gc, if it will be split to several slices) 1360 * and probably by gc, if it will be split to several slices)
1354 * negative value -> terminate walking. 1361 * negative value -> terminate walking.
1355 * 1362 *
1356 * The function itself returns: 1363 * The function itself returns:
1357 * 0 -> walk is complete. 1364 * 0 -> walk is complete.
1358 * >0 -> walk is incomplete (i.e. suspended) 1365 * >0 -> walk is incomplete (i.e. suspended)
1359 * <0 -> walk is terminated by an error. 1366 * <0 -> walk is terminated by an error.
1360 */ 1367 */
1361 1368
1362 static int fib6_walk_continue(struct fib6_walker_t *w) 1369 static int fib6_walk_continue(struct fib6_walker_t *w)
1363 { 1370 {
1364 struct fib6_node *fn, *pn; 1371 struct fib6_node *fn, *pn;
1365 1372
1366 for (;;) { 1373 for (;;) {
1367 fn = w->node; 1374 fn = w->node;
1368 if (!fn) 1375 if (!fn)
1369 return 0; 1376 return 0;
1370 1377
1371 if (w->prune && fn != w->root && 1378 if (w->prune && fn != w->root &&
1372 fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { 1379 fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
1373 w->state = FWS_C; 1380 w->state = FWS_C;
1374 w->leaf = fn->leaf; 1381 w->leaf = fn->leaf;
1375 } 1382 }
1376 switch (w->state) { 1383 switch (w->state) {
1377 #ifdef CONFIG_IPV6_SUBTREES 1384 #ifdef CONFIG_IPV6_SUBTREES
1378 case FWS_S: 1385 case FWS_S:
1379 if (FIB6_SUBTREE(fn)) { 1386 if (FIB6_SUBTREE(fn)) {
1380 w->node = FIB6_SUBTREE(fn); 1387 w->node = FIB6_SUBTREE(fn);
1381 continue; 1388 continue;
1382 } 1389 }
1383 w->state = FWS_L; 1390 w->state = FWS_L;
1384 #endif 1391 #endif
1385 case FWS_L: 1392 case FWS_L:
1386 if (fn->left) { 1393 if (fn->left) {
1387 w->node = fn->left; 1394 w->node = fn->left;
1388 w->state = FWS_INIT; 1395 w->state = FWS_INIT;
1389 continue; 1396 continue;
1390 } 1397 }
1391 w->state = FWS_R; 1398 w->state = FWS_R;
1392 case FWS_R: 1399 case FWS_R:
1393 if (fn->right) { 1400 if (fn->right) {
1394 w->node = fn->right; 1401 w->node = fn->right;
1395 w->state = FWS_INIT; 1402 w->state = FWS_INIT;
1396 continue; 1403 continue;
1397 } 1404 }
1398 w->state = FWS_C; 1405 w->state = FWS_C;
1399 w->leaf = fn->leaf; 1406 w->leaf = fn->leaf;
1400 case FWS_C: 1407 case FWS_C:
1401 if (w->leaf && fn->fn_flags & RTN_RTINFO) { 1408 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
1402 int err; 1409 int err;
1403 1410
1404 if (w->skip) { 1411 if (w->skip) {
1405 w->skip--; 1412 w->skip--;
1406 continue; 1413 continue;
1407 } 1414 }
1408 1415
1409 err = w->func(w); 1416 err = w->func(w);
1410 if (err) 1417 if (err)
1411 return err; 1418 return err;
1412 1419
1413 w->count++; 1420 w->count++;
1414 continue; 1421 continue;
1415 } 1422 }
1416 w->state = FWS_U; 1423 w->state = FWS_U;
1417 case FWS_U: 1424 case FWS_U:
1418 if (fn == w->root) 1425 if (fn == w->root)
1419 return 0; 1426 return 0;
1420 pn = fn->parent; 1427 pn = fn->parent;
1421 w->node = pn; 1428 w->node = pn;
1422 #ifdef CONFIG_IPV6_SUBTREES 1429 #ifdef CONFIG_IPV6_SUBTREES
1423 if (FIB6_SUBTREE(pn) == fn) { 1430 if (FIB6_SUBTREE(pn) == fn) {
1424 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1431 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1425 w->state = FWS_L; 1432 w->state = FWS_L;
1426 continue; 1433 continue;
1427 } 1434 }
1428 #endif 1435 #endif
1429 if (pn->left == fn) { 1436 if (pn->left == fn) {
1430 w->state = FWS_R; 1437 w->state = FWS_R;
1431 continue; 1438 continue;
1432 } 1439 }
1433 if (pn->right == fn) { 1440 if (pn->right == fn) {
1434 w->state = FWS_C; 1441 w->state = FWS_C;
1435 w->leaf = w->node->leaf; 1442 w->leaf = w->node->leaf;
1436 continue; 1443 continue;
1437 } 1444 }
1438 #if RT6_DEBUG >= 2 1445 #if RT6_DEBUG >= 2
1439 WARN_ON(1); 1446 WARN_ON(1);
1440 #endif 1447 #endif
1441 } 1448 }
1442 } 1449 }
1443 } 1450 }
1444 1451
1445 static int fib6_walk(struct fib6_walker_t *w) 1452 static int fib6_walk(struct fib6_walker_t *w)
1446 { 1453 {
1447 int res; 1454 int res;
1448 1455
1449 w->state = FWS_INIT; 1456 w->state = FWS_INIT;
1450 w->node = w->root; 1457 w->node = w->root;
1451 1458
1452 fib6_walker_link(w); 1459 fib6_walker_link(w);
1453 res = fib6_walk_continue(w); 1460 res = fib6_walk_continue(w);
1454 if (res <= 0) 1461 if (res <= 0)
1455 fib6_walker_unlink(w); 1462 fib6_walker_unlink(w);
1456 return res; 1463 return res;
1457 } 1464 }
1458 1465
1459 static int fib6_clean_node(struct fib6_walker_t *w) 1466 static int fib6_clean_node(struct fib6_walker_t *w)
1460 { 1467 {
1461 int res; 1468 int res;
1462 struct rt6_info *rt; 1469 struct rt6_info *rt;
1463 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); 1470 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
1464 struct nl_info info = { 1471 struct nl_info info = {
1465 .nl_net = c->net, 1472 .nl_net = c->net,
1466 }; 1473 };
1467 1474
1468 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 1475 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
1469 res = c->func(rt, c->arg); 1476 res = c->func(rt, c->arg);
1470 if (res < 0) { 1477 if (res < 0) {
1471 w->leaf = rt; 1478 w->leaf = rt;
1472 res = fib6_del(rt, &info); 1479 res = fib6_del(rt, &info);
1473 if (res) { 1480 if (res) {
1474 #if RT6_DEBUG >= 2 1481 #if RT6_DEBUG >= 2
1475 pr_debug("%s: del failed: rt=%p@%p err=%d\n", 1482 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
1476 __func__, rt, rt->rt6i_node, res); 1483 __func__, rt, rt->rt6i_node, res);
1477 #endif 1484 #endif
1478 continue; 1485 continue;
1479 } 1486 }
1480 return 0; 1487 return 0;
1481 } 1488 }
1482 WARN_ON(res != 0); 1489 WARN_ON(res != 0);
1483 } 1490 }
1484 w->leaf = rt; 1491 w->leaf = rt;
1485 return 0; 1492 return 0;
1486 } 1493 }
1487 1494
1488 /* 1495 /*
1489 * Convenient frontend to tree walker. 1496 * Convenient frontend to tree walker.
1490 * 1497 *
1491 * func is called on each route. 1498 * func is called on each route.
1492 * It may return -1 -> delete this route. 1499 * It may return -1 -> delete this route.
1493 * 0 -> continue walking 1500 * 0 -> continue walking
1494 * 1501 *
1495 * prune==1 -> only immediate children of node (certainly, 1502 * prune==1 -> only immediate children of node (certainly,
1496 * ignoring pure split nodes) will be scanned. 1503 * ignoring pure split nodes) will be scanned.
1497 */ 1504 */
1498 1505
1499 static void fib6_clean_tree(struct net *net, struct fib6_node *root, 1506 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1500 int (*func)(struct rt6_info *, void *arg), 1507 int (*func)(struct rt6_info *, void *arg),
1501 int prune, void *arg) 1508 int prune, void *arg)
1502 { 1509 {
1503 struct fib6_cleaner_t c; 1510 struct fib6_cleaner_t c;
1504 1511
1505 c.w.root = root; 1512 c.w.root = root;
1506 c.w.func = fib6_clean_node; 1513 c.w.func = fib6_clean_node;
1507 c.w.prune = prune; 1514 c.w.prune = prune;
1508 c.w.count = 0; 1515 c.w.count = 0;
1509 c.w.skip = 0; 1516 c.w.skip = 0;
1510 c.func = func; 1517 c.func = func;
1511 c.arg = arg; 1518 c.arg = arg;
1512 c.net = net; 1519 c.net = net;
1513 1520
1514 fib6_walk(&c.w); 1521 fib6_walk(&c.w);
1515 } 1522 }
1516 1523
1517 void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), 1524 void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
1518 int prune, void *arg) 1525 int prune, void *arg)
1519 { 1526 {
1520 struct fib6_table *table; 1527 struct fib6_table *table;
1521 struct hlist_head *head; 1528 struct hlist_head *head;
1522 unsigned int h; 1529 unsigned int h;
1523 1530
1524 rcu_read_lock(); 1531 rcu_read_lock();
1525 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 1532 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1526 head = &net->ipv6.fib_table_hash[h]; 1533 head = &net->ipv6.fib_table_hash[h];
1527 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 1534 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
1528 read_lock_bh(&table->tb6_lock); 1535 read_lock_bh(&table->tb6_lock);
1529 fib6_clean_tree(net, &table->tb6_root, 1536 fib6_clean_tree(net, &table->tb6_root,
1530 func, prune, arg); 1537 func, prune, arg);
1531 read_unlock_bh(&table->tb6_lock); 1538 read_unlock_bh(&table->tb6_lock);
1532 } 1539 }
1533 } 1540 }
1534 rcu_read_unlock(); 1541 rcu_read_unlock();
1535 } 1542 }
1536 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), 1543 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
1537 int prune, void *arg) 1544 int prune, void *arg)
1538 { 1545 {
1539 struct fib6_table *table; 1546 struct fib6_table *table;
1540 struct hlist_head *head; 1547 struct hlist_head *head;
1541 unsigned int h; 1548 unsigned int h;
1542 1549
1543 rcu_read_lock(); 1550 rcu_read_lock();
1544 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 1551 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1545 head = &net->ipv6.fib_table_hash[h]; 1552 head = &net->ipv6.fib_table_hash[h];
1546 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 1553 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
1547 write_lock_bh(&table->tb6_lock); 1554 write_lock_bh(&table->tb6_lock);
1548 fib6_clean_tree(net, &table->tb6_root, 1555 fib6_clean_tree(net, &table->tb6_root,
1549 func, prune, arg); 1556 func, prune, arg);
1550 write_unlock_bh(&table->tb6_lock); 1557 write_unlock_bh(&table->tb6_lock);
1551 } 1558 }
1552 } 1559 }
1553 rcu_read_unlock(); 1560 rcu_read_unlock();
1554 } 1561 }
1555 1562
1556 static int fib6_prune_clone(struct rt6_info *rt, void *arg) 1563 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1557 { 1564 {
1558 if (rt->rt6i_flags & RTF_CACHE) { 1565 if (rt->rt6i_flags & RTF_CACHE) {
1559 RT6_TRACE("pruning clone %p\n", rt); 1566 RT6_TRACE("pruning clone %p\n", rt);
1560 return -1; 1567 return -1;
1561 } 1568 }
1562 1569
1563 return 0; 1570 return 0;
1564 } 1571 }
1565 1572
1566 static void fib6_prune_clones(struct net *net, struct fib6_node *fn, 1573 static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
1567 struct rt6_info *rt) 1574 struct rt6_info *rt)
1568 { 1575 {
1569 fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); 1576 fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
1570 } 1577 }
1571 1578
1572 /* 1579 /*
1573 * Garbage collection 1580 * Garbage collection
1574 */ 1581 */
1575 1582
1576 static struct fib6_gc_args 1583 static struct fib6_gc_args
1577 { 1584 {
1578 int timeout; 1585 int timeout;
1579 int more; 1586 int more;
1580 } gc_args; 1587 } gc_args;
1581 1588
1582 static int fib6_age(struct rt6_info *rt, void *arg) 1589 static int fib6_age(struct rt6_info *rt, void *arg)
1583 { 1590 {
1584 unsigned long now = jiffies; 1591 unsigned long now = jiffies;
1585 1592
1586 /* 1593 /*
1587 * check addrconf expiration here. 1594 * check addrconf expiration here.
1588 * Routes are expired even if they are in use. 1595 * Routes are expired even if they are in use.
1589 * 1596 *
1590 * Also age clones. Note, that clones are aged out 1597 * Also age clones. Note, that clones are aged out
1591 * only if they are not in use now. 1598 * only if they are not in use now.
1592 */ 1599 */
1593 1600
1594 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { 1601 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
1595 if (time_after(now, rt->dst.expires)) { 1602 if (time_after(now, rt->dst.expires)) {
1596 RT6_TRACE("expiring %p\n", rt); 1603 RT6_TRACE("expiring %p\n", rt);
1597 return -1; 1604 return -1;
1598 } 1605 }
1599 gc_args.more++; 1606 gc_args.more++;
1600 } else if (rt->rt6i_flags & RTF_CACHE) { 1607 } else if (rt->rt6i_flags & RTF_CACHE) {
1601 if (atomic_read(&rt->dst.__refcnt) == 0 && 1608 if (atomic_read(&rt->dst.__refcnt) == 0 &&
1602 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { 1609 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
1603 RT6_TRACE("aging clone %p\n", rt); 1610 RT6_TRACE("aging clone %p\n", rt);
1604 return -1; 1611 return -1;
1605 } else if (rt->rt6i_flags & RTF_GATEWAY) { 1612 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1606 struct neighbour *neigh; 1613 struct neighbour *neigh;
1607 __u8 neigh_flags = 0; 1614 __u8 neigh_flags = 0;
1608 1615
1609 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); 1616 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1610 if (neigh) { 1617 if (neigh) {
1611 neigh_flags = neigh->flags; 1618 neigh_flags = neigh->flags;
1612 neigh_release(neigh); 1619 neigh_release(neigh);
1613 } 1620 }
1614 if (!(neigh_flags & NTF_ROUTER)) { 1621 if (!(neigh_flags & NTF_ROUTER)) {
1615 RT6_TRACE("purging route %p via non-router but gateway\n", 1622 RT6_TRACE("purging route %p via non-router but gateway\n",
1616 rt); 1623 rt);
1617 return -1; 1624 return -1;
1618 } 1625 }
1619 } 1626 }
1620 gc_args.more++; 1627 gc_args.more++;
1621 } 1628 }
1622 1629
1623 return 0; 1630 return 0;
1624 } 1631 }
1625 1632
1626 static DEFINE_SPINLOCK(fib6_gc_lock); 1633 static DEFINE_SPINLOCK(fib6_gc_lock);
1627 1634
1628 void fib6_run_gc(unsigned long expires, struct net *net) 1635 void fib6_run_gc(unsigned long expires, struct net *net)
1629 { 1636 {
1630 if (expires != ~0UL) { 1637 if (expires != ~0UL) {
1631 spin_lock_bh(&fib6_gc_lock); 1638 spin_lock_bh(&fib6_gc_lock);
1632 gc_args.timeout = expires ? (int)expires : 1639 gc_args.timeout = expires ? (int)expires :
1633 net->ipv6.sysctl.ip6_rt_gc_interval; 1640 net->ipv6.sysctl.ip6_rt_gc_interval;
1634 } else { 1641 } else {
1635 if (!spin_trylock_bh(&fib6_gc_lock)) { 1642 if (!spin_trylock_bh(&fib6_gc_lock)) {
1636 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); 1643 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1637 return; 1644 return;
1638 } 1645 }
1639 gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; 1646 gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
1640 } 1647 }
1641 1648
1642 gc_args.more = icmp6_dst_gc(); 1649 gc_args.more = icmp6_dst_gc();
1643 1650
1644 fib6_clean_all(net, fib6_age, 0, NULL); 1651 fib6_clean_all(net, fib6_age, 0, NULL);
1645 1652
1646 if (gc_args.more) 1653 if (gc_args.more)
1647 mod_timer(&net->ipv6.ip6_fib_timer, 1654 mod_timer(&net->ipv6.ip6_fib_timer,
1648 round_jiffies(jiffies 1655 round_jiffies(jiffies
1649 + net->ipv6.sysctl.ip6_rt_gc_interval)); 1656 + net->ipv6.sysctl.ip6_rt_gc_interval));
1650 else 1657 else
1651 del_timer(&net->ipv6.ip6_fib_timer); 1658 del_timer(&net->ipv6.ip6_fib_timer);
1652 spin_unlock_bh(&fib6_gc_lock); 1659 spin_unlock_bh(&fib6_gc_lock);
1653 } 1660 }
1654 1661
1655 static void fib6_gc_timer_cb(unsigned long arg) 1662 static void fib6_gc_timer_cb(unsigned long arg)
1656 { 1663 {
1657 fib6_run_gc(0, (struct net *)arg); 1664 fib6_run_gc(0, (struct net *)arg);
1658 } 1665 }
1659 1666
1660 static int __net_init fib6_net_init(struct net *net) 1667 static int __net_init fib6_net_init(struct net *net)
1661 { 1668 {
1662 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; 1669 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
1663 1670
1664 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 1671 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
1665 1672
1666 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1673 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1667 if (!net->ipv6.rt6_stats) 1674 if (!net->ipv6.rt6_stats)
1668 goto out_timer; 1675 goto out_timer;
1669 1676
1670 /* Avoid false sharing : Use at least a full cache line */ 1677 /* Avoid false sharing : Use at least a full cache line */
1671 size = max_t(size_t, size, L1_CACHE_BYTES); 1678 size = max_t(size_t, size, L1_CACHE_BYTES);
1672 1679
1673 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); 1680 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
1674 if (!net->ipv6.fib_table_hash) 1681 if (!net->ipv6.fib_table_hash)
1675 goto out_rt6_stats; 1682 goto out_rt6_stats;
1676 1683
1677 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), 1684 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
1678 GFP_KERNEL); 1685 GFP_KERNEL);
1679 if (!net->ipv6.fib6_main_tbl) 1686 if (!net->ipv6.fib6_main_tbl)
1680 goto out_fib_table_hash; 1687 goto out_fib_table_hash;
1681 1688
1682 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; 1689 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
1683 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 1690 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
1684 net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 1691 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
1685 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 1692 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1686 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); 1693 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
1687 1694
1688 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1695 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1689 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), 1696 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
1690 GFP_KERNEL); 1697 GFP_KERNEL);
1691 if (!net->ipv6.fib6_local_tbl) 1698 if (!net->ipv6.fib6_local_tbl)
1692 goto out_fib6_main_tbl; 1699 goto out_fib6_main_tbl;
1693 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; 1700 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
1694 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 1701 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
1695 net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 1702 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
1696 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 1703 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1697 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); 1704 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
1698 #endif 1705 #endif
1699 fib6_tables_init(net); 1706 fib6_tables_init(net);
1700 1707
1701 return 0; 1708 return 0;
1702 1709
1703 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1710 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1704 out_fib6_main_tbl: 1711 out_fib6_main_tbl:
1705 kfree(net->ipv6.fib6_main_tbl); 1712 kfree(net->ipv6.fib6_main_tbl);
1706 #endif 1713 #endif
1707 out_fib_table_hash: 1714 out_fib_table_hash:
1708 kfree(net->ipv6.fib_table_hash); 1715 kfree(net->ipv6.fib_table_hash);
1709 out_rt6_stats: 1716 out_rt6_stats:
1710 kfree(net->ipv6.rt6_stats); 1717 kfree(net->ipv6.rt6_stats);
1711 out_timer: 1718 out_timer:
1712 return -ENOMEM; 1719 return -ENOMEM;
1713 } 1720 }
1714 1721
1715 static void fib6_net_exit(struct net *net) 1722 static void fib6_net_exit(struct net *net)
1716 { 1723 {
1717 rt6_ifdown(net, NULL); 1724 rt6_ifdown(net, NULL);
1718 del_timer_sync(&net->ipv6.ip6_fib_timer); 1725 del_timer_sync(&net->ipv6.ip6_fib_timer);
1719 1726
1720 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1727 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1721 inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); 1728 inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
1722 kfree(net->ipv6.fib6_local_tbl); 1729 kfree(net->ipv6.fib6_local_tbl);
1723 #endif 1730 #endif
1724 inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); 1731 inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
1725 kfree(net->ipv6.fib6_main_tbl); 1732 kfree(net->ipv6.fib6_main_tbl);
1726 kfree(net->ipv6.fib_table_hash); 1733 kfree(net->ipv6.fib_table_hash);
1727 kfree(net->ipv6.rt6_stats); 1734 kfree(net->ipv6.rt6_stats);
1728 } 1735 }
1729 1736
1730 static struct pernet_operations fib6_net_ops = { 1737 static struct pernet_operations fib6_net_ops = {
1731 .init = fib6_net_init, 1738 .init = fib6_net_init,
1732 .exit = fib6_net_exit, 1739 .exit = fib6_net_exit,
1733 }; 1740 };
1734 1741
1735 int __init fib6_init(void) 1742 int __init fib6_init(void)
1736 { 1743 {
1737 int ret = -ENOMEM; 1744 int ret = -ENOMEM;
1738 1745
1739 fib6_node_kmem = kmem_cache_create("fib6_nodes", 1746 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1740 sizeof(struct fib6_node), 1747 sizeof(struct fib6_node),
1741 0, SLAB_HWCACHE_ALIGN, 1748 0, SLAB_HWCACHE_ALIGN,
1742 NULL); 1749 NULL);
1743 if (!fib6_node_kmem) 1750 if (!fib6_node_kmem)
1744 goto out; 1751 goto out;
1745 1752
1746 ret = register_pernet_subsys(&fib6_net_ops); 1753 ret = register_pernet_subsys(&fib6_net_ops);
1747 if (ret) 1754 if (ret)
1748 goto out_kmem_cache_create; 1755 goto out_kmem_cache_create;
1749 1756
1750 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, 1757 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
1751 NULL); 1758 NULL);
1752 if (ret) 1759 if (ret)
1753 goto out_unregister_subsys; 1760 goto out_unregister_subsys;
1754 out: 1761 out:
1755 return ret; 1762 return ret;
1756 1763
1757 out_unregister_subsys: 1764 out_unregister_subsys:
1758 unregister_pernet_subsys(&fib6_net_ops); 1765 unregister_pernet_subsys(&fib6_net_ops);
1759 out_kmem_cache_create: 1766 out_kmem_cache_create:
1760 kmem_cache_destroy(fib6_node_kmem); 1767 kmem_cache_destroy(fib6_node_kmem);
1761 goto out; 1768 goto out;
1762 } 1769 }
1763 1770
1764 void fib6_gc_cleanup(void) 1771 void fib6_gc_cleanup(void)
1765 { 1772 {
1766 unregister_pernet_subsys(&fib6_net_ops); 1773 unregister_pernet_subsys(&fib6_net_ops);
1767 kmem_cache_destroy(fib6_node_kmem); 1774 kmem_cache_destroy(fib6_node_kmem);
1768 } 1775 }