Commit 307f2fb95e9b96b3577916e73d92e104f8f26494
Committed by
David S. Miller
1 parent
9b4fe5fb0b
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
ipv6: only static routes qualify for equal cost multipathing
Static routes in this case are non-expiring routes which did not get configured by autoconf or by icmpv6 redirects. To make sure we actually get an ecmp route while searching for the first one in this fib6_node's leafs, also make sure it matches the ecmp route assumptions. v2: a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF already ensures that this route, even if added again without RTF_EXPIRES (in case of a RA announcement with infinite timeout), does not cause the rt6i_nsiblings logic to go wrong if a later RA updates the expiration time later. v3: a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so, because an pmtu event could update the RTF_EXPIRES flag and we would not count this route, if another route joins this set. We now filter only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that don't get changed after rt6_info construction. Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 1 changed file with 11 additions and 4 deletions Inline Diff
net/ipv6/ip6_fib.c
1 | /* | 1 | /* |
2 | * Linux INET6 implementation | 2 | * Linux INET6 implementation |
3 | * Forwarding Information Database | 3 | * Forwarding Information Database |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version | 10 | * as published by the Free Software Foundation; either version |
11 | * 2 of the License, or (at your option) any later version. | 11 | * 2 of the License, or (at your option) any later version. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * Changes: | 15 | * Changes: |
16 | * Yuji SEKIYA @USAGI: Support default route on router node; | 16 | * Yuji SEKIYA @USAGI: Support default route on router node; |
17 | * remove ip6_null_entry from the top of | 17 | * remove ip6_null_entry from the top of |
18 | * routing table. | 18 | * routing table. |
19 | * Ville Nuorvala: Fixed routing subtrees. | 19 | * Ville Nuorvala: Fixed routing subtrees. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #define pr_fmt(fmt) "IPv6: " fmt | 22 | #define pr_fmt(fmt) "IPv6: " fmt |
23 | 23 | ||
24 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/net.h> | 26 | #include <linux/net.h> |
27 | #include <linux/route.h> | 27 | #include <linux/route.h> |
28 | #include <linux/netdevice.h> | 28 | #include <linux/netdevice.h> |
29 | #include <linux/in6.h> | 29 | #include <linux/in6.h> |
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/list.h> | 31 | #include <linux/list.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | 33 | ||
34 | #include <net/ipv6.h> | 34 | #include <net/ipv6.h> |
35 | #include <net/ndisc.h> | 35 | #include <net/ndisc.h> |
36 | #include <net/addrconf.h> | 36 | #include <net/addrconf.h> |
37 | 37 | ||
38 | #include <net/ip6_fib.h> | 38 | #include <net/ip6_fib.h> |
39 | #include <net/ip6_route.h> | 39 | #include <net/ip6_route.h> |
40 | 40 | ||
41 | #define RT6_DEBUG 2 | 41 | #define RT6_DEBUG 2 |
42 | 42 | ||
43 | #if RT6_DEBUG >= 3 | 43 | #if RT6_DEBUG >= 3 |
44 | #define RT6_TRACE(x...) pr_debug(x) | 44 | #define RT6_TRACE(x...) pr_debug(x) |
45 | #else | 45 | #else |
46 | #define RT6_TRACE(x...) do { ; } while (0) | 46 | #define RT6_TRACE(x...) do { ; } while (0) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | static struct kmem_cache * fib6_node_kmem __read_mostly; | 49 | static struct kmem_cache * fib6_node_kmem __read_mostly; |
50 | 50 | ||
51 | enum fib_walk_state_t | 51 | enum fib_walk_state_t |
52 | { | 52 | { |
53 | #ifdef CONFIG_IPV6_SUBTREES | 53 | #ifdef CONFIG_IPV6_SUBTREES |
54 | FWS_S, | 54 | FWS_S, |
55 | #endif | 55 | #endif |
56 | FWS_L, | 56 | FWS_L, |
57 | FWS_R, | 57 | FWS_R, |
58 | FWS_C, | 58 | FWS_C, |
59 | FWS_U | 59 | FWS_U |
60 | }; | 60 | }; |
61 | 61 | ||
62 | struct fib6_cleaner_t | 62 | struct fib6_cleaner_t |
63 | { | 63 | { |
64 | struct fib6_walker_t w; | 64 | struct fib6_walker_t w; |
65 | struct net *net; | 65 | struct net *net; |
66 | int (*func)(struct rt6_info *, void *arg); | 66 | int (*func)(struct rt6_info *, void *arg); |
67 | void *arg; | 67 | void *arg; |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static DEFINE_RWLOCK(fib6_walker_lock); | 70 | static DEFINE_RWLOCK(fib6_walker_lock); |
71 | 71 | ||
72 | #ifdef CONFIG_IPV6_SUBTREES | 72 | #ifdef CONFIG_IPV6_SUBTREES |
73 | #define FWS_INIT FWS_S | 73 | #define FWS_INIT FWS_S |
74 | #else | 74 | #else |
75 | #define FWS_INIT FWS_L | 75 | #define FWS_INIT FWS_L |
76 | #endif | 76 | #endif |
77 | 77 | ||
78 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, | 78 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, |
79 | struct rt6_info *rt); | 79 | struct rt6_info *rt); |
80 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); | 80 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); |
81 | static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); | 81 | static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); |
82 | static int fib6_walk(struct fib6_walker_t *w); | 82 | static int fib6_walk(struct fib6_walker_t *w); |
83 | static int fib6_walk_continue(struct fib6_walker_t *w); | 83 | static int fib6_walk_continue(struct fib6_walker_t *w); |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * A routing update causes an increase of the serial number on the | 86 | * A routing update causes an increase of the serial number on the |
87 | * affected subtree. This allows for cached routes to be asynchronously | 87 | * affected subtree. This allows for cached routes to be asynchronously |
88 | * tested when modifications are made to the destination cache as a | 88 | * tested when modifications are made to the destination cache as a |
89 | * result of redirects, path MTU changes, etc. | 89 | * result of redirects, path MTU changes, etc. |
90 | */ | 90 | */ |
91 | 91 | ||
92 | static __u32 rt_sernum; | 92 | static __u32 rt_sernum; |
93 | 93 | ||
94 | static void fib6_gc_timer_cb(unsigned long arg); | 94 | static void fib6_gc_timer_cb(unsigned long arg); |
95 | 95 | ||
96 | static LIST_HEAD(fib6_walkers); | 96 | static LIST_HEAD(fib6_walkers); |
97 | #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) | 97 | #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) |
98 | 98 | ||
99 | static inline void fib6_walker_link(struct fib6_walker_t *w) | 99 | static inline void fib6_walker_link(struct fib6_walker_t *w) |
100 | { | 100 | { |
101 | write_lock_bh(&fib6_walker_lock); | 101 | write_lock_bh(&fib6_walker_lock); |
102 | list_add(&w->lh, &fib6_walkers); | 102 | list_add(&w->lh, &fib6_walkers); |
103 | write_unlock_bh(&fib6_walker_lock); | 103 | write_unlock_bh(&fib6_walker_lock); |
104 | } | 104 | } |
105 | 105 | ||
106 | static inline void fib6_walker_unlink(struct fib6_walker_t *w) | 106 | static inline void fib6_walker_unlink(struct fib6_walker_t *w) |
107 | { | 107 | { |
108 | write_lock_bh(&fib6_walker_lock); | 108 | write_lock_bh(&fib6_walker_lock); |
109 | list_del(&w->lh); | 109 | list_del(&w->lh); |
110 | write_unlock_bh(&fib6_walker_lock); | 110 | write_unlock_bh(&fib6_walker_lock); |
111 | } | 111 | } |
112 | static __inline__ u32 fib6_new_sernum(void) | 112 | static __inline__ u32 fib6_new_sernum(void) |
113 | { | 113 | { |
114 | u32 n = ++rt_sernum; | 114 | u32 n = ++rt_sernum; |
115 | if ((__s32)n <= 0) | 115 | if ((__s32)n <= 0) |
116 | rt_sernum = n = 1; | 116 | rt_sernum = n = 1; |
117 | return n; | 117 | return n; |
118 | } | 118 | } |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * Auxiliary address test functions for the radix tree. | 121 | * Auxiliary address test functions for the radix tree. |
122 | * | 122 | * |
123 | * These assume a 32bit processor (although it will work on | 123 | * These assume a 32bit processor (although it will work on |
124 | * 64bit processors) | 124 | * 64bit processors) |
125 | */ | 125 | */ |
126 | 126 | ||
127 | /* | 127 | /* |
128 | * test bit | 128 | * test bit |
129 | */ | 129 | */ |
130 | #if defined(__LITTLE_ENDIAN) | 130 | #if defined(__LITTLE_ENDIAN) |
131 | # define BITOP_BE32_SWIZZLE (0x1F & ~7) | 131 | # define BITOP_BE32_SWIZZLE (0x1F & ~7) |
132 | #else | 132 | #else |
133 | # define BITOP_BE32_SWIZZLE 0 | 133 | # define BITOP_BE32_SWIZZLE 0 |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) | 136 | static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) |
137 | { | 137 | { |
138 | const __be32 *addr = token; | 138 | const __be32 *addr = token; |
139 | /* | 139 | /* |
140 | * Here, | 140 | * Here, |
141 | * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) | 141 | * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) |
142 | * is optimized version of | 142 | * is optimized version of |
143 | * htonl(1 << ((~fn_bit)&0x1F)) | 143 | * htonl(1 << ((~fn_bit)&0x1F)) |
144 | * See include/asm-generic/bitops/le.h. | 144 | * See include/asm-generic/bitops/le.h. |
145 | */ | 145 | */ |
146 | return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & | 146 | return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & |
147 | addr[fn_bit >> 5]; | 147 | addr[fn_bit >> 5]; |
148 | } | 148 | } |
149 | 149 | ||
150 | static __inline__ struct fib6_node * node_alloc(void) | 150 | static __inline__ struct fib6_node * node_alloc(void) |
151 | { | 151 | { |
152 | struct fib6_node *fn; | 152 | struct fib6_node *fn; |
153 | 153 | ||
154 | fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); | 154 | fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); |
155 | 155 | ||
156 | return fn; | 156 | return fn; |
157 | } | 157 | } |
158 | 158 | ||
159 | static __inline__ void node_free(struct fib6_node * fn) | 159 | static __inline__ void node_free(struct fib6_node * fn) |
160 | { | 160 | { |
161 | kmem_cache_free(fib6_node_kmem, fn); | 161 | kmem_cache_free(fib6_node_kmem, fn); |
162 | } | 162 | } |
163 | 163 | ||
164 | static __inline__ void rt6_release(struct rt6_info *rt) | 164 | static __inline__ void rt6_release(struct rt6_info *rt) |
165 | { | 165 | { |
166 | if (atomic_dec_and_test(&rt->rt6i_ref)) | 166 | if (atomic_dec_and_test(&rt->rt6i_ref)) |
167 | dst_free(&rt->dst); | 167 | dst_free(&rt->dst); |
168 | } | 168 | } |
169 | 169 | ||
170 | static void fib6_link_table(struct net *net, struct fib6_table *tb) | 170 | static void fib6_link_table(struct net *net, struct fib6_table *tb) |
171 | { | 171 | { |
172 | unsigned int h; | 172 | unsigned int h; |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * Initialize table lock at a single place to give lockdep a key, | 175 | * Initialize table lock at a single place to give lockdep a key, |
176 | * tables aren't visible prior to being linked to the list. | 176 | * tables aren't visible prior to being linked to the list. |
177 | */ | 177 | */ |
178 | rwlock_init(&tb->tb6_lock); | 178 | rwlock_init(&tb->tb6_lock); |
179 | 179 | ||
180 | h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); | 180 | h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); |
181 | 181 | ||
182 | /* | 182 | /* |
183 | * No protection necessary, this is the only list mutatation | 183 | * No protection necessary, this is the only list mutatation |
184 | * operation, tables never disappear once they exist. | 184 | * operation, tables never disappear once they exist. |
185 | */ | 185 | */ |
186 | hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); | 186 | hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); |
187 | } | 187 | } |
188 | 188 | ||
189 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 189 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
190 | 190 | ||
191 | static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) | 191 | static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) |
192 | { | 192 | { |
193 | struct fib6_table *table; | 193 | struct fib6_table *table; |
194 | 194 | ||
195 | table = kzalloc(sizeof(*table), GFP_ATOMIC); | 195 | table = kzalloc(sizeof(*table), GFP_ATOMIC); |
196 | if (table) { | 196 | if (table) { |
197 | table->tb6_id = id; | 197 | table->tb6_id = id; |
198 | table->tb6_root.leaf = net->ipv6.ip6_null_entry; | 198 | table->tb6_root.leaf = net->ipv6.ip6_null_entry; |
199 | table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 199 | table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
200 | inet_peer_base_init(&table->tb6_peers); | 200 | inet_peer_base_init(&table->tb6_peers); |
201 | } | 201 | } |
202 | 202 | ||
203 | return table; | 203 | return table; |
204 | } | 204 | } |
205 | 205 | ||
206 | struct fib6_table *fib6_new_table(struct net *net, u32 id) | 206 | struct fib6_table *fib6_new_table(struct net *net, u32 id) |
207 | { | 207 | { |
208 | struct fib6_table *tb; | 208 | struct fib6_table *tb; |
209 | 209 | ||
210 | if (id == 0) | 210 | if (id == 0) |
211 | id = RT6_TABLE_MAIN; | 211 | id = RT6_TABLE_MAIN; |
212 | tb = fib6_get_table(net, id); | 212 | tb = fib6_get_table(net, id); |
213 | if (tb) | 213 | if (tb) |
214 | return tb; | 214 | return tb; |
215 | 215 | ||
216 | tb = fib6_alloc_table(net, id); | 216 | tb = fib6_alloc_table(net, id); |
217 | if (tb) | 217 | if (tb) |
218 | fib6_link_table(net, tb); | 218 | fib6_link_table(net, tb); |
219 | 219 | ||
220 | return tb; | 220 | return tb; |
221 | } | 221 | } |
222 | 222 | ||
223 | struct fib6_table *fib6_get_table(struct net *net, u32 id) | 223 | struct fib6_table *fib6_get_table(struct net *net, u32 id) |
224 | { | 224 | { |
225 | struct fib6_table *tb; | 225 | struct fib6_table *tb; |
226 | struct hlist_head *head; | 226 | struct hlist_head *head; |
227 | unsigned int h; | 227 | unsigned int h; |
228 | 228 | ||
229 | if (id == 0) | 229 | if (id == 0) |
230 | id = RT6_TABLE_MAIN; | 230 | id = RT6_TABLE_MAIN; |
231 | h = id & (FIB6_TABLE_HASHSZ - 1); | 231 | h = id & (FIB6_TABLE_HASHSZ - 1); |
232 | rcu_read_lock(); | 232 | rcu_read_lock(); |
233 | head = &net->ipv6.fib_table_hash[h]; | 233 | head = &net->ipv6.fib_table_hash[h]; |
234 | hlist_for_each_entry_rcu(tb, head, tb6_hlist) { | 234 | hlist_for_each_entry_rcu(tb, head, tb6_hlist) { |
235 | if (tb->tb6_id == id) { | 235 | if (tb->tb6_id == id) { |
236 | rcu_read_unlock(); | 236 | rcu_read_unlock(); |
237 | return tb; | 237 | return tb; |
238 | } | 238 | } |
239 | } | 239 | } |
240 | rcu_read_unlock(); | 240 | rcu_read_unlock(); |
241 | 241 | ||
242 | return NULL; | 242 | return NULL; |
243 | } | 243 | } |
244 | 244 | ||
245 | static void __net_init fib6_tables_init(struct net *net) | 245 | static void __net_init fib6_tables_init(struct net *net) |
246 | { | 246 | { |
247 | fib6_link_table(net, net->ipv6.fib6_main_tbl); | 247 | fib6_link_table(net, net->ipv6.fib6_main_tbl); |
248 | fib6_link_table(net, net->ipv6.fib6_local_tbl); | 248 | fib6_link_table(net, net->ipv6.fib6_local_tbl); |
249 | } | 249 | } |
250 | #else | 250 | #else |
251 | 251 | ||
252 | struct fib6_table *fib6_new_table(struct net *net, u32 id) | 252 | struct fib6_table *fib6_new_table(struct net *net, u32 id) |
253 | { | 253 | { |
254 | return fib6_get_table(net, id); | 254 | return fib6_get_table(net, id); |
255 | } | 255 | } |
256 | 256 | ||
257 | struct fib6_table *fib6_get_table(struct net *net, u32 id) | 257 | struct fib6_table *fib6_get_table(struct net *net, u32 id) |
258 | { | 258 | { |
259 | return net->ipv6.fib6_main_tbl; | 259 | return net->ipv6.fib6_main_tbl; |
260 | } | 260 | } |
261 | 261 | ||
262 | struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, | 262 | struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, |
263 | int flags, pol_lookup_t lookup) | 263 | int flags, pol_lookup_t lookup) |
264 | { | 264 | { |
265 | return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); | 265 | return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); |
266 | } | 266 | } |
267 | 267 | ||
268 | static void __net_init fib6_tables_init(struct net *net) | 268 | static void __net_init fib6_tables_init(struct net *net) |
269 | { | 269 | { |
270 | fib6_link_table(net, net->ipv6.fib6_main_tbl); | 270 | fib6_link_table(net, net->ipv6.fib6_main_tbl); |
271 | } | 271 | } |
272 | 272 | ||
273 | #endif | 273 | #endif |
274 | 274 | ||
275 | static int fib6_dump_node(struct fib6_walker_t *w) | 275 | static int fib6_dump_node(struct fib6_walker_t *w) |
276 | { | 276 | { |
277 | int res; | 277 | int res; |
278 | struct rt6_info *rt; | 278 | struct rt6_info *rt; |
279 | 279 | ||
280 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { | 280 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { |
281 | res = rt6_dump_route(rt, w->args); | 281 | res = rt6_dump_route(rt, w->args); |
282 | if (res < 0) { | 282 | if (res < 0) { |
283 | /* Frame is full, suspend walking */ | 283 | /* Frame is full, suspend walking */ |
284 | w->leaf = rt; | 284 | w->leaf = rt; |
285 | return 1; | 285 | return 1; |
286 | } | 286 | } |
287 | WARN_ON(res == 0); | 287 | WARN_ON(res == 0); |
288 | } | 288 | } |
289 | w->leaf = NULL; | 289 | w->leaf = NULL; |
290 | return 0; | 290 | return 0; |
291 | } | 291 | } |
292 | 292 | ||
293 | static void fib6_dump_end(struct netlink_callback *cb) | 293 | static void fib6_dump_end(struct netlink_callback *cb) |
294 | { | 294 | { |
295 | struct fib6_walker_t *w = (void*)cb->args[2]; | 295 | struct fib6_walker_t *w = (void*)cb->args[2]; |
296 | 296 | ||
297 | if (w) { | 297 | if (w) { |
298 | if (cb->args[4]) { | 298 | if (cb->args[4]) { |
299 | cb->args[4] = 0; | 299 | cb->args[4] = 0; |
300 | fib6_walker_unlink(w); | 300 | fib6_walker_unlink(w); |
301 | } | 301 | } |
302 | cb->args[2] = 0; | 302 | cb->args[2] = 0; |
303 | kfree(w); | 303 | kfree(w); |
304 | } | 304 | } |
305 | cb->done = (void*)cb->args[3]; | 305 | cb->done = (void*)cb->args[3]; |
306 | cb->args[1] = 3; | 306 | cb->args[1] = 3; |
307 | } | 307 | } |
308 | 308 | ||
309 | static int fib6_dump_done(struct netlink_callback *cb) | 309 | static int fib6_dump_done(struct netlink_callback *cb) |
310 | { | 310 | { |
311 | fib6_dump_end(cb); | 311 | fib6_dump_end(cb); |
312 | return cb->done ? cb->done(cb) : 0; | 312 | return cb->done ? cb->done(cb) : 0; |
313 | } | 313 | } |
314 | 314 | ||
315 | static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, | 315 | static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, |
316 | struct netlink_callback *cb) | 316 | struct netlink_callback *cb) |
317 | { | 317 | { |
318 | struct fib6_walker_t *w; | 318 | struct fib6_walker_t *w; |
319 | int res; | 319 | int res; |
320 | 320 | ||
321 | w = (void *)cb->args[2]; | 321 | w = (void *)cb->args[2]; |
322 | w->root = &table->tb6_root; | 322 | w->root = &table->tb6_root; |
323 | 323 | ||
324 | if (cb->args[4] == 0) { | 324 | if (cb->args[4] == 0) { |
325 | w->count = 0; | 325 | w->count = 0; |
326 | w->skip = 0; | 326 | w->skip = 0; |
327 | 327 | ||
328 | read_lock_bh(&table->tb6_lock); | 328 | read_lock_bh(&table->tb6_lock); |
329 | res = fib6_walk(w); | 329 | res = fib6_walk(w); |
330 | read_unlock_bh(&table->tb6_lock); | 330 | read_unlock_bh(&table->tb6_lock); |
331 | if (res > 0) { | 331 | if (res > 0) { |
332 | cb->args[4] = 1; | 332 | cb->args[4] = 1; |
333 | cb->args[5] = w->root->fn_sernum; | 333 | cb->args[5] = w->root->fn_sernum; |
334 | } | 334 | } |
335 | } else { | 335 | } else { |
336 | if (cb->args[5] != w->root->fn_sernum) { | 336 | if (cb->args[5] != w->root->fn_sernum) { |
337 | /* Begin at the root if the tree changed */ | 337 | /* Begin at the root if the tree changed */ |
338 | cb->args[5] = w->root->fn_sernum; | 338 | cb->args[5] = w->root->fn_sernum; |
339 | w->state = FWS_INIT; | 339 | w->state = FWS_INIT; |
340 | w->node = w->root; | 340 | w->node = w->root; |
341 | w->skip = w->count; | 341 | w->skip = w->count; |
342 | } else | 342 | } else |
343 | w->skip = 0; | 343 | w->skip = 0; |
344 | 344 | ||
345 | read_lock_bh(&table->tb6_lock); | 345 | read_lock_bh(&table->tb6_lock); |
346 | res = fib6_walk_continue(w); | 346 | res = fib6_walk_continue(w); |
347 | read_unlock_bh(&table->tb6_lock); | 347 | read_unlock_bh(&table->tb6_lock); |
348 | if (res <= 0) { | 348 | if (res <= 0) { |
349 | fib6_walker_unlink(w); | 349 | fib6_walker_unlink(w); |
350 | cb->args[4] = 0; | 350 | cb->args[4] = 0; |
351 | } | 351 | } |
352 | } | 352 | } |
353 | 353 | ||
354 | return res; | 354 | return res; |
355 | } | 355 | } |
356 | 356 | ||
357 | static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 357 | static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
358 | { | 358 | { |
359 | struct net *net = sock_net(skb->sk); | 359 | struct net *net = sock_net(skb->sk); |
360 | unsigned int h, s_h; | 360 | unsigned int h, s_h; |
361 | unsigned int e = 0, s_e; | 361 | unsigned int e = 0, s_e; |
362 | struct rt6_rtnl_dump_arg arg; | 362 | struct rt6_rtnl_dump_arg arg; |
363 | struct fib6_walker_t *w; | 363 | struct fib6_walker_t *w; |
364 | struct fib6_table *tb; | 364 | struct fib6_table *tb; |
365 | struct hlist_head *head; | 365 | struct hlist_head *head; |
366 | int res = 0; | 366 | int res = 0; |
367 | 367 | ||
368 | s_h = cb->args[0]; | 368 | s_h = cb->args[0]; |
369 | s_e = cb->args[1]; | 369 | s_e = cb->args[1]; |
370 | 370 | ||
371 | w = (void *)cb->args[2]; | 371 | w = (void *)cb->args[2]; |
372 | if (!w) { | 372 | if (!w) { |
373 | /* New dump: | 373 | /* New dump: |
374 | * | 374 | * |
375 | * 1. hook callback destructor. | 375 | * 1. hook callback destructor. |
376 | */ | 376 | */ |
377 | cb->args[3] = (long)cb->done; | 377 | cb->args[3] = (long)cb->done; |
378 | cb->done = fib6_dump_done; | 378 | cb->done = fib6_dump_done; |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * 2. allocate and initialize walker. | 381 | * 2. allocate and initialize walker. |
382 | */ | 382 | */ |
383 | w = kzalloc(sizeof(*w), GFP_ATOMIC); | 383 | w = kzalloc(sizeof(*w), GFP_ATOMIC); |
384 | if (!w) | 384 | if (!w) |
385 | return -ENOMEM; | 385 | return -ENOMEM; |
386 | w->func = fib6_dump_node; | 386 | w->func = fib6_dump_node; |
387 | cb->args[2] = (long)w; | 387 | cb->args[2] = (long)w; |
388 | } | 388 | } |
389 | 389 | ||
390 | arg.skb = skb; | 390 | arg.skb = skb; |
391 | arg.cb = cb; | 391 | arg.cb = cb; |
392 | arg.net = net; | 392 | arg.net = net; |
393 | w->args = &arg; | 393 | w->args = &arg; |
394 | 394 | ||
395 | rcu_read_lock(); | 395 | rcu_read_lock(); |
396 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { | 396 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { |
397 | e = 0; | 397 | e = 0; |
398 | head = &net->ipv6.fib_table_hash[h]; | 398 | head = &net->ipv6.fib_table_hash[h]; |
399 | hlist_for_each_entry_rcu(tb, head, tb6_hlist) { | 399 | hlist_for_each_entry_rcu(tb, head, tb6_hlist) { |
400 | if (e < s_e) | 400 | if (e < s_e) |
401 | goto next; | 401 | goto next; |
402 | res = fib6_dump_table(tb, skb, cb); | 402 | res = fib6_dump_table(tb, skb, cb); |
403 | if (res != 0) | 403 | if (res != 0) |
404 | goto out; | 404 | goto out; |
405 | next: | 405 | next: |
406 | e++; | 406 | e++; |
407 | } | 407 | } |
408 | } | 408 | } |
409 | out: | 409 | out: |
410 | rcu_read_unlock(); | 410 | rcu_read_unlock(); |
411 | cb->args[1] = e; | 411 | cb->args[1] = e; |
412 | cb->args[0] = h; | 412 | cb->args[0] = h; |
413 | 413 | ||
414 | res = res < 0 ? res : skb->len; | 414 | res = res < 0 ? res : skb->len; |
415 | if (res <= 0) | 415 | if (res <= 0) |
416 | fib6_dump_end(cb); | 416 | fib6_dump_end(cb); |
417 | return res; | 417 | return res; |
418 | } | 418 | } |
419 | 419 | ||
420 | /* | 420 | /* |
421 | * Routing Table | 421 | * Routing Table |
422 | * | 422 | * |
423 | * return the appropriate node for a routing tree "add" operation | 423 | * return the appropriate node for a routing tree "add" operation |
424 | * by either creating and inserting or by returning an existing | 424 | * by either creating and inserting or by returning an existing |
425 | * node. | 425 | * node. |
426 | */ | 426 | */ |
427 | 427 | ||
428 | static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, | 428 | static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, |
429 | int addrlen, int plen, | 429 | int addrlen, int plen, |
430 | int offset, int allow_create, | 430 | int offset, int allow_create, |
431 | int replace_required) | 431 | int replace_required) |
432 | { | 432 | { |
433 | struct fib6_node *fn, *in, *ln; | 433 | struct fib6_node *fn, *in, *ln; |
434 | struct fib6_node *pn = NULL; | 434 | struct fib6_node *pn = NULL; |
435 | struct rt6key *key; | 435 | struct rt6key *key; |
436 | int bit; | 436 | int bit; |
437 | __be32 dir = 0; | 437 | __be32 dir = 0; |
438 | __u32 sernum = fib6_new_sernum(); | 438 | __u32 sernum = fib6_new_sernum(); |
439 | 439 | ||
440 | RT6_TRACE("fib6_add_1\n"); | 440 | RT6_TRACE("fib6_add_1\n"); |
441 | 441 | ||
442 | /* insert node in tree */ | 442 | /* insert node in tree */ |
443 | 443 | ||
444 | fn = root; | 444 | fn = root; |
445 | 445 | ||
446 | do { | 446 | do { |
447 | key = (struct rt6key *)((u8 *)fn->leaf + offset); | 447 | key = (struct rt6key *)((u8 *)fn->leaf + offset); |
448 | 448 | ||
449 | /* | 449 | /* |
450 | * Prefix match | 450 | * Prefix match |
451 | */ | 451 | */ |
452 | if (plen < fn->fn_bit || | 452 | if (plen < fn->fn_bit || |
453 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { | 453 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { |
454 | if (!allow_create) { | 454 | if (!allow_create) { |
455 | if (replace_required) { | 455 | if (replace_required) { |
456 | pr_warn("Can't replace route, no match found\n"); | 456 | pr_warn("Can't replace route, no match found\n"); |
457 | return ERR_PTR(-ENOENT); | 457 | return ERR_PTR(-ENOENT); |
458 | } | 458 | } |
459 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); | 459 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); |
460 | } | 460 | } |
461 | goto insert_above; | 461 | goto insert_above; |
462 | } | 462 | } |
463 | 463 | ||
464 | /* | 464 | /* |
465 | * Exact match ? | 465 | * Exact match ? |
466 | */ | 466 | */ |
467 | 467 | ||
468 | if (plen == fn->fn_bit) { | 468 | if (plen == fn->fn_bit) { |
469 | /* clean up an intermediate node */ | 469 | /* clean up an intermediate node */ |
470 | if (!(fn->fn_flags & RTN_RTINFO)) { | 470 | if (!(fn->fn_flags & RTN_RTINFO)) { |
471 | rt6_release(fn->leaf); | 471 | rt6_release(fn->leaf); |
472 | fn->leaf = NULL; | 472 | fn->leaf = NULL; |
473 | } | 473 | } |
474 | 474 | ||
475 | fn->fn_sernum = sernum; | 475 | fn->fn_sernum = sernum; |
476 | 476 | ||
477 | return fn; | 477 | return fn; |
478 | } | 478 | } |
479 | 479 | ||
480 | /* | 480 | /* |
481 | * We have more bits to go | 481 | * We have more bits to go |
482 | */ | 482 | */ |
483 | 483 | ||
484 | /* Try to walk down on tree. */ | 484 | /* Try to walk down on tree. */ |
485 | fn->fn_sernum = sernum; | 485 | fn->fn_sernum = sernum; |
486 | dir = addr_bit_set(addr, fn->fn_bit); | 486 | dir = addr_bit_set(addr, fn->fn_bit); |
487 | pn = fn; | 487 | pn = fn; |
488 | fn = dir ? fn->right: fn->left; | 488 | fn = dir ? fn->right: fn->left; |
489 | } while (fn); | 489 | } while (fn); |
490 | 490 | ||
491 | if (!allow_create) { | 491 | if (!allow_create) { |
492 | /* We should not create new node because | 492 | /* We should not create new node because |
493 | * NLM_F_REPLACE was specified without NLM_F_CREATE | 493 | * NLM_F_REPLACE was specified without NLM_F_CREATE |
494 | * I assume it is safe to require NLM_F_CREATE when | 494 | * I assume it is safe to require NLM_F_CREATE when |
495 | * REPLACE flag is used! Later we may want to remove the | 495 | * REPLACE flag is used! Later we may want to remove the |
496 | * check for replace_required, because according | 496 | * check for replace_required, because according |
497 | * to netlink specification, NLM_F_CREATE | 497 | * to netlink specification, NLM_F_CREATE |
498 | * MUST be specified if new route is created. | 498 | * MUST be specified if new route is created. |
499 | * That would keep IPv6 consistent with IPv4 | 499 | * That would keep IPv6 consistent with IPv4 |
500 | */ | 500 | */ |
501 | if (replace_required) { | 501 | if (replace_required) { |
502 | pr_warn("Can't replace route, no match found\n"); | 502 | pr_warn("Can't replace route, no match found\n"); |
503 | return ERR_PTR(-ENOENT); | 503 | return ERR_PTR(-ENOENT); |
504 | } | 504 | } |
505 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); | 505 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); |
506 | } | 506 | } |
507 | /* | 507 | /* |
508 | * We walked to the bottom of tree. | 508 | * We walked to the bottom of tree. |
509 | * Create new leaf node without children. | 509 | * Create new leaf node without children. |
510 | */ | 510 | */ |
511 | 511 | ||
512 | ln = node_alloc(); | 512 | ln = node_alloc(); |
513 | 513 | ||
514 | if (!ln) | 514 | if (!ln) |
515 | return ERR_PTR(-ENOMEM); | 515 | return ERR_PTR(-ENOMEM); |
516 | ln->fn_bit = plen; | 516 | ln->fn_bit = plen; |
517 | 517 | ||
518 | ln->parent = pn; | 518 | ln->parent = pn; |
519 | ln->fn_sernum = sernum; | 519 | ln->fn_sernum = sernum; |
520 | 520 | ||
521 | if (dir) | 521 | if (dir) |
522 | pn->right = ln; | 522 | pn->right = ln; |
523 | else | 523 | else |
524 | pn->left = ln; | 524 | pn->left = ln; |
525 | 525 | ||
526 | return ln; | 526 | return ln; |
527 | 527 | ||
528 | 528 | ||
529 | insert_above: | 529 | insert_above: |
530 | /* | 530 | /* |
531 | * split since we don't have a common prefix anymore or | 531 | * split since we don't have a common prefix anymore or |
532 | * we have a less significant route. | 532 | * we have a less significant route. |
533 | * we've to insert an intermediate node on the list | 533 | * we've to insert an intermediate node on the list |
534 | * this new node will point to the one we need to create | 534 | * this new node will point to the one we need to create |
535 | * and the current | 535 | * and the current |
536 | */ | 536 | */ |
537 | 537 | ||
538 | pn = fn->parent; | 538 | pn = fn->parent; |
539 | 539 | ||
540 | /* find 1st bit in difference between the 2 addrs. | 540 | /* find 1st bit in difference between the 2 addrs. |
541 | 541 | ||
542 | See comment in __ipv6_addr_diff: bit may be an invalid value, | 542 | See comment in __ipv6_addr_diff: bit may be an invalid value, |
543 | but if it is >= plen, the value is ignored in any case. | 543 | but if it is >= plen, the value is ignored in any case. |
544 | */ | 544 | */ |
545 | 545 | ||
546 | bit = __ipv6_addr_diff(addr, &key->addr, addrlen); | 546 | bit = __ipv6_addr_diff(addr, &key->addr, addrlen); |
547 | 547 | ||
548 | /* | 548 | /* |
549 | * (intermediate)[in] | 549 | * (intermediate)[in] |
550 | * / \ | 550 | * / \ |
551 | * (new leaf node)[ln] (old node)[fn] | 551 | * (new leaf node)[ln] (old node)[fn] |
552 | */ | 552 | */ |
553 | if (plen > bit) { | 553 | if (plen > bit) { |
554 | in = node_alloc(); | 554 | in = node_alloc(); |
555 | ln = node_alloc(); | 555 | ln = node_alloc(); |
556 | 556 | ||
557 | if (!in || !ln) { | 557 | if (!in || !ln) { |
558 | if (in) | 558 | if (in) |
559 | node_free(in); | 559 | node_free(in); |
560 | if (ln) | 560 | if (ln) |
561 | node_free(ln); | 561 | node_free(ln); |
562 | return ERR_PTR(-ENOMEM); | 562 | return ERR_PTR(-ENOMEM); |
563 | } | 563 | } |
564 | 564 | ||
565 | /* | 565 | /* |
566 | * new intermediate node. | 566 | * new intermediate node. |
567 | * RTN_RTINFO will | 567 | * RTN_RTINFO will |
568 | * be off since that an address that chooses one of | 568 | * be off since that an address that chooses one of |
569 | * the branches would not match less specific routes | 569 | * the branches would not match less specific routes |
570 | * in the other branch | 570 | * in the other branch |
571 | */ | 571 | */ |
572 | 572 | ||
573 | in->fn_bit = bit; | 573 | in->fn_bit = bit; |
574 | 574 | ||
575 | in->parent = pn; | 575 | in->parent = pn; |
576 | in->leaf = fn->leaf; | 576 | in->leaf = fn->leaf; |
577 | atomic_inc(&in->leaf->rt6i_ref); | 577 | atomic_inc(&in->leaf->rt6i_ref); |
578 | 578 | ||
579 | in->fn_sernum = sernum; | 579 | in->fn_sernum = sernum; |
580 | 580 | ||
581 | /* update parent pointer */ | 581 | /* update parent pointer */ |
582 | if (dir) | 582 | if (dir) |
583 | pn->right = in; | 583 | pn->right = in; |
584 | else | 584 | else |
585 | pn->left = in; | 585 | pn->left = in; |
586 | 586 | ||
587 | ln->fn_bit = plen; | 587 | ln->fn_bit = plen; |
588 | 588 | ||
589 | ln->parent = in; | 589 | ln->parent = in; |
590 | fn->parent = in; | 590 | fn->parent = in; |
591 | 591 | ||
592 | ln->fn_sernum = sernum; | 592 | ln->fn_sernum = sernum; |
593 | 593 | ||
594 | if (addr_bit_set(addr, bit)) { | 594 | if (addr_bit_set(addr, bit)) { |
595 | in->right = ln; | 595 | in->right = ln; |
596 | in->left = fn; | 596 | in->left = fn; |
597 | } else { | 597 | } else { |
598 | in->left = ln; | 598 | in->left = ln; |
599 | in->right = fn; | 599 | in->right = fn; |
600 | } | 600 | } |
601 | } else { /* plen <= bit */ | 601 | } else { /* plen <= bit */ |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * (new leaf node)[ln] | 604 | * (new leaf node)[ln] |
605 | * / \ | 605 | * / \ |
606 | * (old node)[fn] NULL | 606 | * (old node)[fn] NULL |
607 | */ | 607 | */ |
608 | 608 | ||
609 | ln = node_alloc(); | 609 | ln = node_alloc(); |
610 | 610 | ||
611 | if (!ln) | 611 | if (!ln) |
612 | return ERR_PTR(-ENOMEM); | 612 | return ERR_PTR(-ENOMEM); |
613 | 613 | ||
614 | ln->fn_bit = plen; | 614 | ln->fn_bit = plen; |
615 | 615 | ||
616 | ln->parent = pn; | 616 | ln->parent = pn; |
617 | 617 | ||
618 | ln->fn_sernum = sernum; | 618 | ln->fn_sernum = sernum; |
619 | 619 | ||
620 | if (dir) | 620 | if (dir) |
621 | pn->right = ln; | 621 | pn->right = ln; |
622 | else | 622 | else |
623 | pn->left = ln; | 623 | pn->left = ln; |
624 | 624 | ||
625 | if (addr_bit_set(&key->addr, plen)) | 625 | if (addr_bit_set(&key->addr, plen)) |
626 | ln->right = fn; | 626 | ln->right = fn; |
627 | else | 627 | else |
628 | ln->left = fn; | 628 | ln->left = fn; |
629 | 629 | ||
630 | fn->parent = ln; | 630 | fn->parent = ln; |
631 | } | 631 | } |
632 | return ln; | 632 | return ln; |
633 | } | 633 | } |
634 | 634 | ||
635 | static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) | ||
636 | { | ||
637 | return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == | ||
638 | RTF_GATEWAY; | ||
639 | } | ||
640 | |||
635 | /* | 641 | /* |
636 | * Insert routing information in a node. | 642 | * Insert routing information in a node. |
637 | */ | 643 | */ |
638 | 644 | ||
639 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | 645 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, |
640 | struct nl_info *info) | 646 | struct nl_info *info) |
641 | { | 647 | { |
642 | struct rt6_info *iter = NULL; | 648 | struct rt6_info *iter = NULL; |
643 | struct rt6_info **ins; | 649 | struct rt6_info **ins; |
644 | int replace = (info->nlh && | 650 | int replace = (info->nlh && |
645 | (info->nlh->nlmsg_flags & NLM_F_REPLACE)); | 651 | (info->nlh->nlmsg_flags & NLM_F_REPLACE)); |
646 | int add = (!info->nlh || | 652 | int add = (!info->nlh || |
647 | (info->nlh->nlmsg_flags & NLM_F_CREATE)); | 653 | (info->nlh->nlmsg_flags & NLM_F_CREATE)); |
648 | int found = 0; | 654 | int found = 0; |
655 | bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); | ||
649 | 656 | ||
650 | ins = &fn->leaf; | 657 | ins = &fn->leaf; |
651 | 658 | ||
652 | for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { | 659 | for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { |
653 | /* | 660 | /* |
654 | * Search for duplicates | 661 | * Search for duplicates |
655 | */ | 662 | */ |
656 | 663 | ||
657 | if (iter->rt6i_metric == rt->rt6i_metric) { | 664 | if (iter->rt6i_metric == rt->rt6i_metric) { |
658 | /* | 665 | /* |
659 | * Same priority level | 666 | * Same priority level |
660 | */ | 667 | */ |
661 | if (info->nlh && | 668 | if (info->nlh && |
662 | (info->nlh->nlmsg_flags & NLM_F_EXCL)) | 669 | (info->nlh->nlmsg_flags & NLM_F_EXCL)) |
663 | return -EEXIST; | 670 | return -EEXIST; |
664 | if (replace) { | 671 | if (replace) { |
665 | found++; | 672 | found++; |
666 | break; | 673 | break; |
667 | } | 674 | } |
668 | 675 | ||
669 | if (iter->dst.dev == rt->dst.dev && | 676 | if (iter->dst.dev == rt->dst.dev && |
670 | iter->rt6i_idev == rt->rt6i_idev && | 677 | iter->rt6i_idev == rt->rt6i_idev && |
671 | ipv6_addr_equal(&iter->rt6i_gateway, | 678 | ipv6_addr_equal(&iter->rt6i_gateway, |
672 | &rt->rt6i_gateway)) { | 679 | &rt->rt6i_gateway)) { |
673 | if (rt->rt6i_nsiblings) | 680 | if (rt->rt6i_nsiblings) |
674 | rt->rt6i_nsiblings = 0; | 681 | rt->rt6i_nsiblings = 0; |
675 | if (!(iter->rt6i_flags & RTF_EXPIRES)) | 682 | if (!(iter->rt6i_flags & RTF_EXPIRES)) |
676 | return -EEXIST; | 683 | return -EEXIST; |
677 | if (!(rt->rt6i_flags & RTF_EXPIRES)) | 684 | if (!(rt->rt6i_flags & RTF_EXPIRES)) |
678 | rt6_clean_expires(iter); | 685 | rt6_clean_expires(iter); |
679 | else | 686 | else |
680 | rt6_set_expires(iter, rt->dst.expires); | 687 | rt6_set_expires(iter, rt->dst.expires); |
681 | return -EEXIST; | 688 | return -EEXIST; |
682 | } | 689 | } |
683 | /* If we have the same destination and the same metric, | 690 | /* If we have the same destination and the same metric, |
684 | * but not the same gateway, then the route we try to | 691 | * but not the same gateway, then the route we try to |
685 | * add is sibling to this route, increment our counter | 692 | * add is sibling to this route, increment our counter |
686 | * of siblings, and later we will add our route to the | 693 | * of siblings, and later we will add our route to the |
687 | * list. | 694 | * list. |
688 | * Only static routes (which don't have flag | 695 | * Only static routes (which don't have flag |
689 | * RTF_EXPIRES) are used for ECMPv6. | 696 | * RTF_EXPIRES) are used for ECMPv6. |
690 | * | 697 | * |
691 | * To avoid long list, we only had siblings if the | 698 | * To avoid long list, we only had siblings if the |
692 | * route have a gateway. | 699 | * route have a gateway. |
693 | */ | 700 | */ |
694 | if (rt->rt6i_flags & RTF_GATEWAY && | 701 | if (rt_can_ecmp && |
695 | !(rt->rt6i_flags & RTF_EXPIRES) && | 702 | rt6_qualify_for_ecmp(iter)) |
696 | !(iter->rt6i_flags & RTF_EXPIRES)) | ||
697 | rt->rt6i_nsiblings++; | 703 | rt->rt6i_nsiblings++; |
698 | } | 704 | } |
699 | 705 | ||
700 | if (iter->rt6i_metric > rt->rt6i_metric) | 706 | if (iter->rt6i_metric > rt->rt6i_metric) |
701 | break; | 707 | break; |
702 | 708 | ||
703 | ins = &iter->dst.rt6_next; | 709 | ins = &iter->dst.rt6_next; |
704 | } | 710 | } |
705 | 711 | ||
706 | /* Reset round-robin state, if necessary */ | 712 | /* Reset round-robin state, if necessary */ |
707 | if (ins == &fn->leaf) | 713 | if (ins == &fn->leaf) |
708 | fn->rr_ptr = NULL; | 714 | fn->rr_ptr = NULL; |
709 | 715 | ||
710 | /* Link this route to others same route. */ | 716 | /* Link this route to others same route. */ |
711 | if (rt->rt6i_nsiblings) { | 717 | if (rt->rt6i_nsiblings) { |
712 | unsigned int rt6i_nsiblings; | 718 | unsigned int rt6i_nsiblings; |
713 | struct rt6_info *sibling, *temp_sibling; | 719 | struct rt6_info *sibling, *temp_sibling; |
714 | 720 | ||
715 | /* Find the first route that have the same metric */ | 721 | /* Find the first route that have the same metric */ |
716 | sibling = fn->leaf; | 722 | sibling = fn->leaf; |
717 | while (sibling) { | 723 | while (sibling) { |
718 | if (sibling->rt6i_metric == rt->rt6i_metric) { | 724 | if (sibling->rt6i_metric == rt->rt6i_metric && |
725 | rt6_qualify_for_ecmp(sibling)) { | ||
719 | list_add_tail(&rt->rt6i_siblings, | 726 | list_add_tail(&rt->rt6i_siblings, |
720 | &sibling->rt6i_siblings); | 727 | &sibling->rt6i_siblings); |
721 | break; | 728 | break; |
722 | } | 729 | } |
723 | sibling = sibling->dst.rt6_next; | 730 | sibling = sibling->dst.rt6_next; |
724 | } | 731 | } |
725 | /* For each sibling in the list, increment the counter of | 732 | /* For each sibling in the list, increment the counter of |
726 | * siblings. BUG() if counters does not match, list of siblings | 733 | * siblings. BUG() if counters does not match, list of siblings |
727 | * is broken! | 734 | * is broken! |
728 | */ | 735 | */ |
729 | rt6i_nsiblings = 0; | 736 | rt6i_nsiblings = 0; |
730 | list_for_each_entry_safe(sibling, temp_sibling, | 737 | list_for_each_entry_safe(sibling, temp_sibling, |
731 | &rt->rt6i_siblings, rt6i_siblings) { | 738 | &rt->rt6i_siblings, rt6i_siblings) { |
732 | sibling->rt6i_nsiblings++; | 739 | sibling->rt6i_nsiblings++; |
733 | BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); | 740 | BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); |
734 | rt6i_nsiblings++; | 741 | rt6i_nsiblings++; |
735 | } | 742 | } |
736 | BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); | 743 | BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); |
737 | } | 744 | } |
738 | 745 | ||
739 | /* | 746 | /* |
740 | * insert node | 747 | * insert node |
741 | */ | 748 | */ |
742 | if (!replace) { | 749 | if (!replace) { |
743 | if (!add) | 750 | if (!add) |
744 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); | 751 | pr_warn("NLM_F_CREATE should be set when creating new route\n"); |
745 | 752 | ||
746 | add: | 753 | add: |
747 | rt->dst.rt6_next = iter; | 754 | rt->dst.rt6_next = iter; |
748 | *ins = rt; | 755 | *ins = rt; |
749 | rt->rt6i_node = fn; | 756 | rt->rt6i_node = fn; |
750 | atomic_inc(&rt->rt6i_ref); | 757 | atomic_inc(&rt->rt6i_ref); |
751 | inet6_rt_notify(RTM_NEWROUTE, rt, info); | 758 | inet6_rt_notify(RTM_NEWROUTE, rt, info); |
752 | info->nl_net->ipv6.rt6_stats->fib_rt_entries++; | 759 | info->nl_net->ipv6.rt6_stats->fib_rt_entries++; |
753 | 760 | ||
754 | if (!(fn->fn_flags & RTN_RTINFO)) { | 761 | if (!(fn->fn_flags & RTN_RTINFO)) { |
755 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; | 762 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; |
756 | fn->fn_flags |= RTN_RTINFO; | 763 | fn->fn_flags |= RTN_RTINFO; |
757 | } | 764 | } |
758 | 765 | ||
759 | } else { | 766 | } else { |
760 | if (!found) { | 767 | if (!found) { |
761 | if (add) | 768 | if (add) |
762 | goto add; | 769 | goto add; |
763 | pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); | 770 | pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); |
764 | return -ENOENT; | 771 | return -ENOENT; |
765 | } | 772 | } |
766 | *ins = rt; | 773 | *ins = rt; |
767 | rt->rt6i_node = fn; | 774 | rt->rt6i_node = fn; |
768 | rt->dst.rt6_next = iter->dst.rt6_next; | 775 | rt->dst.rt6_next = iter->dst.rt6_next; |
769 | atomic_inc(&rt->rt6i_ref); | 776 | atomic_inc(&rt->rt6i_ref); |
770 | inet6_rt_notify(RTM_NEWROUTE, rt, info); | 777 | inet6_rt_notify(RTM_NEWROUTE, rt, info); |
771 | rt6_release(iter); | 778 | rt6_release(iter); |
772 | if (!(fn->fn_flags & RTN_RTINFO)) { | 779 | if (!(fn->fn_flags & RTN_RTINFO)) { |
773 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; | 780 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; |
774 | fn->fn_flags |= RTN_RTINFO; | 781 | fn->fn_flags |= RTN_RTINFO; |
775 | } | 782 | } |
776 | } | 783 | } |
777 | 784 | ||
778 | return 0; | 785 | return 0; |
779 | } | 786 | } |
780 | 787 | ||
781 | static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) | 788 | static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) |
782 | { | 789 | { |
783 | if (!timer_pending(&net->ipv6.ip6_fib_timer) && | 790 | if (!timer_pending(&net->ipv6.ip6_fib_timer) && |
784 | (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) | 791 | (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) |
785 | mod_timer(&net->ipv6.ip6_fib_timer, | 792 | mod_timer(&net->ipv6.ip6_fib_timer, |
786 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); | 793 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); |
787 | } | 794 | } |
788 | 795 | ||
789 | void fib6_force_start_gc(struct net *net) | 796 | void fib6_force_start_gc(struct net *net) |
790 | { | 797 | { |
791 | if (!timer_pending(&net->ipv6.ip6_fib_timer)) | 798 | if (!timer_pending(&net->ipv6.ip6_fib_timer)) |
792 | mod_timer(&net->ipv6.ip6_fib_timer, | 799 | mod_timer(&net->ipv6.ip6_fib_timer, |
793 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); | 800 | jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); |
794 | } | 801 | } |
795 | 802 | ||
796 | /* | 803 | /* |
797 | * Add routing information to the routing tree. | 804 | * Add routing information to the routing tree. |
798 | * <destination addr>/<source addr> | 805 | * <destination addr>/<source addr> |
799 | * with source addr info in sub-trees | 806 | * with source addr info in sub-trees |
800 | */ | 807 | */ |
801 | 808 | ||
802 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) | 809 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) |
803 | { | 810 | { |
804 | struct fib6_node *fn, *pn = NULL; | 811 | struct fib6_node *fn, *pn = NULL; |
805 | int err = -ENOMEM; | 812 | int err = -ENOMEM; |
806 | int allow_create = 1; | 813 | int allow_create = 1; |
807 | int replace_required = 0; | 814 | int replace_required = 0; |
808 | 815 | ||
809 | if (info->nlh) { | 816 | if (info->nlh) { |
810 | if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) | 817 | if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) |
811 | allow_create = 0; | 818 | allow_create = 0; |
812 | if (info->nlh->nlmsg_flags & NLM_F_REPLACE) | 819 | if (info->nlh->nlmsg_flags & NLM_F_REPLACE) |
813 | replace_required = 1; | 820 | replace_required = 1; |
814 | } | 821 | } |
815 | if (!allow_create && !replace_required) | 822 | if (!allow_create && !replace_required) |
816 | pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); | 823 | pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); |
817 | 824 | ||
818 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), | 825 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), |
819 | rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), | 826 | rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), |
820 | allow_create, replace_required); | 827 | allow_create, replace_required); |
821 | 828 | ||
822 | if (IS_ERR(fn)) { | 829 | if (IS_ERR(fn)) { |
823 | err = PTR_ERR(fn); | 830 | err = PTR_ERR(fn); |
824 | goto out; | 831 | goto out; |
825 | } | 832 | } |
826 | 833 | ||
827 | pn = fn; | 834 | pn = fn; |
828 | 835 | ||
829 | #ifdef CONFIG_IPV6_SUBTREES | 836 | #ifdef CONFIG_IPV6_SUBTREES |
830 | if (rt->rt6i_src.plen) { | 837 | if (rt->rt6i_src.plen) { |
831 | struct fib6_node *sn; | 838 | struct fib6_node *sn; |
832 | 839 | ||
833 | if (!fn->subtree) { | 840 | if (!fn->subtree) { |
834 | struct fib6_node *sfn; | 841 | struct fib6_node *sfn; |
835 | 842 | ||
836 | /* | 843 | /* |
837 | * Create subtree. | 844 | * Create subtree. |
838 | * | 845 | * |
839 | * fn[main tree] | 846 | * fn[main tree] |
840 | * | | 847 | * | |
841 | * sfn[subtree root] | 848 | * sfn[subtree root] |
842 | * \ | 849 | * \ |
843 | * sn[new leaf node] | 850 | * sn[new leaf node] |
844 | */ | 851 | */ |
845 | 852 | ||
846 | /* Create subtree root node */ | 853 | /* Create subtree root node */ |
847 | sfn = node_alloc(); | 854 | sfn = node_alloc(); |
848 | if (!sfn) | 855 | if (!sfn) |
849 | goto st_failure; | 856 | goto st_failure; |
850 | 857 | ||
851 | sfn->leaf = info->nl_net->ipv6.ip6_null_entry; | 858 | sfn->leaf = info->nl_net->ipv6.ip6_null_entry; |
852 | atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); | 859 | atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); |
853 | sfn->fn_flags = RTN_ROOT; | 860 | sfn->fn_flags = RTN_ROOT; |
854 | sfn->fn_sernum = fib6_new_sernum(); | 861 | sfn->fn_sernum = fib6_new_sernum(); |
855 | 862 | ||
856 | /* Now add the first leaf node to new subtree */ | 863 | /* Now add the first leaf node to new subtree */ |
857 | 864 | ||
858 | sn = fib6_add_1(sfn, &rt->rt6i_src.addr, | 865 | sn = fib6_add_1(sfn, &rt->rt6i_src.addr, |
859 | sizeof(struct in6_addr), rt->rt6i_src.plen, | 866 | sizeof(struct in6_addr), rt->rt6i_src.plen, |
860 | offsetof(struct rt6_info, rt6i_src), | 867 | offsetof(struct rt6_info, rt6i_src), |
861 | allow_create, replace_required); | 868 | allow_create, replace_required); |
862 | 869 | ||
863 | if (IS_ERR(sn)) { | 870 | if (IS_ERR(sn)) { |
864 | /* If it is failed, discard just allocated | 871 | /* If it is failed, discard just allocated |
865 | root, and then (in st_failure) stale node | 872 | root, and then (in st_failure) stale node |
866 | in main tree. | 873 | in main tree. |
867 | */ | 874 | */ |
868 | node_free(sfn); | 875 | node_free(sfn); |
869 | err = PTR_ERR(sn); | 876 | err = PTR_ERR(sn); |
870 | goto st_failure; | 877 | goto st_failure; |
871 | } | 878 | } |
872 | 879 | ||
873 | /* Now link new subtree to main tree */ | 880 | /* Now link new subtree to main tree */ |
874 | sfn->parent = fn; | 881 | sfn->parent = fn; |
875 | fn->subtree = sfn; | 882 | fn->subtree = sfn; |
876 | } else { | 883 | } else { |
877 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, | 884 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, |
878 | sizeof(struct in6_addr), rt->rt6i_src.plen, | 885 | sizeof(struct in6_addr), rt->rt6i_src.plen, |
879 | offsetof(struct rt6_info, rt6i_src), | 886 | offsetof(struct rt6_info, rt6i_src), |
880 | allow_create, replace_required); | 887 | allow_create, replace_required); |
881 | 888 | ||
882 | if (IS_ERR(sn)) { | 889 | if (IS_ERR(sn)) { |
883 | err = PTR_ERR(sn); | 890 | err = PTR_ERR(sn); |
884 | goto st_failure; | 891 | goto st_failure; |
885 | } | 892 | } |
886 | } | 893 | } |
887 | 894 | ||
888 | if (!fn->leaf) { | 895 | if (!fn->leaf) { |
889 | fn->leaf = rt; | 896 | fn->leaf = rt; |
890 | atomic_inc(&rt->rt6i_ref); | 897 | atomic_inc(&rt->rt6i_ref); |
891 | } | 898 | } |
892 | fn = sn; | 899 | fn = sn; |
893 | } | 900 | } |
894 | #endif | 901 | #endif |
895 | 902 | ||
896 | err = fib6_add_rt2node(fn, rt, info); | 903 | err = fib6_add_rt2node(fn, rt, info); |
897 | if (!err) { | 904 | if (!err) { |
898 | fib6_start_gc(info->nl_net, rt); | 905 | fib6_start_gc(info->nl_net, rt); |
899 | if (!(rt->rt6i_flags & RTF_CACHE)) | 906 | if (!(rt->rt6i_flags & RTF_CACHE)) |
900 | fib6_prune_clones(info->nl_net, pn, rt); | 907 | fib6_prune_clones(info->nl_net, pn, rt); |
901 | } | 908 | } |
902 | 909 | ||
903 | out: | 910 | out: |
904 | if (err) { | 911 | if (err) { |
905 | #ifdef CONFIG_IPV6_SUBTREES | 912 | #ifdef CONFIG_IPV6_SUBTREES |
906 | /* | 913 | /* |
907 | * If fib6_add_1 has cleared the old leaf pointer in the | 914 | * If fib6_add_1 has cleared the old leaf pointer in the |
908 | * super-tree leaf node we have to find a new one for it. | 915 | * super-tree leaf node we have to find a new one for it. |
909 | */ | 916 | */ |
910 | if (pn != fn && pn->leaf == rt) { | 917 | if (pn != fn && pn->leaf == rt) { |
911 | pn->leaf = NULL; | 918 | pn->leaf = NULL; |
912 | atomic_dec(&rt->rt6i_ref); | 919 | atomic_dec(&rt->rt6i_ref); |
913 | } | 920 | } |
914 | if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { | 921 | if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { |
915 | pn->leaf = fib6_find_prefix(info->nl_net, pn); | 922 | pn->leaf = fib6_find_prefix(info->nl_net, pn); |
916 | #if RT6_DEBUG >= 2 | 923 | #if RT6_DEBUG >= 2 |
917 | if (!pn->leaf) { | 924 | if (!pn->leaf) { |
918 | WARN_ON(pn->leaf == NULL); | 925 | WARN_ON(pn->leaf == NULL); |
919 | pn->leaf = info->nl_net->ipv6.ip6_null_entry; | 926 | pn->leaf = info->nl_net->ipv6.ip6_null_entry; |
920 | } | 927 | } |
921 | #endif | 928 | #endif |
922 | atomic_inc(&pn->leaf->rt6i_ref); | 929 | atomic_inc(&pn->leaf->rt6i_ref); |
923 | } | 930 | } |
924 | #endif | 931 | #endif |
925 | dst_free(&rt->dst); | 932 | dst_free(&rt->dst); |
926 | } | 933 | } |
927 | return err; | 934 | return err; |
928 | 935 | ||
929 | #ifdef CONFIG_IPV6_SUBTREES | 936 | #ifdef CONFIG_IPV6_SUBTREES |
930 | /* Subtree creation failed, probably main tree node | 937 | /* Subtree creation failed, probably main tree node |
931 | is orphan. If it is, shoot it. | 938 | is orphan. If it is, shoot it. |
932 | */ | 939 | */ |
933 | st_failure: | 940 | st_failure: |
934 | if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) | 941 | if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) |
935 | fib6_repair_tree(info->nl_net, fn); | 942 | fib6_repair_tree(info->nl_net, fn); |
936 | dst_free(&rt->dst); | 943 | dst_free(&rt->dst); |
937 | return err; | 944 | return err; |
938 | #endif | 945 | #endif |
939 | } | 946 | } |
940 | 947 | ||
941 | /* | 948 | /* |
942 | * Routing tree lookup | 949 | * Routing tree lookup |
943 | * | 950 | * |
944 | */ | 951 | */ |
945 | 952 | ||
946 | struct lookup_args { | 953 | struct lookup_args { |
947 | int offset; /* key offset on rt6_info */ | 954 | int offset; /* key offset on rt6_info */ |
948 | const struct in6_addr *addr; /* search key */ | 955 | const struct in6_addr *addr; /* search key */ |
949 | }; | 956 | }; |
950 | 957 | ||
951 | static struct fib6_node * fib6_lookup_1(struct fib6_node *root, | 958 | static struct fib6_node * fib6_lookup_1(struct fib6_node *root, |
952 | struct lookup_args *args) | 959 | struct lookup_args *args) |
953 | { | 960 | { |
954 | struct fib6_node *fn; | 961 | struct fib6_node *fn; |
955 | __be32 dir; | 962 | __be32 dir; |
956 | 963 | ||
957 | if (unlikely(args->offset == 0)) | 964 | if (unlikely(args->offset == 0)) |
958 | return NULL; | 965 | return NULL; |
959 | 966 | ||
960 | /* | 967 | /* |
961 | * Descend on a tree | 968 | * Descend on a tree |
962 | */ | 969 | */ |
963 | 970 | ||
964 | fn = root; | 971 | fn = root; |
965 | 972 | ||
966 | for (;;) { | 973 | for (;;) { |
967 | struct fib6_node *next; | 974 | struct fib6_node *next; |
968 | 975 | ||
969 | dir = addr_bit_set(args->addr, fn->fn_bit); | 976 | dir = addr_bit_set(args->addr, fn->fn_bit); |
970 | 977 | ||
971 | next = dir ? fn->right : fn->left; | 978 | next = dir ? fn->right : fn->left; |
972 | 979 | ||
973 | if (next) { | 980 | if (next) { |
974 | fn = next; | 981 | fn = next; |
975 | continue; | 982 | continue; |
976 | } | 983 | } |
977 | break; | 984 | break; |
978 | } | 985 | } |
979 | 986 | ||
980 | while (fn) { | 987 | while (fn) { |
981 | if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { | 988 | if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { |
982 | struct rt6key *key; | 989 | struct rt6key *key; |
983 | 990 | ||
984 | key = (struct rt6key *) ((u8 *) fn->leaf + | 991 | key = (struct rt6key *) ((u8 *) fn->leaf + |
985 | args->offset); | 992 | args->offset); |
986 | 993 | ||
987 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { | 994 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { |
988 | #ifdef CONFIG_IPV6_SUBTREES | 995 | #ifdef CONFIG_IPV6_SUBTREES |
989 | if (fn->subtree) | 996 | if (fn->subtree) |
990 | fn = fib6_lookup_1(fn->subtree, args + 1); | 997 | fn = fib6_lookup_1(fn->subtree, args + 1); |
991 | #endif | 998 | #endif |
992 | if (!fn || fn->fn_flags & RTN_RTINFO) | 999 | if (!fn || fn->fn_flags & RTN_RTINFO) |
993 | return fn; | 1000 | return fn; |
994 | } | 1001 | } |
995 | } | 1002 | } |
996 | 1003 | ||
997 | if (fn->fn_flags & RTN_ROOT) | 1004 | if (fn->fn_flags & RTN_ROOT) |
998 | break; | 1005 | break; |
999 | 1006 | ||
1000 | fn = fn->parent; | 1007 | fn = fn->parent; |
1001 | } | 1008 | } |
1002 | 1009 | ||
1003 | return NULL; | 1010 | return NULL; |
1004 | } | 1011 | } |
1005 | 1012 | ||
1006 | struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, | 1013 | struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, |
1007 | const struct in6_addr *saddr) | 1014 | const struct in6_addr *saddr) |
1008 | { | 1015 | { |
1009 | struct fib6_node *fn; | 1016 | struct fib6_node *fn; |
1010 | struct lookup_args args[] = { | 1017 | struct lookup_args args[] = { |
1011 | { | 1018 | { |
1012 | .offset = offsetof(struct rt6_info, rt6i_dst), | 1019 | .offset = offsetof(struct rt6_info, rt6i_dst), |
1013 | .addr = daddr, | 1020 | .addr = daddr, |
1014 | }, | 1021 | }, |
1015 | #ifdef CONFIG_IPV6_SUBTREES | 1022 | #ifdef CONFIG_IPV6_SUBTREES |
1016 | { | 1023 | { |
1017 | .offset = offsetof(struct rt6_info, rt6i_src), | 1024 | .offset = offsetof(struct rt6_info, rt6i_src), |
1018 | .addr = saddr, | 1025 | .addr = saddr, |
1019 | }, | 1026 | }, |
1020 | #endif | 1027 | #endif |
1021 | { | 1028 | { |
1022 | .offset = 0, /* sentinel */ | 1029 | .offset = 0, /* sentinel */ |
1023 | } | 1030 | } |
1024 | }; | 1031 | }; |
1025 | 1032 | ||
1026 | fn = fib6_lookup_1(root, daddr ? args : args + 1); | 1033 | fn = fib6_lookup_1(root, daddr ? args : args + 1); |
1027 | if (!fn || fn->fn_flags & RTN_TL_ROOT) | 1034 | if (!fn || fn->fn_flags & RTN_TL_ROOT) |
1028 | fn = root; | 1035 | fn = root; |
1029 | 1036 | ||
1030 | return fn; | 1037 | return fn; |
1031 | } | 1038 | } |
1032 | 1039 | ||
1033 | /* | 1040 | /* |
1034 | * Get node with specified destination prefix (and source prefix, | 1041 | * Get node with specified destination prefix (and source prefix, |
1035 | * if subtrees are used) | 1042 | * if subtrees are used) |
1036 | */ | 1043 | */ |
1037 | 1044 | ||
1038 | 1045 | ||
1039 | static struct fib6_node * fib6_locate_1(struct fib6_node *root, | 1046 | static struct fib6_node * fib6_locate_1(struct fib6_node *root, |
1040 | const struct in6_addr *addr, | 1047 | const struct in6_addr *addr, |
1041 | int plen, int offset) | 1048 | int plen, int offset) |
1042 | { | 1049 | { |
1043 | struct fib6_node *fn; | 1050 | struct fib6_node *fn; |
1044 | 1051 | ||
1045 | for (fn = root; fn ; ) { | 1052 | for (fn = root; fn ; ) { |
1046 | struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); | 1053 | struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); |
1047 | 1054 | ||
1048 | /* | 1055 | /* |
1049 | * Prefix match | 1056 | * Prefix match |
1050 | */ | 1057 | */ |
1051 | if (plen < fn->fn_bit || | 1058 | if (plen < fn->fn_bit || |
1052 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) | 1059 | !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) |
1053 | return NULL; | 1060 | return NULL; |
1054 | 1061 | ||
1055 | if (plen == fn->fn_bit) | 1062 | if (plen == fn->fn_bit) |
1056 | return fn; | 1063 | return fn; |
1057 | 1064 | ||
1058 | /* | 1065 | /* |
1059 | * We have more bits to go | 1066 | * We have more bits to go |
1060 | */ | 1067 | */ |
1061 | if (addr_bit_set(addr, fn->fn_bit)) | 1068 | if (addr_bit_set(addr, fn->fn_bit)) |
1062 | fn = fn->right; | 1069 | fn = fn->right; |
1063 | else | 1070 | else |
1064 | fn = fn->left; | 1071 | fn = fn->left; |
1065 | } | 1072 | } |
1066 | return NULL; | 1073 | return NULL; |
1067 | } | 1074 | } |
1068 | 1075 | ||
1069 | struct fib6_node * fib6_locate(struct fib6_node *root, | 1076 | struct fib6_node * fib6_locate(struct fib6_node *root, |
1070 | const struct in6_addr *daddr, int dst_len, | 1077 | const struct in6_addr *daddr, int dst_len, |
1071 | const struct in6_addr *saddr, int src_len) | 1078 | const struct in6_addr *saddr, int src_len) |
1072 | { | 1079 | { |
1073 | struct fib6_node *fn; | 1080 | struct fib6_node *fn; |
1074 | 1081 | ||
1075 | fn = fib6_locate_1(root, daddr, dst_len, | 1082 | fn = fib6_locate_1(root, daddr, dst_len, |
1076 | offsetof(struct rt6_info, rt6i_dst)); | 1083 | offsetof(struct rt6_info, rt6i_dst)); |
1077 | 1084 | ||
1078 | #ifdef CONFIG_IPV6_SUBTREES | 1085 | #ifdef CONFIG_IPV6_SUBTREES |
1079 | if (src_len) { | 1086 | if (src_len) { |
1080 | WARN_ON(saddr == NULL); | 1087 | WARN_ON(saddr == NULL); |
1081 | if (fn && fn->subtree) | 1088 | if (fn && fn->subtree) |
1082 | fn = fib6_locate_1(fn->subtree, saddr, src_len, | 1089 | fn = fib6_locate_1(fn->subtree, saddr, src_len, |
1083 | offsetof(struct rt6_info, rt6i_src)); | 1090 | offsetof(struct rt6_info, rt6i_src)); |
1084 | } | 1091 | } |
1085 | #endif | 1092 | #endif |
1086 | 1093 | ||
1087 | if (fn && fn->fn_flags & RTN_RTINFO) | 1094 | if (fn && fn->fn_flags & RTN_RTINFO) |
1088 | return fn; | 1095 | return fn; |
1089 | 1096 | ||
1090 | return NULL; | 1097 | return NULL; |
1091 | } | 1098 | } |
1092 | 1099 | ||
1093 | 1100 | ||
1094 | /* | 1101 | /* |
1095 | * Deletion | 1102 | * Deletion |
1096 | * | 1103 | * |
1097 | */ | 1104 | */ |
1098 | 1105 | ||
1099 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) | 1106 | static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) |
1100 | { | 1107 | { |
1101 | if (fn->fn_flags & RTN_ROOT) | 1108 | if (fn->fn_flags & RTN_ROOT) |
1102 | return net->ipv6.ip6_null_entry; | 1109 | return net->ipv6.ip6_null_entry; |
1103 | 1110 | ||
1104 | while (fn) { | 1111 | while (fn) { |
1105 | if (fn->left) | 1112 | if (fn->left) |
1106 | return fn->left->leaf; | 1113 | return fn->left->leaf; |
1107 | if (fn->right) | 1114 | if (fn->right) |
1108 | return fn->right->leaf; | 1115 | return fn->right->leaf; |
1109 | 1116 | ||
1110 | fn = FIB6_SUBTREE(fn); | 1117 | fn = FIB6_SUBTREE(fn); |
1111 | } | 1118 | } |
1112 | return NULL; | 1119 | return NULL; |
1113 | } | 1120 | } |
1114 | 1121 | ||
1115 | /* | 1122 | /* |
1116 | * Called to trim the tree of intermediate nodes when possible. "fn" | 1123 | * Called to trim the tree of intermediate nodes when possible. "fn" |
1117 | * is the node we want to try and remove. | 1124 | * is the node we want to try and remove. |
1118 | */ | 1125 | */ |
1119 | 1126 | ||
1120 | static struct fib6_node *fib6_repair_tree(struct net *net, | 1127 | static struct fib6_node *fib6_repair_tree(struct net *net, |
1121 | struct fib6_node *fn) | 1128 | struct fib6_node *fn) |
1122 | { | 1129 | { |
1123 | int children; | 1130 | int children; |
1124 | int nstate; | 1131 | int nstate; |
1125 | struct fib6_node *child, *pn; | 1132 | struct fib6_node *child, *pn; |
1126 | struct fib6_walker_t *w; | 1133 | struct fib6_walker_t *w; |
1127 | int iter = 0; | 1134 | int iter = 0; |
1128 | 1135 | ||
1129 | for (;;) { | 1136 | for (;;) { |
1130 | RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); | 1137 | RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); |
1131 | iter++; | 1138 | iter++; |
1132 | 1139 | ||
1133 | WARN_ON(fn->fn_flags & RTN_RTINFO); | 1140 | WARN_ON(fn->fn_flags & RTN_RTINFO); |
1134 | WARN_ON(fn->fn_flags & RTN_TL_ROOT); | 1141 | WARN_ON(fn->fn_flags & RTN_TL_ROOT); |
1135 | WARN_ON(fn->leaf != NULL); | 1142 | WARN_ON(fn->leaf != NULL); |
1136 | 1143 | ||
1137 | children = 0; | 1144 | children = 0; |
1138 | child = NULL; | 1145 | child = NULL; |
1139 | if (fn->right) child = fn->right, children |= 1; | 1146 | if (fn->right) child = fn->right, children |= 1; |
1140 | if (fn->left) child = fn->left, children |= 2; | 1147 | if (fn->left) child = fn->left, children |= 2; |
1141 | 1148 | ||
1142 | if (children == 3 || FIB6_SUBTREE(fn) | 1149 | if (children == 3 || FIB6_SUBTREE(fn) |
1143 | #ifdef CONFIG_IPV6_SUBTREES | 1150 | #ifdef CONFIG_IPV6_SUBTREES |
1144 | /* Subtree root (i.e. fn) may have one child */ | 1151 | /* Subtree root (i.e. fn) may have one child */ |
1145 | || (children && fn->fn_flags & RTN_ROOT) | 1152 | || (children && fn->fn_flags & RTN_ROOT) |
1146 | #endif | 1153 | #endif |
1147 | ) { | 1154 | ) { |
1148 | fn->leaf = fib6_find_prefix(net, fn); | 1155 | fn->leaf = fib6_find_prefix(net, fn); |
1149 | #if RT6_DEBUG >= 2 | 1156 | #if RT6_DEBUG >= 2 |
1150 | if (!fn->leaf) { | 1157 | if (!fn->leaf) { |
1151 | WARN_ON(!fn->leaf); | 1158 | WARN_ON(!fn->leaf); |
1152 | fn->leaf = net->ipv6.ip6_null_entry; | 1159 | fn->leaf = net->ipv6.ip6_null_entry; |
1153 | } | 1160 | } |
1154 | #endif | 1161 | #endif |
1155 | atomic_inc(&fn->leaf->rt6i_ref); | 1162 | atomic_inc(&fn->leaf->rt6i_ref); |
1156 | return fn->parent; | 1163 | return fn->parent; |
1157 | } | 1164 | } |
1158 | 1165 | ||
1159 | pn = fn->parent; | 1166 | pn = fn->parent; |
1160 | #ifdef CONFIG_IPV6_SUBTREES | 1167 | #ifdef CONFIG_IPV6_SUBTREES |
1161 | if (FIB6_SUBTREE(pn) == fn) { | 1168 | if (FIB6_SUBTREE(pn) == fn) { |
1162 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); | 1169 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); |
1163 | FIB6_SUBTREE(pn) = NULL; | 1170 | FIB6_SUBTREE(pn) = NULL; |
1164 | nstate = FWS_L; | 1171 | nstate = FWS_L; |
1165 | } else { | 1172 | } else { |
1166 | WARN_ON(fn->fn_flags & RTN_ROOT); | 1173 | WARN_ON(fn->fn_flags & RTN_ROOT); |
1167 | #endif | 1174 | #endif |
1168 | if (pn->right == fn) pn->right = child; | 1175 | if (pn->right == fn) pn->right = child; |
1169 | else if (pn->left == fn) pn->left = child; | 1176 | else if (pn->left == fn) pn->left = child; |
1170 | #if RT6_DEBUG >= 2 | 1177 | #if RT6_DEBUG >= 2 |
1171 | else | 1178 | else |
1172 | WARN_ON(1); | 1179 | WARN_ON(1); |
1173 | #endif | 1180 | #endif |
1174 | if (child) | 1181 | if (child) |
1175 | child->parent = pn; | 1182 | child->parent = pn; |
1176 | nstate = FWS_R; | 1183 | nstate = FWS_R; |
1177 | #ifdef CONFIG_IPV6_SUBTREES | 1184 | #ifdef CONFIG_IPV6_SUBTREES |
1178 | } | 1185 | } |
1179 | #endif | 1186 | #endif |
1180 | 1187 | ||
1181 | read_lock(&fib6_walker_lock); | 1188 | read_lock(&fib6_walker_lock); |
1182 | FOR_WALKERS(w) { | 1189 | FOR_WALKERS(w) { |
1183 | if (!child) { | 1190 | if (!child) { |
1184 | if (w->root == fn) { | 1191 | if (w->root == fn) { |
1185 | w->root = w->node = NULL; | 1192 | w->root = w->node = NULL; |
1186 | RT6_TRACE("W %p adjusted by delroot 1\n", w); | 1193 | RT6_TRACE("W %p adjusted by delroot 1\n", w); |
1187 | } else if (w->node == fn) { | 1194 | } else if (w->node == fn) { |
1188 | RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); | 1195 | RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); |
1189 | w->node = pn; | 1196 | w->node = pn; |
1190 | w->state = nstate; | 1197 | w->state = nstate; |
1191 | } | 1198 | } |
1192 | } else { | 1199 | } else { |
1193 | if (w->root == fn) { | 1200 | if (w->root == fn) { |
1194 | w->root = child; | 1201 | w->root = child; |
1195 | RT6_TRACE("W %p adjusted by delroot 2\n", w); | 1202 | RT6_TRACE("W %p adjusted by delroot 2\n", w); |
1196 | } | 1203 | } |
1197 | if (w->node == fn) { | 1204 | if (w->node == fn) { |
1198 | w->node = child; | 1205 | w->node = child; |
1199 | if (children&2) { | 1206 | if (children&2) { |
1200 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); | 1207 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); |
1201 | w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; | 1208 | w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; |
1202 | } else { | 1209 | } else { |
1203 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); | 1210 | RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); |
1204 | w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; | 1211 | w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; |
1205 | } | 1212 | } |
1206 | } | 1213 | } |
1207 | } | 1214 | } |
1208 | } | 1215 | } |
1209 | read_unlock(&fib6_walker_lock); | 1216 | read_unlock(&fib6_walker_lock); |
1210 | 1217 | ||
1211 | node_free(fn); | 1218 | node_free(fn); |
1212 | if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) | 1219 | if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) |
1213 | return pn; | 1220 | return pn; |
1214 | 1221 | ||
1215 | rt6_release(pn->leaf); | 1222 | rt6_release(pn->leaf); |
1216 | pn->leaf = NULL; | 1223 | pn->leaf = NULL; |
1217 | fn = pn; | 1224 | fn = pn; |
1218 | } | 1225 | } |
1219 | } | 1226 | } |
1220 | 1227 | ||
1221 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, | 1228 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, |
1222 | struct nl_info *info) | 1229 | struct nl_info *info) |
1223 | { | 1230 | { |
1224 | struct fib6_walker_t *w; | 1231 | struct fib6_walker_t *w; |
1225 | struct rt6_info *rt = *rtp; | 1232 | struct rt6_info *rt = *rtp; |
1226 | struct net *net = info->nl_net; | 1233 | struct net *net = info->nl_net; |
1227 | 1234 | ||
1228 | RT6_TRACE("fib6_del_route\n"); | 1235 | RT6_TRACE("fib6_del_route\n"); |
1229 | 1236 | ||
1230 | /* Unlink it */ | 1237 | /* Unlink it */ |
1231 | *rtp = rt->dst.rt6_next; | 1238 | *rtp = rt->dst.rt6_next; |
1232 | rt->rt6i_node = NULL; | 1239 | rt->rt6i_node = NULL; |
1233 | net->ipv6.rt6_stats->fib_rt_entries--; | 1240 | net->ipv6.rt6_stats->fib_rt_entries--; |
1234 | net->ipv6.rt6_stats->fib_discarded_routes++; | 1241 | net->ipv6.rt6_stats->fib_discarded_routes++; |
1235 | 1242 | ||
1236 | /* Reset round-robin state, if necessary */ | 1243 | /* Reset round-robin state, if necessary */ |
1237 | if (fn->rr_ptr == rt) | 1244 | if (fn->rr_ptr == rt) |
1238 | fn->rr_ptr = NULL; | 1245 | fn->rr_ptr = NULL; |
1239 | 1246 | ||
1240 | /* Remove this entry from other siblings */ | 1247 | /* Remove this entry from other siblings */ |
1241 | if (rt->rt6i_nsiblings) { | 1248 | if (rt->rt6i_nsiblings) { |
1242 | struct rt6_info *sibling, *next_sibling; | 1249 | struct rt6_info *sibling, *next_sibling; |
1243 | 1250 | ||
1244 | list_for_each_entry_safe(sibling, next_sibling, | 1251 | list_for_each_entry_safe(sibling, next_sibling, |
1245 | &rt->rt6i_siblings, rt6i_siblings) | 1252 | &rt->rt6i_siblings, rt6i_siblings) |
1246 | sibling->rt6i_nsiblings--; | 1253 | sibling->rt6i_nsiblings--; |
1247 | rt->rt6i_nsiblings = 0; | 1254 | rt->rt6i_nsiblings = 0; |
1248 | list_del_init(&rt->rt6i_siblings); | 1255 | list_del_init(&rt->rt6i_siblings); |
1249 | } | 1256 | } |
1250 | 1257 | ||
1251 | /* Adjust walkers */ | 1258 | /* Adjust walkers */ |
1252 | read_lock(&fib6_walker_lock); | 1259 | read_lock(&fib6_walker_lock); |
1253 | FOR_WALKERS(w) { | 1260 | FOR_WALKERS(w) { |
1254 | if (w->state == FWS_C && w->leaf == rt) { | 1261 | if (w->state == FWS_C && w->leaf == rt) { |
1255 | RT6_TRACE("walker %p adjusted by delroute\n", w); | 1262 | RT6_TRACE("walker %p adjusted by delroute\n", w); |
1256 | w->leaf = rt->dst.rt6_next; | 1263 | w->leaf = rt->dst.rt6_next; |
1257 | if (!w->leaf) | 1264 | if (!w->leaf) |
1258 | w->state = FWS_U; | 1265 | w->state = FWS_U; |
1259 | } | 1266 | } |
1260 | } | 1267 | } |
1261 | read_unlock(&fib6_walker_lock); | 1268 | read_unlock(&fib6_walker_lock); |
1262 | 1269 | ||
1263 | rt->dst.rt6_next = NULL; | 1270 | rt->dst.rt6_next = NULL; |
1264 | 1271 | ||
1265 | /* If it was last route, expunge its radix tree node */ | 1272 | /* If it was last route, expunge its radix tree node */ |
1266 | if (!fn->leaf) { | 1273 | if (!fn->leaf) { |
1267 | fn->fn_flags &= ~RTN_RTINFO; | 1274 | fn->fn_flags &= ~RTN_RTINFO; |
1268 | net->ipv6.rt6_stats->fib_route_nodes--; | 1275 | net->ipv6.rt6_stats->fib_route_nodes--; |
1269 | fn = fib6_repair_tree(net, fn); | 1276 | fn = fib6_repair_tree(net, fn); |
1270 | } | 1277 | } |
1271 | 1278 | ||
1272 | if (atomic_read(&rt->rt6i_ref) != 1) { | 1279 | if (atomic_read(&rt->rt6i_ref) != 1) { |
1273 | /* This route is used as dummy address holder in some split | 1280 | /* This route is used as dummy address holder in some split |
1274 | * nodes. It is not leaked, but it still holds other resources, | 1281 | * nodes. It is not leaked, but it still holds other resources, |
1275 | * which must be released in time. So, scan ascendant nodes | 1282 | * which must be released in time. So, scan ascendant nodes |
1276 | * and replace dummy references to this route with references | 1283 | * and replace dummy references to this route with references |
1277 | * to still alive ones. | 1284 | * to still alive ones. |
1278 | */ | 1285 | */ |
1279 | while (fn) { | 1286 | while (fn) { |
1280 | if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { | 1287 | if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { |
1281 | fn->leaf = fib6_find_prefix(net, fn); | 1288 | fn->leaf = fib6_find_prefix(net, fn); |
1282 | atomic_inc(&fn->leaf->rt6i_ref); | 1289 | atomic_inc(&fn->leaf->rt6i_ref); |
1283 | rt6_release(rt); | 1290 | rt6_release(rt); |
1284 | } | 1291 | } |
1285 | fn = fn->parent; | 1292 | fn = fn->parent; |
1286 | } | 1293 | } |
1287 | /* No more references are possible at this point. */ | 1294 | /* No more references are possible at this point. */ |
1288 | BUG_ON(atomic_read(&rt->rt6i_ref) != 1); | 1295 | BUG_ON(atomic_read(&rt->rt6i_ref) != 1); |
1289 | } | 1296 | } |
1290 | 1297 | ||
1291 | inet6_rt_notify(RTM_DELROUTE, rt, info); | 1298 | inet6_rt_notify(RTM_DELROUTE, rt, info); |
1292 | rt6_release(rt); | 1299 | rt6_release(rt); |
1293 | } | 1300 | } |
1294 | 1301 | ||
1295 | int fib6_del(struct rt6_info *rt, struct nl_info *info) | 1302 | int fib6_del(struct rt6_info *rt, struct nl_info *info) |
1296 | { | 1303 | { |
1297 | struct net *net = info->nl_net; | 1304 | struct net *net = info->nl_net; |
1298 | struct fib6_node *fn = rt->rt6i_node; | 1305 | struct fib6_node *fn = rt->rt6i_node; |
1299 | struct rt6_info **rtp; | 1306 | struct rt6_info **rtp; |
1300 | 1307 | ||
1301 | #if RT6_DEBUG >= 2 | 1308 | #if RT6_DEBUG >= 2 |
1302 | if (rt->dst.obsolete>0) { | 1309 | if (rt->dst.obsolete>0) { |
1303 | WARN_ON(fn != NULL); | 1310 | WARN_ON(fn != NULL); |
1304 | return -ENOENT; | 1311 | return -ENOENT; |
1305 | } | 1312 | } |
1306 | #endif | 1313 | #endif |
1307 | if (!fn || rt == net->ipv6.ip6_null_entry) | 1314 | if (!fn || rt == net->ipv6.ip6_null_entry) |
1308 | return -ENOENT; | 1315 | return -ENOENT; |
1309 | 1316 | ||
1310 | WARN_ON(!(fn->fn_flags & RTN_RTINFO)); | 1317 | WARN_ON(!(fn->fn_flags & RTN_RTINFO)); |
1311 | 1318 | ||
1312 | if (!(rt->rt6i_flags & RTF_CACHE)) { | 1319 | if (!(rt->rt6i_flags & RTF_CACHE)) { |
1313 | struct fib6_node *pn = fn; | 1320 | struct fib6_node *pn = fn; |
1314 | #ifdef CONFIG_IPV6_SUBTREES | 1321 | #ifdef CONFIG_IPV6_SUBTREES |
1315 | /* clones of this route might be in another subtree */ | 1322 | /* clones of this route might be in another subtree */ |
1316 | if (rt->rt6i_src.plen) { | 1323 | if (rt->rt6i_src.plen) { |
1317 | while (!(pn->fn_flags & RTN_ROOT)) | 1324 | while (!(pn->fn_flags & RTN_ROOT)) |
1318 | pn = pn->parent; | 1325 | pn = pn->parent; |
1319 | pn = pn->parent; | 1326 | pn = pn->parent; |
1320 | } | 1327 | } |
1321 | #endif | 1328 | #endif |
1322 | fib6_prune_clones(info->nl_net, pn, rt); | 1329 | fib6_prune_clones(info->nl_net, pn, rt); |
1323 | } | 1330 | } |
1324 | 1331 | ||
1325 | /* | 1332 | /* |
1326 | * Walk the leaf entries looking for ourself | 1333 | * Walk the leaf entries looking for ourself |
1327 | */ | 1334 | */ |
1328 | 1335 | ||
1329 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { | 1336 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { |
1330 | if (*rtp == rt) { | 1337 | if (*rtp == rt) { |
1331 | fib6_del_route(fn, rtp, info); | 1338 | fib6_del_route(fn, rtp, info); |
1332 | return 0; | 1339 | return 0; |
1333 | } | 1340 | } |
1334 | } | 1341 | } |
1335 | return -ENOENT; | 1342 | return -ENOENT; |
1336 | } | 1343 | } |
1337 | 1344 | ||
1338 | /* | 1345 | /* |
1339 | * Tree traversal function. | 1346 | * Tree traversal function. |
1340 | * | 1347 | * |
1341 | * Certainly, it is not interrupt safe. | 1348 | * Certainly, it is not interrupt safe. |
1342 | * However, it is internally reenterable wrt itself and fib6_add/fib6_del. | 1349 | * However, it is internally reenterable wrt itself and fib6_add/fib6_del. |
1343 | * It means, that we can modify tree during walking | 1350 | * It means, that we can modify tree during walking |
1344 | * and use this function for garbage collection, clone pruning, | 1351 | * and use this function for garbage collection, clone pruning, |
1345 | * cleaning tree when a device goes down etc. etc. | 1352 | * cleaning tree when a device goes down etc. etc. |
1346 | * | 1353 | * |
1347 | * It guarantees that every node will be traversed, | 1354 | * It guarantees that every node will be traversed, |
1348 | * and that it will be traversed only once. | 1355 | * and that it will be traversed only once. |
1349 | * | 1356 | * |
1350 | * Callback function w->func may return: | 1357 | * Callback function w->func may return: |
1351 | * 0 -> continue walking. | 1358 | * 0 -> continue walking. |
1352 | * positive value -> walking is suspended (used by tree dumps, | 1359 | * positive value -> walking is suspended (used by tree dumps, |
1353 | * and probably by gc, if it will be split to several slices) | 1360 | * and probably by gc, if it will be split to several slices) |
1354 | * negative value -> terminate walking. | 1361 | * negative value -> terminate walking. |
1355 | * | 1362 | * |
1356 | * The function itself returns: | 1363 | * The function itself returns: |
1357 | * 0 -> walk is complete. | 1364 | * 0 -> walk is complete. |
1358 | * >0 -> walk is incomplete (i.e. suspended) | 1365 | * >0 -> walk is incomplete (i.e. suspended) |
1359 | * <0 -> walk is terminated by an error. | 1366 | * <0 -> walk is terminated by an error. |
1360 | */ | 1367 | */ |
1361 | 1368 | ||
1362 | static int fib6_walk_continue(struct fib6_walker_t *w) | 1369 | static int fib6_walk_continue(struct fib6_walker_t *w) |
1363 | { | 1370 | { |
1364 | struct fib6_node *fn, *pn; | 1371 | struct fib6_node *fn, *pn; |
1365 | 1372 | ||
1366 | for (;;) { | 1373 | for (;;) { |
1367 | fn = w->node; | 1374 | fn = w->node; |
1368 | if (!fn) | 1375 | if (!fn) |
1369 | return 0; | 1376 | return 0; |
1370 | 1377 | ||
1371 | if (w->prune && fn != w->root && | 1378 | if (w->prune && fn != w->root && |
1372 | fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { | 1379 | fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { |
1373 | w->state = FWS_C; | 1380 | w->state = FWS_C; |
1374 | w->leaf = fn->leaf; | 1381 | w->leaf = fn->leaf; |
1375 | } | 1382 | } |
1376 | switch (w->state) { | 1383 | switch (w->state) { |
1377 | #ifdef CONFIG_IPV6_SUBTREES | 1384 | #ifdef CONFIG_IPV6_SUBTREES |
1378 | case FWS_S: | 1385 | case FWS_S: |
1379 | if (FIB6_SUBTREE(fn)) { | 1386 | if (FIB6_SUBTREE(fn)) { |
1380 | w->node = FIB6_SUBTREE(fn); | 1387 | w->node = FIB6_SUBTREE(fn); |
1381 | continue; | 1388 | continue; |
1382 | } | 1389 | } |
1383 | w->state = FWS_L; | 1390 | w->state = FWS_L; |
1384 | #endif | 1391 | #endif |
1385 | case FWS_L: | 1392 | case FWS_L: |
1386 | if (fn->left) { | 1393 | if (fn->left) { |
1387 | w->node = fn->left; | 1394 | w->node = fn->left; |
1388 | w->state = FWS_INIT; | 1395 | w->state = FWS_INIT; |
1389 | continue; | 1396 | continue; |
1390 | } | 1397 | } |
1391 | w->state = FWS_R; | 1398 | w->state = FWS_R; |
1392 | case FWS_R: | 1399 | case FWS_R: |
1393 | if (fn->right) { | 1400 | if (fn->right) { |
1394 | w->node = fn->right; | 1401 | w->node = fn->right; |
1395 | w->state = FWS_INIT; | 1402 | w->state = FWS_INIT; |
1396 | continue; | 1403 | continue; |
1397 | } | 1404 | } |
1398 | w->state = FWS_C; | 1405 | w->state = FWS_C; |
1399 | w->leaf = fn->leaf; | 1406 | w->leaf = fn->leaf; |
1400 | case FWS_C: | 1407 | case FWS_C: |
1401 | if (w->leaf && fn->fn_flags & RTN_RTINFO) { | 1408 | if (w->leaf && fn->fn_flags & RTN_RTINFO) { |
1402 | int err; | 1409 | int err; |
1403 | 1410 | ||
1404 | if (w->skip) { | 1411 | if (w->skip) { |
1405 | w->skip--; | 1412 | w->skip--; |
1406 | continue; | 1413 | continue; |
1407 | } | 1414 | } |
1408 | 1415 | ||
1409 | err = w->func(w); | 1416 | err = w->func(w); |
1410 | if (err) | 1417 | if (err) |
1411 | return err; | 1418 | return err; |
1412 | 1419 | ||
1413 | w->count++; | 1420 | w->count++; |
1414 | continue; | 1421 | continue; |
1415 | } | 1422 | } |
1416 | w->state = FWS_U; | 1423 | w->state = FWS_U; |
1417 | case FWS_U: | 1424 | case FWS_U: |
1418 | if (fn == w->root) | 1425 | if (fn == w->root) |
1419 | return 0; | 1426 | return 0; |
1420 | pn = fn->parent; | 1427 | pn = fn->parent; |
1421 | w->node = pn; | 1428 | w->node = pn; |
1422 | #ifdef CONFIG_IPV6_SUBTREES | 1429 | #ifdef CONFIG_IPV6_SUBTREES |
1423 | if (FIB6_SUBTREE(pn) == fn) { | 1430 | if (FIB6_SUBTREE(pn) == fn) { |
1424 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); | 1431 | WARN_ON(!(fn->fn_flags & RTN_ROOT)); |
1425 | w->state = FWS_L; | 1432 | w->state = FWS_L; |
1426 | continue; | 1433 | continue; |
1427 | } | 1434 | } |
1428 | #endif | 1435 | #endif |
1429 | if (pn->left == fn) { | 1436 | if (pn->left == fn) { |
1430 | w->state = FWS_R; | 1437 | w->state = FWS_R; |
1431 | continue; | 1438 | continue; |
1432 | } | 1439 | } |
1433 | if (pn->right == fn) { | 1440 | if (pn->right == fn) { |
1434 | w->state = FWS_C; | 1441 | w->state = FWS_C; |
1435 | w->leaf = w->node->leaf; | 1442 | w->leaf = w->node->leaf; |
1436 | continue; | 1443 | continue; |
1437 | } | 1444 | } |
1438 | #if RT6_DEBUG >= 2 | 1445 | #if RT6_DEBUG >= 2 |
1439 | WARN_ON(1); | 1446 | WARN_ON(1); |
1440 | #endif | 1447 | #endif |
1441 | } | 1448 | } |
1442 | } | 1449 | } |
1443 | } | 1450 | } |
1444 | 1451 | ||
1445 | static int fib6_walk(struct fib6_walker_t *w) | 1452 | static int fib6_walk(struct fib6_walker_t *w) |
1446 | { | 1453 | { |
1447 | int res; | 1454 | int res; |
1448 | 1455 | ||
1449 | w->state = FWS_INIT; | 1456 | w->state = FWS_INIT; |
1450 | w->node = w->root; | 1457 | w->node = w->root; |
1451 | 1458 | ||
1452 | fib6_walker_link(w); | 1459 | fib6_walker_link(w); |
1453 | res = fib6_walk_continue(w); | 1460 | res = fib6_walk_continue(w); |
1454 | if (res <= 0) | 1461 | if (res <= 0) |
1455 | fib6_walker_unlink(w); | 1462 | fib6_walker_unlink(w); |
1456 | return res; | 1463 | return res; |
1457 | } | 1464 | } |
1458 | 1465 | ||
1459 | static int fib6_clean_node(struct fib6_walker_t *w) | 1466 | static int fib6_clean_node(struct fib6_walker_t *w) |
1460 | { | 1467 | { |
1461 | int res; | 1468 | int res; |
1462 | struct rt6_info *rt; | 1469 | struct rt6_info *rt; |
1463 | struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); | 1470 | struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); |
1464 | struct nl_info info = { | 1471 | struct nl_info info = { |
1465 | .nl_net = c->net, | 1472 | .nl_net = c->net, |
1466 | }; | 1473 | }; |
1467 | 1474 | ||
1468 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { | 1475 | for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { |
1469 | res = c->func(rt, c->arg); | 1476 | res = c->func(rt, c->arg); |
1470 | if (res < 0) { | 1477 | if (res < 0) { |
1471 | w->leaf = rt; | 1478 | w->leaf = rt; |
1472 | res = fib6_del(rt, &info); | 1479 | res = fib6_del(rt, &info); |
1473 | if (res) { | 1480 | if (res) { |
1474 | #if RT6_DEBUG >= 2 | 1481 | #if RT6_DEBUG >= 2 |
1475 | pr_debug("%s: del failed: rt=%p@%p err=%d\n", | 1482 | pr_debug("%s: del failed: rt=%p@%p err=%d\n", |
1476 | __func__, rt, rt->rt6i_node, res); | 1483 | __func__, rt, rt->rt6i_node, res); |
1477 | #endif | 1484 | #endif |
1478 | continue; | 1485 | continue; |
1479 | } | 1486 | } |
1480 | return 0; | 1487 | return 0; |
1481 | } | 1488 | } |
1482 | WARN_ON(res != 0); | 1489 | WARN_ON(res != 0); |
1483 | } | 1490 | } |
1484 | w->leaf = rt; | 1491 | w->leaf = rt; |
1485 | return 0; | 1492 | return 0; |
1486 | } | 1493 | } |
1487 | 1494 | ||
1488 | /* | 1495 | /* |
1489 | * Convenient frontend to tree walker. | 1496 | * Convenient frontend to tree walker. |
1490 | * | 1497 | * |
1491 | * func is called on each route. | 1498 | * func is called on each route. |
1492 | * It may return -1 -> delete this route. | 1499 | * It may return -1 -> delete this route. |
1493 | * 0 -> continue walking | 1500 | * 0 -> continue walking |
1494 | * | 1501 | * |
1495 | * prune==1 -> only immediate children of node (certainly, | 1502 | * prune==1 -> only immediate children of node (certainly, |
1496 | * ignoring pure split nodes) will be scanned. | 1503 | * ignoring pure split nodes) will be scanned. |
1497 | */ | 1504 | */ |
1498 | 1505 | ||
1499 | static void fib6_clean_tree(struct net *net, struct fib6_node *root, | 1506 | static void fib6_clean_tree(struct net *net, struct fib6_node *root, |
1500 | int (*func)(struct rt6_info *, void *arg), | 1507 | int (*func)(struct rt6_info *, void *arg), |
1501 | int prune, void *arg) | 1508 | int prune, void *arg) |
1502 | { | 1509 | { |
1503 | struct fib6_cleaner_t c; | 1510 | struct fib6_cleaner_t c; |
1504 | 1511 | ||
1505 | c.w.root = root; | 1512 | c.w.root = root; |
1506 | c.w.func = fib6_clean_node; | 1513 | c.w.func = fib6_clean_node; |
1507 | c.w.prune = prune; | 1514 | c.w.prune = prune; |
1508 | c.w.count = 0; | 1515 | c.w.count = 0; |
1509 | c.w.skip = 0; | 1516 | c.w.skip = 0; |
1510 | c.func = func; | 1517 | c.func = func; |
1511 | c.arg = arg; | 1518 | c.arg = arg; |
1512 | c.net = net; | 1519 | c.net = net; |
1513 | 1520 | ||
1514 | fib6_walk(&c.w); | 1521 | fib6_walk(&c.w); |
1515 | } | 1522 | } |
1516 | 1523 | ||
1517 | void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), | 1524 | void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), |
1518 | int prune, void *arg) | 1525 | int prune, void *arg) |
1519 | { | 1526 | { |
1520 | struct fib6_table *table; | 1527 | struct fib6_table *table; |
1521 | struct hlist_head *head; | 1528 | struct hlist_head *head; |
1522 | unsigned int h; | 1529 | unsigned int h; |
1523 | 1530 | ||
1524 | rcu_read_lock(); | 1531 | rcu_read_lock(); |
1525 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { | 1532 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { |
1526 | head = &net->ipv6.fib_table_hash[h]; | 1533 | head = &net->ipv6.fib_table_hash[h]; |
1527 | hlist_for_each_entry_rcu(table, head, tb6_hlist) { | 1534 | hlist_for_each_entry_rcu(table, head, tb6_hlist) { |
1528 | read_lock_bh(&table->tb6_lock); | 1535 | read_lock_bh(&table->tb6_lock); |
1529 | fib6_clean_tree(net, &table->tb6_root, | 1536 | fib6_clean_tree(net, &table->tb6_root, |
1530 | func, prune, arg); | 1537 | func, prune, arg); |
1531 | read_unlock_bh(&table->tb6_lock); | 1538 | read_unlock_bh(&table->tb6_lock); |
1532 | } | 1539 | } |
1533 | } | 1540 | } |
1534 | rcu_read_unlock(); | 1541 | rcu_read_unlock(); |
1535 | } | 1542 | } |
1536 | void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), | 1543 | void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), |
1537 | int prune, void *arg) | 1544 | int prune, void *arg) |
1538 | { | 1545 | { |
1539 | struct fib6_table *table; | 1546 | struct fib6_table *table; |
1540 | struct hlist_head *head; | 1547 | struct hlist_head *head; |
1541 | unsigned int h; | 1548 | unsigned int h; |
1542 | 1549 | ||
1543 | rcu_read_lock(); | 1550 | rcu_read_lock(); |
1544 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { | 1551 | for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { |
1545 | head = &net->ipv6.fib_table_hash[h]; | 1552 | head = &net->ipv6.fib_table_hash[h]; |
1546 | hlist_for_each_entry_rcu(table, head, tb6_hlist) { | 1553 | hlist_for_each_entry_rcu(table, head, tb6_hlist) { |
1547 | write_lock_bh(&table->tb6_lock); | 1554 | write_lock_bh(&table->tb6_lock); |
1548 | fib6_clean_tree(net, &table->tb6_root, | 1555 | fib6_clean_tree(net, &table->tb6_root, |
1549 | func, prune, arg); | 1556 | func, prune, arg); |
1550 | write_unlock_bh(&table->tb6_lock); | 1557 | write_unlock_bh(&table->tb6_lock); |
1551 | } | 1558 | } |
1552 | } | 1559 | } |
1553 | rcu_read_unlock(); | 1560 | rcu_read_unlock(); |
1554 | } | 1561 | } |
1555 | 1562 | ||
1556 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) | 1563 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) |
1557 | { | 1564 | { |
1558 | if (rt->rt6i_flags & RTF_CACHE) { | 1565 | if (rt->rt6i_flags & RTF_CACHE) { |
1559 | RT6_TRACE("pruning clone %p\n", rt); | 1566 | RT6_TRACE("pruning clone %p\n", rt); |
1560 | return -1; | 1567 | return -1; |
1561 | } | 1568 | } |
1562 | 1569 | ||
1563 | return 0; | 1570 | return 0; |
1564 | } | 1571 | } |
1565 | 1572 | ||
1566 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, | 1573 | static void fib6_prune_clones(struct net *net, struct fib6_node *fn, |
1567 | struct rt6_info *rt) | 1574 | struct rt6_info *rt) |
1568 | { | 1575 | { |
1569 | fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); | 1576 | fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); |
1570 | } | 1577 | } |
1571 | 1578 | ||
1572 | /* | 1579 | /* |
1573 | * Garbage collection | 1580 | * Garbage collection |
1574 | */ | 1581 | */ |
1575 | 1582 | ||
1576 | static struct fib6_gc_args | 1583 | static struct fib6_gc_args |
1577 | { | 1584 | { |
1578 | int timeout; | 1585 | int timeout; |
1579 | int more; | 1586 | int more; |
1580 | } gc_args; | 1587 | } gc_args; |
1581 | 1588 | ||
1582 | static int fib6_age(struct rt6_info *rt, void *arg) | 1589 | static int fib6_age(struct rt6_info *rt, void *arg) |
1583 | { | 1590 | { |
1584 | unsigned long now = jiffies; | 1591 | unsigned long now = jiffies; |
1585 | 1592 | ||
1586 | /* | 1593 | /* |
1587 | * check addrconf expiration here. | 1594 | * check addrconf expiration here. |
1588 | * Routes are expired even if they are in use. | 1595 | * Routes are expired even if they are in use. |
1589 | * | 1596 | * |
1590 | * Also age clones. Note, that clones are aged out | 1597 | * Also age clones. Note, that clones are aged out |
1591 | * only if they are not in use now. | 1598 | * only if they are not in use now. |
1592 | */ | 1599 | */ |
1593 | 1600 | ||
1594 | if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { | 1601 | if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { |
1595 | if (time_after(now, rt->dst.expires)) { | 1602 | if (time_after(now, rt->dst.expires)) { |
1596 | RT6_TRACE("expiring %p\n", rt); | 1603 | RT6_TRACE("expiring %p\n", rt); |
1597 | return -1; | 1604 | return -1; |
1598 | } | 1605 | } |
1599 | gc_args.more++; | 1606 | gc_args.more++; |
1600 | } else if (rt->rt6i_flags & RTF_CACHE) { | 1607 | } else if (rt->rt6i_flags & RTF_CACHE) { |
1601 | if (atomic_read(&rt->dst.__refcnt) == 0 && | 1608 | if (atomic_read(&rt->dst.__refcnt) == 0 && |
1602 | time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { | 1609 | time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { |
1603 | RT6_TRACE("aging clone %p\n", rt); | 1610 | RT6_TRACE("aging clone %p\n", rt); |
1604 | return -1; | 1611 | return -1; |
1605 | } else if (rt->rt6i_flags & RTF_GATEWAY) { | 1612 | } else if (rt->rt6i_flags & RTF_GATEWAY) { |
1606 | struct neighbour *neigh; | 1613 | struct neighbour *neigh; |
1607 | __u8 neigh_flags = 0; | 1614 | __u8 neigh_flags = 0; |
1608 | 1615 | ||
1609 | neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); | 1616 | neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); |
1610 | if (neigh) { | 1617 | if (neigh) { |
1611 | neigh_flags = neigh->flags; | 1618 | neigh_flags = neigh->flags; |
1612 | neigh_release(neigh); | 1619 | neigh_release(neigh); |
1613 | } | 1620 | } |
1614 | if (!(neigh_flags & NTF_ROUTER)) { | 1621 | if (!(neigh_flags & NTF_ROUTER)) { |
1615 | RT6_TRACE("purging route %p via non-router but gateway\n", | 1622 | RT6_TRACE("purging route %p via non-router but gateway\n", |
1616 | rt); | 1623 | rt); |
1617 | return -1; | 1624 | return -1; |
1618 | } | 1625 | } |
1619 | } | 1626 | } |
1620 | gc_args.more++; | 1627 | gc_args.more++; |
1621 | } | 1628 | } |
1622 | 1629 | ||
1623 | return 0; | 1630 | return 0; |
1624 | } | 1631 | } |
1625 | 1632 | ||
1626 | static DEFINE_SPINLOCK(fib6_gc_lock); | 1633 | static DEFINE_SPINLOCK(fib6_gc_lock); |
1627 | 1634 | ||
1628 | void fib6_run_gc(unsigned long expires, struct net *net) | 1635 | void fib6_run_gc(unsigned long expires, struct net *net) |
1629 | { | 1636 | { |
1630 | if (expires != ~0UL) { | 1637 | if (expires != ~0UL) { |
1631 | spin_lock_bh(&fib6_gc_lock); | 1638 | spin_lock_bh(&fib6_gc_lock); |
1632 | gc_args.timeout = expires ? (int)expires : | 1639 | gc_args.timeout = expires ? (int)expires : |
1633 | net->ipv6.sysctl.ip6_rt_gc_interval; | 1640 | net->ipv6.sysctl.ip6_rt_gc_interval; |
1634 | } else { | 1641 | } else { |
1635 | if (!spin_trylock_bh(&fib6_gc_lock)) { | 1642 | if (!spin_trylock_bh(&fib6_gc_lock)) { |
1636 | mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); | 1643 | mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); |
1637 | return; | 1644 | return; |
1638 | } | 1645 | } |
1639 | gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; | 1646 | gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; |
1640 | } | 1647 | } |
1641 | 1648 | ||
1642 | gc_args.more = icmp6_dst_gc(); | 1649 | gc_args.more = icmp6_dst_gc(); |
1643 | 1650 | ||
1644 | fib6_clean_all(net, fib6_age, 0, NULL); | 1651 | fib6_clean_all(net, fib6_age, 0, NULL); |
1645 | 1652 | ||
1646 | if (gc_args.more) | 1653 | if (gc_args.more) |
1647 | mod_timer(&net->ipv6.ip6_fib_timer, | 1654 | mod_timer(&net->ipv6.ip6_fib_timer, |
1648 | round_jiffies(jiffies | 1655 | round_jiffies(jiffies |
1649 | + net->ipv6.sysctl.ip6_rt_gc_interval)); | 1656 | + net->ipv6.sysctl.ip6_rt_gc_interval)); |
1650 | else | 1657 | else |
1651 | del_timer(&net->ipv6.ip6_fib_timer); | 1658 | del_timer(&net->ipv6.ip6_fib_timer); |
1652 | spin_unlock_bh(&fib6_gc_lock); | 1659 | spin_unlock_bh(&fib6_gc_lock); |
1653 | } | 1660 | } |
1654 | 1661 | ||
1655 | static void fib6_gc_timer_cb(unsigned long arg) | 1662 | static void fib6_gc_timer_cb(unsigned long arg) |
1656 | { | 1663 | { |
1657 | fib6_run_gc(0, (struct net *)arg); | 1664 | fib6_run_gc(0, (struct net *)arg); |
1658 | } | 1665 | } |
1659 | 1666 | ||
1660 | static int __net_init fib6_net_init(struct net *net) | 1667 | static int __net_init fib6_net_init(struct net *net) |
1661 | { | 1668 | { |
1662 | size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; | 1669 | size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; |
1663 | 1670 | ||
1664 | setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); | 1671 | setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); |
1665 | 1672 | ||
1666 | net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); | 1673 | net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); |
1667 | if (!net->ipv6.rt6_stats) | 1674 | if (!net->ipv6.rt6_stats) |
1668 | goto out_timer; | 1675 | goto out_timer; |
1669 | 1676 | ||
1670 | /* Avoid false sharing : Use at least a full cache line */ | 1677 | /* Avoid false sharing : Use at least a full cache line */ |
1671 | size = max_t(size_t, size, L1_CACHE_BYTES); | 1678 | size = max_t(size_t, size, L1_CACHE_BYTES); |
1672 | 1679 | ||
1673 | net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); | 1680 | net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); |
1674 | if (!net->ipv6.fib_table_hash) | 1681 | if (!net->ipv6.fib_table_hash) |
1675 | goto out_rt6_stats; | 1682 | goto out_rt6_stats; |
1676 | 1683 | ||
1677 | net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), | 1684 | net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), |
1678 | GFP_KERNEL); | 1685 | GFP_KERNEL); |
1679 | if (!net->ipv6.fib6_main_tbl) | 1686 | if (!net->ipv6.fib6_main_tbl) |
1680 | goto out_fib_table_hash; | 1687 | goto out_fib_table_hash; |
1681 | 1688 | ||
1682 | net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; | 1689 | net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; |
1683 | net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; | 1690 | net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; |
1684 | net->ipv6.fib6_main_tbl->tb6_root.fn_flags = | 1691 | net->ipv6.fib6_main_tbl->tb6_root.fn_flags = |
1685 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 1692 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
1686 | inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); | 1693 | inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); |
1687 | 1694 | ||
1688 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1695 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1689 | net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), | 1696 | net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), |
1690 | GFP_KERNEL); | 1697 | GFP_KERNEL); |
1691 | if (!net->ipv6.fib6_local_tbl) | 1698 | if (!net->ipv6.fib6_local_tbl) |
1692 | goto out_fib6_main_tbl; | 1699 | goto out_fib6_main_tbl; |
1693 | net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; | 1700 | net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; |
1694 | net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; | 1701 | net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; |
1695 | net->ipv6.fib6_local_tbl->tb6_root.fn_flags = | 1702 | net->ipv6.fib6_local_tbl->tb6_root.fn_flags = |
1696 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | 1703 | RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; |
1697 | inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); | 1704 | inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); |
1698 | #endif | 1705 | #endif |
1699 | fib6_tables_init(net); | 1706 | fib6_tables_init(net); |
1700 | 1707 | ||
1701 | return 0; | 1708 | return 0; |
1702 | 1709 | ||
1703 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1710 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1704 | out_fib6_main_tbl: | 1711 | out_fib6_main_tbl: |
1705 | kfree(net->ipv6.fib6_main_tbl); | 1712 | kfree(net->ipv6.fib6_main_tbl); |
1706 | #endif | 1713 | #endif |
1707 | out_fib_table_hash: | 1714 | out_fib_table_hash: |
1708 | kfree(net->ipv6.fib_table_hash); | 1715 | kfree(net->ipv6.fib_table_hash); |
1709 | out_rt6_stats: | 1716 | out_rt6_stats: |
1710 | kfree(net->ipv6.rt6_stats); | 1717 | kfree(net->ipv6.rt6_stats); |
1711 | out_timer: | 1718 | out_timer: |
1712 | return -ENOMEM; | 1719 | return -ENOMEM; |
1713 | } | 1720 | } |
1714 | 1721 | ||
1715 | static void fib6_net_exit(struct net *net) | 1722 | static void fib6_net_exit(struct net *net) |
1716 | { | 1723 | { |
1717 | rt6_ifdown(net, NULL); | 1724 | rt6_ifdown(net, NULL); |
1718 | del_timer_sync(&net->ipv6.ip6_fib_timer); | 1725 | del_timer_sync(&net->ipv6.ip6_fib_timer); |
1719 | 1726 | ||
1720 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | 1727 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
1721 | inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); | 1728 | inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); |
1722 | kfree(net->ipv6.fib6_local_tbl); | 1729 | kfree(net->ipv6.fib6_local_tbl); |
1723 | #endif | 1730 | #endif |
1724 | inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); | 1731 | inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); |
1725 | kfree(net->ipv6.fib6_main_tbl); | 1732 | kfree(net->ipv6.fib6_main_tbl); |
1726 | kfree(net->ipv6.fib_table_hash); | 1733 | kfree(net->ipv6.fib_table_hash); |
1727 | kfree(net->ipv6.rt6_stats); | 1734 | kfree(net->ipv6.rt6_stats); |
1728 | } | 1735 | } |
1729 | 1736 | ||
1730 | static struct pernet_operations fib6_net_ops = { | 1737 | static struct pernet_operations fib6_net_ops = { |
1731 | .init = fib6_net_init, | 1738 | .init = fib6_net_init, |
1732 | .exit = fib6_net_exit, | 1739 | .exit = fib6_net_exit, |
1733 | }; | 1740 | }; |
1734 | 1741 | ||
1735 | int __init fib6_init(void) | 1742 | int __init fib6_init(void) |
1736 | { | 1743 | { |
1737 | int ret = -ENOMEM; | 1744 | int ret = -ENOMEM; |
1738 | 1745 | ||
1739 | fib6_node_kmem = kmem_cache_create("fib6_nodes", | 1746 | fib6_node_kmem = kmem_cache_create("fib6_nodes", |
1740 | sizeof(struct fib6_node), | 1747 | sizeof(struct fib6_node), |
1741 | 0, SLAB_HWCACHE_ALIGN, | 1748 | 0, SLAB_HWCACHE_ALIGN, |
1742 | NULL); | 1749 | NULL); |
1743 | if (!fib6_node_kmem) | 1750 | if (!fib6_node_kmem) |
1744 | goto out; | 1751 | goto out; |
1745 | 1752 | ||
1746 | ret = register_pernet_subsys(&fib6_net_ops); | 1753 | ret = register_pernet_subsys(&fib6_net_ops); |
1747 | if (ret) | 1754 | if (ret) |
1748 | goto out_kmem_cache_create; | 1755 | goto out_kmem_cache_create; |
1749 | 1756 | ||
1750 | ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, | 1757 | ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, |
1751 | NULL); | 1758 | NULL); |
1752 | if (ret) | 1759 | if (ret) |
1753 | goto out_unregister_subsys; | 1760 | goto out_unregister_subsys; |
1754 | out: | 1761 | out: |
1755 | return ret; | 1762 | return ret; |
1756 | 1763 | ||
1757 | out_unregister_subsys: | 1764 | out_unregister_subsys: |
1758 | unregister_pernet_subsys(&fib6_net_ops); | 1765 | unregister_pernet_subsys(&fib6_net_ops); |
1759 | out_kmem_cache_create: | 1766 | out_kmem_cache_create: |
1760 | kmem_cache_destroy(fib6_node_kmem); | 1767 | kmem_cache_destroy(fib6_node_kmem); |
1761 | goto out; | 1768 | goto out; |
1762 | } | 1769 | } |
1763 | 1770 | ||
1764 | void fib6_gc_cleanup(void) | 1771 | void fib6_gc_cleanup(void) |
1765 | { | 1772 | { |
1766 | unregister_pernet_subsys(&fib6_net_ops); | 1773 | unregister_pernet_subsys(&fib6_net_ops); |
1767 | kmem_cache_destroy(fib6_node_kmem); | 1774 | kmem_cache_destroy(fib6_node_kmem); |
1768 | } | 1775 | } |