Blame view

net/ipv4/inetpeer.c 16.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   *		INETPEER - A storage for permanent information about peers
   *
   *  This source is covered by the GNU GPL, the same as all kernel sources.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
11
12
13
14
   *  Authors:	Andrey V. Savochkin <saw@msu.ru>
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/interrupt.h>
  #include <linux/spinlock.h>
  #include <linux/random.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/timer.h>
  #include <linux/time.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/net.h>
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
20
  #include <linux/workqueue.h>
20380731b   Arnaldo Carvalho de Melo   [NET]: Fix sparse...
21
  #include <net/ip.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <net/inetpeer.h>
6e5714eaf   David S. Miller   net: Compute prot...
23
  #include <net/secure_seq.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
  
  /*
   *  Theory of operations.
   *  We keep one entry for each peer IP address.  The nodes contains long-living
   *  information about the peer which doesn't depend on routes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
34
35
36
37
38
39
40
41
42
   *  Nodes are removed only when reference counter goes to 0.
   *  When it's happened the node may be removed when a sufficient amount of
   *  time has been passed since its last use.  The less-recently-used entry can
   *  also be removed if the pool is overloaded i.e. if the total amount of
   *  entries is greater-or-equal than the threshold.
   *
   *  Node pool is organised as an AVL tree.
   *  Such an implementation has been chosen not just for fun.  It's a way to
   *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
   *  amount of long living nodes in a single hash slot would significantly delay
   *  lookups performed with disabled BHs.
   *
   *  Serialisation issues.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
43
44
   *  1.  Nodes may appear in the tree only with the pool lock held.
   *  2.  Nodes may disappear from the tree only with the pool lock held
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
   *      AND reference count being 0.
4b9d9be83   Eric Dumazet   inetpeer: remove ...
46
47
   *  3.  Global variable peer_total is modified under the pool lock.
   *  4.  struct inet_peer fields modification:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
   *		avl_left, avl_right, avl_parent, avl_height: pool lock
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
   *		refcnt: atomically against modifications on other CPU;
   *		   usually under some other lock to prevent node disappearing
582a72da9   David S. Miller   inetpeer: Introdu...
51
   *		daddr: unchangeable
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
53
  static struct kmem_cache *peer_cachep __read_mostly;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54

5faa5df1f   Steffen Klassert   inetpeer: Invalid...
55
56
57
58
  static LIST_HEAD(gc_list);
  static const int gc_delay = 60 * HZ;
  static struct delayed_work gc_work;
  static DEFINE_SPINLOCK(gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
  #define node_height(x) x->avl_height
d6cc1d642   Eric Dumazet   inetpeer: various...
60
61
  
  #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
62
  #define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
d6cc1d642   Eric Dumazet   inetpeer: various...
63
  static const struct inet_peer peer_fake_node = {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
64
65
  	.avl_left	= peer_avl_empty_rcu,
  	.avl_right	= peer_avl_empty_rcu,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
  	.avl_height	= 0
  };
d6cc1d642   Eric Dumazet   inetpeer: various...
68

c3426b471   David S. Miller   inet: Initialize ...
69
70
71
72
  void inet_peer_base_init(struct inet_peer_base *bp)
  {
  	bp->root = peer_avl_empty_rcu;
  	seqlock_init(&bp->lock);
b48c80ece   David S. Miller   inet: Add family ...
73
  	bp->flush_seq = ~0U;
c3426b471   David S. Miller   inet: Initialize ...
74
75
76
  	bp->total = 0;
  }
  EXPORT_SYMBOL_GPL(inet_peer_base_init);
021e92991   David S. Miller   inetpeer: Add v6 ...
77

b48c80ece   David S. Miller   inet: Add family ...
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  static atomic_t v4_seq = ATOMIC_INIT(0);
  static atomic_t v6_seq = ATOMIC_INIT(0);
  
  static atomic_t *inetpeer_seq_ptr(int family)
  {
  	return (family == AF_INET ? &v4_seq : &v6_seq);
  }
  
  static inline void flush_check(struct inet_peer_base *base, int family)
  {
  	atomic_t *fp = inetpeer_seq_ptr(family);
  
  	if (unlikely(base->flush_seq != atomic_read(fp))) {
  		inetpeer_invalidate_tree(base);
  		base->flush_seq = atomic_read(fp);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
  #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
  /* Exported for sysctl_net_ipv4.  */
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
97
  int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
  					 * aggressively at this stage */
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
99
100
  int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
  int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101

5faa5df1f   Steffen Klassert   inetpeer: Invalid...
102
103
  static void inetpeer_gc_worker(struct work_struct *work)
  {
da5573746   Eric Dumazet   inetpeer: inetpee...
104
  	struct inet_peer *p, *n, *c;
851bdd11c   xiao jin   inetpeer_gc_worke...
105
  	struct list_head list;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
106
107
108
109
110
111
112
113
114
  
  	spin_lock_bh(&gc_lock);
  	list_replace_init(&gc_list, &list);
  	spin_unlock_bh(&gc_lock);
  
  	if (list_empty(&list))
  		return;
  
  	list_for_each_entry_safe(p, n, &list, gc_list) {
da5573746   Eric Dumazet   inetpeer: inetpee...
115
  		if (need_resched())
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
116
  			cond_resched();
da5573746   Eric Dumazet   inetpeer: inetpee...
117
118
119
120
  		c = rcu_dereference_protected(p->avl_left, 1);
  		if (c != peer_avl_empty) {
  			list_add_tail(&c->gc_list, &list);
  			p->avl_left = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
121
  		}
da5573746   Eric Dumazet   inetpeer: inetpee...
122
123
124
125
  		c = rcu_dereference_protected(p->avl_right, 1);
  		if (c != peer_avl_empty) {
  			list_add_tail(&c->gc_list, &list);
  			p->avl_right = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
  		}
  
  		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
  
  		if (!atomic_read(&p->refcnt)) {
  			list_del(&p->gc_list);
  			kmem_cache_free(peer_cachep, p);
  		}
  	}
  
  	if (list_empty(&list))
  		return;
  
  	spin_lock_bh(&gc_lock);
  	list_splice(&list, &gc_list);
  	spin_unlock_bh(&gc_lock);
  
  	schedule_delayed_work(&gc_work, gc_delay);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  
  /* Called from ip_output.c:ip_init  */
  void __init inet_initpeers(void)
  {
  	struct sysinfo si;
  
  	/* Use the straight interface to information about memory. */
  	si_meminfo(&si);
  	/* The values below were suggested by Alexey Kuznetsov
  	 * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
  	 * myself.  --SAW
  	 */
  	if (si.totalram <= (32768*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
  	if (si.totalram <= (16384*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 1; /* about 512KB */
  	if (si.totalram <= (8192*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 2; /* about 128KB */
  
  	peer_cachep = kmem_cache_create("inet_peer_cache",
  			sizeof(struct inet_peer),
317fe0e6c   Eric Dumazet   inetpeer: restore...
166
  			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
20c2df83d   Paul Mundt   mm: Remove slab d...
167
  			NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168

203b42f73   Tejun Heo   workqueue: make d...
169
  	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
170
  }
8790ca172   David S. Miller   inetpeer: Kill us...
171
172
  static int addr_compare(const struct inetpeer_addr *a,
  			const struct inetpeer_addr *b)
026630450   David S. Miller   inetpeer: Abstrac...
173
174
175
176
  {
  	int i, n = (a->family == AF_INET ? 1 : 4);
  
  	for (i = 0; i < n; i++) {
7a71ed899   David S. Miller   inetpeer: Abstrac...
177
  		if (a->addr.a6[i] == b->addr.a6[i])
026630450   David S. Miller   inetpeer: Abstrac...
178
  			continue;
747465ef7   Eric Dumazet   net: fix some spa...
179
  		if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i])
026630450   David S. Miller   inetpeer: Abstrac...
180
181
182
183
184
185
  			return -1;
  		return 1;
  	}
  
  	return 0;
  }
65e8354ec   Eric Dumazet   inetpeer: seqlock...
186
187
  #define rcu_deref_locked(X, BASE)				\
  	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
188
189
  /*
   * Called with local BH disabled and the pool lock held.
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
190
   */
98158f5a8   David S. Miller   inetpeer: Abstrac...
191
  #define lookup(_daddr, _stack, _base)				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
  ({								\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
193
194
  	struct inet_peer *u;					\
  	struct inet_peer __rcu **v;				\
aa1039e73   Eric Dumazet   inetpeer: RCU con...
195
196
  								\
  	stackptr = _stack;					\
98158f5a8   David S. Miller   inetpeer: Abstrac...
197
  	*stackptr++ = &_base->root;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
198
  	for (u = rcu_deref_locked(_base->root, _base);		\
0c9a67d2e   Weilong Chen   ipv4: fix checkpa...
199
  	     u != peer_avl_empty;) {				\
026630450   David S. Miller   inetpeer: Abstrac...
200
201
  		int cmp = addr_compare(_daddr, &u->daddr);	\
  		if (cmp == 0)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
  			break;					\
026630450   David S. Miller   inetpeer: Abstrac...
203
  		if (cmp == -1)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
205
206
  			v = &u->avl_left;			\
  		else						\
  			v = &u->avl_right;			\
aa1039e73   Eric Dumazet   inetpeer: RCU con...
207
  		*stackptr++ = v;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
208
  		u = rcu_deref_locked(*v, _base);		\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
  	}							\
  	u;							\
  })
aa1039e73   Eric Dumazet   inetpeer: RCU con...
212
  /*
7b46ac4e7   David S. Miller   inetpeer: Don't d...
213
   * Called with rcu_read_lock()
aa1039e73   Eric Dumazet   inetpeer: RCU con...
214
215
216
217
218
   * Because we hold no lock against a writer, its quite possible we fall
   * in an endless loop.
   * But every pointer we follow is guaranteed to be valid thanks to RCU.
   * We exit from this function if number of links exceeds PEER_MAXDEPTH
   */
7b46ac4e7   David S. Miller   inetpeer: Don't d...
219
  static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
4b9d9be83   Eric Dumazet   inetpeer: remove ...
220
  				    struct inet_peer_base *base)
aa1039e73   Eric Dumazet   inetpeer: RCU con...
221
  {
7b46ac4e7   David S. Miller   inetpeer: Don't d...
222
  	struct inet_peer *u = rcu_dereference(base->root);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
223
224
225
  	int count = 0;
  
  	while (u != peer_avl_empty) {
026630450   David S. Miller   inetpeer: Abstrac...
226
227
  		int cmp = addr_compare(daddr, &u->daddr);
  		if (cmp == 0) {
5f2f89209   Eric Dumazet   inetpeer: do not ...
228
  			/* Before taking a reference, check if this entry was
4b9d9be83   Eric Dumazet   inetpeer: remove ...
229
  			 * deleted (refcnt=-1)
5f2f89209   Eric Dumazet   inetpeer: do not ...
230
  			 */
4b9d9be83   Eric Dumazet   inetpeer: remove ...
231
  			if (!atomic_add_unless(&u->refcnt, 1, -1))
aa1039e73   Eric Dumazet   inetpeer: RCU con...
232
233
234
  				u = NULL;
  			return u;
  		}
026630450   David S. Miller   inetpeer: Abstrac...
235
  		if (cmp == -1)
7b46ac4e7   David S. Miller   inetpeer: Don't d...
236
  			u = rcu_dereference(u->avl_left);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
237
  		else
7b46ac4e7   David S. Miller   inetpeer: Don't d...
238
  			u = rcu_dereference(u->avl_right);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
239
240
241
242
243
244
245
  		if (unlikely(++count == PEER_MAXDEPTH))
  			break;
  	}
  	return NULL;
  }
  
  /* Called with local BH disabled and the pool lock held. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
246
  #define lookup_rightempty(start, base)				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
  ({								\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
248
249
  	struct inet_peer *u;					\
  	struct inet_peer __rcu **v;				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
  	*stackptr++ = &start->avl_left;				\
  	v = &start->avl_left;					\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
252
  	for (u = rcu_deref_locked(*v, base);			\
0c9a67d2e   Weilong Chen   ipv4: fix checkpa...
253
  	     u->avl_right != peer_avl_empty_rcu;) {		\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
254
255
  		v = &u->avl_right;				\
  		*stackptr++ = v;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
256
  		u = rcu_deref_locked(*v, base);			\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
259
  	}							\
  	u;							\
  })
aa1039e73   Eric Dumazet   inetpeer: RCU con...
260
  /* Called with local BH disabled and the pool lock held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261
   * Variable names are the proof of operation correctness.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
262
263
   * Look into mm/map_avl.c for more detail description of the ideas.
   */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
264
  static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
98158f5a8   David S. Miller   inetpeer: Abstrac...
265
266
  			       struct inet_peer __rcu ***stackend,
  			       struct inet_peer_base *base)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267
  {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
268
269
  	struct inet_peer __rcu **nodep;
  	struct inet_peer *node, *l, *r;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
271
272
273
  	int lh, rh;
  
  	while (stackend > stack) {
  		nodep = *--stackend;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
274
275
276
  		node = rcu_deref_locked(*nodep, base);
  		l = rcu_deref_locked(node->avl_left, base);
  		r = rcu_deref_locked(node->avl_right, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
279
280
281
  		lh = node_height(l);
  		rh = node_height(r);
  		if (lh > rh + 1) { /* l: RH+2 */
  			struct inet_peer *ll, *lr, *lrl, *lrr;
  			int lrh;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
282
283
  			ll = rcu_deref_locked(l->avl_left, base);
  			lr = rcu_deref_locked(l->avl_right, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284
285
  			lrh = node_height(lr);
  			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
286
287
  				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
  				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
  				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
289
290
  				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
  				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
  				l->avl_height = node->avl_height + 1;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
292
  				RCU_INIT_POINTER(*nodep, l);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
  			} else { /* ll: RH, lr: RH+1 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
294
295
  				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
  				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
296
297
  				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
  				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
298
  				node->avl_height = rh + 1; /* node: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
299
300
  				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
  				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  				l->avl_height = rh + 1;	/* l: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
302
303
  				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
  				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
304
  				lr->avl_height = rh + 2;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
305
  				RCU_INIT_POINTER(*nodep, lr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
307
308
309
  			}
  		} else if (rh > lh + 1) { /* r: LH+2 */
  			struct inet_peer *rr, *rl, *rlr, *rll;
  			int rlh;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
310
311
  			rr = rcu_deref_locked(r->avl_right, base);
  			rl = rcu_deref_locked(r->avl_left, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
  			rlh = node_height(rl);
  			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
314
315
  				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
  				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
  				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
317
318
  				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
  				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
  				r->avl_height = node->avl_height + 1;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
320
  				RCU_INIT_POINTER(*nodep, r);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
  			} else { /* rr: RH, rl: RH+1 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
322
323
  				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
  				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
324
325
  				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
  				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
  				node->avl_height = lh + 1; /* node: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
327
328
  				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
  				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329
  				r->avl_height = lh + 1;	/* r: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
330
331
  				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
  				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
  				rl->avl_height = lh + 2;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
333
  				RCU_INIT_POINTER(*nodep, rl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
336
337
338
339
  			}
  		} else {
  			node->avl_height = (lh > rh ? lh : rh) + 1;
  		}
  	}
  }
aa1039e73   Eric Dumazet   inetpeer: RCU con...
340
  /* Called with local BH disabled and the pool lock held. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
341
  #define link_to_pool(n, base)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
  do {								\
  	n->avl_height = 1;					\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
344
345
346
347
  	n->avl_left = peer_avl_empty_rcu;			\
  	n->avl_right = peer_avl_empty_rcu;			\
  	/* lockless readers can catch us now */			\
  	rcu_assign_pointer(**--stackptr, n);			\
98158f5a8   David S. Miller   inetpeer: Abstrac...
348
  	peer_avl_rebalance(stack, stackptr, base);		\
d6cc1d642   Eric Dumazet   inetpeer: various...
349
  } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350

aa1039e73   Eric Dumazet   inetpeer: RCU con...
351
352
353
354
  static void inetpeer_free_rcu(struct rcu_head *head)
  {
  	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
  }
66944e1c5   Eric Dumazet   inetpeer: reduce ...
355
356
  static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
  			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
  	struct inet_peer __rcu ***stackptr, ***delp;
  
  	if (lookup(&p->daddr, stack, base) != p)
  		BUG();
  	delp = stackptr - 1; /* *delp[0] == p */
  	if (p->avl_left == peer_avl_empty_rcu) {
  		*delp[0] = p->avl_right;
  		--stackptr;
  	} else {
  		/* look for a node to insert instead of p */
  		struct inet_peer *t;
  		t = lookup_rightempty(p, base);
  		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
  		**--stackptr = t->avl_left;
  		/* t is removed, t->daddr > x->daddr for any
  		 * x in p->avl_left subtree.
  		 * Put t in the old place of p. */
  		RCU_INIT_POINTER(*delp[0], t);
  		t->avl_left = p->avl_left;
  		t->avl_right = p->avl_right;
  		t->avl_height = p->avl_height;
  		BUG_ON(delp[1] != &p->avl_left);
  		delp[1] = &t->avl_left; /* was &p->avl_left */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
  	}
4b9d9be83   Eric Dumazet   inetpeer: remove ...
382
383
384
  	peer_avl_rebalance(stack, stackptr, base);
  	base->total--;
  	call_rcu(&p->rcu, inetpeer_free_rcu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  }
4b9d9be83   Eric Dumazet   inetpeer: remove ...
386
387
388
389
  /* perform garbage collect on all items stacked during a lookup */
  static int inet_peer_gc(struct inet_peer_base *base,
  			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
  			struct inet_peer __rcu ***stackptr)
98158f5a8   David S. Miller   inetpeer: Abstrac...
390
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
391
392
393
  	struct inet_peer *p, *gchead = NULL;
  	__u32 delta, ttl;
  	int cnt = 0;
d71209ded   Pavel Emelyanov   [INET]: Use list_...
394

4b9d9be83   Eric Dumazet   inetpeer: remove ...
395
396
397
398
399
400
401
402
403
404
  	if (base->total >= inet_peer_threshold)
  		ttl = 0; /* be aggressive */
  	else
  		ttl = inet_peer_maxttl
  				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
  					base->total / inet_peer_threshold * HZ;
  	stackptr--; /* last stack slot is peer_avl_empty */
  	while (stackptr > stack) {
  		stackptr--;
  		p = rcu_deref_locked(**stackptr, base);
6d1a3e042   Eric Dumazet   inetpeer: kill in...
405
406
407
408
409
410
411
412
  		if (atomic_read(&p->refcnt) == 0) {
  			smp_rmb();
  			delta = (__u32)jiffies - p->dtime;
  			if (delta >= ttl &&
  			    atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
  				p->gc_next = gchead;
  				gchead = p;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
  	}
4b9d9be83   Eric Dumazet   inetpeer: remove ...
415
416
417
418
419
420
  	while ((p = gchead) != NULL) {
  		gchead = p->gc_next;
  		cnt++;
  		unlink_from_pool(p, base, stack);
  	}
  	return cnt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
  }
c0efc887d   David S. Miller   inet: Pass inetpe...
422
  struct inet_peer *inet_getpeer(struct inet_peer_base *base,
c8a627ed0   Gao feng   inetpeer: add nam...
423
424
  			       const struct inetpeer_addr *daddr,
  			       int create)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
  {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
426
  	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
98158f5a8   David S. Miller   inetpeer: Abstrac...
427
  	struct inet_peer *p;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
428
  	unsigned int sequence;
4b9d9be83   Eric Dumazet   inetpeer: remove ...
429
  	int invalidated, gccnt = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
430

b48c80ece   David S. Miller   inet: Add family ...
431
  	flush_check(base, daddr->family);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
432
  	/* Attempt a lockless lookup first.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
433
434
  	 * Because of a concurrent writer, we might not find an existing entry.
  	 */
7b46ac4e7   David S. Miller   inetpeer: Don't d...
435
  	rcu_read_lock();
65e8354ec   Eric Dumazet   inetpeer: seqlock...
436
  	sequence = read_seqbegin(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
437
  	p = lookup_rcu(daddr, base);
65e8354ec   Eric Dumazet   inetpeer: seqlock...
438
  	invalidated = read_seqretry(&base->lock, sequence);
7b46ac4e7   David S. Miller   inetpeer: Don't d...
439
  	rcu_read_unlock();
aa1039e73   Eric Dumazet   inetpeer: RCU con...
440

4b9d9be83   Eric Dumazet   inetpeer: remove ...
441
  	if (p)
aa1039e73   Eric Dumazet   inetpeer: RCU con...
442
  		return p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443

65e8354ec   Eric Dumazet   inetpeer: seqlock...
444
445
446
  	/* If no writer did a change during our lookup, we can return early. */
  	if (!create && !invalidated)
  		return NULL;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
447
448
449
  	/* retry an exact lookup, taking the lock before.
  	 * At least, nodes should be hot in our cache.
  	 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
450
  	write_seqlock_bh(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
451
  relookup:
026630450   David S. Miller   inetpeer: Abstrac...
452
  	p = lookup(daddr, stack, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  	if (p != peer_avl_empty) {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
454
  		atomic_inc(&p->refcnt);
65e8354ec   Eric Dumazet   inetpeer: seqlock...
455
  		write_sequnlock_bh(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
456
457
458
459
460
461
  		return p;
  	}
  	if (!gccnt) {
  		gccnt = inet_peer_gc(base, stack, stackptr);
  		if (gccnt && create)
  			goto relookup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
  	}
aa1039e73   Eric Dumazet   inetpeer: RCU con...
463
464
  	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
  	if (p) {
b534ecf1c   David S. Miller   inetpeer: Make in...
465
  		p->daddr = *daddr;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
466
467
  		atomic_set(&p->refcnt, 1);
  		atomic_set(&p->rid, 0);
144001bdd   David S. Miller   inetpeer: Mark me...
468
  		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
92d868292   David S. Miller   inetpeer: Move IC...
469
  		p->rate_tokens = 0;
bc9259a8b   Nicolas Dichtel   inetpeer: fix tok...
470
471
472
473
  		/* 60*HZ is arbitrary, but chosen enough high so that the first
  		 * calculation of tokens is at its maximum.
  		 */
  		p->rate_last = jiffies - 60*HZ;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
474
  		INIT_LIST_HEAD(&p->gc_list);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
475
476
  
  		/* Link the node. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
477
478
  		link_to_pool(p, base);
  		base->total++;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
479
  	}
65e8354ec   Eric Dumazet   inetpeer: seqlock...
480
  	write_sequnlock_bh(&base->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
483
  	return p;
  }
b34193638   David S. Miller   ipv6: Add infrast...
484
  EXPORT_SYMBOL_GPL(inet_getpeer);
98158f5a8   David S. Miller   inetpeer: Abstrac...
485

4663afe2c   Eric Dumazet   [NET]: reduce siz...
486
487
  void inet_putpeer(struct inet_peer *p)
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
488
  	p->dtime = (__u32)jiffies;
4e857c58e   Peter Zijlstra   arch: Mass conver...
489
  	smp_mb__before_atomic();
4b9d9be83   Eric Dumazet   inetpeer: remove ...
490
  	atomic_dec(&p->refcnt);
4663afe2c   Eric Dumazet   [NET]: reduce siz...
491
  }
b34193638   David S. Miller   ipv6: Add infrast...
492
  EXPORT_SYMBOL_GPL(inet_putpeer);
92d868292   David S. Miller   inetpeer: Move IC...
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
  
  /*
   *	Check transmit rate limitation for given message.
   *	The rate information is held in the inet_peer entries now.
   *	This function is generic and could be used for other purposes
   *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
   *
   *	Note that the same inet_peer fields are modified by functions in
   *	route.c too, but these work for packet destinations while xrlim_allow
   *	works for icmp destinations. This means the rate limiting information
   *	for one "ip object" is shared - and these ICMPs are twice limited:
   *	by source and by destination.
   *
   *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
   *			  SHOULD allow setting of rate limits
   *
   * 	Shared between ICMPv4 and ICMPv6.
   */
  #define XRLIM_BURST_FACTOR 6
  bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
  {
  	unsigned long now, token;
  	bool rc = false;
  
  	if (!peer)
  		return true;
  
  	token = peer->rate_tokens;
  	now = jiffies;
  	token += now - peer->rate_last;
  	peer->rate_last = now;
  	if (token > XRLIM_BURST_FACTOR * timeout)
  		token = XRLIM_BURST_FACTOR * timeout;
  	if (token >= timeout) {
  		token -= timeout;
  		rc = true;
  	}
  	peer->rate_tokens = token;
  	return rc;
  }
  EXPORT_SYMBOL(inet_peer_xrlim_allow);
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
534

55432d2b5   Eric Dumazet   inetpeer: fix a r...
535
536
537
538
539
540
541
542
543
544
  static void inetpeer_inval_rcu(struct rcu_head *head)
  {
  	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
  
  	spin_lock_bh(&gc_lock);
  	list_add_tail(&p->gc_list, &gc_list);
  	spin_unlock_bh(&gc_lock);
  
  	schedule_delayed_work(&gc_work, gc_delay);
  }
56a6b248e   David S. Miller   inet: Consolidate...
545
  void inetpeer_invalidate_tree(struct inet_peer_base *base)
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
546
  {
da5573746   Eric Dumazet   inetpeer: inetpee...
547
  	struct inet_peer *root;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
548
549
  
  	write_seqlock_bh(&base->lock);
da5573746   Eric Dumazet   inetpeer: inetpee...
550
551
552
  	root = rcu_deref_locked(base->root, base);
  	if (root != peer_avl_empty) {
  		base->root = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
553
  		base->total = 0;
da5573746   Eric Dumazet   inetpeer: inetpee...
554
  		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
555
  	}
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
556
557
558
  	write_sequnlock_bh(&base->lock);
  }
  EXPORT_SYMBOL(inetpeer_invalidate_tree);