Blame view

net/ipv4/inetpeer.c 15.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   *		INETPEER - A storage for permanent information about peers
   *
   *  This source is covered by the GNU GPL, the same as all kernel sources.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
11
12
13
14
   *  Authors:	Andrey V. Savochkin <saw@msu.ru>
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/interrupt.h>
  #include <linux/spinlock.h>
  #include <linux/random.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/timer.h>
  #include <linux/time.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/net.h>
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
20
  #include <linux/workqueue.h>
20380731b   Arnaldo Carvalho de Melo   [NET]: Fix sparse...
21
  #include <net/ip.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <net/inetpeer.h>
6e5714eaf   David S. Miller   net: Compute prot...
23
  #include <net/secure_seq.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
  
  /*
   *  Theory of operations.
   *  We keep one entry for each peer IP address.  The nodes contains long-living
   *  information about the peer which doesn't depend on routes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
34
35
36
37
38
39
40
41
42
   *  Nodes are removed only when reference counter goes to 0.
   *  When it's happened the node may be removed when a sufficient amount of
   *  time has been passed since its last use.  The less-recently-used entry can
   *  also be removed if the pool is overloaded i.e. if the total amount of
   *  entries is greater-or-equal than the threshold.
   *
   *  Node pool is organised as an AVL tree.
   *  Such an implementation has been chosen not just for fun.  It's a way to
   *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
   *  amount of long living nodes in a single hash slot would significantly delay
   *  lookups performed with disabled BHs.
   *
   *  Serialisation issues.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
43
44
   *  1.  Nodes may appear in the tree only with the pool lock held.
   *  2.  Nodes may disappear from the tree only with the pool lock held
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
   *      AND reference count being 0.
4b9d9be83   Eric Dumazet   inetpeer: remove ...
46
47
   *  3.  Global variable peer_total is modified under the pool lock.
   *  4.  struct inet_peer fields modification:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
   *		avl_left, avl_right, avl_parent, avl_height: pool lock
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
   *		refcnt: atomically against modifications on other CPU;
   *		   usually under some other lock to prevent node disappearing
582a72da9   David S. Miller   inetpeer: Introdu...
51
   *		daddr: unchangeable
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
53
  static struct kmem_cache *peer_cachep __read_mostly;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54

5faa5df1f   Steffen Klassert   inetpeer: Invalid...
55
56
57
58
  static LIST_HEAD(gc_list);
  static const int gc_delay = 60 * HZ;
  static struct delayed_work gc_work;
  static DEFINE_SPINLOCK(gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
  #define node_height(x) x->avl_height
d6cc1d642   Eric Dumazet   inetpeer: various...
60
61
  
  #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
62
  #define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
d6cc1d642   Eric Dumazet   inetpeer: various...
63
  static const struct inet_peer peer_fake_node = {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
64
65
  	.avl_left	= peer_avl_empty_rcu,
  	.avl_right	= peer_avl_empty_rcu,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
  	.avl_height	= 0
  };
d6cc1d642   Eric Dumazet   inetpeer: various...
68

c3426b471   David S. Miller   inet: Initialize ...
69
70
71
72
73
74
75
  void inet_peer_base_init(struct inet_peer_base *bp)
  {
  	bp->root = peer_avl_empty_rcu;
  	seqlock_init(&bp->lock);
  	bp->total = 0;
  }
  EXPORT_SYMBOL_GPL(inet_peer_base_init);
021e92991   David S. Miller   inetpeer: Add v6 ...
76

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
  #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
  /* Exported for sysctl_net_ipv4.  */
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
79
  int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  					 * aggressively at this stage */
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
81
82
  int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
  int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83

5faa5df1f   Steffen Klassert   inetpeer: Invalid...
84
85
  static void inetpeer_gc_worker(struct work_struct *work)
  {
da5573746   Eric Dumazet   inetpeer: inetpee...
86
  	struct inet_peer *p, *n, *c;
851bdd11c   xiao jin   inetpeer_gc_worke...
87
  	struct list_head list;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
88
89
90
91
92
93
94
95
96
  
  	spin_lock_bh(&gc_lock);
  	list_replace_init(&gc_list, &list);
  	spin_unlock_bh(&gc_lock);
  
  	if (list_empty(&list))
  		return;
  
  	list_for_each_entry_safe(p, n, &list, gc_list) {
da5573746   Eric Dumazet   inetpeer: inetpee...
97
  		if (need_resched())
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
98
  			cond_resched();
da5573746   Eric Dumazet   inetpeer: inetpee...
99
100
101
102
  		c = rcu_dereference_protected(p->avl_left, 1);
  		if (c != peer_avl_empty) {
  			list_add_tail(&c->gc_list, &list);
  			p->avl_left = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
103
  		}
da5573746   Eric Dumazet   inetpeer: inetpee...
104
105
106
107
  		c = rcu_dereference_protected(p->avl_right, 1);
  		if (c != peer_avl_empty) {
  			list_add_tail(&c->gc_list, &list);
  			p->avl_right = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  		}
  
  		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
  
  		if (!atomic_read(&p->refcnt)) {
  			list_del(&p->gc_list);
  			kmem_cache_free(peer_cachep, p);
  		}
  	}
  
  	if (list_empty(&list))
  		return;
  
  	spin_lock_bh(&gc_lock);
  	list_splice(&list, &gc_list);
  	spin_unlock_bh(&gc_lock);
  
  	schedule_delayed_work(&gc_work, gc_delay);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  
  /* Called from ip_output.c:ip_init  */
  void __init inet_initpeers(void)
  {
  	struct sysinfo si;
  
  	/* Use the straight interface to information about memory. */
  	si_meminfo(&si);
  	/* The values below were suggested by Alexey Kuznetsov
  	 * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
  	 * myself.  --SAW
  	 */
  	if (si.totalram <= (32768*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
  	if (si.totalram <= (16384*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 1; /* about 512KB */
  	if (si.totalram <= (8192*1024)/PAGE_SIZE)
  		inet_peer_threshold >>= 2; /* about 128KB */
  
  	peer_cachep = kmem_cache_create("inet_peer_cache",
  			sizeof(struct inet_peer),
317fe0e6c   Eric Dumazet   inetpeer: restore...
148
  			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
20c2df83d   Paul Mundt   mm: Remove slab d...
149
  			NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150

203b42f73   Tejun Heo   workqueue: make d...
151
  	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
152
  }
65e8354ec   Eric Dumazet   inetpeer: seqlock...
153
154
  #define rcu_deref_locked(X, BASE)				\
  	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
155
156
  /*
   * Called with local BH disabled and the pool lock held.
243bbcaa0   Eric Dumazet   [IPV4]: Optimize ...
157
   */
98158f5a8   David S. Miller   inetpeer: Abstrac...
158
  #define lookup(_daddr, _stack, _base)				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
  ({								\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
160
161
  	struct inet_peer *u;					\
  	struct inet_peer __rcu **v;				\
aa1039e73   Eric Dumazet   inetpeer: RCU con...
162
163
  								\
  	stackptr = _stack;					\
98158f5a8   David S. Miller   inetpeer: Abstrac...
164
  	*stackptr++ = &_base->root;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
165
  	for (u = rcu_deref_locked(_base->root, _base);		\
0c9a67d2e   Weilong Chen   ipv4: fix checkpa...
166
  	     u != peer_avl_empty;) {				\
d39d14ffa   David Ahern   net: Add helper f...
167
  		int cmp = inetpeer_addr_cmp(_daddr, &u->daddr);	\
026630450   David S. Miller   inetpeer: Abstrac...
168
  		if (cmp == 0)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
  			break;					\
026630450   David S. Miller   inetpeer: Abstrac...
170
  		if (cmp == -1)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
171
172
173
  			v = &u->avl_left;			\
  		else						\
  			v = &u->avl_right;			\
aa1039e73   Eric Dumazet   inetpeer: RCU con...
174
  		*stackptr++ = v;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
175
  		u = rcu_deref_locked(*v, _base);		\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
178
  	}							\
  	u;							\
  })
aa1039e73   Eric Dumazet   inetpeer: RCU con...
179
  /*
7b46ac4e7   David S. Miller   inetpeer: Don't d...
180
   * Called with rcu_read_lock()
aa1039e73   Eric Dumazet   inetpeer: RCU con...
181
182
183
184
185
   * Because we hold no lock against a writer, its quite possible we fall
   * in an endless loop.
   * But every pointer we follow is guaranteed to be valid thanks to RCU.
   * We exit from this function if number of links exceeds PEER_MAXDEPTH
   */
7b46ac4e7   David S. Miller   inetpeer: Don't d...
186
  static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
4b9d9be83   Eric Dumazet   inetpeer: remove ...
187
  				    struct inet_peer_base *base)
aa1039e73   Eric Dumazet   inetpeer: RCU con...
188
  {
7b46ac4e7   David S. Miller   inetpeer: Don't d...
189
  	struct inet_peer *u = rcu_dereference(base->root);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
190
191
192
  	int count = 0;
  
  	while (u != peer_avl_empty) {
d39d14ffa   David Ahern   net: Add helper f...
193
  		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
026630450   David S. Miller   inetpeer: Abstrac...
194
  		if (cmp == 0) {
5f2f89209   Eric Dumazet   inetpeer: do not ...
195
  			/* Before taking a reference, check if this entry was
4b9d9be83   Eric Dumazet   inetpeer: remove ...
196
  			 * deleted (refcnt=-1)
5f2f89209   Eric Dumazet   inetpeer: do not ...
197
  			 */
4b9d9be83   Eric Dumazet   inetpeer: remove ...
198
  			if (!atomic_add_unless(&u->refcnt, 1, -1))
aa1039e73   Eric Dumazet   inetpeer: RCU con...
199
200
201
  				u = NULL;
  			return u;
  		}
026630450   David S. Miller   inetpeer: Abstrac...
202
  		if (cmp == -1)
7b46ac4e7   David S. Miller   inetpeer: Don't d...
203
  			u = rcu_dereference(u->avl_left);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
204
  		else
7b46ac4e7   David S. Miller   inetpeer: Don't d...
205
  			u = rcu_dereference(u->avl_right);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
206
207
208
209
210
211
212
  		if (unlikely(++count == PEER_MAXDEPTH))
  			break;
  	}
  	return NULL;
  }
  
  /* Called with local BH disabled and the pool lock held. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
213
  #define lookup_rightempty(start, base)				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
  ({								\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
215
216
  	struct inet_peer *u;					\
  	struct inet_peer __rcu **v;				\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217
218
  	*stackptr++ = &start->avl_left;				\
  	v = &start->avl_left;					\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
219
  	for (u = rcu_deref_locked(*v, base);			\
0c9a67d2e   Weilong Chen   ipv4: fix checkpa...
220
  	     u->avl_right != peer_avl_empty_rcu;) {		\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
  		v = &u->avl_right;				\
  		*stackptr++ = v;				\
65e8354ec   Eric Dumazet   inetpeer: seqlock...
223
  		u = rcu_deref_locked(*v, base);			\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
225
226
  	}							\
  	u;							\
  })
aa1039e73   Eric Dumazet   inetpeer: RCU con...
227
  /* Called with local BH disabled and the pool lock held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
228
   * Variable names are the proof of operation correctness.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
229
230
   * Look into mm/map_avl.c for more detail description of the ideas.
   */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
231
  static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
98158f5a8   David S. Miller   inetpeer: Abstrac...
232
233
  			       struct inet_peer __rcu ***stackend,
  			       struct inet_peer_base *base)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
  {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
235
236
  	struct inet_peer __rcu **nodep;
  	struct inet_peer *node, *l, *r;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
238
239
240
  	int lh, rh;
  
  	while (stackend > stack) {
  		nodep = *--stackend;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
241
242
243
  		node = rcu_deref_locked(*nodep, base);
  		l = rcu_deref_locked(node->avl_left, base);
  		r = rcu_deref_locked(node->avl_right, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
246
247
248
  		lh = node_height(l);
  		rh = node_height(r);
  		if (lh > rh + 1) { /* l: RH+2 */
  			struct inet_peer *ll, *lr, *lrl, *lrr;
  			int lrh;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
249
250
  			ll = rcu_deref_locked(l->avl_left, base);
  			lr = rcu_deref_locked(l->avl_right, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
  			lrh = node_height(lr);
  			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
253
254
  				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
  				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
  				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
256
257
  				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
  				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258
  				l->avl_height = node->avl_height + 1;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
259
  				RCU_INIT_POINTER(*nodep, l);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260
  			} else { /* ll: RH, lr: RH+1 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
261
262
  				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
  				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
263
264
  				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
  				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
  				node->avl_height = rh + 1; /* node: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
266
267
  				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
  				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
  				l->avl_height = rh + 1;	/* l: RH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
269
270
  				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
  				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
  				lr->avl_height = rh + 2;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
272
  				RCU_INIT_POINTER(*nodep, lr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
276
  			}
  		} else if (rh > lh + 1) { /* r: LH+2 */
  			struct inet_peer *rr, *rl, *rlr, *rll;
  			int rlh;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
277
278
  			rr = rcu_deref_locked(r->avl_right, base);
  			rl = rcu_deref_locked(r->avl_left, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
  			rlh = node_height(rl);
  			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
281
282
  				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
  				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
  				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
284
285
  				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
  				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
  				r->avl_height = node->avl_height + 1;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
287
  				RCU_INIT_POINTER(*nodep, r);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
  			} else { /* rr: RH, rl: RH+1 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
289
290
  				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
  				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
291
292
  				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
  				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
  				node->avl_height = lh + 1; /* node: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
294
295
  				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
  				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  				r->avl_height = lh + 1;	/* r: LH+1 */
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
297
298
  				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
  				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  				rl->avl_height = lh + 2;
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
300
  				RCU_INIT_POINTER(*nodep, rl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
302
303
304
305
306
  			}
  		} else {
  			node->avl_height = (lh > rh ? lh : rh) + 1;
  		}
  	}
  }
aa1039e73   Eric Dumazet   inetpeer: RCU con...
307
  /* Called with local BH disabled and the pool lock held. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
308
  #define link_to_pool(n, base)					\
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
310
  do {								\
  	n->avl_height = 1;					\
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
311
312
313
314
  	n->avl_left = peer_avl_empty_rcu;			\
  	n->avl_right = peer_avl_empty_rcu;			\
  	/* lockless readers can catch us now */			\
  	rcu_assign_pointer(**--stackptr, n);			\
98158f5a8   David S. Miller   inetpeer: Abstrac...
315
  	peer_avl_rebalance(stack, stackptr, base);		\
d6cc1d642   Eric Dumazet   inetpeer: various...
316
  } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317

aa1039e73   Eric Dumazet   inetpeer: RCU con...
318
319
320
321
  static void inetpeer_free_rcu(struct rcu_head *head)
  {
  	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
  }
66944e1c5   Eric Dumazet   inetpeer: reduce ...
322
323
  static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
  			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  	struct inet_peer __rcu ***stackptr, ***delp;
  
  	if (lookup(&p->daddr, stack, base) != p)
  		BUG();
  	delp = stackptr - 1; /* *delp[0] == p */
  	if (p->avl_left == peer_avl_empty_rcu) {
  		*delp[0] = p->avl_right;
  		--stackptr;
  	} else {
  		/* look for a node to insert instead of p */
  		struct inet_peer *t;
  		t = lookup_rightempty(p, base);
  		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
  		**--stackptr = t->avl_left;
  		/* t is removed, t->daddr > x->daddr for any
  		 * x in p->avl_left subtree.
  		 * Put t in the old place of p. */
  		RCU_INIT_POINTER(*delp[0], t);
  		t->avl_left = p->avl_left;
  		t->avl_right = p->avl_right;
  		t->avl_height = p->avl_height;
  		BUG_ON(delp[1] != &p->avl_left);
  		delp[1] = &t->avl_left; /* was &p->avl_left */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
  	}
4b9d9be83   Eric Dumazet   inetpeer: remove ...
349
350
351
  	peer_avl_rebalance(stack, stackptr, base);
  	base->total--;
  	call_rcu(&p->rcu, inetpeer_free_rcu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
  }
4b9d9be83   Eric Dumazet   inetpeer: remove ...
353
354
355
356
  /* perform garbage collect on all items stacked during a lookup */
  static int inet_peer_gc(struct inet_peer_base *base,
  			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
  			struct inet_peer __rcu ***stackptr)
98158f5a8   David S. Miller   inetpeer: Abstrac...
357
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
358
359
360
  	struct inet_peer *p, *gchead = NULL;
  	__u32 delta, ttl;
  	int cnt = 0;
d71209ded   Pavel Emelyanov   [INET]: Use list_...
361

4b9d9be83   Eric Dumazet   inetpeer: remove ...
362
363
364
365
366
367
368
369
370
371
  	if (base->total >= inet_peer_threshold)
  		ttl = 0; /* be aggressive */
  	else
  		ttl = inet_peer_maxttl
  				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
  					base->total / inet_peer_threshold * HZ;
  	stackptr--; /* last stack slot is peer_avl_empty */
  	while (stackptr > stack) {
  		stackptr--;
  		p = rcu_deref_locked(**stackptr, base);
6d1a3e042   Eric Dumazet   inetpeer: kill in...
372
373
374
375
376
377
378
379
  		if (atomic_read(&p->refcnt) == 0) {
  			smp_rmb();
  			delta = (__u32)jiffies - p->dtime;
  			if (delta >= ttl &&
  			    atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
  				p->gc_next = gchead;
  				gchead = p;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
380
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
  	}
4b9d9be83   Eric Dumazet   inetpeer: remove ...
382
383
384
385
386
387
  	while ((p = gchead) != NULL) {
  		gchead = p->gc_next;
  		cnt++;
  		unlink_from_pool(p, base, stack);
  	}
  	return cnt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
  }
c0efc887d   David S. Miller   inet: Pass inetpe...
389
  struct inet_peer *inet_getpeer(struct inet_peer_base *base,
c8a627ed0   Gao feng   inetpeer: add nam...
390
391
  			       const struct inetpeer_addr *daddr,
  			       int create)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
  {
b914c4ea9   Eric Dumazet   inetpeer: __rcu a...
393
  	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
98158f5a8   David S. Miller   inetpeer: Abstrac...
394
  	struct inet_peer *p;
65e8354ec   Eric Dumazet   inetpeer: seqlock...
395
  	unsigned int sequence;
4b9d9be83   Eric Dumazet   inetpeer: remove ...
396
  	int invalidated, gccnt = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397

4b9d9be83   Eric Dumazet   inetpeer: remove ...
398
  	/* Attempt a lockless lookup first.
aa1039e73   Eric Dumazet   inetpeer: RCU con...
399
400
  	 * Because of a concurrent writer, we might not find an existing entry.
  	 */
7b46ac4e7   David S. Miller   inetpeer: Don't d...
401
  	rcu_read_lock();
65e8354ec   Eric Dumazet   inetpeer: seqlock...
402
  	sequence = read_seqbegin(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
403
  	p = lookup_rcu(daddr, base);
65e8354ec   Eric Dumazet   inetpeer: seqlock...
404
  	invalidated = read_seqretry(&base->lock, sequence);
7b46ac4e7   David S. Miller   inetpeer: Don't d...
405
  	rcu_read_unlock();
aa1039e73   Eric Dumazet   inetpeer: RCU con...
406

4b9d9be83   Eric Dumazet   inetpeer: remove ...
407
  	if (p)
aa1039e73   Eric Dumazet   inetpeer: RCU con...
408
  		return p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409

65e8354ec   Eric Dumazet   inetpeer: seqlock...
410
411
412
  	/* If no writer did a change during our lookup, we can return early. */
  	if (!create && !invalidated)
  		return NULL;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
413
414
415
  	/* retry an exact lookup, taking the lock before.
  	 * At least, nodes should be hot in our cache.
  	 */
65e8354ec   Eric Dumazet   inetpeer: seqlock...
416
  	write_seqlock_bh(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
417
  relookup:
026630450   David S. Miller   inetpeer: Abstrac...
418
  	p = lookup(daddr, stack, base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
  	if (p != peer_avl_empty) {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
420
  		atomic_inc(&p->refcnt);
65e8354ec   Eric Dumazet   inetpeer: seqlock...
421
  		write_sequnlock_bh(&base->lock);
4b9d9be83   Eric Dumazet   inetpeer: remove ...
422
423
424
425
426
427
  		return p;
  	}
  	if (!gccnt) {
  		gccnt = inet_peer_gc(base, stack, stackptr);
  		if (gccnt && create)
  			goto relookup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
  	}
aa1039e73   Eric Dumazet   inetpeer: RCU con...
429
430
  	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
  	if (p) {
b534ecf1c   David S. Miller   inetpeer: Make in...
431
  		p->daddr = *daddr;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
432
433
  		atomic_set(&p->refcnt, 1);
  		atomic_set(&p->rid, 0);
144001bdd   David S. Miller   inetpeer: Mark me...
434
  		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
92d868292   David S. Miller   inetpeer: Move IC...
435
  		p->rate_tokens = 0;
bc9259a8b   Nicolas Dichtel   inetpeer: fix tok...
436
437
438
439
  		/* 60*HZ is arbitrary, but chosen enough high so that the first
  		 * calculation of tokens is at its maximum.
  		 */
  		p->rate_last = jiffies - 60*HZ;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
440
  		INIT_LIST_HEAD(&p->gc_list);
aa1039e73   Eric Dumazet   inetpeer: RCU con...
441
442
  
  		/* Link the node. */
98158f5a8   David S. Miller   inetpeer: Abstrac...
443
444
  		link_to_pool(p, base);
  		base->total++;
aa1039e73   Eric Dumazet   inetpeer: RCU con...
445
  	}
65e8354ec   Eric Dumazet   inetpeer: seqlock...
446
  	write_sequnlock_bh(&base->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
449
  	return p;
  }
b34193638   David S. Miller   ipv6: Add infrast...
450
  EXPORT_SYMBOL_GPL(inet_getpeer);
98158f5a8   David S. Miller   inetpeer: Abstrac...
451

4663afe2c   Eric Dumazet   [NET]: reduce siz...
452
453
  void inet_putpeer(struct inet_peer *p)
  {
4b9d9be83   Eric Dumazet   inetpeer: remove ...
454
  	p->dtime = (__u32)jiffies;
4e857c58e   Peter Zijlstra   arch: Mass conver...
455
  	smp_mb__before_atomic();
4b9d9be83   Eric Dumazet   inetpeer: remove ...
456
  	atomic_dec(&p->refcnt);
4663afe2c   Eric Dumazet   [NET]: reduce siz...
457
  }
b34193638   David S. Miller   ipv6: Add infrast...
458
  EXPORT_SYMBOL_GPL(inet_putpeer);
92d868292   David S. Miller   inetpeer: Move IC...
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
  
  /*
   *	Check transmit rate limitation for given message.
   *	The rate information is held in the inet_peer entries now.
   *	This function is generic and could be used for other purposes
   *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
   *
   *	Note that the same inet_peer fields are modified by functions in
   *	route.c too, but these work for packet destinations while xrlim_allow
   *	works for icmp destinations. This means the rate limiting information
   *	for one "ip object" is shared - and these ICMPs are twice limited:
   *	by source and by destination.
   *
   *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
   *			  SHOULD allow setting of rate limits
   *
   * 	Shared between ICMPv4 and ICMPv6.
   */
  #define XRLIM_BURST_FACTOR 6
  bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
  {
  	unsigned long now, token;
  	bool rc = false;
  
  	if (!peer)
  		return true;
  
  	token = peer->rate_tokens;
  	now = jiffies;
  	token += now - peer->rate_last;
  	peer->rate_last = now;
  	if (token > XRLIM_BURST_FACTOR * timeout)
  		token = XRLIM_BURST_FACTOR * timeout;
  	if (token >= timeout) {
  		token -= timeout;
  		rc = true;
  	}
  	peer->rate_tokens = token;
  	return rc;
  }
  EXPORT_SYMBOL(inet_peer_xrlim_allow);
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
500

55432d2b5   Eric Dumazet   inetpeer: fix a r...
501
502
503
504
505
506
507
508
509
510
  static void inetpeer_inval_rcu(struct rcu_head *head)
  {
  	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
  
  	spin_lock_bh(&gc_lock);
  	list_add_tail(&p->gc_list, &gc_list);
  	spin_unlock_bh(&gc_lock);
  
  	schedule_delayed_work(&gc_work, gc_delay);
  }
56a6b248e   David S. Miller   inet: Consolidate...
511
  void inetpeer_invalidate_tree(struct inet_peer_base *base)
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
512
  {
da5573746   Eric Dumazet   inetpeer: inetpee...
513
  	struct inet_peer *root;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
514
515
  
  	write_seqlock_bh(&base->lock);
da5573746   Eric Dumazet   inetpeer: inetpee...
516
517
518
  	root = rcu_deref_locked(base->root, base);
  	if (root != peer_avl_empty) {
  		base->root = peer_avl_empty_rcu;
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
519
  		base->total = 0;
da5573746   Eric Dumazet   inetpeer: inetpee...
520
  		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
521
  	}
5faa5df1f   Steffen Klassert   inetpeer: Invalid...
522
523
524
  	write_sequnlock_bh(&base->lock);
  }
  EXPORT_SYMBOL(inetpeer_invalidate_tree);