Blame view

net/unix/garbage.c 8.9 KB
a85036f66   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-or-later
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   * NET3:	Garbage Collector For AF_UNIX sockets
   *
   * Garbage Collector:
   *	Copyright (C) Barak A. Pearlmutter.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
   *
   * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
   * If it doesn't work blame me, it worked when Barak sent it.
   *
   * Assumptions:
   *
   *  - object w/ a bit
   *  - free list
   *
   * Current optimizations:
   *
   *  - explicit stack instead of recursion
   *  - tail recurse on first born instead of immediate push/pop
   *  - we gather the stuff that should not be killed into tree
   *    and stack is just a path from root to the current pointer.
   *
   *  Future optimizations:
   *
   *  - don't just push entire root set; process in place
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
   *  Fixes:
   *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
   *					Cope with changing max_files.
   *	Al Viro		11 Oct 1998
   *		Graph may have cycles. That is, we can send the descriptor
   *		of foo to bar and vice versa. Current code chokes on that.
   *		Fix: move SCM_RIGHTS ones into the separate list and then
   *		skb_free() them all instead of doing explicit fput's.
   *		Another problem: since fput() may block somebody may
   *		create a new unix_socket when we are in the middle of sweep
   *		phase. Fix: revert the logic wrt MARKED. Mark everything
   *		upon the beginning and unmark non-junk ones.
   *
   *		[12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS
   *		sent to connect()'ed but still not accept()'ed sockets.
   *		Fixed. Old code had slightly different problem here:
   *		extra fput() in situation when we passed the descriptor via
   *		such socket and closed it (descriptor). That would happen on
   *		each unix_gc() until the accept(). Since the struct file in
   *		question would go to the free list and might be reused...
   *		That might be the reason of random oopses on filp_close()
   *		in unrelated processes.
   *
   *	AV		28 Feb 1999
   *		Kill the explicit allocation of stack. Now we keep the tree
   *		with root in dummy + pointer (gc_current) to one of the nodes.
   *		Stack is represented as path from gc_current to dummy. Unmark
   *		now means "add to tree". Push == "make it a son of gc_current".
   *		Pop == "move gc_current to parent". We keep only pointers to
   *		parents (->gc_tree).
   *	AV		1 Mar 1999
   *		Damn. Added missing check for ->dead in listen queues scanning.
   *
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
60
61
62
63
   *	Miklos Szeredi 25 Jun 2007
   *		Reimplement with a cycle collecting algorithm. This should
   *		solve several problems with the previous code, like being racy
   *		wrt receive and holding up unrelated socket operations.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
   */
ac7bfa62f   YOSHIFUJI Hideaki   [NET] UNIX: Fix w...
65

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
69
70
71
  #include <linux/string.h>
  #include <linux/socket.h>
  #include <linux/un.h>
  #include <linux/net.h>
  #include <linux/fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
73
74
75
  #include <linux/skbuff.h>
  #include <linux/netdevice.h>
  #include <linux/file.h>
  #include <linux/proc_fs.h>
4a3e2f711   Arjan van de Ven   [NET] sem2mutex: ...
76
  #include <linux/mutex.h>
5f23b7349   dann frazier   net: Fix soft loc...
77
  #include <linux/wait.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
79
80
81
  
  #include <net/sock.h>
  #include <net/af_unix.h>
  #include <net/scm.h>
c752f0739   Arnaldo Carvalho de Melo   [TCP]: Move the t...
82
  #include <net/tcp_states.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83

f4e65870e   Jens Axboe   net: split out fu...
84
  #include "scm.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
  /* Internal data structures and random procedures: */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
86
  static LIST_HEAD(gc_candidates);
5f23b7349   dann frazier   net: Fix soft loc...
87
  static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88

5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
89
  static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
90
  			  struct sk_buff_head *hitlist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
91
  {
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
92
93
94
95
  	struct sk_buff *skb;
  	struct sk_buff *next;
  
  	spin_lock(&x->sk_receive_queue.lock);
a2f3be17c   Ilpo Järvinen   unix/garbage: kil...
96
  	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
d1ab39f17   Jason Eastman   net: unix: garbag...
97
  		/* Do we have file descriptors ? */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
98
99
  		if (UNIXCB(skb).fp) {
  			bool hit = false;
d1ab39f17   Jason Eastman   net: unix: garbag...
100
  			/* Process the descriptors of this socket */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
101
102
  			int nfd = UNIXCB(skb).fp->count;
  			struct file **fp = UNIXCB(skb).fp->fp;
d1ab39f17   Jason Eastman   net: unix: garbag...
103

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
104
  			while (nfd--) {
d1ab39f17   Jason Eastman   net: unix: garbag...
105
  				/* Get the socket the fd matches if it indeed does so */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
106
  				struct sock *sk = unix_get_socket(*fp++);
d1ab39f17   Jason Eastman   net: unix: garbag...
107

5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
108
  				if (sk) {
6209344f5   Miklos Szeredi   net: unix: fix in...
109
  					struct unix_sock *u = unix_sk(sk);
d1ab39f17   Jason Eastman   net: unix: garbag...
110
  					/* Ignore non-candidates, they could
6209344f5   Miklos Szeredi   net: unix: fix in...
111
112
113
  					 * have been added to the queues after
  					 * starting the garbage collection
  					 */
60bc851ae   Eric Dumazet   af_unix: fix a fa...
114
  					if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
6209344f5   Miklos Szeredi   net: unix: fix in...
115
  						hit = true;
d1ab39f17   Jason Eastman   net: unix: garbag...
116

6209344f5   Miklos Szeredi   net: unix: fix in...
117
118
  						func(u);
  					}
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
119
120
121
122
123
124
125
126
127
  				}
  			}
  			if (hit && hitlist != NULL) {
  				__skb_unlink(skb, &x->sk_receive_queue);
  				__skb_queue_tail(hitlist, skb);
  			}
  		}
  	}
  	spin_unlock(&x->sk_receive_queue.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
  }
5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
129
  static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
130
  			  struct sk_buff_head *hitlist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
  {
d1ab39f17   Jason Eastman   net: unix: garbag...
132
  	if (x->sk_state != TCP_LISTEN) {
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
133
  		scan_inflight(x, func, hitlist);
d1ab39f17   Jason Eastman   net: unix: garbag...
134
  	} else {
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
135
136
137
138
  		struct sk_buff *skb;
  		struct sk_buff *next;
  		struct unix_sock *u;
  		LIST_HEAD(embryos);
d1ab39f17   Jason Eastman   net: unix: garbag...
139
  		/* For a listening socket collect the queued embryos
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
140
141
142
  		 * and perform a scan on them as well.
  		 */
  		spin_lock(&x->sk_receive_queue.lock);
a2f3be17c   Ilpo Järvinen   unix/garbage: kil...
143
  		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
144
  			u = unix_sk(skb->sk);
d1ab39f17   Jason Eastman   net: unix: garbag...
145
  			/* An embryo cannot be in-flight, so it's safe
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
146
147
148
149
150
151
152
153
154
155
156
157
158
  			 * to use the list link.
  			 */
  			BUG_ON(!list_empty(&u->link));
  			list_add_tail(&u->link, &embryos);
  		}
  		spin_unlock(&x->sk_receive_queue.lock);
  
  		while (!list_empty(&embryos)) {
  			u = list_entry(embryos.next, struct unix_sock, link);
  			scan_inflight(&u->sk, func, hitlist);
  			list_del_init(&u->link);
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
  }
5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
160
  static void dec_inflight(struct unix_sock *usk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
  {
516e0cc56   Al Viro   [PATCH] f_count m...
162
  	atomic_long_dec(&usk->inflight);
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
163
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164

5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
165
  static void inc_inflight(struct unix_sock *usk)
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
166
  {
516e0cc56   Al Viro   [PATCH] f_count m...
167
  	atomic_long_inc(&usk->inflight);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
  }
5c80f1ae9   Pavel Emelyanov   [AF_UNIX]: Conver...
169
  static void inc_inflight_move_tail(struct unix_sock *u)
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
170
  {
516e0cc56   Al Viro   [PATCH] f_count m...
171
  	atomic_long_inc(&u->inflight);
d1ab39f17   Jason Eastman   net: unix: garbag...
172
  	/* If this still might be part of a cycle, move it to the end
6209344f5   Miklos Szeredi   net: unix: fix in...
173
174
  	 * of the list, so that it's checked even if it was already
  	 * passed over
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
175
  	 */
60bc851ae   Eric Dumazet   af_unix: fix a fa...
176
  	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
177
178
  		list_move_tail(&u->link, &gc_candidates);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179

505e907db   Fabian Frederick   af_unix: remove 0...
180
  static bool gc_in_progress;
9915672d4   Eric Dumazet   af_unix: limit un...
181
  #define UNIX_INFLIGHT_TRIGGER_GC 16000
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182

5f23b7349   dann frazier   net: Fix soft loc...
183
  void wait_for_unix_gc(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
  {
d1ab39f17   Jason Eastman   net: unix: garbag...
185
  	/* If number of inflight sockets is insane,
9915672d4   Eric Dumazet   af_unix: limit un...
186
187
188
189
  	 * force a garbage collect right now.
  	 */
  	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
  		unix_gc();
5f23b7349   dann frazier   net: Fix soft loc...
190
191
  	wait_event(unix_gc_wait, gc_in_progress == false);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192

5f23b7349   dann frazier   net: Fix soft loc...
193
194
195
  /* The external entry point: unix_gc() */
  void unix_gc(void)
  {
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
196
197
198
199
  	struct unix_sock *u;
  	struct unix_sock *next;
  	struct sk_buff_head hitlist;
  	struct list_head cursor;
6209344f5   Miklos Szeredi   net: unix: fix in...
200
  	LIST_HEAD(not_cycle_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
202
  	spin_lock(&unix_gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
204
205
206
  	/* Avoid a recursive GC. */
  	if (gc_in_progress)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
208
  	gc_in_progress = true;
d1ab39f17   Jason Eastman   net: unix: garbag...
209
  	/* First, select candidates for garbage collection.  Only
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
210
211
212
213
214
  	 * in-flight sockets are considered, and from those only ones
  	 * which don't have any external reference.
  	 *
  	 * Holding unix_gc_lock will protect these candidates from
  	 * being detached, and hence from gaining an external
6209344f5   Miklos Szeredi   net: unix: fix in...
215
216
217
218
219
220
221
222
  	 * reference.  Since there are no possible receivers, all
  	 * buffers currently on the candidates' queues stay there
  	 * during the garbage collection.
  	 *
  	 * We also know that no new candidate can be added onto the
  	 * receive queues.  Other, non candidate sockets _can_ be
  	 * added to queue, so we must make sure only to touch
  	 * candidates.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  	 */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
224
  	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
516e0cc56   Al Viro   [PATCH] f_count m...
225
226
  		long total_refs;
  		long inflight_refs;
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
227
228
  
  		total_refs = file_count(u->sk.sk_socket->file);
516e0cc56   Al Viro   [PATCH] f_count m...
229
  		inflight_refs = atomic_long_read(&u->inflight);
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
230
231
232
233
234
  
  		BUG_ON(inflight_refs < 1);
  		BUG_ON(total_refs < inflight_refs);
  		if (total_refs == inflight_refs) {
  			list_move_tail(&u->link, &gc_candidates);
60bc851ae   Eric Dumazet   af_unix: fix a fa...
235
236
  			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
  			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
237
238
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239

d1ab39f17   Jason Eastman   net: unix: garbag...
240
  	/* Now remove all internal in-flight reference to children of
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
241
  	 * the candidates.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242
  	 */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
243
244
  	list_for_each_entry(u, &gc_candidates, link)
  		scan_children(&u->sk, dec_inflight, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245

d1ab39f17   Jason Eastman   net: unix: garbag...
246
  	/* Restore the references for children of all candidates,
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
247
248
249
250
251
  	 * which have remaining references.  Do this recursively, so
  	 * only those remain, which form cyclic references.
  	 *
  	 * Use a "cursor" link, to make the list traversal safe, even
  	 * though elements might be moved about.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
  	 */
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
253
254
255
  	list_add(&cursor, &gc_candidates);
  	while (cursor.next != &gc_candidates) {
  		u = list_entry(cursor.next, struct unix_sock, link);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
257
258
  		/* Move cursor to after the current position. */
  		list_move(&cursor, &u->link);
ac7bfa62f   YOSHIFUJI Hideaki   [NET] UNIX: Fix w...
259

516e0cc56   Al Viro   [PATCH] f_count m...
260
  		if (atomic_long_read(&u->inflight) > 0) {
6209344f5   Miklos Szeredi   net: unix: fix in...
261
  			list_move_tail(&u->link, &not_cycle_list);
60bc851ae   Eric Dumazet   af_unix: fix a fa...
262
  			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
263
  			scan_children(&u->sk, inc_inflight_move_tail, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
  	}
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
266
  	list_del(&cursor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267

7df9c2462   Andrey Ulanov   net: unix: proper...
268
269
270
271
272
273
274
  	/* Now gc_candidates contains only garbage.  Restore original
  	 * inflight counters for these as well, and remove the skbuffs
  	 * which are creating the cycle(s).
  	 */
  	skb_queue_head_init(&hitlist);
  	list_for_each_entry(u, &gc_candidates, link)
  		scan_children(&u->sk, inc_inflight, &hitlist);
d1ab39f17   Jason Eastman   net: unix: garbag...
275
  	/* not_cycle_list contains those sockets which do not make up a
6209344f5   Miklos Szeredi   net: unix: fix in...
276
277
278
279
  	 * cycle.  Restore these to the inflight list.
  	 */
  	while (!list_empty(&not_cycle_list)) {
  		u = list_entry(not_cycle_list.next, struct unix_sock, link);
60bc851ae   Eric Dumazet   af_unix: fix a fa...
280
  		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
6209344f5   Miklos Szeredi   net: unix: fix in...
281
282
  		list_move_tail(&u->link, &gc_inflight_list);
  	}
1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
283
  	spin_unlock(&unix_gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
285
286
  	/* Here we are. Hitlist is filled. Die. */
  	__skb_queue_purge(&hitlist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
288
  	spin_lock(&unix_gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
290
291
292
  	/* All candidates should have been detached by now. */
  	BUG_ON(!list_empty(&gc_candidates));
  	gc_in_progress = false;
5f23b7349   dann frazier   net: Fix soft loc...
293
  	wake_up(&unix_gc_wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294

1fd05ba5a   Miklos Szeredi   [AF_UNIX]: Rewrit...
295
296
   out:
  	spin_unlock(&unix_gc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
  }