Blame view

ipc/sem.c 54.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   * linux/ipc/sem.c
   * Copyright (C) 1992 Krishna Balasubramanian
   * Copyright (C) 1995 Eric Schenk, Bruno Haible
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
   * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
   *
   * SMP-threaded, sysctl's added
624dffcbc   Christian Kujau   correct email add...
9
   * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
   * Enforced range limit on SEM_UNDO
046c68842   Alan Cox   mm: update my add...
11
   * (c) 2001 Red Hat Inc
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
   * Lockless wakeup
   * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
c5cf6359a   Manfred Spraul   ipc/sem.c: update...
14
15
   * Further wakeup optimizations, documentation
   * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
073115d6b   Steve Grubb   [PATCH] Rework of...
16
17
18
   *
   * support for audit of ipc object properties and permission changes
   * Dustin Kirkland <dustin.kirkland@us.ibm.com>
e38935341   Kirill Korotaev   [PATCH] IPC names...
19
20
21
22
   *
   * namespaces support
   * OpenVZ, SWsoft Inc.
   * Pavel Emelianov <xemul@openvz.org>
c5cf6359a   Manfred Spraul   ipc/sem.c: update...
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
   *
   * Implementation notes: (May 2010)
   * This file implements System V semaphores.
   *
   * User space visible behavior:
   * - FIFO ordering for semop() operations (just FIFO, not starvation
   *   protection)
   * - multiple semaphore operations that alter the same semaphore in
   *   one semop() are handled.
   * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
   *   SETALL calls.
   * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
   * - undo adjustments at process exit are limited to 0..SEMVMX.
   * - namespace are supported.
   * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
   *   to /proc/sys/kernel/sem.
   * - statistics about the usage are reported in /proc/sysvipc/sem.
   *
   * Internals:
   * - scalability:
   *   - all global variables are read-mostly.
   *   - semop() calls and semctl(RMID) are synchronized by RCU.
   *   - most operations do write operations (actually: spin_lock calls) to
   *     the per-semaphore array structure.
   *   Thus: Perfect SMP scaling between independent semaphore arrays.
   *         If multiple semaphores in one array are used, then cache line
   *         trashing on the semaphore array spinlock will limit the scaling.
   * - semncnt and semzcnt are calculated on demand in count_semncnt() and
   *   count_semzcnt()
   * - the task that performs a successful semop() scans the list of all
   *   sleeping tasks and completes any pending operations that can be fulfilled.
   *   Semaphores are actively given to waiting tasks (necessary for FIFO).
   *   (see update_queue())
   * - To improve the scalability, the actual wake-up calls are performed after
   *   dropping all locks. (see wake_up_sem_queue_prepare(),
   *   wake_up_sem_queue_do())
   * - All work is done by the waker, the woken up task does not have to do
   *   anything - not even acquiring a lock or dropping a refcount.
   * - A woken up task may not even touch the semaphore array anymore, it may
   *   have been destroyed already by a semctl(RMID).
   * - The synchronizations between wake-ups due to a timeout/signal and a
   *   wake-up due to a completed semaphore operation is achieved by using an
   *   intermediate state (IN_WAKEUP).
   * - UNDO values are stored in an array (one per process and per
   *   semaphore array, lazily allocated). For backwards compatibility, multiple
   *   modes for the UNDO variables are supported (per process, per thread)
   *   (see copy_semundo, CLONE_SYSVSEM)
   * - There are two lists of the pending operations: a per-array list
   *   and per-semaphore list (stored in the array). This allows to achieve FIFO
   *   ordering without always scanning all pending operations.
   *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
74
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
77
78
79
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/init.h>
  #include <linux/proc_fs.h>
  #include <linux/time.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
81
82
  #include <linux/security.h>
  #include <linux/syscalls.h>
  #include <linux/audit.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
83
  #include <linux/capability.h>
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
84
  #include <linux/seq_file.h>
3e148c799   Nadia Derbey   fix idr_find() lo...
85
  #include <linux/rwsem.h>
e38935341   Kirill Korotaev   [PATCH] IPC names...
86
  #include <linux/nsproxy.h>
ae5e1b22f   Pavel Emelyanov   namespaces: move ...
87
  #include <linux/ipc_namespace.h>
5f921ae96   Ingo Molnar   [PATCH] sem2mutex...
88

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
  #include <asm/uaccess.h>
  #include "util.h"
e57940d71   Manfred Spraul   ipc/sem.c: remove...
91
92
93
94
  /* One semaphore structure for each semaphore in the system. */
  struct sem {
  	int	semval;		/* current value */
  	int	sempid;		/* pid of last operation */
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
95
  	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
ab63bc97f   Manfred Spraul   ipc/sem: separate...
96
97
98
99
  	struct list_head pending_alter; /* pending single-sop operations */
  					/* that alter the semaphore */
  	struct list_head pending_const; /* pending single-sop operations */
  					/* that do not alter the semaphore*/
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
100
  	time_t	sem_otime;	/* candidate for sem_otime */
0824e44c3   Manfred Spraul   ipc/sem.c: cachel...
101
  } ____cacheline_aligned_in_smp;
e57940d71   Manfred Spraul   ipc/sem.c: remove...
102
103
104
  
  /* One queue for each sleeping process in the system. */
  struct sem_queue {
e57940d71   Manfred Spraul   ipc/sem.c: remove...
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  	struct list_head	list;	 /* queue of pending operations */
  	struct task_struct	*sleeper; /* this process */
  	struct sem_undo		*undo;	 /* undo structure */
  	int			pid;	 /* process id of requesting process */
  	int			status;	 /* completion status of operation */
  	struct sembuf		*sops;	 /* array of pending operations */
  	int			nsops;	 /* number of operations */
  	int			alter;	 /* does *sops alter the array? */
  };
  
  /* Each task has a list of undo requests. They are executed automatically
   * when the process exits.
   */
  struct sem_undo {
  	struct list_head	list_proc;	/* per-process list: *
  						 * all undos from one process
  						 * rcu protected */
  	struct rcu_head		rcu;		/* rcu struct for sem_undo */
  	struct sem_undo_list	*ulp;		/* back ptr to sem_undo_list */
  	struct list_head	list_id;	/* per semaphore array list:
  						 * all undos for one array */
  	int			semid;		/* semaphore set identifier */
  	short			*semadj;	/* array of adjustments */
  						/* one per semaphore */
  };
  
  /* sem_undo_list controls shared access to the list of sem_undo structures
   * that may be shared among all a CLONE_SYSVSEM task group.
   */
  struct sem_undo_list {
  	atomic_t		refcnt;
  	spinlock_t		lock;
  	struct list_head	list_proc;
  };
ed2ddbf88   Pierre Peiffer   IPC: make struct ...
139
  #define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
e38935341   Kirill Korotaev   [PATCH] IPC names...
140

1b531f213   Nadia Derbey   ipc: remove unnee...
141
  #define sem_checkid(sma, semid)	ipc_checkid(&sma->sem_perm, semid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142

7748dbfaa   Nadia Derbey   ipc: unify the sy...
143
  static int newary(struct ipc_namespace *, struct ipc_params *);
01b8b07a5   Pierre Peiffer   IPC: consolidate ...
144
  static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
  #ifdef CONFIG_PROC_FS
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
146
  static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
149
150
151
152
  #endif
  
  #define SEMMSL_FAST	256 /* 512 bytes on stack */
  #define SEMOPM_FAST	64  /* ~ 372 bytes on stack */
  
  /*
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
153
   * Locking:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
   *	sem_undo.id_next,
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
155
   *	sem_array.complex_count,
ab63bc97f   Manfred Spraul   ipc/sem: separate...
156
   *	sem_array.pending{_alter,_cont},
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
157
   *	sem_array.sem_undo: global sem_lock() for read/write
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
   *	sem_undo.proc_next: only "current" is allowed to read/write that field.
   *	
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
160
161
   *	sem_array.sem_base[i].pending_{const,alter}:
   *		global or semaphore sem_lock() for read/write
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
   */
e38935341   Kirill Korotaev   [PATCH] IPC names...
163
164
165
166
  #define sc_semmsl	sem_ctls[0]
  #define sc_semmns	sem_ctls[1]
  #define sc_semopm	sem_ctls[2]
  #define sc_semmni	sem_ctls[3]
ed2ddbf88   Pierre Peiffer   IPC: make struct ...
167
  void sem_init_ns(struct ipc_namespace *ns)
e38935341   Kirill Korotaev   [PATCH] IPC names...
168
  {
e38935341   Kirill Korotaev   [PATCH] IPC names...
169
170
171
172
173
  	ns->sc_semmsl = SEMMSL;
  	ns->sc_semmns = SEMMNS;
  	ns->sc_semopm = SEMOPM;
  	ns->sc_semmni = SEMMNI;
  	ns->used_sems = 0;
ed2ddbf88   Pierre Peiffer   IPC: make struct ...
174
  	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
e38935341   Kirill Korotaev   [PATCH] IPC names...
175
  }
ae5e1b22f   Pavel Emelyanov   namespaces: move ...
176
  #ifdef CONFIG_IPC_NS
e38935341   Kirill Korotaev   [PATCH] IPC names...
177
178
  void sem_exit_ns(struct ipc_namespace *ns)
  {
01b8b07a5   Pierre Peiffer   IPC: consolidate ...
179
  	free_ipcs(ns, &sem_ids(ns), freeary);
7d6feeb28   Serge E. Hallyn   ipc ns: fix memor...
180
  	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
e38935341   Kirill Korotaev   [PATCH] IPC names...
181
  }
ae5e1b22f   Pavel Emelyanov   namespaces: move ...
182
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
185
  
  void __init sem_init (void)
  {
ed2ddbf88   Pierre Peiffer   IPC: make struct ...
186
  	sem_init_ns(&init_ipc_ns);
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
187
188
189
  	ipc_init_proc_interface("sysvipc/sem",
  				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime
  ",
e38935341   Kirill Korotaev   [PATCH] IPC names...
190
  				IPC_SEM_IDS, sysvipc_sem_proc_show);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
  }
e5639c528   Manfred Spraul   ipc/sem.c: always...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
  /**
   * unmerge_queues - unmerge queues, if possible.
   * @sma: semaphore array
   *
   * The function unmerges the wait queues if complex_count is 0.
   * It must be called prior to dropping the global semaphore array lock.
   */
  static void unmerge_queues(struct sem_array *sma)
  {
  	struct sem_queue *q, *tq;
  
  	/* complex operations still around? */
  	if (sma->complex_count)
  		return;
  	/*
  	 * We will switch back to simple mode.
  	 * Move all pending operation back into the per-semaphore
  	 * queues.
  	 */
  	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
  		struct sem *curr;
  		curr = &sma->sem_base[q->sops[0].sem_num];
  
  		list_add_tail(&q->list, &curr->pending_alter);
  	}
  	INIT_LIST_HEAD(&sma->pending_alter);
  }
  
  /**
   * merge_queues - Merge single semop queues into global queue
   * @sma: semaphore array
   *
   * This function merges all per-semaphore queues into the global queue.
   * It is necessary to achieve FIFO ordering for the pending single-sop
   * operations when a multi-semop operation must sleep.
   * Only the alter operations must be moved, the const operations can stay.
   */
  static void merge_queues(struct sem_array *sma)
  {
  	int i;
  	for (i = 0; i < sma->sem_nsems; i++) {
  		struct sem *sem = sma->sem_base + i;
  
  		list_splice_init(&sem->pending_alter, &sma->pending_alter);
  	}
  }
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
238
239
240
241
242
243
244
245
  static void sem_rcu_free(struct rcu_head *head)
  {
  	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
  	struct sem_array *sma = ipc_rcu_to_struct(p);
  
  	security_sem_free(sma);
  	ipc_rcu_free(head);
  }
3e148c799   Nadia Derbey   fix idr_find() lo...
246
  /*
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
247
248
249
250
   * Wait until all currently ongoing simple ops have completed.
   * Caller must own sem_perm.lock.
   * New simple ops cannot start, because simple ops first check
   * that sem_perm.lock is free.
901f6fedc   Manfred Spraul   ipc/sem.c: optimi...
251
   * that a) sem_perm.lock is free and b) complex_count is 0.
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
252
253
254
255
256
   */
  static void sem_wait_array(struct sem_array *sma)
  {
  	int i;
  	struct sem *sem;
901f6fedc   Manfred Spraul   ipc/sem.c: optimi...
257
258
259
260
261
262
  	if (sma->complex_count)  {
  		/* The thread that increased sma->complex_count waited on
  		 * all sem->lock locks. Thus we don't need to wait again.
  		 */
  		return;
  	}
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
263
264
265
266
267
268
269
  	for (i = 0; i < sma->sem_nsems; i++) {
  		sem = sma->sem_base + i;
  		spin_unlock_wait(&sem->lock);
  	}
  }
  
  /*
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
270
271
272
273
274
   * If the request contains only one semaphore operation, and there are
   * no complex transactions pending, lock only the semaphore involved.
   * Otherwise, lock the entire semaphore array, since we either have
   * multiple semaphores in our own semops, or we need to look at
   * semaphores from other pending complex operations.
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
275
276
277
278
   */
  static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
  			      int nsops)
  {
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
279
  	struct sem *sem;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
280

184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
281
282
283
  	if (nsops != 1) {
  		/* Complex operation - acquire a full lock */
  		ipc_lock_object(&sma->sem_perm);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
284

184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
285
286
  		/* And wait until all simple ops that are processed
  		 * right now have dropped their locks.
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
287
  		 */
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  		sem_wait_array(sma);
  		return -1;
  	}
  
  	/*
  	 * Only one semaphore affected - try to optimize locking.
  	 * The rules are:
  	 * - optimized locking is possible if no complex operation
  	 *   is either enqueued or processed right now.
  	 * - The test for enqueued complex ops is simple:
  	 *      sma->complex_count != 0
  	 * - Testing for complex ops that are processed right now is
  	 *   a bit more difficult. Complex ops acquire the full lock
  	 *   and first wait that the running simple ops have completed.
  	 *   (see above)
  	 *   Thus: If we own a simple lock and the global lock is free
  	 *	and complex_count is now 0, then it will stay 0 and
  	 *	thus just locking sem->lock is sufficient.
  	 */
  	sem = sma->sem_base + sops->sem_num;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
308

184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
309
  	if (sma->complex_count == 0) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
310
  		/*
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
311
312
  		 * It appears that no complex operation is around.
  		 * Acquire the per-semaphore lock.
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
313
  		 */
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
  		spin_lock(&sem->lock);
  
  		/* Then check that the global lock is free */
  		if (!spin_is_locked(&sma->sem_perm.lock)) {
  			/* spin_is_locked() is not a memory barrier */
  			smp_mb();
  
  			/* Now repeat the test of complex_count:
  			 * It can't change anymore until we drop sem->lock.
  			 * Thus: if is now 0, then it will stay 0.
  			 */
  			if (sma->complex_count == 0) {
  				/* fast path successful! */
  				return sops->sem_num;
  			}
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
329
  		}
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
330
331
332
333
334
  		spin_unlock(&sem->lock);
  	}
  
  	/* slow path: acquire the full lock */
  	ipc_lock_object(&sma->sem_perm);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
335

184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
336
337
338
339
340
341
342
343
  	if (sma->complex_count == 0) {
  		/* False alarm:
  		 * There is no complex operation, thus we can switch
  		 * back to the fast path.
  		 */
  		spin_lock(&sem->lock);
  		ipc_unlock_object(&sma->sem_perm);
  		return sops->sem_num;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
344
  	} else {
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
345
346
  		/* Not a false alarm, thus complete the sequence for a
  		 * full lock.
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
347
  		 */
184076a9f   Manfred Spraul   ipc/sem.c: fix ra...
348
349
  		sem_wait_array(sma);
  		return -1;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
350
  	}
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
351
352
353
354
355
  }
  
  static inline void sem_unlock(struct sem_array *sma, int locknum)
  {
  	if (locknum == -1) {
e5639c528   Manfred Spraul   ipc/sem.c: always...
356
  		unmerge_queues(sma);
115d40dbe   Davidlohr Bueso   ipc: close open c...
357
  		ipc_unlock_object(&sma->sem_perm);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
358
359
360
361
  	} else {
  		struct sem *sem = sma->sem_base + locknum;
  		spin_unlock(&sem->lock);
  	}
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
362
363
364
  }
  
  /*
33b746698   Davidlohr Bueso   ipc: rename ids->...
365
   * sem_lock_(check_) routines are called in the paths where the rwsem
3e148c799   Nadia Derbey   fix idr_find() lo...
366
   * is not held.
321310ced   Linus Torvalds   ipc: move sem_obt...
367
368
   *
   * The caller holds the RCU read lock.
3e148c799   Nadia Derbey   fix idr_find() lo...
369
   */
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
370
371
  static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
  			int id, struct sembuf *sops, int nsops, int *locknum)
023a53557   Nadia Derbey   ipc: integrate ip...
372
  {
c460b662d   Rik van Riel   ipc,sem: open cod...
373
374
  	struct kern_ipc_perm *ipcp;
  	struct sem_array *sma;
03f02c765   Nadia Derbey   Storing ipcs into...
375

c460b662d   Rik van Riel   ipc,sem: open cod...
376
  	ipcp = ipc_obtain_object(&sem_ids(ns), id);
321310ced   Linus Torvalds   ipc: move sem_obt...
377
378
  	if (IS_ERR(ipcp))
  		return ERR_CAST(ipcp);
b1ed88b47   Pierre Peiffer   IPC: fix error ch...
379

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
380
381
  	sma = container_of(ipcp, struct sem_array, sem_perm);
  	*locknum = sem_lock(sma, sops, nsops);
c460b662d   Rik van Riel   ipc,sem: open cod...
382
383
384
385
386
387
  
  	/* ipc_rmid() may have already freed the ID while sem_lock
  	 * was spinning: verify that the structure is still valid
  	 */
  	if (!ipcp->deleted)
  		return container_of(ipcp, struct sem_array, sem_perm);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
388
  	sem_unlock(sma, *locknum);
321310ced   Linus Torvalds   ipc: move sem_obt...
389
  	return ERR_PTR(-EINVAL);
023a53557   Nadia Derbey   ipc: integrate ip...
390
  }
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
391
392
393
394
395
396
397
398
399
  static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
  {
  	struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
  
  	if (IS_ERR(ipcp))
  		return ERR_CAST(ipcp);
  
  	return container_of(ipcp, struct sem_array, sem_perm);
  }
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
400
401
402
403
404
405
406
  static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
  							int id)
  {
  	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
  
  	if (IS_ERR(ipcp))
  		return ERR_CAST(ipcp);
b1ed88b47   Pierre Peiffer   IPC: fix error ch...
407

03f02c765   Nadia Derbey   Storing ipcs into...
408
  	return container_of(ipcp, struct sem_array, sem_perm);
023a53557   Nadia Derbey   ipc: integrate ip...
409
  }
6ff379721   Pierre Peiffer   IPC/semaphores: c...
410
411
  static inline void sem_lock_and_putref(struct sem_array *sma)
  {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
412
  	sem_lock(sma, NULL, -1);
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
413
  	ipc_rcu_putref(sma, ipc_rcu_free);
6ff379721   Pierre Peiffer   IPC/semaphores: c...
414
  }
7ca7e564e   Nadia Derbey   ipc: store ipcs i...
415
416
417
418
  static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  {
  	ipc_rmid(&sem_ids(ns), &s->sem_perm);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
421
422
423
  /*
   * Lockless wakeup algorithm:
   * Without the check/retry algorithm a lockless wakeup is possible:
   * - queue.status is initialized to -EINTR before blocking.
   * - wakeup is performed by
ab63bc97f   Manfred Spraul   ipc/sem: separate...
424
   *	* unlinking the queue entry from the pending list
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
427
428
429
430
431
432
433
   *	* setting queue.status to IN_WAKEUP
   *	  This is the notification for the blocked thread that a
   *	  result value is imminent.
   *	* call wake_up_process
   *	* set queue.status to the final value.
   * - the previously blocked thread checks queue.status:
   *   	* if it's IN_WAKEUP, then it must wait until the value changes
   *   	* if it's not -EINTR, then the operation was completed by
   *   	  update_queue. semtimedop can return queue.status without
5f921ae96   Ingo Molnar   [PATCH] sem2mutex...
434
   *   	  performing any operation on the sem array.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
   *   	* otherwise it must acquire the spinlock and check what's up.
   *
   * The two-stage algorithm is necessary to protect against the following
   * races:
   * - if queue.status is set after wake_up_process, then the woken up idle
   *   thread could race forward and try (and fail) to acquire sma->lock
   *   before update_queue had a chance to set queue.status
   * - if queue.status is written before wake_up_process and if the
   *   blocked process is woken up by a signal between writing
   *   queue.status and the wake_up_process, then the woken up
   *   process could return from semtimedop and die by calling
   *   sys_exit before wake_up_process is called. Then wake_up_process
   *   will oops, because the task structure is already invalid.
   *   (yes, this happened on s390 with sysv msg).
   *
   */
  #define IN_WAKEUP	1
f4566f048   Nadia Derbey   ipc: fix wrong co...
452
453
454
455
456
  /**
   * newary - Create a new semaphore set
   * @ns: namespace
   * @params: ptr to the structure that contains key, semflg and nsems
   *
33b746698   Davidlohr Bueso   ipc: rename ids->...
457
   * Called with sem_ids.rwsem held (as a writer)
f4566f048   Nadia Derbey   ipc: fix wrong co...
458
   */
7748dbfaa   Nadia Derbey   ipc: unify the sy...
459
  static int newary(struct ipc_namespace *ns, struct ipc_params *params)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
460
461
462
463
464
  {
  	int id;
  	int retval;
  	struct sem_array *sma;
  	int size;
7748dbfaa   Nadia Derbey   ipc: unify the sy...
465
466
467
  	key_t key = params->key;
  	int nsems = params->u.nsems;
  	int semflg = params->flg;
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
468
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
470
471
  
  	if (!nsems)
  		return -EINVAL;
e38935341   Kirill Korotaev   [PATCH] IPC names...
472
  	if (ns->used_sems + nsems > ns->sc_semmns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
  		return -ENOSPC;
  
  	size = sizeof (*sma) + nsems * sizeof (struct sem);
  	sma = ipc_rcu_alloc(size);
  	if (!sma) {
  		return -ENOMEM;
  	}
  	memset (sma, 0, size);
  
  	sma->sem_perm.mode = (semflg & S_IRWXUGO);
  	sma->sem_perm.key = key;
  
  	sma->sem_perm.security = NULL;
  	retval = security_sem_alloc(sma);
  	if (retval) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
488
  		ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
  		return retval;
  	}
e38935341   Kirill Korotaev   [PATCH] IPC names...
491
  	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
283bb7fad   Pierre Peiffer   IPC: fix error ca...
492
  	if (id < 0) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
493
  		ipc_rcu_putref(sma, sem_rcu_free);
283bb7fad   Pierre Peiffer   IPC: fix error ca...
494
  		return id;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
495
  	}
e38935341   Kirill Korotaev   [PATCH] IPC names...
496
  	ns->used_sems += nsems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
498
  
  	sma->sem_base = (struct sem *) &sma[1];
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
499

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
500
  	for (i = 0; i < nsems; i++) {
ab63bc97f   Manfred Spraul   ipc/sem: separate...
501
502
  		INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
  		INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
503
504
  		spin_lock_init(&sma->sem_base[i].lock);
  	}
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
505
506
  
  	sma->complex_count = 0;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
507
508
  	INIT_LIST_HEAD(&sma->pending_alter);
  	INIT_LIST_HEAD(&sma->pending_const);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
509
  	INIT_LIST_HEAD(&sma->list_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
511
  	sma->sem_nsems = nsems;
  	sma->sem_ctime = get_seconds();
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
512
  	sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
513
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514

7ca7e564e   Nadia Derbey   ipc: store ipcs i...
515
  	return sma->sem_perm.id;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  }
7748dbfaa   Nadia Derbey   ipc: unify the sy...
517

f4566f048   Nadia Derbey   ipc: fix wrong co...
518
  /*
33b746698   Davidlohr Bueso   ipc: rename ids->...
519
   * Called with sem_ids.rwsem and ipcp locked.
f4566f048   Nadia Derbey   ipc: fix wrong co...
520
   */
03f02c765   Nadia Derbey   Storing ipcs into...
521
  static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
7748dbfaa   Nadia Derbey   ipc: unify the sy...
522
  {
03f02c765   Nadia Derbey   Storing ipcs into...
523
524
525
526
  	struct sem_array *sma;
  
  	sma = container_of(ipcp, struct sem_array, sem_perm);
  	return security_sem_associate(sma, semflg);
7748dbfaa   Nadia Derbey   ipc: unify the sy...
527
  }
f4566f048   Nadia Derbey   ipc: fix wrong co...
528
  /*
33b746698   Davidlohr Bueso   ipc: rename ids->...
529
   * Called with sem_ids.rwsem and ipcp locked.
f4566f048   Nadia Derbey   ipc: fix wrong co...
530
   */
03f02c765   Nadia Derbey   Storing ipcs into...
531
532
  static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
  				struct ipc_params *params)
7748dbfaa   Nadia Derbey   ipc: unify the sy...
533
  {
03f02c765   Nadia Derbey   Storing ipcs into...
534
535
536
537
  	struct sem_array *sma;
  
  	sma = container_of(ipcp, struct sem_array, sem_perm);
  	if (params->u.nsems > sma->sem_nsems)
7748dbfaa   Nadia Derbey   ipc: unify the sy...
538
539
540
541
  		return -EINVAL;
  
  	return 0;
  }
d5460c997   Heiko Carstens   [CVE-2009-0029] S...
542
  SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
  {
e38935341   Kirill Korotaev   [PATCH] IPC names...
544
  	struct ipc_namespace *ns;
7748dbfaa   Nadia Derbey   ipc: unify the sy...
545
546
  	struct ipc_ops sem_ops;
  	struct ipc_params sem_params;
e38935341   Kirill Korotaev   [PATCH] IPC names...
547
548
  
  	ns = current->nsproxy->ipc_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549

e38935341   Kirill Korotaev   [PATCH] IPC names...
550
  	if (nsems < 0 || nsems > ns->sc_semmsl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
  		return -EINVAL;
7ca7e564e   Nadia Derbey   ipc: store ipcs i...
552

7748dbfaa   Nadia Derbey   ipc: unify the sy...
553
554
555
556
557
558
559
  	sem_ops.getnew = newary;
  	sem_ops.associate = sem_security;
  	sem_ops.more_checks = sem_more_checks;
  
  	sem_params.key = key;
  	sem_params.flg = semflg;
  	sem_params.u.nsems = nsems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
560

7748dbfaa   Nadia Derbey   ipc: unify the sy...
561
  	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
  }
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
563
564
565
566
567
568
569
570
571
572
  /** perform_atomic_semop - Perform (if possible) a semaphore operation
   * @sma: semaphore array
   * @sops: array with operations that should be checked
   * @nsems: number of sops
   * @un: undo array
   * @pid: pid that did the change
   *
   * Returns 0 if the operation was possible.
   * Returns 1 if the operation is impossible, the caller must sleep.
   * Negative values are error codes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
   */
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
574
  static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
  			     int nsops, struct sem_undo *un, int pid)
  {
  	int result, sem_op;
  	struct sembuf *sop;
  	struct sem * curr;
  
  	for (sop = sops; sop < sops + nsops; sop++) {
  		curr = sma->sem_base + sop->sem_num;
  		sem_op = sop->sem_op;
  		result = curr->semval;
    
  		if (!sem_op && result)
  			goto would_block;
  
  		result += sem_op;
  		if (result < 0)
  			goto would_block;
  		if (result > SEMVMX)
  			goto out_of_range;
  		if (sop->sem_flg & SEM_UNDO) {
  			int undo = un->semadj[sop->sem_num] - sem_op;
  			/*
  	 		 *	Exceeding the undo range is an error.
  			 */
  			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
  				goto out_of_range;
  		}
  		curr->semval = result;
  	}
  
  	sop--;
  	while (sop >= sops) {
  		sma->sem_base[sop->sem_num].sempid = pid;
  		if (sop->sem_flg & SEM_UNDO)
  			un->semadj[sop->sem_num] -= sop->sem_op;
  		sop--;
  	}
  	
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
  	return 0;
  
  out_of_range:
  	result = -ERANGE;
  	goto undo;
  
  would_block:
  	if (sop->sem_flg & IPC_NOWAIT)
  		result = -EAGAIN;
  	else
  		result = 1;
  
  undo:
  	sop--;
  	while (sop >= sops) {
  		sma->sem_base[sop->sem_num].semval -= sop->sem_op;
  		sop--;
  	}
  
  	return result;
  }
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
634
635
636
637
638
  /** wake_up_sem_queue_prepare(q, error): Prepare wake-up
   * @q: queue entry that must be signaled
   * @error: Error value for the signal
   *
   * Prepare the wake-up of the queue entry q.
d4212093d   Nick Piggin   ipc/sem.c: sem pr...
639
   */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
640
641
  static void wake_up_sem_queue_prepare(struct list_head *pt,
  				struct sem_queue *q, int error)
d4212093d   Nick Piggin   ipc/sem.c: sem pr...
642
  {
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
643
644
645
646
647
648
649
  	if (list_empty(pt)) {
  		/*
  		 * Hold preempt off so that we don't get preempted and have the
  		 * wakee busy-wait until we're scheduled back on.
  		 */
  		preempt_disable();
  	}
d4212093d   Nick Piggin   ipc/sem.c: sem pr...
650
  	q->status = IN_WAKEUP;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
651
  	q->pid = error;
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
652
  	list_add_tail(&q->list, pt);
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  }
  
  /**
   * wake_up_sem_queue_do(pt) - do the actual wake-up
   * @pt: list of tasks to be woken up
   *
   * Do the actual wake-up.
   * The function is called without any locks held, thus the semaphore array
   * could be destroyed already and the tasks can disappear as soon as the
   * status is set to the actual return code.
   */
  static void wake_up_sem_queue_do(struct list_head *pt)
  {
  	struct sem_queue *q, *t;
  	int did_something;
  
  	did_something = !list_empty(pt);
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
670
  	list_for_each_entry_safe(q, t, pt, list) {
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
671
672
673
674
675
676
677
  		wake_up_process(q->sleeper);
  		/* q can disappear immediately after writing q->status. */
  		smp_wmb();
  		q->status = q->pid;
  	}
  	if (did_something)
  		preempt_enable();
d4212093d   Nick Piggin   ipc/sem.c: sem pr...
678
  }
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
679
680
681
  static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  {
  	list_del(&q->list);
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
682
  	if (q->nsops > 1)
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
683
684
  		sma->complex_count--;
  }
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
685
686
687
688
689
690
691
  /** check_restart(sma, q)
   * @sma: semaphore array
   * @q: the operation that just completed
   *
   * update_queue is O(N^2) when it restarts scanning the whole queue of
   * waiting operations. Therefore this function checks if the restart is
   * really necessary. It is called after a previously waiting operation
ab63bc97f   Manfred Spraul   ipc/sem: separate...
692
693
   * modified the array.
   * Note that wait-for-zero operations are handled without restart.
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
694
695
696
   */
  static int check_restart(struct sem_array *sma, struct sem_queue *q)
  {
ab63bc97f   Manfred Spraul   ipc/sem: separate...
697
698
  	/* pending complex alter operations are too difficult to analyse */
  	if (!list_empty(&sma->pending_alter))
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
699
700
701
702
703
  		return 1;
  
  	/* we were a sleeping complex operation. Too difficult */
  	if (q->nsops > 1)
  		return 1;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
704
705
706
707
708
709
710
711
712
713
714
715
716
  	/* It is impossible that someone waits for the new value:
  	 * - complex operations always restart.
  	 * - wait-for-zero are handled seperately.
  	 * - q is a previously sleeping simple operation that
  	 *   altered the array. It must be a decrement, because
  	 *   simple increments never sleep.
  	 * - If there are older (higher priority) decrements
  	 *   in the queue, then they have observed the original
  	 *   semval value and couldn't proceed. The operation
  	 *   decremented to value - thus they won't proceed either.
  	 */
  	return 0;
  }
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
717

ab63bc97f   Manfred Spraul   ipc/sem: separate...
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
  /**
   * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks
   * @sma: semaphore array.
   * @semnum: semaphore that was modified.
   * @pt: list head for the tasks that must be woken up.
   *
   * wake_const_ops must be called after a semaphore in a semaphore array
   * was set to 0. If complex const operations are pending, wake_const_ops must
   * be called with semnum = -1, as well as with the number of each modified
   * semaphore.
   * The tasks that must be woken up are added to @pt. The return code
   * is stored in q->pid.
   * The function returns 1 if at least one operation was completed successfully.
   */
  static int wake_const_ops(struct sem_array *sma, int semnum,
  				struct list_head *pt)
  {
  	struct sem_queue *q;
  	struct list_head *walk;
  	struct list_head *pending_list;
  	int semop_completed = 0;
  
  	if (semnum == -1)
  		pending_list = &sma->pending_const;
  	else
  		pending_list = &sma->sem_base[semnum].pending_const;
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
744

ab63bc97f   Manfred Spraul   ipc/sem: separate...
745
746
747
748
749
750
  	walk = pending_list->next;
  	while (walk != pending_list) {
  		int error;
  
  		q = container_of(walk, struct sem_queue, list);
  		walk = walk->next;
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
751
752
  		error = perform_atomic_semop(sma, q->sops, q->nsops,
  						 q->undo, q->pid);
ab63bc97f   Manfred Spraul   ipc/sem: separate...
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
  
  		if (error <= 0) {
  			/* operation completed, remove from queue & wakeup */
  
  			unlink_queue(sma, q);
  
  			wake_up_sem_queue_prepare(pt, q, error);
  			if (error == 0)
  				semop_completed = 1;
  		}
  	}
  	return semop_completed;
  }
  
  /**
   * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks
   * @sma: semaphore array
   * @sops: operations that were performed
   * @nsops: number of operations
   * @pt: list head of the tasks that must be woken up.
   *
   * do_smart_wakeup_zero() checks all required queue for wait-for-zero
   * operations, based on the actual changes that were performed on the
   * semaphore array.
   * The function returns 1 if at least one operation was completed successfully.
   */
  static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
  					int nsops, struct list_head *pt)
  {
  	int i;
  	int semop_completed = 0;
  	int got_zero = 0;
  
  	/* first: the per-semaphore queues, if known */
  	if (sops) {
  		for (i = 0; i < nsops; i++) {
  			int num = sops[i].sem_num;
  
  			if (sma->sem_base[num].semval == 0) {
  				got_zero = 1;
  				semop_completed |= wake_const_ops(sma, num, pt);
  			}
  		}
  	} else {
  		/*
  		 * No sops means modified semaphores not known.
  		 * Assume all were changed.
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
800
  		 */
ab63bc97f   Manfred Spraul   ipc/sem: separate...
801
802
803
804
805
806
  		for (i = 0; i < sma->sem_nsems; i++) {
  			if (sma->sem_base[i].semval == 0) {
  				got_zero = 1;
  				semop_completed |= wake_const_ops(sma, i, pt);
  			}
  		}
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
807
808
  	}
  	/*
ab63bc97f   Manfred Spraul   ipc/sem: separate...
809
810
  	 * If one of the modified semaphores got 0,
  	 * then check the global queue, too.
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
811
  	 */
ab63bc97f   Manfred Spraul   ipc/sem: separate...
812
813
  	if (got_zero)
  		semop_completed |= wake_const_ops(sma, -1, pt);
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
814

ab63bc97f   Manfred Spraul   ipc/sem: separate...
815
  	return semop_completed;
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
816
  }
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
817
818
819
820
821
  
  /**
   * update_queue(sma, semnum): Look for tasks that can be completed.
   * @sma: semaphore array.
   * @semnum: semaphore that was modified.
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
822
   * @pt: list head for the tasks that must be woken up.
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
823
824
   *
   * update_queue must be called after a semaphore in a semaphore array
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
825
826
827
   * was modified. If multiple semaphores were modified, update_queue must
   * be called with semnum = -1, as well as with the number of each modified
   * semaphore.
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
828
829
   * The tasks that must be woken up are added to @pt. The return code
   * is stored in q->pid.
ab63bc97f   Manfred Spraul   ipc/sem: separate...
830
831
   * The function internally checks if const operations can now succeed.
   *
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
832
   * The function return 1 if at least one semop was completed successfully.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
   */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
834
  static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835
  {
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
836
837
838
  	struct sem_queue *q;
  	struct list_head *walk;
  	struct list_head *pending_list;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
839
  	int semop_completed = 0;
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
840

9f1bc2c90   Rik van Riel   ipc,sem: have onl...
841
  	if (semnum == -1)
ab63bc97f   Manfred Spraul   ipc/sem: separate...
842
  		pending_list = &sma->pending_alter;
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
843
  	else
ab63bc97f   Manfred Spraul   ipc/sem: separate...
844
  		pending_list = &sma->sem_base[semnum].pending_alter;
9cad200c7   Nick Piggin   ipc/sem.c: sem us...
845
846
  
  again:
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
847
848
  	walk = pending_list->next;
  	while (walk != pending_list) {
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
849
  		int error, restart;
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
850

9f1bc2c90   Rik van Riel   ipc,sem: have onl...
851
  		q = container_of(walk, struct sem_queue, list);
636c6be82   Manfred Spraul   ipc/sem.c: optimi...
852
  		walk = walk->next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
853

d987f8b21   Manfred Spraul   ipc/sem.c: optimi...
854
855
  		/* If we are scanning the single sop, per-semaphore list of
  		 * one semaphore and that semaphore is 0, then it is not
ab63bc97f   Manfred Spraul   ipc/sem: separate...
856
  		 * necessary to scan further: simple increments
d987f8b21   Manfred Spraul   ipc/sem.c: optimi...
857
858
859
860
  		 * that affect only one entry succeed immediately and cannot
  		 * be in the  per semaphore pending queue, and decrements
  		 * cannot be successful if the value is already 0.
  		 */
ab63bc97f   Manfred Spraul   ipc/sem: separate...
861
  		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
d987f8b21   Manfred Spraul   ipc/sem.c: optimi...
862
  			break;
b56e88e25   Manfred Spraul   ipc/sem.c: rename...
863
  		error = perform_atomic_semop(sma, q->sops, q->nsops,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
866
  					 q->undo, q->pid);
  
  		/* Does q->sleeper still need to sleep? */
9cad200c7   Nick Piggin   ipc/sem.c: sem us...
867
868
  		if (error > 0)
  			continue;
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
869
  		unlink_queue(sma, q);
9cad200c7   Nick Piggin   ipc/sem.c: sem us...
870

0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
871
  		if (error) {
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
872
  			restart = 0;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
873
874
  		} else {
  			semop_completed = 1;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
875
  			do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
876
  			restart = check_restart(sma, q);
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
877
  		}
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
878

0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
879
  		wake_up_sem_queue_prepare(pt, q, error);
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
880
  		if (restart)
9cad200c7   Nick Piggin   ipc/sem.c: sem us...
881
  			goto again;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882
  	}
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
883
  	return semop_completed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
884
  }
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
885
  /**
e556ea019   Manfred Spraul   ipc/sem.c: update...
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
   * set_semotime(sma, sops) - set sem_otime
   * @sma: semaphore array
   * @sops: operations that modified the array, may be NULL
   *
   * sem_otime is replicated to avoid cache line trashing.
   * This function sets one instance to the current time.
   */
  static void set_semotime(struct sem_array *sma, struct sembuf *sops)
  {
  	if (sops == NULL) {
  		sma->sem_base[0].sem_otime = get_seconds();
  	} else {
  		sma->sem_base[sops[0].sem_num].sem_otime =
  							get_seconds();
  	}
  }
  
  /**
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
904
   * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
905
906
907
   * @sma: semaphore array
   * @sops: operations that were performed
   * @nsops: number of operations
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
908
909
   * @otime: force setting otime
   * @pt: list head of the tasks that must be woken up.
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
910
   *
ab63bc97f   Manfred Spraul   ipc/sem: separate...
911
912
   * do_smart_update() does the required calls to update_queue and wakeup_zero,
   * based on the actual changes that were performed on the semaphore array.
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
913
914
915
   * Note that the function does not do the actual wake-up: the caller is
   * responsible for calling wake_up_sem_queue_do(@pt).
   * It is safe to perform this call after dropping all locks.
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
916
   */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
917
918
  static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
  			int otime, struct list_head *pt)
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
919
920
  {
  	int i;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
921
  	otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
e5639c528   Manfred Spraul   ipc/sem.c: always...
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
  	if (!list_empty(&sma->pending_alter)) {
  		/* semaphore array uses the global queue - just process it. */
  		otime |= update_queue(sma, -1, pt);
  	} else {
  		if (!sops) {
  			/*
  			 * No sops, thus the modified semaphores are not
  			 * known. Check all.
  			 */
  			for (i = 0; i < sma->sem_nsems; i++)
  				otime |= update_queue(sma, i, pt);
  		} else {
  			/*
  			 * Check the semaphores that were increased:
  			 * - No complex ops, thus all sleeping ops are
  			 *   decrease.
  			 * - if we decreased the value, then any sleeping
  			 *   semaphore ops wont be able to run: If the
  			 *   previous value was too small, then the new
  			 *   value will be too small, too.
  			 */
  			for (i = 0; i < nsops; i++) {
  				if (sops[i].sem_op > 0) {
  					otime |= update_queue(sma,
  							sops[i].sem_num, pt);
  				}
ab465df9d   Manfred Spraul   ipc/sem.c: Fix mi...
948
  			}
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
949
  		}
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
950
  	}
e556ea019   Manfred Spraul   ipc/sem.c: update...
951
952
  	if (otime)
  		set_semotime(sma, sops);
fd5db4225   Manfred Spraul   ipc/sem.c: optimi...
953
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
  /* The following counts are associated to each semaphore:
   *   semncnt        number of tasks waiting on semval being nonzero
   *   semzcnt        number of tasks waiting on semval being zero
   * This model assumes that a task waits on exactly one semaphore.
   * Since semaphore operations are to be performed atomically, tasks actually
   * wait on a whole sequence of semaphores simultaneously.
   * The counts we return here are a rough approximation, but still
   * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
   */
  static int count_semncnt (struct sem_array * sma, ushort semnum)
  {
  	int semncnt;
  	struct sem_queue * q;
  
  	semncnt = 0;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
969
  	list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) {
de2657f94   Rik van Riel   ipc,sem: fix semc...
970
971
972
973
974
  		struct sembuf * sops = q->sops;
  		BUG_ON(sops->sem_num != semnum);
  		if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
  			semncnt++;
  	}
ab63bc97f   Manfred Spraul   ipc/sem: separate...
975
  	list_for_each_entry(q, &sma->pending_alter, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
976
977
978
979
980
981
982
983
984
985
986
  		struct sembuf * sops = q->sops;
  		int nsops = q->nsops;
  		int i;
  		for (i = 0; i < nsops; i++)
  			if (sops[i].sem_num == semnum
  			    && (sops[i].sem_op < 0)
  			    && !(sops[i].sem_flg & IPC_NOWAIT))
  				semncnt++;
  	}
  	return semncnt;
  }
a1193f8ec   Manfred Spraul   ipc/sem.c: conver...
987

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
989
990
991
992
993
  static int count_semzcnt (struct sem_array * sma, ushort semnum)
  {
  	int semzcnt;
  	struct sem_queue * q;
  
  	semzcnt = 0;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
994
  	list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) {
ebc2e5e6a   Rik van Riel   ipc,sem: fix semc...
995
996
997
998
999
  		struct sembuf * sops = q->sops;
  		BUG_ON(sops->sem_num != semnum);
  		if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
  			semzcnt++;
  	}
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1000
  	list_for_each_entry(q, &sma->pending_const, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
  		struct sembuf * sops = q->sops;
  		int nsops = q->nsops;
  		int i;
  		for (i = 0; i < nsops; i++)
  			if (sops[i].sem_num == semnum
  			    && (sops[i].sem_op == 0)
  			    && !(sops[i].sem_flg & IPC_NOWAIT))
  				semzcnt++;
  	}
  	return semzcnt;
  }
33b746698   Davidlohr Bueso   ipc: rename ids->...
1012
1013
  /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
   * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
3e148c799   Nadia Derbey   fix idr_find() lo...
1014
   * remains locked on exit.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1015
   */
01b8b07a5   Pierre Peiffer   IPC: consolidate ...
1016
  static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1017
  {
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1018
1019
  	struct sem_undo *un, *tu;
  	struct sem_queue *q, *tq;
01b8b07a5   Pierre Peiffer   IPC: consolidate ...
1020
  	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1021
  	struct list_head tasks;
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
1022
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023

380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1024
  	/* Free the existing undo structures for this semaphore set.  */
115d40dbe   Davidlohr Bueso   ipc: close open c...
1025
  	ipc_assert_locked_object(&sma->sem_perm);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1026
1027
1028
  	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
  		list_del(&un->list_id);
  		spin_lock(&un->ulp->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
  		un->semid = -1;
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1030
1031
  		list_del_rcu(&un->list_proc);
  		spin_unlock(&un->ulp->lock);
693a8b6ee   Lai Jiangshan   ipc,rcu: Convert ...
1032
  		kfree_rcu(un, rcu);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1033
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1034
1035
  
  	/* Wake up all pending processes and let them fail with EIDRM. */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1036
  	INIT_LIST_HEAD(&tasks);
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1037
1038
1039
1040
1041
1042
  	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
  		unlink_queue(sma, q);
  		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
  	}
  
  	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
1043
  		unlink_queue(sma, q);
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1044
  		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045
  	}
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
1046
1047
  	for (i = 0; i < sma->sem_nsems; i++) {
  		struct sem *sem = sma->sem_base + i;
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1048
1049
1050
1051
1052
  		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
  			unlink_queue(sma, q);
  			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
  		}
  		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
1053
1054
1055
1056
  			unlink_queue(sma, q);
  			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057

7ca7e564e   Nadia Derbey   ipc: store ipcs i...
1058
1059
  	/* Remove the semaphore set from the IDR */
  	sem_rmid(ns, sma);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1060
  	sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1061
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062

0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1063
  	wake_up_sem_queue_do(&tasks);
e38935341   Kirill Korotaev   [PATCH] IPC names...
1064
  	ns->used_sems -= sma->sem_nsems;
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1065
  	ipc_rcu_putref(sma, sem_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
  }
  
  static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
  {
  	switch(version) {
  	case IPC_64:
  		return copy_to_user(buf, in, sizeof(*in));
  	case IPC_OLD:
  	    {
  		struct semid_ds out;
982f7c2b2   Dan Rosenberg   sys_semctl: fix k...
1076
  		memset(&out, 0, sizeof(out));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
  		ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
  
  		out.sem_otime	= in->sem_otime;
  		out.sem_ctime	= in->sem_ctime;
  		out.sem_nsems	= in->sem_nsems;
  
  		return copy_to_user(buf, &out, sizeof(out));
  	    }
  	default:
  		return -EINVAL;
  	}
  }
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
  static time_t get_semotime(struct sem_array *sma)
  {
  	int i;
  	time_t res;
  
  	res = sma->sem_base[0].sem_otime;
  	for (i = 1; i < sma->sem_nsems; i++) {
  		time_t to = sma->sem_base[i].sem_otime;
  
  		if (to > res)
  			res = to;
  	}
  	return res;
  }
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1103
  static int semctl_nolock(struct ipc_namespace *ns, int semid,
e1fd1f490   Al Viro   get rid of union ...
1104
  			 int cmd, int version, void __user *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1105
  {
e5cc9c7b1   Amerigo Wang   ipc: remove unrea...
1106
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
  	struct sem_array *sma;
  
  	switch(cmd) {
  	case IPC_INFO:
  	case SEM_INFO:
  	{
  		struct seminfo seminfo;
  		int max_id;
  
  		err = security_sem_semctl(NULL, cmd);
  		if (err)
  			return err;
  		
  		memset(&seminfo,0,sizeof(seminfo));
e38935341   Kirill Korotaev   [PATCH] IPC names...
1121
1122
1123
1124
  		seminfo.semmni = ns->sc_semmni;
  		seminfo.semmns = ns->sc_semmns;
  		seminfo.semmsl = ns->sc_semmsl;
  		seminfo.semopm = ns->sc_semopm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
1127
1128
  		seminfo.semvmx = SEMVMX;
  		seminfo.semmnu = SEMMNU;
  		seminfo.semmap = SEMMAP;
  		seminfo.semume = SEMUME;
33b746698   Davidlohr Bueso   ipc: rename ids->...
1129
  		down_read(&sem_ids(ns).rwsem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  		if (cmd == SEM_INFO) {
e38935341   Kirill Korotaev   [PATCH] IPC names...
1131
1132
  			seminfo.semusz = sem_ids(ns).in_use;
  			seminfo.semaem = ns->used_sems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
1134
1135
1136
  		} else {
  			seminfo.semusz = SEMUSZ;
  			seminfo.semaem = SEMAEM;
  		}
7ca7e564e   Nadia Derbey   ipc: store ipcs i...
1137
  		max_id = ipc_get_maxid(&sem_ids(ns));
33b746698   Davidlohr Bueso   ipc: rename ids->...
1138
  		up_read(&sem_ids(ns).rwsem);
e1fd1f490   Al Viro   get rid of union ...
1139
  		if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
1141
1142
  			return -EFAULT;
  		return (max_id < 0) ? 0: max_id;
  	}
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1143
  	case IPC_STAT:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
1145
1146
  	case SEM_STAT:
  	{
  		struct semid64_ds tbuf;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1147
1148
1149
  		int id = 0;
  
  		memset(&tbuf, 0, sizeof(tbuf));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150

941b0304a   Linus Torvalds   ipc: simplify rcu...
1151
  		rcu_read_lock();
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1152
  		if (cmd == SEM_STAT) {
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1153
1154
1155
1156
1157
  			sma = sem_obtain_object(ns, semid);
  			if (IS_ERR(sma)) {
  				err = PTR_ERR(sma);
  				goto out_unlock;
  			}
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1158
1159
  			id = sma->sem_perm.id;
  		} else {
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1160
1161
1162
1163
1164
  			sma = sem_obtain_object_check(ns, semid);
  			if (IS_ERR(sma)) {
  				err = PTR_ERR(sma);
  				goto out_unlock;
  			}
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1165
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1166
1167
  
  		err = -EACCES;
b0e77598f   Serge E. Hallyn   userns: user name...
1168
  		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
1170
1171
1172
1173
  			goto out_unlock;
  
  		err = security_sem_semctl(sma, cmd);
  		if (err)
  			goto out_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1174
  		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
1175
1176
1177
  		tbuf.sem_otime = get_semotime(sma);
  		tbuf.sem_ctime = sma->sem_ctime;
  		tbuf.sem_nsems = sma->sem_nsems;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1178
  		rcu_read_unlock();
e1fd1f490   Al Viro   get rid of union ...
1179
  		if (copy_semid_to_user(p, &tbuf, version))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180
1181
1182
1183
1184
1185
  			return -EFAULT;
  		return id;
  	}
  	default:
  		return -EINVAL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186
  out_unlock:
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1187
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188
1189
  	return err;
  }
e1fd1f490   Al Viro   get rid of union ...
1190
1191
1192
1193
1194
1195
1196
  static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
  		unsigned long arg)
  {
  	struct sem_undo *un;
  	struct sem_array *sma;
  	struct sem* curr;
  	int err;
e1fd1f490   Al Viro   get rid of union ...
1197
1198
1199
1200
1201
1202
1203
1204
1205
  	struct list_head tasks;
  	int val;
  #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
  	/* big-endian 64bit */
  	val = arg >> 32;
  #else
  	/* 32bit or little-endian 64bit */
  	val = arg;
  #endif
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1206
1207
  	if (val > SEMVMX || val < 0)
  		return -ERANGE;
e1fd1f490   Al Viro   get rid of union ...
1208
1209
  
  	INIT_LIST_HEAD(&tasks);
e1fd1f490   Al Viro   get rid of union ...
1210

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
  	rcu_read_lock();
  	sma = sem_obtain_object_check(ns, semid);
  	if (IS_ERR(sma)) {
  		rcu_read_unlock();
  		return PTR_ERR(sma);
  	}
  
  	if (semnum < 0 || semnum >= sma->sem_nsems) {
  		rcu_read_unlock();
  		return -EINVAL;
  	}
  
  
  	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
  		rcu_read_unlock();
  		return -EACCES;
  	}
e1fd1f490   Al Viro   get rid of union ...
1228
1229
  
  	err = security_sem_semctl(sma, SETVAL);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1230
1231
1232
1233
  	if (err) {
  		rcu_read_unlock();
  		return -EACCES;
  	}
e1fd1f490   Al Viro   get rid of union ...
1234

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1235
  	sem_lock(sma, NULL, -1);
e1fd1f490   Al Viro   get rid of union ...
1236
1237
  
  	curr = &sma->sem_base[semnum];
115d40dbe   Davidlohr Bueso   ipc: close open c...
1238
  	ipc_assert_locked_object(&sma->sem_perm);
e1fd1f490   Al Viro   get rid of union ...
1239
1240
1241
1242
1243
1244
1245
1246
  	list_for_each_entry(un, &sma->list_id, list_id)
  		un->semadj[semnum] = 0;
  
  	curr->semval = val;
  	curr->sempid = task_tgid_vnr(current);
  	sma->sem_ctime = get_seconds();
  	/* maybe some queued-up processes were waiting for this */
  	do_smart_update(sma, NULL, 0, 0, &tasks);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1247
  	sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1248
  	rcu_read_unlock();
e1fd1f490   Al Viro   get rid of union ...
1249
  	wake_up_sem_queue_do(&tasks);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1250
  	return 0;
e1fd1f490   Al Viro   get rid of union ...
1251
  }
e38935341   Kirill Korotaev   [PATCH] IPC names...
1252
  static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
e1fd1f490   Al Viro   get rid of union ...
1253
  		int cmd, void __user *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
1255
1256
  {
  	struct sem_array *sma;
  	struct sem* curr;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1257
  	int err, nsems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1258
1259
  	ushort fast_sem_io[SEMMSL_FAST];
  	ushort* sem_io = fast_sem_io;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1260
  	struct list_head tasks;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261

16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1262
1263
1264
1265
1266
1267
  	INIT_LIST_HEAD(&tasks);
  
  	rcu_read_lock();
  	sma = sem_obtain_object_check(ns, semid);
  	if (IS_ERR(sma)) {
  		rcu_read_unlock();
023a53557   Nadia Derbey   ipc: integrate ip...
1268
  		return PTR_ERR(sma);
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1269
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270
1271
  
  	nsems = sma->sem_nsems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1272
  	err = -EACCES;
c728b9c87   Linus Torvalds   ipc: simplify sem...
1273
1274
  	if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
  		goto out_rcu_wakeup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275
1276
  
  	err = security_sem_semctl(sma, cmd);
c728b9c87   Linus Torvalds   ipc: simplify sem...
1277
1278
  	if (err)
  		goto out_rcu_wakeup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279
1280
1281
1282
1283
  
  	err = -EACCES;
  	switch (cmd) {
  	case GETALL:
  	{
e1fd1f490   Al Viro   get rid of union ...
1284
  		ushort __user *array = p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1285
  		int i;
ce857229e   Al Viro   ipc: fix GETALL/I...
1286
  		sem_lock(sma, NULL, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
  		if(nsems > SEMMSL_FAST) {
ce857229e   Al Viro   ipc: fix GETALL/I...
1288
1289
  			if (!ipc_rcu_getref(sma)) {
  				sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1290
  				rcu_read_unlock();
ce857229e   Al Viro   ipc: fix GETALL/I...
1291
1292
1293
1294
  				err = -EIDRM;
  				goto out_free;
  			}
  			sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1295
  			rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
1297
  			sem_io = ipc_alloc(sizeof(ushort)*nsems);
  			if(sem_io == NULL) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1298
  				ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1299
1300
  				return -ENOMEM;
  			}
4091fd942   Linus Torvalds   ipc: move the rcu...
1301
  			rcu_read_lock();
6ff379721   Pierre Peiffer   IPC/semaphores: c...
1302
  			sem_lock_and_putref(sma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1303
  			if (sma->sem_perm.deleted) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1304
  				sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1305
  				rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
1307
1308
  				err = -EIDRM;
  				goto out_free;
  			}
ce857229e   Al Viro   ipc: fix GETALL/I...
1309
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
1311
  		for (i = 0; i < sma->sem_nsems; i++)
  			sem_io[i] = sma->sem_base[i].semval;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1312
  		sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1313
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1314
1315
1316
1317
1318
1319
1320
1321
1322
  		err = 0;
  		if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
  			err = -EFAULT;
  		goto out_free;
  	}
  	case SETALL:
  	{
  		int i;
  		struct sem_undo *un;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1323
1324
1325
1326
  		if (!ipc_rcu_getref(sma)) {
  			rcu_read_unlock();
  			return -EIDRM;
  		}
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1327
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
1329
1330
1331
  
  		if(nsems > SEMMSL_FAST) {
  			sem_io = ipc_alloc(sizeof(ushort)*nsems);
  			if(sem_io == NULL) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1332
  				ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1333
1334
1335
  				return -ENOMEM;
  			}
  		}
e1fd1f490   Al Viro   get rid of union ...
1336
  		if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1337
  			ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
1339
1340
1341
1342
1343
  			err = -EFAULT;
  			goto out_free;
  		}
  
  		for (i = 0; i < nsems; i++) {
  			if (sem_io[i] > SEMVMX) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1344
  				ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1345
1346
1347
1348
  				err = -ERANGE;
  				goto out_free;
  			}
  		}
4091fd942   Linus Torvalds   ipc: move the rcu...
1349
  		rcu_read_lock();
6ff379721   Pierre Peiffer   IPC/semaphores: c...
1350
  		sem_lock_and_putref(sma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1351
  		if (sma->sem_perm.deleted) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1352
  			sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1353
  			rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1354
1355
1356
1357
1358
1359
  			err = -EIDRM;
  			goto out_free;
  		}
  
  		for (i = 0; i < nsems; i++)
  			sma->sem_base[i].semval = sem_io[i];
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1360

115d40dbe   Davidlohr Bueso   ipc: close open c...
1361
  		ipc_assert_locked_object(&sma->sem_perm);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1362
  		list_for_each_entry(un, &sma->list_id, list_id) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363
1364
  			for (i = 0; i < nsems; i++)
  				un->semadj[i] = 0;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1365
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
1367
  		sma->sem_ctime = get_seconds();
  		/* maybe some queued-up processes were waiting for this */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1368
  		do_smart_update(sma, NULL, 0, 0, &tasks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
1370
1371
  		err = 0;
  		goto out_unlock;
  	}
e1fd1f490   Al Viro   get rid of union ...
1372
  	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373
1374
  	}
  	err = -EINVAL;
c728b9c87   Linus Torvalds   ipc: simplify sem...
1375
1376
  	if (semnum < 0 || semnum >= nsems)
  		goto out_rcu_wakeup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1378
  	sem_lock(sma, NULL, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
  	curr = &sma->sem_base[semnum];
  
  	switch (cmd) {
  	case GETVAL:
  		err = curr->semval;
  		goto out_unlock;
  	case GETPID:
  		err = curr->sempid;
  		goto out_unlock;
  	case GETNCNT:
  		err = count_semncnt(sma,semnum);
  		goto out_unlock;
  	case GETZCNT:
  		err = count_semzcnt(sma,semnum);
  		goto out_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
  	}
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1395

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396
  out_unlock:
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1397
  	sem_unlock(sma, -1);
c728b9c87   Linus Torvalds   ipc: simplify sem...
1398
  out_rcu_wakeup:
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1399
  	rcu_read_unlock();
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1400
  	wake_up_sem_queue_do(&tasks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1401
1402
1403
1404
1405
  out_free:
  	if(sem_io != fast_sem_io)
  		ipc_free(sem_io, sizeof(ushort)*nsems);
  	return err;
  }
016d7132f   Pierre Peiffer   IPC: get rid of t...
1406
1407
  static inline unsigned long
  copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1408
1409
1410
  {
  	switch(version) {
  	case IPC_64:
016d7132f   Pierre Peiffer   IPC: get rid of t...
1411
  		if (copy_from_user(out, buf, sizeof(*out)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1412
  			return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
1415
1416
1417
1418
1419
  	case IPC_OLD:
  	    {
  		struct semid_ds tbuf_old;
  
  		if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
  			return -EFAULT;
016d7132f   Pierre Peiffer   IPC: get rid of t...
1420
1421
1422
  		out->sem_perm.uid	= tbuf_old.sem_perm.uid;
  		out->sem_perm.gid	= tbuf_old.sem_perm.gid;
  		out->sem_perm.mode	= tbuf_old.sem_perm.mode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1423
1424
1425
1426
1427
1428
1429
  
  		return 0;
  	    }
  	default:
  		return -EINVAL;
  	}
  }
522bb2a2b   Pierre Peiffer   IPC/semaphores: m...
1430
  /*
33b746698   Davidlohr Bueso   ipc: rename ids->...
1431
   * This function handles some semctl commands which require the rwsem
522bb2a2b   Pierre Peiffer   IPC/semaphores: m...
1432
   * to be held in write mode.
33b746698   Davidlohr Bueso   ipc: rename ids->...
1433
   * NOTE: no locks must be held, the rwsem is taken inside this function.
522bb2a2b   Pierre Peiffer   IPC/semaphores: m...
1434
   */
21a4826a7   Pierre Peiffer   IPC/semaphores: r...
1435
  static int semctl_down(struct ipc_namespace *ns, int semid,
e1fd1f490   Al Viro   get rid of union ...
1436
  		       int cmd, int version, void __user *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437
1438
1439
  {
  	struct sem_array *sma;
  	int err;
016d7132f   Pierre Peiffer   IPC: get rid of t...
1440
  	struct semid64_ds semid64;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
1442
1443
  	struct kern_ipc_perm *ipcp;
  
  	if(cmd == IPC_SET) {
e1fd1f490   Al Viro   get rid of union ...
1444
  		if (copy_semid_from_user(&semid64, p, version))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
  			return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1446
  	}
073115d6b   Steve Grubb   [PATCH] Rework of...
1447

33b746698   Davidlohr Bueso   ipc: rename ids->...
1448
  	down_write(&sem_ids(ns).rwsem);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1449
  	rcu_read_lock();
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1450
1451
  	ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
  				      &semid64.sem_perm, 0);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1452
1453
  	if (IS_ERR(ipcp)) {
  		err = PTR_ERR(ipcp);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1454
1455
  		goto out_unlock1;
  	}
073115d6b   Steve Grubb   [PATCH] Rework of...
1456

a5f75e7f2   Pierre Peiffer   IPC: consolidate ...
1457
  	sma = container_of(ipcp, struct sem_array, sem_perm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458
1459
  
  	err = security_sem_semctl(sma, cmd);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1460
1461
  	if (err)
  		goto out_unlock1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1462

ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1463
  	switch (cmd) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  	case IPC_RMID:
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1465
  		sem_lock(sma, NULL, -1);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1466
  		/* freeary unlocks the ipc object and rcu */
01b8b07a5   Pierre Peiffer   IPC: consolidate ...
1467
  		freeary(ns, ipcp);
522bb2a2b   Pierre Peiffer   IPC/semaphores: m...
1468
  		goto out_up;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1469
  	case IPC_SET:
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1470
  		sem_lock(sma, NULL, -1);
1efdb69b0   Eric W. Biederman   userns: Convert i...
1471
1472
  		err = ipc_update_perm(&semid64.sem_perm, ipcp);
  		if (err)
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1473
  			goto out_unlock0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1474
  		sma->sem_ctime = get_seconds();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1475
1476
  		break;
  	default:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1477
  		err = -EINVAL;
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1478
  		goto out_unlock1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1479
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1480

ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1481
  out_unlock0:
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1482
  	sem_unlock(sma, -1);
ac9bc6e39   Davidlohr Bueso   ipc: move locking...
1483
  out_unlock1:
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1484
  	rcu_read_unlock();
522bb2a2b   Pierre Peiffer   IPC/semaphores: m...
1485
  out_up:
33b746698   Davidlohr Bueso   ipc: rename ids->...
1486
  	up_write(&sem_ids(ns).rwsem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1487
1488
  	return err;
  }
e1fd1f490   Al Viro   get rid of union ...
1489
  SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1491
  	int version;
e38935341   Kirill Korotaev   [PATCH] IPC names...
1492
  	struct ipc_namespace *ns;
e1fd1f490   Al Viro   get rid of union ...
1493
  	void __user *p = (void __user *)arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1494
1495
1496
1497
1498
  
  	if (semid < 0)
  		return -EINVAL;
  
  	version = ipc_parse_version(&cmd);
e38935341   Kirill Korotaev   [PATCH] IPC names...
1499
  	ns = current->nsproxy->ipc_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500
1501
1502
1503
  
  	switch(cmd) {
  	case IPC_INFO:
  	case SEM_INFO:
4b9fcb0ec   Pierre Peiffer   IPC/semaphores: c...
1504
  	case IPC_STAT:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1505
  	case SEM_STAT:
e1fd1f490   Al Viro   get rid of union ...
1506
  		return semctl_nolock(ns, semid, cmd, version, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1507
1508
1509
1510
1511
  	case GETALL:
  	case GETVAL:
  	case GETPID:
  	case GETNCNT:
  	case GETZCNT:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1512
  	case SETALL:
e1fd1f490   Al Viro   get rid of union ...
1513
1514
1515
  		return semctl_main(ns, semid, semnum, cmd, p);
  	case SETVAL:
  		return semctl_setval(ns, semid, semnum, arg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1516
1517
  	case IPC_RMID:
  	case IPC_SET:
e1fd1f490   Al Viro   get rid of union ...
1518
  		return semctl_down(ns, semid, cmd, version, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1519
1520
1521
1522
  	default:
  		return -EINVAL;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
  /* If the task doesn't already have a undo_list, then allocate one
   * here.  We guarantee there is only one thread using this undo list,
   * and current is THE ONE
   *
   * If this allocation and assignment succeeds, but later
   * portions of this code fail, there is no need to free the sem_undo_list.
   * Just let it stay associated with the task, and it'll be freed later
   * at exit time.
   *
   * This can block, so callers must hold no locks.
   */
  static inline int get_undo_list(struct sem_undo_list **undo_listp)
  {
  	struct sem_undo_list *undo_list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
1538
1539
  
  	undo_list = current->sysvsem.undo_list;
  	if (!undo_list) {
2453a3062   Matt Helsley   [PATCH] ipc: repl...
1540
  		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1541
1542
  		if (undo_list == NULL)
  			return -ENOMEM;
00a5dfdb9   Ingo Molnar   [PATCH] Fix semun...
1543
  		spin_lock_init(&undo_list->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544
  		atomic_set(&undo_list->refcnt, 1);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1545
  		INIT_LIST_HEAD(&undo_list->list_proc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546
1547
1548
1549
1550
  		current->sysvsem.undo_list = undo_list;
  	}
  	*undo_listp = undo_list;
  	return 0;
  }
bf17bb717   Nick Piggin   ipc/sem.c: sem op...
1551
  static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1552
  {
bf17bb717   Nick Piggin   ipc/sem.c: sem op...
1553
  	struct sem_undo *un;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1554

bf17bb717   Nick Piggin   ipc/sem.c: sem op...
1555
1556
1557
  	list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
  		if (un->semid == semid)
  			return un;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558
  	}
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1559
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560
  }
bf17bb717   Nick Piggin   ipc/sem.c: sem op...
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
  static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
  {
  	struct sem_undo *un;
  
    	assert_spin_locked(&ulp->lock);
  
  	un = __lookup_undo(ulp, semid);
  	if (un) {
  		list_del_rcu(&un->list_proc);
  		list_add_rcu(&un->list_proc, &ulp->list_proc);
  	}
  	return un;
  }
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1574
1575
1576
1577
1578
1579
1580
1581
  /**
   * find_alloc_undo - Lookup (and if not present create) undo array
   * @ns: namespace
   * @semid: semaphore array id
   *
   * The function looks up (and if not present creates) the undo structure.
   * The size of the undo structure depends on the size of the semaphore
   * array, thus the alloc path is not that straightforward.
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1582
1583
   * Lifetime-rules: sem_undo is rcu-protected, on success, the function
   * performs a rcu_read_lock().
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1584
1585
   */
  static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1586
1587
1588
1589
  {
  	struct sem_array *sma;
  	struct sem_undo_list *ulp;
  	struct sem_undo *un, *new;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1590
  	int nsems, error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1591
1592
1593
1594
  
  	error = get_undo_list(&ulp);
  	if (error)
  		return ERR_PTR(error);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1595
  	rcu_read_lock();
c530c6ac7   Pierre Peiffer   IPC: cleanup some...
1596
  	spin_lock(&ulp->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1597
  	un = lookup_undo(ulp, semid);
c530c6ac7   Pierre Peiffer   IPC: cleanup some...
1598
  	spin_unlock(&ulp->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1599
1600
1601
1602
  	if (likely(un!=NULL))
  		goto out;
  
  	/* no undo structure around - allocate one. */
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1603
  	/* step 1: figure out the size of the semaphore array */
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1604
1605
1606
  	sma = sem_obtain_object_check(ns, semid);
  	if (IS_ERR(sma)) {
  		rcu_read_unlock();
4de85cd6d   Julia Lawall   ipc/sem.c: use ER...
1607
  		return ERR_CAST(sma);
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1608
  	}
023a53557   Nadia Derbey   ipc: integrate ip...
1609

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
  	nsems = sma->sem_nsems;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1611
1612
1613
1614
1615
  	if (!ipc_rcu_getref(sma)) {
  		rcu_read_unlock();
  		un = ERR_PTR(-EIDRM);
  		goto out;
  	}
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1616
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1617

4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1618
  	/* step 2: allocate new undo structure */
4668edc33   Burman Yan   [PATCH] kernel co...
1619
  	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1620
  	if (!new) {
e84ca3337   Davidlohr Bueso   ipc: fix race wit...
1621
  		ipc_rcu_putref(sma, ipc_rcu_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622
1623
  		return ERR_PTR(-ENOMEM);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1624

380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1625
  	/* step 3: Acquire the lock on semaphore array */
4091fd942   Linus Torvalds   ipc: move the rcu...
1626
  	rcu_read_lock();
6ff379721   Pierre Peiffer   IPC/semaphores: c...
1627
  	sem_lock_and_putref(sma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1628
  	if (sma->sem_perm.deleted) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1629
  		sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1630
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631
1632
1633
1634
  		kfree(new);
  		un = ERR_PTR(-EIDRM);
  		goto out;
  	}
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
  	spin_lock(&ulp->lock);
  
  	/*
  	 * step 4: check for races: did someone else allocate the undo struct?
  	 */
  	un = lookup_undo(ulp, semid);
  	if (un) {
  		kfree(new);
  		goto success;
  	}
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1645
1646
  	/* step 5: initialize & link new undo structure */
  	new->semadj = (short *) &new[1];
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1647
  	new->ulp = ulp;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1648
1649
  	new->semid = semid;
  	assert_spin_locked(&ulp->lock);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1650
  	list_add_rcu(&new->list_proc, &ulp->list_proc);
115d40dbe   Davidlohr Bueso   ipc: close open c...
1651
  	ipc_assert_locked_object(&sma->sem_perm);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1652
  	list_add(&new->list_id, &sma->list_id);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1653
  	un = new;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1654

380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1655
  success:
c530c6ac7   Pierre Peiffer   IPC: cleanup some...
1656
  	spin_unlock(&ulp->lock);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1657
  	sem_unlock(sma, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1658
1659
1660
  out:
  	return un;
  }
c61284e99   Manfred Spraul   ipc/sem.c: bugfix...
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
  
  /**
   * get_queue_result - Retrieve the result code from sem_queue
   * @q: Pointer to queue structure
   *
   * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
   * q->status, then we must loop until the value is replaced with the final
   * value: This may happen if a task is woken up by an unrelated event (e.g.
   * signal) and in parallel the task is woken up by another task because it got
   * the requested semaphores.
   *
   * The function can be called with or without holding the semaphore spinlock.
   */
  static int get_queue_result(struct sem_queue *q)
  {
  	int error;
  
  	error = q->status;
  	while (unlikely(error == IN_WAKEUP)) {
  		cpu_relax();
  		error = q->status;
  	}
  
  	return error;
  }
d5460c997   Heiko Carstens   [CVE-2009-0029] S...
1686
1687
  SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
  		unsigned, nsops, const struct timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1688
1689
1690
1691
1692
1693
  {
  	int error = -EINVAL;
  	struct sem_array *sma;
  	struct sembuf fast_sops[SEMOPM_FAST];
  	struct sembuf* sops = fast_sops, *sop;
  	struct sem_undo *un;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1694
  	int undos = 0, alter = 0, max, locknum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695
1696
  	struct sem_queue queue;
  	unsigned long jiffies_left = 0;
e38935341   Kirill Korotaev   [PATCH] IPC names...
1697
  	struct ipc_namespace *ns;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1698
  	struct list_head tasks;
e38935341   Kirill Korotaev   [PATCH] IPC names...
1699
1700
  
  	ns = current->nsproxy->ipc_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1701
1702
1703
  
  	if (nsops < 1 || semid < 0)
  		return -EINVAL;
e38935341   Kirill Korotaev   [PATCH] IPC names...
1704
  	if (nsops > ns->sc_semopm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
  		return -E2BIG;
  	if(nsops > SEMOPM_FAST) {
  		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
  		if(sops==NULL)
  			return -ENOMEM;
  	}
  	if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
  		error=-EFAULT;
  		goto out_free;
  	}
  	if (timeout) {
  		struct timespec _timeout;
  		if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
  			error = -EFAULT;
  			goto out_free;
  		}
  		if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
  			_timeout.tv_nsec >= 1000000000L) {
  			error = -EINVAL;
  			goto out_free;
  		}
  		jiffies_left = timespec_to_jiffies(&_timeout);
  	}
  	max = 0;
  	for (sop = sops; sop < sops + nsops; sop++) {
  		if (sop->sem_num >= max)
  			max = sop->sem_num;
  		if (sop->sem_flg & SEM_UNDO)
b78755abc   Manfred Spraul   [PATCH] ipcsem: r...
1733
1734
  			undos = 1;
  		if (sop->sem_op != 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1735
1736
  			alter = 1;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1737

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1738
  	INIT_LIST_HEAD(&tasks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1739
  	if (undos) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1740
  		/* On success, find_alloc_undo takes the rcu_read_lock */
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1741
  		un = find_alloc_undo(ns, semid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1742
1743
1744
1745
  		if (IS_ERR(un)) {
  			error = PTR_ERR(un);
  			goto out_free;
  		}
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1746
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1747
  		un = NULL;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1748
1749
  		rcu_read_lock();
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1750

16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1751
  	sma = sem_obtain_object_check(ns, semid);
023a53557   Nadia Derbey   ipc: integrate ip...
1752
  	if (IS_ERR(sma)) {
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1753
  		rcu_read_unlock();
023a53557   Nadia Derbey   ipc: integrate ip...
1754
  		error = PTR_ERR(sma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1755
  		goto out_free;
023a53557   Nadia Derbey   ipc: integrate ip...
1756
  	}
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1757
  	error = -EFBIG;
c728b9c87   Linus Torvalds   ipc: simplify sem...
1758
1759
  	if (max >= sma->sem_nsems)
  		goto out_rcu_wakeup;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1760
1761
  
  	error = -EACCES;
c728b9c87   Linus Torvalds   ipc: simplify sem...
1762
1763
  	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
  		goto out_rcu_wakeup;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1764
1765
  
  	error = security_sem_semop(sma, sops, nsops, alter);
c728b9c87   Linus Torvalds   ipc: simplify sem...
1766
1767
  	if (error)
  		goto out_rcu_wakeup;
16df3674e   Davidlohr Bueso   ipc,sem: do not h...
1768

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1769
  	/*
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1770
  	 * semid identifiers are not unique - find_alloc_undo may have
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1771
  	 * allocated an undo structure, it was invalidated by an RMID
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1772
  	 * and now a new array with received the same id. Check and fail.
25985edce   Lucas De Marchi   Fix common misspe...
1773
  	 * This case can be detected checking un->semid. The existence of
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1774
  	 * "un" itself is guaranteed by rcu.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1775
  	 */
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1776
  	error = -EIDRM;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1777
1778
1779
  	locknum = sem_lock(sma, sops, nsops);
  	if (un && un->semid == -1)
  		goto out_unlock_free;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1780

b56e88e25   Manfred Spraul   ipc/sem.c: rename...
1781
1782
  	error = perform_atomic_semop(sma, sops, nsops, un,
  					task_tgid_vnr(current));
e556ea019   Manfred Spraul   ipc/sem.c: update...
1783
1784
1785
1786
1787
  	if (error == 0) {
  		/* If the operation was successful, then do
  		 * the required updates.
  		 */
  		if (alter)
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1788
  			do_smart_update(sma, sops, nsops, 1, &tasks);
e556ea019   Manfred Spraul   ipc/sem.c: update...
1789
1790
  		else
  			set_semotime(sma, sops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1791
  	}
e556ea019   Manfred Spraul   ipc/sem.c: update...
1792
1793
  	if (error <= 0)
  		goto out_unlock_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1794
1795
1796
1797
1798
  
  	/* We need to sleep on this operation, so we put the current
  	 * task into the pending queue and go to sleep.
  	 */
  		
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1799
1800
1801
  	queue.sops = sops;
  	queue.nsops = nsops;
  	queue.undo = un;
b488893a3   Pavel Emelyanov   pid namespaces: c...
1802
  	queue.pid = task_tgid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1803
  	queue.alter = alter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1804

b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
1805
1806
1807
  	if (nsops == 1) {
  		struct sem *curr;
  		curr = &sma->sem_base[sops->sem_num];
e5639c528   Manfred Spraul   ipc/sem.c: always...
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
  		if (alter) {
  			if (sma->complex_count) {
  				list_add_tail(&queue.list,
  						&sma->pending_alter);
  			} else {
  
  				list_add_tail(&queue.list,
  						&curr->pending_alter);
  			}
  		} else {
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1818
  			list_add_tail(&queue.list, &curr->pending_const);
e5639c528   Manfred Spraul   ipc/sem.c: always...
1819
  		}
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
1820
  	} else {
e5639c528   Manfred Spraul   ipc/sem.c: always...
1821
1822
  		if (!sma->complex_count)
  			merge_queues(sma);
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
1823
  		if (alter)
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1824
  			list_add_tail(&queue.list, &sma->pending_alter);
9f1bc2c90   Rik van Riel   ipc,sem: have onl...
1825
  		else
ab63bc97f   Manfred Spraul   ipc/sem: separate...
1826
  			list_add_tail(&queue.list, &sma->pending_const);
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
1827
1828
  		sma->complex_count++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1829
1830
  	queue.status = -EINTR;
  	queue.sleeper = current;
0b0577f60   Manfred Spraul   ipc/sem.c: handle...
1831
1832
  
  sleep_again:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1833
  	current->state = TASK_INTERRUPTIBLE;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1834
  	sem_unlock(sma, locknum);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1835
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1836
1837
1838
1839
1840
  
  	if (timeout)
  		jiffies_left = schedule_timeout(jiffies_left);
  	else
  		schedule();
c61284e99   Manfred Spraul   ipc/sem.c: bugfix...
1841
  	error = get_queue_result(&queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1842
1843
1844
  
  	if (error != -EINTR) {
  		/* fast path: update_queue already obtained all requested
c61284e99   Manfred Spraul   ipc/sem.c: bugfix...
1845
1846
1847
1848
1849
1850
1851
  		 * resources.
  		 * Perform a smp_mb(): User space could assume that semop()
  		 * is a memory barrier: Without the mb(), the cpu could
  		 * speculatively read in user space stale data that was
  		 * overwritten by the previous owner of the semaphore.
  		 */
  		smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1852
1853
  		goto out_free;
  	}
321310ced   Linus Torvalds   ipc: move sem_obt...
1854
  	rcu_read_lock();
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1855
  	sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
d694ad62b   Manfred Spraul   ipc/sem.c: fix ra...
1856
1857
1858
1859
1860
1861
1862
1863
1864
  
  	/*
  	 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
  	 */
  	error = get_queue_result(&queue);
  
  	/*
  	 * Array removed? If yes, leave without sem_unlock().
  	 */
023a53557   Nadia Derbey   ipc: integrate ip...
1865
  	if (IS_ERR(sma)) {
321310ced   Linus Torvalds   ipc: move sem_obt...
1866
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1867
1868
  		goto out_free;
  	}
c61284e99   Manfred Spraul   ipc/sem.c: bugfix...
1869

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1870
  	/*
d694ad62b   Manfred Spraul   ipc/sem.c: fix ra...
1871
1872
  	 * If queue.status != -EINTR we are woken up by another process.
  	 * Leave without unlink_queue(), but with sem_unlock().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1873
  	 */
c61284e99   Manfred Spraul   ipc/sem.c: bugfix...
1874

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1875
1876
1877
1878
1879
1880
1881
1882
1883
  	if (error != -EINTR) {
  		goto out_unlock_free;
  	}
  
  	/*
  	 * If an interrupt occurred we have to clean up the queue
  	 */
  	if (timeout && jiffies_left == 0)
  		error = -EAGAIN;
0b0577f60   Manfred Spraul   ipc/sem.c: handle...
1884
1885
1886
1887
1888
1889
  
  	/*
  	 * If the wakeup was spurious, just retry
  	 */
  	if (error == -EINTR && !signal_pending(current))
  		goto sleep_again;
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
1890
  	unlink_queue(sma, &queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1891
1892
  
  out_unlock_free:
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1893
  	sem_unlock(sma, locknum);
c728b9c87   Linus Torvalds   ipc: simplify sem...
1894
  out_rcu_wakeup:
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1895
  	rcu_read_unlock();
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1896
  	wake_up_sem_queue_do(&tasks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1897
1898
1899
1900
1901
  out_free:
  	if(sops != fast_sops)
  		kfree(sops);
  	return error;
  }
d5460c997   Heiko Carstens   [CVE-2009-0029] S...
1902
1903
  SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
  		unsigned, nsops)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1904
1905
1906
1907
1908
1909
  {
  	return sys_semtimedop(semid, tsops, nsops, NULL);
  }
  
  /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
   * parent and child tasks.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
   */
  
  int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
  {
  	struct sem_undo_list *undo_list;
  	int error;
  
  	if (clone_flags & CLONE_SYSVSEM) {
  		error = get_undo_list(&undo_list);
  		if (error)
  			return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
  		atomic_inc(&undo_list->refcnt);
  		tsk->sysvsem.undo_list = undo_list;
  	} else 
  		tsk->sysvsem.undo_list = NULL;
  
  	return 0;
  }
  
  /*
   * add semadj values to semaphores, free undo structures.
   * undo structures are not freed when semaphore arrays are destroyed
   * so some of them may be out of date.
   * IMPLEMENTATION NOTE: There is some confusion over whether the
   * set of adjustments that needs to be done should be done in an atomic
   * manner or not. That is, if we are attempting to decrement the semval
   * should we queue up and wait until we can do so legally?
   * The original implementation attempted to do this (queue and wait).
   * The current implementation does not do so. The POSIX standard
   * and SVID should be consulted to determine what behavior is mandated.
   */
  void exit_sem(struct task_struct *tsk)
  {
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1943
  	struct sem_undo_list *ulp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1944

4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1945
1946
  	ulp = tsk->sysvsem.undo_list;
  	if (!ulp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1947
  		return;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1948
  	tsk->sysvsem.undo_list = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1949

4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1950
  	if (!atomic_dec_and_test(&ulp->refcnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1951
  		return;
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1952
  	for (;;) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1953
  		struct sem_array *sma;
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1954
  		struct sem_undo *un;
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
1955
  		struct list_head tasks;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1956
  		int semid, i;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1957

380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1958
  		rcu_read_lock();
05725f7eb   Jiri Pirko   rculist: use list...
1959
1960
  		un = list_entry_rcu(ulp->list_proc.next,
  				    struct sem_undo, list_proc);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1961
1962
1963
1964
  		if (&un->list_proc == &ulp->list_proc)
  			semid = -1;
  		 else
  			semid = un->semid;
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1965

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1966
1967
  		if (semid == -1) {
  			rcu_read_unlock();
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1968
  			break;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1969
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1970

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1971
  		sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1972
  		/* exit_sem raced with IPC_RMID, nothing to do */
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1973
1974
  		if (IS_ERR(sma)) {
  			rcu_read_unlock();
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1975
  			continue;
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1976
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1977

6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1978
  		sem_lock(sma, NULL, -1);
bf17bb717   Nick Piggin   ipc/sem.c: sem op...
1979
  		un = __lookup_undo(ulp, semid);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1980
1981
1982
1983
  		if (un == NULL) {
  			/* exit_sem raced with IPC_RMID+semget() that created
  			 * exactly the same semid. Nothing to do.
  			 */
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
1984
  			sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
1985
  			rcu_read_unlock();
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1986
1987
1988
1989
  			continue;
  		}
  
  		/* remove un from the linked lists */
115d40dbe   Davidlohr Bueso   ipc: close open c...
1990
  		ipc_assert_locked_object(&sma->sem_perm);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1991
  		list_del(&un->list_id);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
1992
1993
1994
  		spin_lock(&ulp->lock);
  		list_del_rcu(&un->list_proc);
  		spin_unlock(&ulp->lock);
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1995
1996
  		/* perform adjustments registered in un */
  		for (i = 0; i < sma->sem_nsems; i++) {
5f921ae96   Ingo Molnar   [PATCH] sem2mutex...
1997
  			struct sem * semaphore = &sma->sem_base[i];
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
1998
1999
  			if (un->semadj[i]) {
  				semaphore->semval += un->semadj[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
  				/*
  				 * Range checks of the new semaphore value,
  				 * not defined by sus:
  				 * - Some unices ignore the undo entirely
  				 *   (e.g. HP UX 11i 11.22, Tru64 V5.1)
  				 * - some cap the value (e.g. FreeBSD caps
  				 *   at 0, but doesn't enforce SEMVMX)
  				 *
  				 * Linux caps the semaphore value, both at 0
  				 * and at SEMVMX.
  				 *
  				 * 	Manfred <manfred@colorfullife.com>
  				 */
5f921ae96   Ingo Molnar   [PATCH] sem2mutex...
2013
2014
2015
2016
  				if (semaphore->semval < 0)
  					semaphore->semval = 0;
  				if (semaphore->semval > SEMVMX)
  					semaphore->semval = SEMVMX;
b488893a3   Pavel Emelyanov   pid namespaces: c...
2017
  				semaphore->sempid = task_tgid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2018
2019
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2020
  		/* maybe some queued-up processes were waiting for this */
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
2021
2022
  		INIT_LIST_HEAD(&tasks);
  		do_smart_update(sma, NULL, 0, 1, &tasks);
6062a8dc0   Rik van Riel   ipc,sem: fine gra...
2023
  		sem_unlock(sma, -1);
6d49dab8a   Linus Torvalds   ipc: move rcu_rea...
2024
  		rcu_read_unlock();
0a2b9d4c7   Manfred Spraul   ipc/sem.c: move w...
2025
  		wake_up_sem_queue_do(&tasks);
380af1b33   Manfred Spraul   ipc/sem.c: rewrit...
2026

693a8b6ee   Lai Jiangshan   ipc,rcu: Convert ...
2027
  		kfree_rcu(un, rcu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2028
  	}
4daa28f6d   Manfred Spraul   ipc/sem.c: conver...
2029
  	kfree(ulp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2030
2031
2032
  }
  
  #ifdef CONFIG_PROC_FS
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2033
  static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2034
  {
1efdb69b0   Eric W. Biederman   userns: Convert i...
2035
  	struct user_namespace *user_ns = seq_user_ns(s);
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2036
  	struct sem_array *sma = it;
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
2037
  	time_t sem_otime;
83aeb6e34   Manfred Spraul   ipc/sem.c: synchr...
2038
2039
2040
2041
2042
2043
2044
  	/*
  	 * The proc interface isn't aware of sem_lock(), it calls
  	 * ipc_lock_object() directly (in sysvipc_find_ipc).
  	 * In order to stay compatible with sem_lock(), we must wait until
  	 * all simple semop() calls have left their critical regions.
  	 */
  	sem_wait_array(sma);
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
2045
  	sem_otime = get_semotime(sma);
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2046
2047
  
  	return seq_printf(s,
b97e820ff   Manfred Spraul   ipc/sem.c: add a ...
2048
2049
  			  "%10d %10d  %4o %10u %5u %5u %5u %5u %10lu %10lu
  ",
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2050
  			  sma->sem_perm.key,
7ca7e564e   Nadia Derbey   ipc: store ipcs i...
2051
  			  sma->sem_perm.id,
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2052
2053
  			  sma->sem_perm.mode,
  			  sma->sem_nsems,
1efdb69b0   Eric W. Biederman   userns: Convert i...
2054
2055
2056
2057
  			  from_kuid_munged(user_ns, sma->sem_perm.uid),
  			  from_kgid_munged(user_ns, sma->sem_perm.gid),
  			  from_kuid_munged(user_ns, sma->sem_perm.cuid),
  			  from_kgid_munged(user_ns, sma->sem_perm.cgid),
bf6830ad6   Manfred Spraul   ipc/sem.c: replac...
2058
  			  sem_otime,
19b4946ca   Mike Waychison   [PATCH] ipc: conv...
2059
  			  sma->sem_ctime);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2060
2061
  }
  #endif