Blame view
ipc/sem.c
58.1 KB
b24413180
|
1 |
// SPDX-License-Identifier: GPL-2.0 |
1da177e4c
|
2 3 4 5 6 |
/* * linux/ipc/sem.c * Copyright (C) 1992 Krishna Balasubramanian * Copyright (C) 1995 Eric Schenk, Bruno Haible * |
1da177e4c
|
7 8 9 |
* /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> * * SMP-threaded, sysctl's added |
624dffcbc
|
10 |
* (c) 1999 Manfred Spraul <manfred@colorfullife.com> |
1da177e4c
|
11 |
* Enforced range limit on SEM_UNDO |
046c68842
|
12 |
* (c) 2001 Red Hat Inc |
1da177e4c
|
13 14 |
* Lockless wakeup * (c) 2003 Manfred Spraul <manfred@colorfullife.com> |
9ae949fa3
|
15 |
* (c) 2016 Davidlohr Bueso <dave@stgolabs.net> |
c5cf6359a
|
16 17 |
* Further wakeup optimizations, documentation * (c) 2010 Manfred Spraul <manfred@colorfullife.com> |
073115d6b
|
18 19 20 |
* * support for audit of ipc object properties and permission changes * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
e38935341
|
21 22 23 24 |
* * namespaces support * OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> |
c5cf6359a
|
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
* * Implementation notes: (May 2010) * This file implements System V semaphores. * * User space visible behavior: * - FIFO ordering for semop() operations (just FIFO, not starvation * protection) * - multiple semaphore operations that alter the same semaphore in * one semop() are handled. * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and * SETALL calls. * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. * - undo adjustments at process exit are limited to 0..SEMVMX. * - namespace are supported. * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing * to /proc/sys/kernel/sem. * - statistics about the usage are reported in /proc/sysvipc/sem. * * Internals: * - scalability: * - all global variables are read-mostly. * - semop() calls and semctl(RMID) are synchronized by RCU. * - most operations do write operations (actually: spin_lock calls) to * the per-semaphore array structure. * Thus: Perfect SMP scaling between independent semaphore arrays. * If multiple semaphores in one array are used, then cache line * trashing on the semaphore array spinlock will limit the scaling. |
2f2ed41dc
|
52 |
* - semncnt and semzcnt are calculated on demand in count_semcnt() |
c5cf6359a
|
53 54 55 56 57 |
* - the task that performs a successful semop() scans the list of all * sleeping tasks and completes any pending operations that can be fulfilled. * Semaphores are actively given to waiting tasks (necessary for FIFO). * (see update_queue()) * - To improve the scalability, the actual wake-up calls are performed after |
9ae949fa3
|
58 |
* dropping all locks. (see wake_up_sem_queue_prepare()) |
c5cf6359a
|
59 60 61 62 |
* - All work is done by the waker, the woken up task does not have to do * anything - not even acquiring a lock or dropping a refcount. * - A woken up task may not even touch the semaphore array anymore, it may * have been destroyed already by a semctl(RMID). |
c5cf6359a
|
63 64 65 66 67 68 69 70 |
* - UNDO values are stored in an array (one per process and per * semaphore array, lazily allocated). For backwards compatibility, multiple * modes for the UNDO variables are supported (per process, per thread) * (see copy_semundo, CLONE_SYSVSEM) * - There are two lists of the pending operations: a per-array list * and per-semaphore list (stored in the array). This allows to achieve FIFO * ordering without always scanning all pending operations. * The worst-case behavior is nevertheless O(N^2) for N wakeups. |
1da177e4c
|
71 |
*/ |
1da177e4c
|
72 73 74 75 76 |
#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/time.h> |
1da177e4c
|
77 78 79 |
#include <linux/security.h> #include <linux/syscalls.h> #include <linux/audit.h> |
c59ede7b7
|
80 |
#include <linux/capability.h> |
19b4946ca
|
81 |
#include <linux/seq_file.h> |
3e148c799
|
82 |
#include <linux/rwsem.h> |
e38935341
|
83 |
#include <linux/nsproxy.h> |
ae5e1b22f
|
84 |
#include <linux/ipc_namespace.h> |
84f001e15
|
85 |
#include <linux/sched/wake_q.h> |
5f921ae96
|
86 |
|
7153e4027
|
87 |
#include <linux/uaccess.h> |
1da177e4c
|
88 |
#include "util.h" |
e57940d71
|
89 90 91 |
/* One queue for each sleeping process in the system. */ struct sem_queue { |
e57940d71
|
92 93 94 95 96 97 |
struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ int pid; /* process id of requesting process */ int status; /* completion status of operation */ struct sembuf *sops; /* array of pending operations */ |
ed247b7ca
|
98 |
struct sembuf *blocking; /* the operation that blocked */ |
e57940d71
|
99 |
int nsops; /* number of operations */ |
4ce33ec2e
|
100 101 |
bool alter; /* does *sops alter the array? */ bool dupsop; /* sops on more than one sem_num */ |
e57940d71
|
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
}; /* Each task has a list of undo requests. They are executed automatically * when the process exits. */ struct sem_undo { struct list_head list_proc; /* per-process list: * * all undos from one process * rcu protected */ struct rcu_head rcu; /* rcu struct for sem_undo */ struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ struct list_head list_id; /* per semaphore array list: * all undos for one array */ int semid; /* semaphore set identifier */ short *semadj; /* array of adjustments */ /* one per semaphore */ }; /* sem_undo_list controls shared access to the list of sem_undo structures * that may be shared among all a CLONE_SYSVSEM task group. */ struct sem_undo_list { |
f74370b86
|
124 |
refcount_t refcnt; |
e57940d71
|
125 126 127 |
spinlock_t lock; struct list_head list_proc; }; |
ed2ddbf88
|
128 |
#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) |
e38935341
|
129 |
|
7748dbfaa
|
130 |
static int newary(struct ipc_namespace *, struct ipc_params *); |
01b8b07a5
|
131 |
static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); |
1da177e4c
|
132 |
#ifdef CONFIG_PROC_FS |
19b4946ca
|
133 |
static int sysvipc_sem_proc_show(struct seq_file *s, void *it); |
1da177e4c
|
134 135 136 137 138 139 |
#endif #define SEMMSL_FAST 256 /* 512 bytes on stack */ #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* |
9de5ab8a2
|
140 141 142 143 144 145 146 |
* Switching from the mode suitable for simple ops * to the mode for complex ops is costly. Therefore: * use some hysteresis */ #define USE_GLOBAL_LOCK_HYSTERESIS 10 /* |
758a6ba39
|
147 |
* Locking: |
5864a2fd3
|
148 |
* a) global sem_lock() for read/write |
1da177e4c
|
149 |
* sem_undo.id_next, |
758a6ba39
|
150 |
* sem_array.complex_count, |
5864a2fd3
|
151 152 |
* sem_array.pending{_alter,_const}, * sem_array.sem_undo |
46c0a8ca3
|
153 |
* |
5864a2fd3
|
154 |
* b) global or semaphore sem_lock() for read/write: |
1a2339567
|
155 |
* sem_array.sems[i].pending_{const,alter}: |
5864a2fd3
|
156 157 158 159 160 |
* * c) special: * sem_undo_list.list_proc: * * undo_list->lock for write * * rcu for read |
9de5ab8a2
|
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
* use_global_lock: * * global sem_lock() for write * * either local or global sem_lock() for read. * * Memory ordering: * Most ordering is enforced by using spin_lock() and spin_unlock(). * The special case is use_global_lock: * Setting it from non-zero to 0 is a RELEASE, this is ensured by * using smp_store_release(). * Testing if it is non-zero is an ACQUIRE, this is ensured by using * smp_load_acquire(). * Setting it from 0 to non-zero must be ordered with regards to * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() * is inside a spin_lock() and after a write from 0 to non-zero a * spin_lock()+spin_unlock() is done. |
1da177e4c
|
176 |
*/ |
e38935341
|
177 178 179 180 |
#define sc_semmsl sem_ctls[0] #define sc_semmns sem_ctls[1] #define sc_semopm sem_ctls[2] #define sc_semmni sem_ctls[3] |
0cfb6aee7
|
181 |
int sem_init_ns(struct ipc_namespace *ns) |
e38935341
|
182 |
{ |
e38935341
|
183 184 185 186 187 |
ns->sc_semmsl = SEMMSL; ns->sc_semmns = SEMMNS; ns->sc_semopm = SEMOPM; ns->sc_semmni = SEMMNI; ns->used_sems = 0; |
0cfb6aee7
|
188 |
return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); |
e38935341
|
189 |
} |
ae5e1b22f
|
190 |
#ifdef CONFIG_IPC_NS |
e38935341
|
191 192 |
void sem_exit_ns(struct ipc_namespace *ns) { |
01b8b07a5
|
193 |
free_ipcs(ns, &sem_ids(ns), freeary); |
7d6feeb28
|
194 |
idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); |
0cfb6aee7
|
195 |
rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); |
e38935341
|
196 |
} |
ae5e1b22f
|
197 |
#endif |
1da177e4c
|
198 |
|
0cfb6aee7
|
199 |
int __init sem_init(void) |
1da177e4c
|
200 |
{ |
0cfb6aee7
|
201 |
const int err = sem_init_ns(&init_ipc_ns); |
19b4946ca
|
202 203 204 |
ipc_init_proc_interface("sysvipc/sem", " key semid perms nsems uid gid cuid cgid otime ctime ", |
e38935341
|
205 |
IPC_SEM_IDS, sysvipc_sem_proc_show); |
0cfb6aee7
|
206 |
return err; |
1da177e4c
|
207 |
} |
f269f40ad
|
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
/** * unmerge_queues - unmerge queues, if possible. * @sma: semaphore array * * The function unmerges the wait queues if complex_count is 0. * It must be called prior to dropping the global semaphore array lock. */ static void unmerge_queues(struct sem_array *sma) { struct sem_queue *q, *tq; /* complex operations still around? */ if (sma->complex_count) return; /* * We will switch back to simple mode. * Move all pending operation back into the per-semaphore * queues. */ list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { struct sem *curr; |
1a2339567
|
229 |
curr = &sma->sems[q->sops[0].sem_num]; |
f269f40ad
|
230 231 232 233 234 235 236 |
list_add_tail(&q->list, &curr->pending_alter); } INIT_LIST_HEAD(&sma->pending_alter); } /** |
8001c8581
|
237 |
* merge_queues - merge single semop queues into global queue |
f269f40ad
|
238 239 240 241 242 243 244 245 246 247 248 |
* @sma: semaphore array * * This function merges all per-semaphore queues into the global queue. * It is necessary to achieve FIFO ordering for the pending single-sop * operations when a multi-semop operation must sleep. * Only the alter operations must be moved, the const operations can stay. */ static void merge_queues(struct sem_array *sma) { int i; for (i = 0; i < sma->sem_nsems; i++) { |
1a2339567
|
249 |
struct sem *sem = &sma->sems[i]; |
f269f40ad
|
250 251 252 253 |
list_splice_init(&sem->pending_alter, &sma->pending_alter); } } |
53dad6d3a
|
254 255 |
static void sem_rcu_free(struct rcu_head *head) { |
dba4cdd39
|
256 257 |
struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); struct sem_array *sma = container_of(p, struct sem_array, sem_perm); |
53dad6d3a
|
258 259 |
security_sem_free(sma); |
e2029dfee
|
260 |
kvfree(sma); |
53dad6d3a
|
261 |
} |
3e148c799
|
262 |
/* |
5864a2fd3
|
263 |
* Enter the mode suitable for non-simple operations: |
5e9d52759
|
264 |
* Caller must own sem_perm.lock. |
5e9d52759
|
265 |
*/ |
5864a2fd3
|
266 |
static void complexmode_enter(struct sem_array *sma) |
5e9d52759
|
267 268 269 |
{ int i; struct sem *sem; |
9de5ab8a2
|
270 271 272 273 274 275 276 |
if (sma->use_global_lock > 0) { /* * We are already in global lock mode. * Nothing to do, just reset the * counter until we return to simple mode. */ sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; |
6d07b68ce
|
277 278 |
return; } |
9de5ab8a2
|
279 |
sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; |
5864a2fd3
|
280 |
|
5e9d52759
|
281 |
for (i = 0; i < sma->sem_nsems; i++) { |
1a2339567
|
282 |
sem = &sma->sems[i]; |
27d7be180
|
283 284 |
spin_lock(&sem->lock); spin_unlock(&sem->lock); |
5e9d52759
|
285 |
} |
5864a2fd3
|
286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
} /* * Try to leave the mode that disallows simple operations: * Caller must own sem_perm.lock. */ static void complexmode_tryleave(struct sem_array *sma) { if (sma->complex_count) { /* Complex ops are sleeping. * We must stay in complex mode */ return; } |
9de5ab8a2
|
300 301 302 303 304 305 306 307 308 309 310 |
if (sma->use_global_lock == 1) { /* * Immediately after setting use_global_lock to 0, * a simple op can start. Thus: all memory writes * performed by the current operation must be visible * before we set use_global_lock to 0. */ smp_store_release(&sma->use_global_lock, 0); } else { sma->use_global_lock--; } |
5e9d52759
|
311 |
} |
5864a2fd3
|
312 |
#define SEM_GLOBAL_LOCK (-1) |
5e9d52759
|
313 |
/* |
6062a8dc0
|
314 315 316 317 318 |
* If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have * multiple semaphores in our own semops, or we need to look at * semaphores from other pending complex operations. |
6062a8dc0
|
319 320 321 322 |
*/ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { |
5e9d52759
|
323 |
struct sem *sem; |
6062a8dc0
|
324 |
|
5e9d52759
|
325 326 327 |
if (nsops != 1) { /* Complex operation - acquire a full lock */ ipc_lock_object(&sma->sem_perm); |
6062a8dc0
|
328 |
|
5864a2fd3
|
329 330 331 |
/* Prevent parallel simple ops */ complexmode_enter(sma); return SEM_GLOBAL_LOCK; |
5e9d52759
|
332 333 334 335 |
} /* * Only one semaphore affected - try to optimize locking. |
5864a2fd3
|
336 337 338 |
* Optimized locking is possible if no complex operation * is either enqueued or processed right now. * |
9de5ab8a2
|
339 |
* Both facts are tracked by use_global_mode. |
5e9d52759
|
340 |
*/ |
1a2339567
|
341 |
sem = &sma->sems[sops->sem_num]; |
6062a8dc0
|
342 |
|
5864a2fd3
|
343 |
/* |
9de5ab8a2
|
344 |
* Initial check for use_global_lock. Just an optimization, |
5864a2fd3
|
345 346 |
* no locking, no memory barrier. */ |
9de5ab8a2
|
347 |
if (!sma->use_global_lock) { |
6062a8dc0
|
348 |
/* |
5e9d52759
|
349 350 |
* It appears that no complex operation is around. * Acquire the per-semaphore lock. |
6062a8dc0
|
351 |
*/ |
5e9d52759
|
352 |
spin_lock(&sem->lock); |
9de5ab8a2
|
353 354 |
/* pairs with smp_store_release() */ if (!smp_load_acquire(&sma->use_global_lock)) { |
5864a2fd3
|
355 356 |
/* fast path successful! */ return sops->sem_num; |
6062a8dc0
|
357 |
} |
5e9d52759
|
358 359 360 361 362 |
spin_unlock(&sem->lock); } /* slow path: acquire the full lock */ ipc_lock_object(&sma->sem_perm); |
6062a8dc0
|
363 |
|
9de5ab8a2
|
364 365 366 367 368 369 370 371 372 |
if (sma->use_global_lock == 0) { /* * The use_global_lock mode ended while we waited for * sma->sem_perm.lock. Thus we must switch to locking * with sem->lock. * Unlike in the fast path, there is no need to recheck * sma->use_global_lock after we have acquired sem->lock: * We own sma->sem_perm.lock, thus use_global_lock cannot * change. |
5e9d52759
|
373 374 |
*/ spin_lock(&sem->lock); |
9de5ab8a2
|
375 |
|
5e9d52759
|
376 377 |
ipc_unlock_object(&sma->sem_perm); return sops->sem_num; |
6062a8dc0
|
378 |
} else { |
9de5ab8a2
|
379 380 381 382 |
/* * Not a false alarm, thus continue to use the global lock * mode. No need for complexmode_enter(), this was done by * the caller that has set use_global_mode to non-zero. |
6062a8dc0
|
383 |
*/ |
5864a2fd3
|
384 |
return SEM_GLOBAL_LOCK; |
6062a8dc0
|
385 |
} |
6062a8dc0
|
386 387 388 389 |
} static inline void sem_unlock(struct sem_array *sma, int locknum) { |
5864a2fd3
|
390 |
if (locknum == SEM_GLOBAL_LOCK) { |
f269f40ad
|
391 |
unmerge_queues(sma); |
5864a2fd3
|
392 |
complexmode_tryleave(sma); |
cf9d5d78d
|
393 |
ipc_unlock_object(&sma->sem_perm); |
6062a8dc0
|
394 |
} else { |
1a2339567
|
395 |
struct sem *sem = &sma->sems[locknum]; |
6062a8dc0
|
396 397 |
spin_unlock(&sem->lock); } |
6062a8dc0
|
398 399 400 |
} /* |
d9a605e40
|
401 |
* sem_lock_(check_) routines are called in the paths where the rwsem |
3e148c799
|
402 |
* is not held. |
321310ced
|
403 404 |
* * The caller holds the RCU read lock. |
3e148c799
|
405 |
*/ |
16df3674e
|
406 407 |
static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) { |
55b7ae501
|
408 |
struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); |
16df3674e
|
409 410 411 412 413 414 |
if (IS_ERR(ipcp)) return ERR_CAST(ipcp); return container_of(ipcp, struct sem_array, sem_perm); } |
16df3674e
|
415 416 417 418 419 420 421 |
static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); |
b1ed88b47
|
422 |
|
03f02c765
|
423 |
return container_of(ipcp, struct sem_array, sem_perm); |
023a53557
|
424 |
} |
6ff379721
|
425 426 |
static inline void sem_lock_and_putref(struct sem_array *sma) { |
6062a8dc0
|
427 |
sem_lock(sma, NULL, -1); |
dba4cdd39
|
428 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
6ff379721
|
429 |
} |
7ca7e564e
|
430 431 432 433 |
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) { ipc_rmid(&sem_ids(ns), &s->sem_perm); } |
101ede01d
|
434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
static struct sem_array *sem_alloc(size_t nsems) { struct sem_array *sma; size_t size; if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) return NULL; size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); sma = kvmalloc(size, GFP_KERNEL); if (unlikely(!sma)) return NULL; memset(sma, 0, size); |
101ede01d
|
448 449 450 |
return sma; } |
f4566f048
|
451 452 453 454 455 |
/** * newary - Create a new semaphore set * @ns: namespace * @params: ptr to the structure that contains key, semflg and nsems * |
d9a605e40
|
456 |
* Called with sem_ids.rwsem held (as a writer) |
f4566f048
|
457 |
*/ |
7748dbfaa
|
458 |
static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
1da177e4c
|
459 |
{ |
1da177e4c
|
460 461 |
int retval; struct sem_array *sma; |
7748dbfaa
|
462 463 464 |
key_t key = params->key; int nsems = params->u.nsems; int semflg = params->flg; |
b97e820ff
|
465 |
int i; |
1da177e4c
|
466 467 468 |
if (!nsems) return -EINVAL; |
e38935341
|
469 |
if (ns->used_sems + nsems > ns->sc_semmns) |
1da177e4c
|
470 |
return -ENOSPC; |
101ede01d
|
471 |
sma = sem_alloc(nsems); |
3ab08fe20
|
472 |
if (!sma) |
1da177e4c
|
473 |
return -ENOMEM; |
3ab08fe20
|
474 |
|
1da177e4c
|
475 476 477 478 479 480 |
sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { |
e2029dfee
|
481 |
kvfree(sma); |
1da177e4c
|
482 483 |
return retval; } |
6062a8dc0
|
484 |
for (i = 0; i < nsems; i++) { |
1a2339567
|
485 486 487 |
INIT_LIST_HEAD(&sma->sems[i].pending_alter); INIT_LIST_HEAD(&sma->sems[i].pending_const); spin_lock_init(&sma->sems[i].lock); |
6062a8dc0
|
488 |
} |
b97e820ff
|
489 490 |
sma->complex_count = 0; |
9de5ab8a2
|
491 |
sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; |
1a82e9e1d
|
492 493 |
INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); |
4daa28f6d
|
494 |
INIT_LIST_HEAD(&sma->list_id); |
1da177e4c
|
495 |
sma->sem_nsems = nsems; |
e54d02b23
|
496 |
sma->sem_ctime = ktime_get_real_seconds(); |
e8577d1f0
|
497 |
|
2ec55f802
|
498 499 500 501 |
retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (retval < 0) { call_rcu(&sma->sem_perm.rcu, sem_rcu_free); return retval; |
e8577d1f0
|
502 503 |
} ns->used_sems += nsems; |
6062a8dc0
|
504 |
sem_unlock(sma, -1); |
6d49dab8a
|
505 |
rcu_read_unlock(); |
1da177e4c
|
506 |
|
7ca7e564e
|
507 |
return sma->sem_perm.id; |
1da177e4c
|
508 |
} |
7748dbfaa
|
509 |
|
f4566f048
|
510 |
/* |
d9a605e40
|
511 |
* Called with sem_ids.rwsem and ipcp locked. |
f4566f048
|
512 |
*/ |
03f02c765
|
513 |
static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
7748dbfaa
|
514 |
{ |
03f02c765
|
515 516 517 518 |
struct sem_array *sma; sma = container_of(ipcp, struct sem_array, sem_perm); return security_sem_associate(sma, semflg); |
7748dbfaa
|
519 |
} |
f4566f048
|
520 |
/* |
d9a605e40
|
521 |
* Called with sem_ids.rwsem and ipcp locked. |
f4566f048
|
522 |
*/ |
03f02c765
|
523 524 |
static inline int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) |
7748dbfaa
|
525 |
{ |
03f02c765
|
526 527 528 529 |
struct sem_array *sma; sma = container_of(ipcp, struct sem_array, sem_perm); if (params->u.nsems > sma->sem_nsems) |
7748dbfaa
|
530 531 532 533 |
return -EINVAL; return 0; } |
d5460c997
|
534 |
SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) |
1da177e4c
|
535 |
{ |
e38935341
|
536 |
struct ipc_namespace *ns; |
eb66ec44f
|
537 538 539 540 541 |
static const struct ipc_ops sem_ops = { .getnew = newary, .associate = sem_security, .more_checks = sem_more_checks, }; |
7748dbfaa
|
542 |
struct ipc_params sem_params; |
e38935341
|
543 544 |
ns = current->nsproxy->ipc_ns; |
1da177e4c
|
545 |
|
e38935341
|
546 |
if (nsems < 0 || nsems > ns->sc_semmsl) |
1da177e4c
|
547 |
return -EINVAL; |
7ca7e564e
|
548 |
|
7748dbfaa
|
549 550 551 |
sem_params.key = key; sem_params.flg = semflg; sem_params.u.nsems = nsems; |
1da177e4c
|
552 |
|
7748dbfaa
|
553 |
return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); |
1da177e4c
|
554 |
} |
78f5009cc
|
555 |
/** |
4ce33ec2e
|
556 557 |
* perform_atomic_semop[_slow] - Attempt to perform semaphore * operations on a given array. |
758a6ba39
|
558 |
* @sma: semaphore array |
d198cd6d6
|
559 |
* @q: struct sem_queue that describes the operation |
758a6ba39
|
560 |
* |
4ce33ec2e
|
561 562 563 564 565 566 567 |
* Caller blocking are as follows, based the value * indicated by the semaphore operation (sem_op): * * (1) >0 never blocks. * (2) 0 (wait-for-zero operation): semval is non-zero. * (3) <0 attempting to decrement semval to a value smaller than zero. * |
758a6ba39
|
568 569 |
* Returns 0 if the operation was possible. * Returns 1 if the operation is impossible, the caller must sleep. |
4ce33ec2e
|
570 |
* Returns <0 for error codes. |
1da177e4c
|
571 |
*/ |
4ce33ec2e
|
572 |
static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) |
1da177e4c
|
573 |
{ |
d198cd6d6
|
574 |
int result, sem_op, nsops, pid; |
1da177e4c
|
575 |
struct sembuf *sop; |
239521f31
|
576 |
struct sem *curr; |
d198cd6d6
|
577 578 579 580 581 582 |
struct sembuf *sops; struct sem_undo *un; sops = q->sops; nsops = q->nsops; un = q->undo; |
1da177e4c
|
583 584 |
for (sop = sops; sop < sops + nsops; sop++) { |
1a2339567
|
585 |
curr = &sma->sems[sop->sem_num]; |
1da177e4c
|
586 587 |
sem_op = sop->sem_op; result = curr->semval; |
78f5009cc
|
588 |
|
1da177e4c
|
589 590 591 592 593 594 595 596 |
if (!sem_op && result) goto would_block; result += sem_op; if (result < 0) goto would_block; if (result > SEMVMX) goto out_of_range; |
78f5009cc
|
597 |
|
1da177e4c
|
598 599 |
if (sop->sem_flg & SEM_UNDO) { int undo = un->semadj[sop->sem_num] - sem_op; |
78f5009cc
|
600 |
/* Exceeding the undo range is an error. */ |
1da177e4c
|
601 602 |
if (undo < (-SEMAEM - 1) || undo > SEMAEM) goto out_of_range; |
78f5009cc
|
603 |
un->semadj[sop->sem_num] = undo; |
1da177e4c
|
604 |
} |
78f5009cc
|
605 |
|
1da177e4c
|
606 607 608 609 |
curr->semval = result; } sop--; |
d198cd6d6
|
610 |
pid = q->pid; |
1da177e4c
|
611 |
while (sop >= sops) { |
1a2339567
|
612 |
sma->sems[sop->sem_num].sempid = pid; |
1da177e4c
|
613 614 |
sop--; } |
78f5009cc
|
615 |
|
1da177e4c
|
616 617 618 619 620 621 622 |
return 0; out_of_range: result = -ERANGE; goto undo; would_block: |
ed247b7ca
|
623 |
q->blocking = sop; |
1da177e4c
|
624 625 626 627 628 629 630 631 |
if (sop->sem_flg & IPC_NOWAIT) result = -EAGAIN; else result = 1; undo: sop--; while (sop >= sops) { |
78f5009cc
|
632 |
sem_op = sop->sem_op; |
1a2339567
|
633 |
sma->sems[sop->sem_num].semval -= sem_op; |
78f5009cc
|
634 635 |
if (sop->sem_flg & SEM_UNDO) un->semadj[sop->sem_num] += sem_op; |
1da177e4c
|
636 637 638 639 640 |
sop--; } return result; } |
4ce33ec2e
|
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 |
static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) { int result, sem_op, nsops; struct sembuf *sop; struct sem *curr; struct sembuf *sops; struct sem_undo *un; sops = q->sops; nsops = q->nsops; un = q->undo; if (unlikely(q->dupsop)) return perform_atomic_semop_slow(sma, q); /* * We scan the semaphore set twice, first to ensure that the entire * operation can succeed, therefore avoiding any pointless writes * to shared memory and having to undo such changes in order to block * until the operations can go through. */ for (sop = sops; sop < sops + nsops; sop++) { |
1a2339567
|
663 |
curr = &sma->sems[sop->sem_num]; |
4ce33ec2e
|
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 |
sem_op = sop->sem_op; result = curr->semval; if (!sem_op && result) goto would_block; /* wait-for-zero */ result += sem_op; if (result < 0) goto would_block; if (result > SEMVMX) return -ERANGE; if (sop->sem_flg & SEM_UNDO) { int undo = un->semadj[sop->sem_num] - sem_op; /* Exceeding the undo range is an error. */ if (undo < (-SEMAEM - 1) || undo > SEMAEM) return -ERANGE; } } for (sop = sops; sop < sops + nsops; sop++) { |
1a2339567
|
687 |
curr = &sma->sems[sop->sem_num]; |
4ce33ec2e
|
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 |
sem_op = sop->sem_op; result = curr->semval; if (sop->sem_flg & SEM_UNDO) { int undo = un->semadj[sop->sem_num] - sem_op; un->semadj[sop->sem_num] = undo; } curr->semval += sem_op; curr->sempid = q->pid; } return 0; would_block: q->blocking = sop; return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; } |
9ae949fa3
|
706 707 |
static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, struct wake_q_head *wake_q) |
0a2b9d4c7
|
708 |
{ |
9ae949fa3
|
709 710 711 712 713 714 715 716 717 |
wake_q_add(wake_q, q->sleeper); /* * Rely on the above implicit barrier, such that we can * ensure that we hold reference to the task before setting * q->status. Otherwise we could race with do_exit if the * task is awoken by an external event before calling * wake_up_process(). */ WRITE_ONCE(q->status, error); |
d4212093d
|
718 |
} |
b97e820ff
|
719 720 721 |
static void unlink_queue(struct sem_array *sma, struct sem_queue *q) { list_del(&q->list); |
9f1bc2c90
|
722 |
if (q->nsops > 1) |
b97e820ff
|
723 724 |
sma->complex_count--; } |
fd5db4225
|
725 726 727 728 729 730 731 |
/** check_restart(sma, q) * @sma: semaphore array * @q: the operation that just completed * * update_queue is O(N^2) when it restarts scanning the whole queue of * waiting operations. Therefore this function checks if the restart is * really necessary. It is called after a previously waiting operation |
1a82e9e1d
|
732 733 |
* modified the array. * Note that wait-for-zero operations are handled without restart. |
fd5db4225
|
734 |
*/ |
4663d3e8f
|
735 |
static inline int check_restart(struct sem_array *sma, struct sem_queue *q) |
fd5db4225
|
736 |
{ |
1a82e9e1d
|
737 738 |
/* pending complex alter operations are too difficult to analyse */ if (!list_empty(&sma->pending_alter)) |
fd5db4225
|
739 740 741 742 743 |
return 1; /* we were a sleeping complex operation. Too difficult */ if (q->nsops > 1) return 1; |
1a82e9e1d
|
744 745 746 747 748 749 750 751 752 753 754 755 756 |
/* It is impossible that someone waits for the new value: * - complex operations always restart. * - wait-for-zero are handled seperately. * - q is a previously sleeping simple operation that * altered the array. It must be a decrement, because * simple increments never sleep. * - If there are older (higher priority) decrements * in the queue, then they have observed the original * semval value and couldn't proceed. The operation * decremented to value - thus they won't proceed either. */ return 0; } |
fd5db4225
|
757 |
|
1a82e9e1d
|
758 |
/** |
8001c8581
|
759 |
* wake_const_ops - wake up non-alter tasks |
1a82e9e1d
|
760 761 |
* @sma: semaphore array. * @semnum: semaphore that was modified. |
9ae949fa3
|
762 |
* @wake_q: lockless wake-queue head. |
1a82e9e1d
|
763 764 765 766 767 |
* * wake_const_ops must be called after a semaphore in a semaphore array * was set to 0. If complex const operations are pending, wake_const_ops must * be called with semnum = -1, as well as with the number of each modified * semaphore. |
9ae949fa3
|
768 |
* The tasks that must be woken up are added to @wake_q. The return code |
1a82e9e1d
|
769 770 771 772 |
* is stored in q->pid. * The function returns 1 if at least one operation was completed successfully. */ static int wake_const_ops(struct sem_array *sma, int semnum, |
9ae949fa3
|
773 |
struct wake_q_head *wake_q) |
1a82e9e1d
|
774 |
{ |
f150f02cf
|
775 |
struct sem_queue *q, *tmp; |
1a82e9e1d
|
776 777 778 779 780 781 |
struct list_head *pending_list; int semop_completed = 0; if (semnum == -1) pending_list = &sma->pending_const; else |
1a2339567
|
782 |
pending_list = &sma->sems[semnum].pending_const; |
fd5db4225
|
783 |
|
f150f02cf
|
784 785 |
list_for_each_entry_safe(q, tmp, pending_list, list) { int error = perform_atomic_semop(sma, q); |
1a82e9e1d
|
786 |
|
f150f02cf
|
787 788 789 790 |
if (error > 0) continue; /* operation completed, remove from queue & wakeup */ unlink_queue(sma, q); |
1a82e9e1d
|
791 |
|
f150f02cf
|
792 793 794 |
wake_up_sem_queue_prepare(q, error, wake_q); if (error == 0) semop_completed = 1; |
1a82e9e1d
|
795 |
} |
f150f02cf
|
796 |
|
1a82e9e1d
|
797 798 799 800 |
return semop_completed; } /** |
8001c8581
|
801 |
* do_smart_wakeup_zero - wakeup all wait for zero tasks |
1a82e9e1d
|
802 803 804 |
* @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations |
9ae949fa3
|
805 |
* @wake_q: lockless wake-queue head |
1a82e9e1d
|
806 |
* |
8001c8581
|
807 808 |
* Checks all required queue for wait-for-zero operations, based * on the actual changes that were performed on the semaphore array. |
1a82e9e1d
|
809 810 811 |
* The function returns 1 if at least one operation was completed successfully. */ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, |
9ae949fa3
|
812 |
int nsops, struct wake_q_head *wake_q) |
1a82e9e1d
|
813 814 815 816 817 818 819 820 821 |
{ int i; int semop_completed = 0; int got_zero = 0; /* first: the per-semaphore queues, if known */ if (sops) { for (i = 0; i < nsops; i++) { int num = sops[i].sem_num; |
1a2339567
|
822 |
if (sma->sems[num].semval == 0) { |
1a82e9e1d
|
823 |
got_zero = 1; |
9ae949fa3
|
824 |
semop_completed |= wake_const_ops(sma, num, wake_q); |
1a82e9e1d
|
825 826 827 828 829 830 |
} } } else { /* * No sops means modified semaphores not known. * Assume all were changed. |
fd5db4225
|
831 |
*/ |
1a82e9e1d
|
832 |
for (i = 0; i < sma->sem_nsems; i++) { |
1a2339567
|
833 |
if (sma->sems[i].semval == 0) { |
1a82e9e1d
|
834 |
got_zero = 1; |
9ae949fa3
|
835 |
semop_completed |= wake_const_ops(sma, i, wake_q); |
1a82e9e1d
|
836 837 |
} } |
fd5db4225
|
838 839 |
} /* |
1a82e9e1d
|
840 841 |
* If one of the modified semaphores got 0, * then check the global queue, too. |
fd5db4225
|
842 |
*/ |
1a82e9e1d
|
843 |
if (got_zero) |
9ae949fa3
|
844 |
semop_completed |= wake_const_ops(sma, -1, wake_q); |
fd5db4225
|
845 |
|
1a82e9e1d
|
846 |
return semop_completed; |
fd5db4225
|
847 |
} |
636c6be82
|
848 849 |
/** |
8001c8581
|
850 |
* update_queue - look for tasks that can be completed. |
636c6be82
|
851 852 |
* @sma: semaphore array. * @semnum: semaphore that was modified. |
9ae949fa3
|
853 |
* @wake_q: lockless wake-queue head. |
636c6be82
|
854 855 |
* * update_queue must be called after a semaphore in a semaphore array |
9f1bc2c90
|
856 857 858 |
* was modified. If multiple semaphores were modified, update_queue must * be called with semnum = -1, as well as with the number of each modified * semaphore. |
9ae949fa3
|
859 |
* The tasks that must be woken up are added to @wake_q. The return code |
0a2b9d4c7
|
860 |
* is stored in q->pid. |
1a82e9e1d
|
861 862 |
* The function internally checks if const operations can now succeed. * |
0a2b9d4c7
|
863 |
* The function return 1 if at least one semop was completed successfully. |
1da177e4c
|
864 |
*/ |
9ae949fa3
|
865 |
static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) |
1da177e4c
|
866 |
{ |
f150f02cf
|
867 |
struct sem_queue *q, *tmp; |
636c6be82
|
868 |
struct list_head *pending_list; |
0a2b9d4c7
|
869 |
int semop_completed = 0; |
636c6be82
|
870 |
|
9f1bc2c90
|
871 |
if (semnum == -1) |
1a82e9e1d
|
872 |
pending_list = &sma->pending_alter; |
9f1bc2c90
|
873 |
else |
1a2339567
|
874 |
pending_list = &sma->sems[semnum].pending_alter; |
9cad200c7
|
875 876 |
again: |
f150f02cf
|
877 |
list_for_each_entry_safe(q, tmp, pending_list, list) { |
fd5db4225
|
878 |
int error, restart; |
636c6be82
|
879 |
|
d987f8b21
|
880 881 |
/* If we are scanning the single sop, per-semaphore list of * one semaphore and that semaphore is 0, then it is not |
1a82e9e1d
|
882 |
* necessary to scan further: simple increments |
d987f8b21
|
883 884 885 886 |
* that affect only one entry succeed immediately and cannot * be in the per semaphore pending queue, and decrements * cannot be successful if the value is already 0. */ |
1a2339567
|
887 |
if (semnum != -1 && sma->sems[semnum].semval == 0) |
d987f8b21
|
888 |
break; |
d198cd6d6
|
889 |
error = perform_atomic_semop(sma, q); |
1da177e4c
|
890 891 |
/* Does q->sleeper still need to sleep? */ |
9cad200c7
|
892 893 |
if (error > 0) continue; |
b97e820ff
|
894 |
unlink_queue(sma, q); |
9cad200c7
|
895 |
|
0a2b9d4c7
|
896 |
if (error) { |
fd5db4225
|
897 |
restart = 0; |
0a2b9d4c7
|
898 899 |
} else { semop_completed = 1; |
9ae949fa3
|
900 |
do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); |
fd5db4225
|
901 |
restart = check_restart(sma, q); |
0a2b9d4c7
|
902 |
} |
fd5db4225
|
903 |
|
9ae949fa3
|
904 |
wake_up_sem_queue_prepare(q, error, wake_q); |
fd5db4225
|
905 |
if (restart) |
9cad200c7
|
906 |
goto again; |
1da177e4c
|
907 |
} |
0a2b9d4c7
|
908 |
return semop_completed; |
1da177e4c
|
909 |
} |
0a2b9d4c7
|
910 |
/** |
8001c8581
|
911 |
* set_semotime - set sem_otime |
0e8c66569
|
912 913 914 915 916 917 918 919 920 |
* @sma: semaphore array * @sops: operations that modified the array, may be NULL * * sem_otime is replicated to avoid cache line trashing. * This function sets one instance to the current time. */ static void set_semotime(struct sem_array *sma, struct sembuf *sops) { if (sops == NULL) { |
1a2339567
|
921 |
sma->sems[0].sem_otime = get_seconds(); |
0e8c66569
|
922 |
} else { |
1a2339567
|
923 |
sma->sems[sops[0].sem_num].sem_otime = |
0e8c66569
|
924 925 926 927 928 |
get_seconds(); } } /** |
8001c8581
|
929 |
* do_smart_update - optimized update_queue |
fd5db4225
|
930 931 932 |
* @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations |
0a2b9d4c7
|
933 |
* @otime: force setting otime |
9ae949fa3
|
934 |
* @wake_q: lockless wake-queue head |
fd5db4225
|
935 |
* |
1a82e9e1d
|
936 937 |
* do_smart_update() does the required calls to update_queue and wakeup_zero, * based on the actual changes that were performed on the semaphore array. |
0a2b9d4c7
|
938 |
* Note that the function does not do the actual wake-up: the caller is |
9ae949fa3
|
939 |
* responsible for calling wake_up_q(). |
0a2b9d4c7
|
940 |
* It is safe to perform this call after dropping all locks. |
fd5db4225
|
941 |
*/ |
0a2b9d4c7
|
942 |
static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, |
9ae949fa3
|
943 |
int otime, struct wake_q_head *wake_q) |
fd5db4225
|
944 945 |
{ int i; |
9ae949fa3
|
946 |
otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); |
1a82e9e1d
|
947 |
|
f269f40ad
|
948 949 |
if (!list_empty(&sma->pending_alter)) { /* semaphore array uses the global queue - just process it. */ |
9ae949fa3
|
950 |
otime |= update_queue(sma, -1, wake_q); |
f269f40ad
|
951 952 953 954 955 956 957 |
} else { if (!sops) { /* * No sops, thus the modified semaphores are not * known. Check all. */ for (i = 0; i < sma->sem_nsems; i++) |
9ae949fa3
|
958 |
otime |= update_queue(sma, i, wake_q); |
f269f40ad
|
959 960 961 962 963 964 965 966 967 968 969 970 971 |
} else { /* * Check the semaphores that were increased: * - No complex ops, thus all sleeping ops are * decrease. * - if we decreased the value, then any sleeping * semaphore ops wont be able to run: If the * previous value was too small, then the new * value will be too small, too. */ for (i = 0; i < nsops; i++) { if (sops[i].sem_op > 0) { otime |= update_queue(sma, |
9ae949fa3
|
972 |
sops[i].sem_num, wake_q); |
f269f40ad
|
973 |
} |
ab465df9d
|
974 |
} |
9f1bc2c90
|
975 |
} |
fd5db4225
|
976 |
} |
0e8c66569
|
977 978 |
if (otime) set_semotime(sma, sops); |
fd5db4225
|
979 |
} |
2f2ed41dc
|
980 |
/* |
b220c57ae
|
981 |
* check_qop: Test if a queued operation sleeps on the semaphore semnum |
2f2ed41dc
|
982 983 984 985 |
*/ static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, bool count_zero) { |
b220c57ae
|
986 |
struct sembuf *sop = q->blocking; |
2f2ed41dc
|
987 |
|
9b44ee2ee
|
988 989 990 991 992 993 994 995 996 997 998 999 |
/* * Linux always (since 0.99.10) reported a task as sleeping on all * semaphores. This violates SUS, therefore it was changed to the * standard compliant behavior. * Give the administrators a chance to notice that an application * might misbehave because it relies on the Linux behavior. */ pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant. " "The task %s (%d) triggered the difference, watch for misbehavior. ", current->comm, task_pid_nr(current)); |
b220c57ae
|
1000 1001 |
if (sop->sem_num != semnum) return 0; |
2f2ed41dc
|
1002 |
|
b220c57ae
|
1003 1004 1005 1006 1007 1008 |
if (count_zero && sop->sem_op == 0) return 1; if (!count_zero && sop->sem_op < 0) return 1; return 0; |
2f2ed41dc
|
1009 |
} |
1da177e4c
|
1010 1011 1012 |
/* The following counts are associated to each semaphore: * semncnt number of tasks waiting on semval being nonzero * semzcnt number of tasks waiting on semval being zero |
b220c57ae
|
1013 1014 1015 |
* * Per definition, a task waits only on the semaphore of the first semop * that cannot proceed, even if additional operation would block, too. |
1da177e4c
|
1016 |
*/ |
2f2ed41dc
|
1017 1018 |
static int count_semcnt(struct sem_array *sma, ushort semnum, bool count_zero) |
1da177e4c
|
1019 |
{ |
2f2ed41dc
|
1020 |
struct list_head *l; |
239521f31
|
1021 |
struct sem_queue *q; |
2f2ed41dc
|
1022 |
int semcnt; |
1da177e4c
|
1023 |
|
2f2ed41dc
|
1024 1025 1026 |
semcnt = 0; /* First: check the simple operations. They are easy to evaluate */ if (count_zero) |
1a2339567
|
1027 |
l = &sma->sems[semnum].pending_const; |
2f2ed41dc
|
1028 |
else |
1a2339567
|
1029 |
l = &sma->sems[semnum].pending_alter; |
1da177e4c
|
1030 |
|
2f2ed41dc
|
1031 1032 1033 1034 1035 |
list_for_each_entry(q, l, list) { /* all task on a per-semaphore list sleep on exactly * that semaphore */ semcnt++; |
ebc2e5e6a
|
1036 |
} |
2f2ed41dc
|
1037 |
/* Then: check the complex operations. */ |
1994862dc
|
1038 |
list_for_each_entry(q, &sma->pending_alter, list) { |
2f2ed41dc
|
1039 1040 1041 1042 1043 1044 |
semcnt += check_qop(sma, semnum, q, count_zero); } if (count_zero) { list_for_each_entry(q, &sma->pending_const, list) { semcnt += check_qop(sma, semnum, q, count_zero); } |
1994862dc
|
1045 |
} |
2f2ed41dc
|
1046 |
return semcnt; |
1da177e4c
|
1047 |
} |
d9a605e40
|
1048 1049 |
/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem |
3e148c799
|
1050 |
* remains locked on exit. |
1da177e4c
|
1051 |
*/ |
01b8b07a5
|
1052 |
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
1da177e4c
|
1053 |
{ |
380af1b33
|
1054 1055 |
struct sem_undo *un, *tu; struct sem_queue *q, *tq; |
01b8b07a5
|
1056 |
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); |
9f1bc2c90
|
1057 |
int i; |
9ae949fa3
|
1058 |
DEFINE_WAKE_Q(wake_q); |
1da177e4c
|
1059 |
|
380af1b33
|
1060 |
/* Free the existing undo structures for this semaphore set. */ |
cf9d5d78d
|
1061 |
ipc_assert_locked_object(&sma->sem_perm); |
380af1b33
|
1062 1063 1064 |
list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { list_del(&un->list_id); spin_lock(&un->ulp->lock); |
1da177e4c
|
1065 |
un->semid = -1; |
380af1b33
|
1066 1067 |
list_del_rcu(&un->list_proc); spin_unlock(&un->ulp->lock); |
693a8b6ee
|
1068 |
kfree_rcu(un, rcu); |
380af1b33
|
1069 |
} |
1da177e4c
|
1070 1071 |
/* Wake up all pending processes and let them fail with EIDRM. */ |
1a82e9e1d
|
1072 1073 |
list_for_each_entry_safe(q, tq, &sma->pending_const, list) { unlink_queue(sma, q); |
9ae949fa3
|
1074 |
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1a82e9e1d
|
1075 1076 1077 |
} list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { |
b97e820ff
|
1078 |
unlink_queue(sma, q); |
9ae949fa3
|
1079 |
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1da177e4c
|
1080 |
} |
9f1bc2c90
|
1081 |
for (i = 0; i < sma->sem_nsems; i++) { |
1a2339567
|
1082 |
struct sem *sem = &sma->sems[i]; |
1a82e9e1d
|
1083 1084 |
list_for_each_entry_safe(q, tq, &sem->pending_const, list) { unlink_queue(sma, q); |
9ae949fa3
|
1085 |
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1a82e9e1d
|
1086 1087 |
} list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { |
9f1bc2c90
|
1088 |
unlink_queue(sma, q); |
9ae949fa3
|
1089 |
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
9f1bc2c90
|
1090 1091 |
} } |
1da177e4c
|
1092 |
|
7ca7e564e
|
1093 1094 |
/* Remove the semaphore set from the IDR */ sem_rmid(ns, sma); |
6062a8dc0
|
1095 |
sem_unlock(sma, -1); |
6d49dab8a
|
1096 |
rcu_read_unlock(); |
1da177e4c
|
1097 |
|
9ae949fa3
|
1098 |
wake_up_q(&wake_q); |
e38935341
|
1099 |
ns->used_sems -= sma->sem_nsems; |
dba4cdd39
|
1100 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1101 1102 1103 1104 |
} static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) { |
239521f31
|
1105 |
switch (version) { |
1da177e4c
|
1106 1107 1108 1109 1110 |
case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct semid_ds out; |
982f7c2b2
|
1111 |
memset(&out, 0, sizeof(out)); |
1da177e4c
|
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 |
ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); out.sem_otime = in->sem_otime; out.sem_ctime = in->sem_ctime; out.sem_nsems = in->sem_nsems; return copy_to_user(buf, &out, sizeof(out)); } default: return -EINVAL; } } |
e54d02b23
|
1124 |
static time64_t get_semotime(struct sem_array *sma) |
d12e1e50e
|
1125 1126 |
{ int i; |
e54d02b23
|
1127 |
time64_t res; |
d12e1e50e
|
1128 |
|
1a2339567
|
1129 |
res = sma->sems[0].sem_otime; |
d12e1e50e
|
1130 |
for (i = 1; i < sma->sem_nsems; i++) { |
e54d02b23
|
1131 |
time64_t to = sma->sems[i].sem_otime; |
d12e1e50e
|
1132 1133 1134 1135 1136 1137 |
if (to > res) res = to; } return res; } |
45a4a64ab
|
1138 1139 |
static int semctl_stat(struct ipc_namespace *ns, int semid, int cmd, struct semid64_ds *semid64) |
1da177e4c
|
1140 |
{ |
1da177e4c
|
1141 |
struct sem_array *sma; |
45a4a64ab
|
1142 1143 |
int id = 0; int err; |
1da177e4c
|
1144 |
|
45a4a64ab
|
1145 |
memset(semid64, 0, sizeof(*semid64)); |
46c0a8ca3
|
1146 |
|
45a4a64ab
|
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 |
rcu_read_lock(); if (cmd == SEM_STAT) { sma = sem_obtain_object(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); goto out_unlock; } id = sma->sem_perm.id; } else { sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); goto out_unlock; |
1da177e4c
|
1160 |
} |
1da177e4c
|
1161 |
} |
1da177e4c
|
1162 |
|
45a4a64ab
|
1163 1164 1165 |
err = -EACCES; if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) goto out_unlock; |
1da177e4c
|
1166 |
|
45a4a64ab
|
1167 1168 1169 |
err = security_sem_semctl(sma, cmd); if (err) goto out_unlock; |
1da177e4c
|
1170 |
|
45a4a64ab
|
1171 1172 1173 1174 1175 1176 |
kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); semid64->sem_otime = get_semotime(sma); semid64->sem_ctime = sma->sem_ctime; semid64->sem_nsems = sma->sem_nsems; rcu_read_unlock(); return id; |
1da177e4c
|
1177 |
|
1da177e4c
|
1178 |
out_unlock: |
16df3674e
|
1179 |
rcu_read_unlock(); |
1da177e4c
|
1180 1181 |
return err; } |
45a4a64ab
|
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 |
static int semctl_info(struct ipc_namespace *ns, int semid, int cmd, void __user *p) { struct seminfo seminfo; int max_id; int err; err = security_sem_semctl(NULL, cmd); if (err) return err; memset(&seminfo, 0, sizeof(seminfo)); seminfo.semmni = ns->sc_semmni; seminfo.semmns = ns->sc_semmns; seminfo.semmsl = ns->sc_semmsl; seminfo.semopm = ns->sc_semopm; seminfo.semvmx = SEMVMX; seminfo.semmnu = SEMMNU; seminfo.semmap = SEMMAP; seminfo.semume = SEMUME; down_read(&sem_ids(ns).rwsem); if (cmd == SEM_INFO) { seminfo.semusz = sem_ids(ns).in_use; seminfo.semaem = ns->used_sems; } else { seminfo.semusz = SEMUSZ; seminfo.semaem = SEMAEM; } max_id = ipc_get_maxid(&sem_ids(ns)); up_read(&sem_ids(ns).rwsem); if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0 : max_id; } |
e1fd1f490
|
1216 |
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, |
45a4a64ab
|
1217 |
int val) |
e1fd1f490
|
1218 1219 1220 |
{ struct sem_undo *un; struct sem_array *sma; |
239521f31
|
1221 |
struct sem *curr; |
45a4a64ab
|
1222 |
int err; |
9ae949fa3
|
1223 |
DEFINE_WAKE_Q(wake_q); |
6062a8dc0
|
1224 1225 |
if (val > SEMVMX || val < 0) return -ERANGE; |
e1fd1f490
|
1226 |
|
6062a8dc0
|
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 |
rcu_read_lock(); sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { rcu_read_unlock(); return PTR_ERR(sma); } if (semnum < 0 || semnum >= sma->sem_nsems) { rcu_read_unlock(); return -EINVAL; } if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { rcu_read_unlock(); return -EACCES; } |
e1fd1f490
|
1244 1245 |
err = security_sem_semctl(sma, SETVAL); |
6062a8dc0
|
1246 1247 1248 1249 |
if (err) { rcu_read_unlock(); return -EACCES; } |
e1fd1f490
|
1250 |
|
6062a8dc0
|
1251 |
sem_lock(sma, NULL, -1); |
e1fd1f490
|
1252 |
|
0f3d2b013
|
1253 |
if (!ipc_valid_object(&sma->sem_perm)) { |
6e224f945
|
1254 1255 1256 1257 |
sem_unlock(sma, -1); rcu_read_unlock(); return -EIDRM; } |
1a2339567
|
1258 |
curr = &sma->sems[semnum]; |
e1fd1f490
|
1259 |
|
cf9d5d78d
|
1260 |
ipc_assert_locked_object(&sma->sem_perm); |
e1fd1f490
|
1261 1262 1263 1264 1265 |
list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; curr->semval = val; curr->sempid = task_tgid_vnr(current); |
e54d02b23
|
1266 |
sma->sem_ctime = ktime_get_real_seconds(); |
e1fd1f490
|
1267 |
/* maybe some queued-up processes were waiting for this */ |
9ae949fa3
|
1268 |
do_smart_update(sma, NULL, 0, 0, &wake_q); |
6062a8dc0
|
1269 |
sem_unlock(sma, -1); |
6d49dab8a
|
1270 |
rcu_read_unlock(); |
9ae949fa3
|
1271 |
wake_up_q(&wake_q); |
6062a8dc0
|
1272 |
return 0; |
e1fd1f490
|
1273 |
} |
e38935341
|
1274 |
static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, |
e1fd1f490
|
1275 |
int cmd, void __user *p) |
1da177e4c
|
1276 1277 |
{ struct sem_array *sma; |
239521f31
|
1278 |
struct sem *curr; |
16df3674e
|
1279 |
int err, nsems; |
1da177e4c
|
1280 |
ushort fast_sem_io[SEMMSL_FAST]; |
239521f31
|
1281 |
ushort *sem_io = fast_sem_io; |
9ae949fa3
|
1282 |
DEFINE_WAKE_Q(wake_q); |
16df3674e
|
1283 1284 1285 1286 1287 |
rcu_read_lock(); sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { rcu_read_unlock(); |
023a53557
|
1288 |
return PTR_ERR(sma); |
16df3674e
|
1289 |
} |
1da177e4c
|
1290 1291 |
nsems = sma->sem_nsems; |
1da177e4c
|
1292 |
err = -EACCES; |
c728b9c87
|
1293 1294 |
if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) goto out_rcu_wakeup; |
1da177e4c
|
1295 1296 |
err = security_sem_semctl(sma, cmd); |
c728b9c87
|
1297 1298 |
if (err) goto out_rcu_wakeup; |
1da177e4c
|
1299 1300 1301 1302 1303 |
err = -EACCES; switch (cmd) { case GETALL: { |
e1fd1f490
|
1304 |
ushort __user *array = p; |
1da177e4c
|
1305 |
int i; |
ce857229e
|
1306 |
sem_lock(sma, NULL, -1); |
0f3d2b013
|
1307 |
if (!ipc_valid_object(&sma->sem_perm)) { |
6e224f945
|
1308 1309 1310 |
err = -EIDRM; goto out_unlock; } |
239521f31
|
1311 |
if (nsems > SEMMSL_FAST) { |
dba4cdd39
|
1312 |
if (!ipc_rcu_getref(&sma->sem_perm)) { |
ce857229e
|
1313 |
err = -EIDRM; |
6e224f945
|
1314 |
goto out_unlock; |
ce857229e
|
1315 1316 |
} sem_unlock(sma, -1); |
6d49dab8a
|
1317 |
rcu_read_unlock(); |
f8dbe8d29
|
1318 1319 |
sem_io = kvmalloc_array(nsems, sizeof(ushort), GFP_KERNEL); |
239521f31
|
1320 |
if (sem_io == NULL) { |
dba4cdd39
|
1321 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1322 1323 |
return -ENOMEM; } |
4091fd942
|
1324 |
rcu_read_lock(); |
6ff379721
|
1325 |
sem_lock_and_putref(sma); |
0f3d2b013
|
1326 |
if (!ipc_valid_object(&sma->sem_perm)) { |
1da177e4c
|
1327 |
err = -EIDRM; |
6e224f945
|
1328 |
goto out_unlock; |
1da177e4c
|
1329 |
} |
ce857229e
|
1330 |
} |
1da177e4c
|
1331 |
for (i = 0; i < sma->sem_nsems; i++) |
1a2339567
|
1332 |
sem_io[i] = sma->sems[i].semval; |
6062a8dc0
|
1333 |
sem_unlock(sma, -1); |
6d49dab8a
|
1334 |
rcu_read_unlock(); |
1da177e4c
|
1335 |
err = 0; |
239521f31
|
1336 |
if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) |
1da177e4c
|
1337 1338 1339 1340 1341 1342 1343 |
err = -EFAULT; goto out_free; } case SETALL: { int i; struct sem_undo *un; |
dba4cdd39
|
1344 |
if (!ipc_rcu_getref(&sma->sem_perm)) { |
6e224f945
|
1345 1346 |
err = -EIDRM; goto out_rcu_wakeup; |
6062a8dc0
|
1347 |
} |
16df3674e
|
1348 |
rcu_read_unlock(); |
1da177e4c
|
1349 |
|
239521f31
|
1350 |
if (nsems > SEMMSL_FAST) { |
f8dbe8d29
|
1351 1352 |
sem_io = kvmalloc_array(nsems, sizeof(ushort), GFP_KERNEL); |
239521f31
|
1353 |
if (sem_io == NULL) { |
dba4cdd39
|
1354 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1355 1356 1357 |
return -ENOMEM; } } |
239521f31
|
1358 |
if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { |
dba4cdd39
|
1359 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1360 1361 1362 1363 1364 1365 |
err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { |
dba4cdd39
|
1366 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1367 1368 1369 1370 |
err = -ERANGE; goto out_free; } } |
4091fd942
|
1371 |
rcu_read_lock(); |
6ff379721
|
1372 |
sem_lock_and_putref(sma); |
0f3d2b013
|
1373 |
if (!ipc_valid_object(&sma->sem_perm)) { |
1da177e4c
|
1374 |
err = -EIDRM; |
6e224f945
|
1375 |
goto out_unlock; |
1da177e4c
|
1376 |
} |
a5f4db877
|
1377 |
for (i = 0; i < nsems; i++) { |
1a2339567
|
1378 1379 |
sma->sems[i].semval = sem_io[i]; sma->sems[i].sempid = task_tgid_vnr(current); |
a5f4db877
|
1380 |
} |
4daa28f6d
|
1381 |
|
cf9d5d78d
|
1382 |
ipc_assert_locked_object(&sma->sem_perm); |
4daa28f6d
|
1383 |
list_for_each_entry(un, &sma->list_id, list_id) { |
1da177e4c
|
1384 1385 |
for (i = 0; i < nsems; i++) un->semadj[i] = 0; |
4daa28f6d
|
1386 |
} |
e54d02b23
|
1387 |
sma->sem_ctime = ktime_get_real_seconds(); |
1da177e4c
|
1388 |
/* maybe some queued-up processes were waiting for this */ |
9ae949fa3
|
1389 |
do_smart_update(sma, NULL, 0, 0, &wake_q); |
1da177e4c
|
1390 1391 1392 |
err = 0; goto out_unlock; } |
e1fd1f490
|
1393 |
/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ |
1da177e4c
|
1394 1395 |
} err = -EINVAL; |
c728b9c87
|
1396 1397 |
if (semnum < 0 || semnum >= nsems) goto out_rcu_wakeup; |
1da177e4c
|
1398 |
|
6062a8dc0
|
1399 |
sem_lock(sma, NULL, -1); |
0f3d2b013
|
1400 |
if (!ipc_valid_object(&sma->sem_perm)) { |
6e224f945
|
1401 1402 1403 |
err = -EIDRM; goto out_unlock; } |
1a2339567
|
1404 |
curr = &sma->sems[semnum]; |
1da177e4c
|
1405 1406 1407 1408 1409 1410 1411 1412 1413 |
switch (cmd) { case GETVAL: err = curr->semval; goto out_unlock; case GETPID: err = curr->sempid; goto out_unlock; case GETNCNT: |
2f2ed41dc
|
1414 |
err = count_semcnt(sma, semnum, 0); |
1da177e4c
|
1415 1416 |
goto out_unlock; case GETZCNT: |
2f2ed41dc
|
1417 |
err = count_semcnt(sma, semnum, 1); |
1da177e4c
|
1418 |
goto out_unlock; |
1da177e4c
|
1419 |
} |
16df3674e
|
1420 |
|
1da177e4c
|
1421 |
out_unlock: |
6062a8dc0
|
1422 |
sem_unlock(sma, -1); |
c728b9c87
|
1423 |
out_rcu_wakeup: |
6d49dab8a
|
1424 |
rcu_read_unlock(); |
9ae949fa3
|
1425 |
wake_up_q(&wake_q); |
1da177e4c
|
1426 |
out_free: |
239521f31
|
1427 |
if (sem_io != fast_sem_io) |
f8dbe8d29
|
1428 |
kvfree(sem_io); |
1da177e4c
|
1429 1430 |
return err; } |
016d7132f
|
1431 1432 |
static inline unsigned long copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) |
1da177e4c
|
1433 |
{ |
239521f31
|
1434 |
switch (version) { |
1da177e4c
|
1435 |
case IPC_64: |
016d7132f
|
1436 |
if (copy_from_user(out, buf, sizeof(*out))) |
1da177e4c
|
1437 |
return -EFAULT; |
1da177e4c
|
1438 |
return 0; |
1da177e4c
|
1439 1440 1441 |
case IPC_OLD: { struct semid_ds tbuf_old; |
239521f31
|
1442 |
if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) |
1da177e4c
|
1443 |
return -EFAULT; |
016d7132f
|
1444 1445 1446 |
out->sem_perm.uid = tbuf_old.sem_perm.uid; out->sem_perm.gid = tbuf_old.sem_perm.gid; out->sem_perm.mode = tbuf_old.sem_perm.mode; |
1da177e4c
|
1447 1448 1449 1450 1451 1452 1453 |
return 0; } default: return -EINVAL; } } |
522bb2a2b
|
1454 |
/* |
d9a605e40
|
1455 |
* This function handles some semctl commands which require the rwsem |
522bb2a2b
|
1456 |
* to be held in write mode. |
d9a605e40
|
1457 |
* NOTE: no locks must be held, the rwsem is taken inside this function. |
522bb2a2b
|
1458 |
*/ |
21a4826a7
|
1459 |
static int semctl_down(struct ipc_namespace *ns, int semid, |
45a4a64ab
|
1460 |
int cmd, struct semid64_ds *semid64) |
1da177e4c
|
1461 1462 1463 |
{ struct sem_array *sma; int err; |
1da177e4c
|
1464 |
struct kern_ipc_perm *ipcp; |
d9a605e40
|
1465 |
down_write(&sem_ids(ns).rwsem); |
7b4cc5d84
|
1466 |
rcu_read_lock(); |
16df3674e
|
1467 |
ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, |
45a4a64ab
|
1468 |
&semid64->sem_perm, 0); |
7b4cc5d84
|
1469 1470 |
if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); |
7b4cc5d84
|
1471 1472 |
goto out_unlock1; } |
073115d6b
|
1473 |
|
a5f75e7f2
|
1474 |
sma = container_of(ipcp, struct sem_array, sem_perm); |
1da177e4c
|
1475 1476 |
err = security_sem_semctl(sma, cmd); |
7b4cc5d84
|
1477 1478 |
if (err) goto out_unlock1; |
1da177e4c
|
1479 |
|
7b4cc5d84
|
1480 |
switch (cmd) { |
1da177e4c
|
1481 |
case IPC_RMID: |
6062a8dc0
|
1482 |
sem_lock(sma, NULL, -1); |
7b4cc5d84
|
1483 |
/* freeary unlocks the ipc object and rcu */ |
01b8b07a5
|
1484 |
freeary(ns, ipcp); |
522bb2a2b
|
1485 |
goto out_up; |
1da177e4c
|
1486 |
case IPC_SET: |
6062a8dc0
|
1487 |
sem_lock(sma, NULL, -1); |
45a4a64ab
|
1488 |
err = ipc_update_perm(&semid64->sem_perm, ipcp); |
1efdb69b0
|
1489 |
if (err) |
7b4cc5d84
|
1490 |
goto out_unlock0; |
e54d02b23
|
1491 |
sma->sem_ctime = ktime_get_real_seconds(); |
1da177e4c
|
1492 1493 |
break; default: |
1da177e4c
|
1494 |
err = -EINVAL; |
7b4cc5d84
|
1495 |
goto out_unlock1; |
1da177e4c
|
1496 |
} |
1da177e4c
|
1497 |
|
7b4cc5d84
|
1498 |
out_unlock0: |
6062a8dc0
|
1499 |
sem_unlock(sma, -1); |
7b4cc5d84
|
1500 |
out_unlock1: |
6d49dab8a
|
1501 |
rcu_read_unlock(); |
522bb2a2b
|
1502 |
out_up: |
d9a605e40
|
1503 |
up_write(&sem_ids(ns).rwsem); |
1da177e4c
|
1504 1505 |
return err; } |
e1fd1f490
|
1506 |
SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) |
1da177e4c
|
1507 |
{ |
1da177e4c
|
1508 |
int version; |
e38935341
|
1509 |
struct ipc_namespace *ns; |
e1fd1f490
|
1510 |
void __user *p = (void __user *)arg; |
45a4a64ab
|
1511 1512 |
struct semid64_ds semid64; int err; |
1da177e4c
|
1513 1514 1515 1516 1517 |
if (semid < 0) return -EINVAL; version = ipc_parse_version(&cmd); |
e38935341
|
1518 |
ns = current->nsproxy->ipc_ns; |
1da177e4c
|
1519 |
|
239521f31
|
1520 |
switch (cmd) { |
1da177e4c
|
1521 1522 |
case IPC_INFO: case SEM_INFO: |
45a4a64ab
|
1523 |
return semctl_info(ns, semid, cmd, p); |
4b9fcb0ec
|
1524 |
case IPC_STAT: |
1da177e4c
|
1525 |
case SEM_STAT: |
45a4a64ab
|
1526 1527 1528 1529 1530 1531 |
err = semctl_stat(ns, semid, cmd, &semid64); if (err < 0) return err; if (copy_semid_to_user(p, &semid64, version)) err = -EFAULT; return err; |
1da177e4c
|
1532 1533 1534 1535 1536 |
case GETALL: case GETVAL: case GETPID: case GETNCNT: case GETZCNT: |
1da177e4c
|
1537 |
case SETALL: |
e1fd1f490
|
1538 |
return semctl_main(ns, semid, semnum, cmd, p); |
45a4a64ab
|
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 |
case SETVAL: { int val; #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) /* big-endian 64bit */ val = arg >> 32; #else /* 32bit or little-endian 64bit */ val = arg; #endif return semctl_setval(ns, semid, semnum, val); } |
1da177e4c
|
1550 |
case IPC_SET: |
45a4a64ab
|
1551 1552 1553 1554 |
if (copy_semid_from_user(&semid64, p, version)) return -EFAULT; case IPC_RMID: return semctl_down(ns, semid, cmd, &semid64); |
1da177e4c
|
1555 1556 1557 1558 |
default: return -EINVAL; } } |
c0ebccb6f
|
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 |
#ifdef CONFIG_COMPAT struct compat_semid_ds { struct compat_ipc_perm sem_perm; compat_time_t sem_otime; compat_time_t sem_ctime; compat_uptr_t sem_base; compat_uptr_t sem_pending; compat_uptr_t sem_pending_last; compat_uptr_t undo; unsigned short sem_nsems; }; static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, int version) { memset(out, 0, sizeof(*out)); if (version == IPC_64) { struct compat_semid64_ds *p = buf; return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); } else { struct compat_semid_ds *p = buf; return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); } } static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, int version) { if (version == IPC_64) { struct compat_semid64_ds v; memset(&v, 0, sizeof(v)); to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); v.sem_otime = in->sem_otime; v.sem_ctime = in->sem_ctime; v.sem_nsems = in->sem_nsems; return copy_to_user(buf, &v, sizeof(v)); } else { struct compat_semid_ds v; memset(&v, 0, sizeof(v)); to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); v.sem_otime = in->sem_otime; v.sem_ctime = in->sem_ctime; v.sem_nsems = in->sem_nsems; return copy_to_user(buf, &v, sizeof(v)); } } COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) { void __user *p = compat_ptr(arg); struct ipc_namespace *ns; struct semid64_ds semid64; int version = compat_ipc_parse_version(&cmd); int err; ns = current->nsproxy->ipc_ns; if (semid < 0) return -EINVAL; switch (cmd & (~IPC_64)) { case IPC_INFO: case SEM_INFO: return semctl_info(ns, semid, cmd, p); case IPC_STAT: case SEM_STAT: err = semctl_stat(ns, semid, cmd, &semid64); if (err < 0) return err; if (copy_compat_semid_to_user(p, &semid64, version)) err = -EFAULT; return err; case GETVAL: case GETPID: case GETNCNT: case GETZCNT: case GETALL: case SETALL: return semctl_main(ns, semid, semnum, cmd, p); |
e1fd1f490
|
1639 1640 |
case SETVAL: return semctl_setval(ns, semid, semnum, arg); |
1da177e4c
|
1641 |
case IPC_SET: |
c0ebccb6f
|
1642 1643 1644 1645 1646 |
if (copy_compat_semid_from_user(&semid64, p, version)) return -EFAULT; /* fallthru */ case IPC_RMID: return semctl_down(ns, semid, cmd, &semid64); |
1da177e4c
|
1647 1648 1649 1650 |
default: return -EINVAL; } } |
c0ebccb6f
|
1651 |
#endif |
1da177e4c
|
1652 |
|
1da177e4c
|
1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 |
/* If the task doesn't already have a undo_list, then allocate one * here. We guarantee there is only one thread using this undo list, * and current is THE ONE * * If this allocation and assignment succeeds, but later * portions of this code fail, there is no need to free the sem_undo_list. * Just let it stay associated with the task, and it'll be freed later * at exit time. * * This can block, so callers must hold no locks. */ static inline int get_undo_list(struct sem_undo_list **undo_listp) { struct sem_undo_list *undo_list; |
1da177e4c
|
1667 1668 1669 |
undo_list = current->sysvsem.undo_list; if (!undo_list) { |
2453a3062
|
1670 |
undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); |
1da177e4c
|
1671 1672 |
if (undo_list == NULL) return -ENOMEM; |
00a5dfdb9
|
1673 |
spin_lock_init(&undo_list->lock); |
f74370b86
|
1674 |
refcount_set(&undo_list->refcnt, 1); |
4daa28f6d
|
1675 |
INIT_LIST_HEAD(&undo_list->list_proc); |
1da177e4c
|
1676 1677 1678 1679 1680 |
current->sysvsem.undo_list = undo_list; } *undo_listp = undo_list; return 0; } |
bf17bb717
|
1681 |
static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) |
1da177e4c
|
1682 |
{ |
bf17bb717
|
1683 |
struct sem_undo *un; |
4daa28f6d
|
1684 |
|
bf17bb717
|
1685 1686 1687 |
list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { if (un->semid == semid) return un; |
1da177e4c
|
1688 |
} |
4daa28f6d
|
1689 |
return NULL; |
1da177e4c
|
1690 |
} |
bf17bb717
|
1691 1692 1693 |
static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) { struct sem_undo *un; |
239521f31
|
1694 |
assert_spin_locked(&ulp->lock); |
bf17bb717
|
1695 1696 1697 1698 1699 1700 1701 1702 |
un = __lookup_undo(ulp, semid); if (un) { list_del_rcu(&un->list_proc); list_add_rcu(&un->list_proc, &ulp->list_proc); } return un; } |
4daa28f6d
|
1703 |
/** |
8001c8581
|
1704 |
* find_alloc_undo - lookup (and if not present create) undo array |
4daa28f6d
|
1705 1706 1707 1708 1709 1710 |
* @ns: namespace * @semid: semaphore array id * * The function looks up (and if not present creates) the undo structure. * The size of the undo structure depends on the size of the semaphore * array, thus the alloc path is not that straightforward. |
380af1b33
|
1711 1712 |
* Lifetime-rules: sem_undo is rcu-protected, on success, the function * performs a rcu_read_lock(). |
4daa28f6d
|
1713 1714 |
*/ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) |
1da177e4c
|
1715 1716 1717 1718 |
{ struct sem_array *sma; struct sem_undo_list *ulp; struct sem_undo *un, *new; |
6062a8dc0
|
1719 |
int nsems, error; |
1da177e4c
|
1720 1721 1722 1723 |
error = get_undo_list(&ulp); if (error) return ERR_PTR(error); |
380af1b33
|
1724 |
rcu_read_lock(); |
c530c6ac7
|
1725 |
spin_lock(&ulp->lock); |
1da177e4c
|
1726 |
un = lookup_undo(ulp, semid); |
c530c6ac7
|
1727 |
spin_unlock(&ulp->lock); |
239521f31
|
1728 |
if (likely(un != NULL)) |
1da177e4c
|
1729 1730 1731 |
goto out; /* no undo structure around - allocate one. */ |
4daa28f6d
|
1732 |
/* step 1: figure out the size of the semaphore array */ |
16df3674e
|
1733 1734 1735 |
sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { rcu_read_unlock(); |
4de85cd6d
|
1736 |
return ERR_CAST(sma); |
16df3674e
|
1737 |
} |
023a53557
|
1738 |
|
1da177e4c
|
1739 |
nsems = sma->sem_nsems; |
dba4cdd39
|
1740 |
if (!ipc_rcu_getref(&sma->sem_perm)) { |
6062a8dc0
|
1741 1742 1743 1744 |
rcu_read_unlock(); un = ERR_PTR(-EIDRM); goto out; } |
16df3674e
|
1745 |
rcu_read_unlock(); |
1da177e4c
|
1746 |
|
4daa28f6d
|
1747 |
/* step 2: allocate new undo structure */ |
4668edc33
|
1748 |
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); |
1da177e4c
|
1749 |
if (!new) { |
dba4cdd39
|
1750 |
ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); |
1da177e4c
|
1751 1752 |
return ERR_PTR(-ENOMEM); } |
1da177e4c
|
1753 |
|
380af1b33
|
1754 |
/* step 3: Acquire the lock on semaphore array */ |
4091fd942
|
1755 |
rcu_read_lock(); |
6ff379721
|
1756 |
sem_lock_and_putref(sma); |
0f3d2b013
|
1757 |
if (!ipc_valid_object(&sma->sem_perm)) { |
6062a8dc0
|
1758 |
sem_unlock(sma, -1); |
6d49dab8a
|
1759 |
rcu_read_unlock(); |
1da177e4c
|
1760 1761 1762 1763 |
kfree(new); un = ERR_PTR(-EIDRM); goto out; } |
380af1b33
|
1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 |
spin_lock(&ulp->lock); /* * step 4: check for races: did someone else allocate the undo struct? */ un = lookup_undo(ulp, semid); if (un) { kfree(new); goto success; } |
4daa28f6d
|
1774 1775 |
/* step 5: initialize & link new undo structure */ new->semadj = (short *) &new[1]; |
380af1b33
|
1776 |
new->ulp = ulp; |
4daa28f6d
|
1777 1778 |
new->semid = semid; assert_spin_locked(&ulp->lock); |
380af1b33
|
1779 |
list_add_rcu(&new->list_proc, &ulp->list_proc); |
cf9d5d78d
|
1780 |
ipc_assert_locked_object(&sma->sem_perm); |
4daa28f6d
|
1781 |
list_add(&new->list_id, &sma->list_id); |
380af1b33
|
1782 |
un = new; |
4daa28f6d
|
1783 |
|
380af1b33
|
1784 |
success: |
c530c6ac7
|
1785 |
spin_unlock(&ulp->lock); |
6062a8dc0
|
1786 |
sem_unlock(sma, -1); |
1da177e4c
|
1787 1788 1789 |
out: return un; } |
44ee45467
|
1790 |
static long do_semtimedop(int semid, struct sembuf __user *tsops, |
3ef56dc26
|
1791 |
unsigned nsops, const struct timespec64 *timeout) |
1da177e4c
|
1792 1793 1794 1795 |
{ int error = -EINVAL; struct sem_array *sma; struct sembuf fast_sops[SEMOPM_FAST]; |
239521f31
|
1796 |
struct sembuf *sops = fast_sops, *sop; |
1da177e4c
|
1797 |
struct sem_undo *un; |
4ce33ec2e
|
1798 1799 |
int max, locknum; bool undos = false, alter = false, dupsop = false; |
1da177e4c
|
1800 |
struct sem_queue queue; |
4ce33ec2e
|
1801 |
unsigned long dup = 0, jiffies_left = 0; |
e38935341
|
1802 1803 1804 |
struct ipc_namespace *ns; ns = current->nsproxy->ipc_ns; |
1da177e4c
|
1805 1806 1807 |
if (nsops < 1 || semid < 0) return -EINVAL; |
e38935341
|
1808 |
if (nsops > ns->sc_semopm) |
1da177e4c
|
1809 |
return -E2BIG; |
239521f31
|
1810 |
if (nsops > SEMOPM_FAST) { |
e4243b806
|
1811 |
sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); |
239521f31
|
1812 |
if (sops == NULL) |
1da177e4c
|
1813 1814 |
return -ENOMEM; } |
4ce33ec2e
|
1815 |
|
239521f31
|
1816 1817 |
if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { error = -EFAULT; |
1da177e4c
|
1818 1819 |
goto out_free; } |
4ce33ec2e
|
1820 |
|
1da177e4c
|
1821 |
if (timeout) { |
44ee45467
|
1822 1823 |
if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || timeout->tv_nsec >= 1000000000L) { |
1da177e4c
|
1824 1825 1826 |
error = -EINVAL; goto out_free; } |
3ef56dc26
|
1827 |
jiffies_left = timespec64_to_jiffies(timeout); |
1da177e4c
|
1828 |
} |
4ce33ec2e
|
1829 |
|
1da177e4c
|
1830 1831 |
max = 0; for (sop = sops; sop < sops + nsops; sop++) { |
4ce33ec2e
|
1832 |
unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); |
1da177e4c
|
1833 1834 1835 |
if (sop->sem_num >= max) max = sop->sem_num; if (sop->sem_flg & SEM_UNDO) |
4ce33ec2e
|
1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 |
undos = true; if (dup & mask) { /* * There was a previous alter access that appears * to have accessed the same semaphore, thus use * the dupsop logic. "appears", because the detection * can only check % BITS_PER_LONG. */ dupsop = true; } if (sop->sem_op != 0) { alter = true; dup |= mask; } |
1da177e4c
|
1850 |
} |
1da177e4c
|
1851 |
|
1da177e4c
|
1852 |
if (undos) { |
6062a8dc0
|
1853 |
/* On success, find_alloc_undo takes the rcu_read_lock */ |
4daa28f6d
|
1854 |
un = find_alloc_undo(ns, semid); |
1da177e4c
|
1855 1856 1857 1858 |
if (IS_ERR(un)) { error = PTR_ERR(un); goto out_free; } |
6062a8dc0
|
1859 |
} else { |
1da177e4c
|
1860 |
un = NULL; |
6062a8dc0
|
1861 1862 |
rcu_read_lock(); } |
1da177e4c
|
1863 |
|
16df3674e
|
1864 |
sma = sem_obtain_object_check(ns, semid); |
023a53557
|
1865 |
if (IS_ERR(sma)) { |
6062a8dc0
|
1866 |
rcu_read_unlock(); |
023a53557
|
1867 |
error = PTR_ERR(sma); |
1da177e4c
|
1868 |
goto out_free; |
023a53557
|
1869 |
} |
16df3674e
|
1870 |
error = -EFBIG; |
248e7357c
|
1871 1872 1873 1874 |
if (max >= sma->sem_nsems) { rcu_read_unlock(); goto out_free; } |
16df3674e
|
1875 1876 |
error = -EACCES; |
248e7357c
|
1877 1878 1879 1880 |
if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { rcu_read_unlock(); goto out_free; } |
16df3674e
|
1881 1882 |
error = security_sem_semop(sma, sops, nsops, alter); |
248e7357c
|
1883 1884 1885 1886 |
if (error) { rcu_read_unlock(); goto out_free; } |
16df3674e
|
1887 |
|
6e224f945
|
1888 1889 |
error = -EIDRM; locknum = sem_lock(sma, sops, nsops); |
0f3d2b013
|
1890 1891 1892 1893 1894 1895 1896 1897 1898 |
/* * We eventually might perform the following check in a lockless * fashion, considering ipc_valid_object() locking constraints. * If nsops == 1 and there is no contention for sem_perm.lock, then * only a per-semaphore lock is held and it's OK to proceed with the * check below. More details on the fine grained locking scheme * entangled here and why it's RMID race safe on comments at sem_lock() */ if (!ipc_valid_object(&sma->sem_perm)) |
6e224f945
|
1899 |
goto out_unlock_free; |
1da177e4c
|
1900 |
/* |
4daa28f6d
|
1901 |
* semid identifiers are not unique - find_alloc_undo may have |
1da177e4c
|
1902 |
* allocated an undo structure, it was invalidated by an RMID |
4daa28f6d
|
1903 |
* and now a new array with received the same id. Check and fail. |
25985edce
|
1904 |
* This case can be detected checking un->semid. The existence of |
380af1b33
|
1905 |
* "un" itself is guaranteed by rcu. |
1da177e4c
|
1906 |
*/ |
6062a8dc0
|
1907 1908 |
if (un && un->semid == -1) goto out_unlock_free; |
4daa28f6d
|
1909 |
|
d198cd6d6
|
1910 1911 1912 1913 1914 |
queue.sops = sops; queue.nsops = nsops; queue.undo = un; queue.pid = task_tgid_vnr(current); queue.alter = alter; |
4ce33ec2e
|
1915 |
queue.dupsop = dupsop; |
d198cd6d6
|
1916 1917 |
error = perform_atomic_semop(sma, &queue); |
9ae949fa3
|
1918 1919 1920 1921 1922 |
if (error == 0) { /* non-blocking succesfull path */ DEFINE_WAKE_Q(wake_q); /* * If the operation was successful, then do |
0e8c66569
|
1923 1924 1925 |
* the required updates. */ if (alter) |
9ae949fa3
|
1926 |
do_smart_update(sma, sops, nsops, 1, &wake_q); |
0e8c66569
|
1927 1928 |
else set_semotime(sma, sops); |
9ae949fa3
|
1929 1930 1931 1932 1933 1934 |
sem_unlock(sma, locknum); rcu_read_unlock(); wake_up_q(&wake_q); goto out_free; |
1da177e4c
|
1935 |
} |
9ae949fa3
|
1936 |
if (error < 0) /* non-blocking error path */ |
0e8c66569
|
1937 |
goto out_unlock_free; |
1da177e4c
|
1938 |
|
9ae949fa3
|
1939 1940 |
/* * We need to sleep on this operation, so we put the current |
1da177e4c
|
1941 1942 |
* task into the pending queue and go to sleep. */ |
b97e820ff
|
1943 1944 |
if (nsops == 1) { struct sem *curr; |
1a2339567
|
1945 |
curr = &sma->sems[sops->sem_num]; |
b97e820ff
|
1946 |
|
f269f40ad
|
1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 |
if (alter) { if (sma->complex_count) { list_add_tail(&queue.list, &sma->pending_alter); } else { list_add_tail(&queue.list, &curr->pending_alter); } } else { |
1a82e9e1d
|
1957 |
list_add_tail(&queue.list, &curr->pending_const); |
f269f40ad
|
1958 |
} |
b97e820ff
|
1959 |
} else { |
f269f40ad
|
1960 1961 |
if (!sma->complex_count) merge_queues(sma); |
9f1bc2c90
|
1962 |
if (alter) |
1a82e9e1d
|
1963 |
list_add_tail(&queue.list, &sma->pending_alter); |
9f1bc2c90
|
1964 |
else |
1a82e9e1d
|
1965 |
list_add_tail(&queue.list, &sma->pending_const); |
b97e820ff
|
1966 1967 |
sma->complex_count++; } |
b5fa01a22
|
1968 |
do { |
92c159863
|
1969 |
WRITE_ONCE(queue.status, -EINTR); |
b5fa01a22
|
1970 |
queue.sleeper = current; |
0b0577f60
|
1971 |
|
b5fa01a22
|
1972 1973 1974 |
__set_current_state(TASK_INTERRUPTIBLE); sem_unlock(sma, locknum); rcu_read_unlock(); |
1da177e4c
|
1975 |
|
b5fa01a22
|
1976 1977 1978 1979 |
if (timeout) jiffies_left = schedule_timeout(jiffies_left); else schedule(); |
1da177e4c
|
1980 |
|
9ae949fa3
|
1981 |
/* |
b5fa01a22
|
1982 1983 1984 1985 1986 1987 1988 1989 1990 |
* fastpath: the semop has completed, either successfully or * not, from the syscall pov, is quite irrelevant to us at this * point; we're done. * * We _do_ care, nonetheless, about being awoken by a signal or * spuriously. The queue.status is checked again in the * slowpath (aka after taking sem_lock), such that we can detect * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). |
c61284e99
|
1991 |
*/ |
b5fa01a22
|
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 |
error = READ_ONCE(queue.status); if (error != -EINTR) { /* * User space could assume that semop() is a memory * barrier: Without the mb(), the cpu could * speculatively read in userspace stale data that was * overwritten by the previous owner of the semaphore. */ smp_mb(); goto out_free; } |
d694ad62b
|
2003 |
|
b5fa01a22
|
2004 |
rcu_read_lock(); |
c626bc46e
|
2005 |
locknum = sem_lock(sma, sops, nsops); |
1da177e4c
|
2006 |
|
370b262c8
|
2007 2008 2009 2010 |
if (!ipc_valid_object(&sma->sem_perm)) goto out_unlock_free; error = READ_ONCE(queue.status); |
1da177e4c
|
2011 |
|
b5fa01a22
|
2012 2013 2014 2015 2016 2017 |
/* * If queue.status != -EINTR we are woken up by another process. * Leave without unlink_queue(), but with sem_unlock(). */ if (error != -EINTR) goto out_unlock_free; |
0b0577f60
|
2018 |
|
b5fa01a22
|
2019 2020 2021 2022 2023 2024 |
/* * If an interrupt occurred we have to clean up the queue. */ if (timeout && jiffies_left == 0) error = -EAGAIN; } while (error == -EINTR && !signal_pending(current)); /* spurious */ |
0b0577f60
|
2025 |
|
b97e820ff
|
2026 |
unlink_queue(sma, &queue); |
1da177e4c
|
2027 2028 |
out_unlock_free: |
6062a8dc0
|
2029 |
sem_unlock(sma, locknum); |
6d49dab8a
|
2030 |
rcu_read_unlock(); |
1da177e4c
|
2031 |
out_free: |
239521f31
|
2032 |
if (sops != fast_sops) |
e4243b806
|
2033 |
kvfree(sops); |
1da177e4c
|
2034 2035 |
return error; } |
44ee45467
|
2036 2037 2038 2039 |
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { if (timeout) { |
3ef56dc26
|
2040 2041 |
struct timespec64 ts; if (get_timespec64(&ts, timeout)) |
44ee45467
|
2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 |
return -EFAULT; return do_semtimedop(semid, tsops, nsops, &ts); } return do_semtimedop(semid, tsops, nsops, NULL); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, unsigned, nsops, const struct compat_timespec __user *, timeout) { if (timeout) { |
3ef56dc26
|
2054 2055 |
struct timespec64 ts; if (compat_get_timespec64(&ts, timeout)) |
44ee45467
|
2056 2057 2058 2059 2060 2061 |
return -EFAULT; return do_semtimedop(semid, tsems, nsops, &ts); } return do_semtimedop(semid, tsems, nsops, NULL); } #endif |
d5460c997
|
2062 2063 |
SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, unsigned, nsops) |
1da177e4c
|
2064 |
{ |
44ee45467
|
2065 |
return do_semtimedop(semid, tsops, nsops, NULL); |
1da177e4c
|
2066 2067 2068 2069 |
} /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between * parent and child tasks. |
1da177e4c
|
2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 |
*/ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) { struct sem_undo_list *undo_list; int error; if (clone_flags & CLONE_SYSVSEM) { error = get_undo_list(&undo_list); if (error) return error; |
f74370b86
|
2081 |
refcount_inc(&undo_list->refcnt); |
1da177e4c
|
2082 |
tsk->sysvsem.undo_list = undo_list; |
46c0a8ca3
|
2083 |
} else |
1da177e4c
|
2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 |
tsk->sysvsem.undo_list = NULL; return 0; } /* * add semadj values to semaphores, free undo structures. * undo structures are not freed when semaphore arrays are destroyed * so some of them may be out of date. * IMPLEMENTATION NOTE: There is some confusion over whether the * set of adjustments that needs to be done should be done in an atomic * manner or not. That is, if we are attempting to decrement the semval * should we queue up and wait until we can do so legally? * The original implementation attempted to do this (queue and wait). * The current implementation does not do so. The POSIX standard * and SVID should be consulted to determine what behavior is mandated. */ void exit_sem(struct task_struct *tsk) { |
4daa28f6d
|
2103 |
struct sem_undo_list *ulp; |
1da177e4c
|
2104 |
|
4daa28f6d
|
2105 2106 |
ulp = tsk->sysvsem.undo_list; if (!ulp) |
1da177e4c
|
2107 |
return; |
9edff4ab1
|
2108 |
tsk->sysvsem.undo_list = NULL; |
1da177e4c
|
2109 |
|
f74370b86
|
2110 |
if (!refcount_dec_and_test(&ulp->refcnt)) |
1da177e4c
|
2111 |
return; |
380af1b33
|
2112 |
for (;;) { |
1da177e4c
|
2113 |
struct sem_array *sma; |
380af1b33
|
2114 |
struct sem_undo *un; |
6062a8dc0
|
2115 |
int semid, i; |
9ae949fa3
|
2116 |
DEFINE_WAKE_Q(wake_q); |
4daa28f6d
|
2117 |
|
2a1613a58
|
2118 |
cond_resched(); |
380af1b33
|
2119 |
rcu_read_lock(); |
05725f7eb
|
2120 2121 |
un = list_entry_rcu(ulp->list_proc.next, struct sem_undo, list_proc); |
602b8593d
|
2122 2123 2124 2125 2126 2127 2128 |
if (&un->list_proc == &ulp->list_proc) { /* * We must wait for freeary() before freeing this ulp, * in case we raced with last sem_undo. There is a small * possibility where we exit while freeary() didn't * finish unlocking sem_undo_list. */ |
e0892e086
|
2129 2130 |
spin_lock(&ulp->lock); spin_unlock(&ulp->lock); |
602b8593d
|
2131 2132 2133 2134 2135 2136 |
rcu_read_unlock(); break; } spin_lock(&ulp->lock); semid = un->semid; spin_unlock(&ulp->lock); |
4daa28f6d
|
2137 |
|
602b8593d
|
2138 |
/* exit_sem raced with IPC_RMID, nothing to do */ |
6062a8dc0
|
2139 2140 |
if (semid == -1) { rcu_read_unlock(); |
602b8593d
|
2141 |
continue; |
6062a8dc0
|
2142 |
} |
1da177e4c
|
2143 |
|
602b8593d
|
2144 |
sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); |
380af1b33
|
2145 |
/* exit_sem raced with IPC_RMID, nothing to do */ |
6062a8dc0
|
2146 2147 |
if (IS_ERR(sma)) { rcu_read_unlock(); |
380af1b33
|
2148 |
continue; |
6062a8dc0
|
2149 |
} |
1da177e4c
|
2150 |
|
6062a8dc0
|
2151 |
sem_lock(sma, NULL, -1); |
6e224f945
|
2152 |
/* exit_sem raced with IPC_RMID, nothing to do */ |
0f3d2b013
|
2153 |
if (!ipc_valid_object(&sma->sem_perm)) { |
6e224f945
|
2154 2155 2156 2157 |
sem_unlock(sma, -1); rcu_read_unlock(); continue; } |
bf17bb717
|
2158 |
un = __lookup_undo(ulp, semid); |
380af1b33
|
2159 2160 2161 2162 |
if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created * exactly the same semid. Nothing to do. */ |
6062a8dc0
|
2163 |
sem_unlock(sma, -1); |
6d49dab8a
|
2164 |
rcu_read_unlock(); |
380af1b33
|
2165 2166 2167 2168 |
continue; } /* remove un from the linked lists */ |
cf9d5d78d
|
2169 |
ipc_assert_locked_object(&sma->sem_perm); |
4daa28f6d
|
2170 |
list_del(&un->list_id); |
a97955844
|
2171 2172 2173 2174 |
/* we are the last process using this ulp, acquiring ulp->lock * isn't required. Besides that, we are also protected against * IPC_RMID as we hold sma->sem_perm lock now */ |
380af1b33
|
2175 |
list_del_rcu(&un->list_proc); |
380af1b33
|
2176 |
|
4daa28f6d
|
2177 2178 |
/* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { |
1a2339567
|
2179 |
struct sem *semaphore = &sma->sems[i]; |
4daa28f6d
|
2180 2181 |
if (un->semadj[i]) { semaphore->semval += un->semadj[i]; |
1da177e4c
|
2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 |
/* * Range checks of the new semaphore value, * not defined by sus: * - Some unices ignore the undo entirely * (e.g. HP UX 11i 11.22, Tru64 V5.1) * - some cap the value (e.g. FreeBSD caps * at 0, but doesn't enforce SEMVMX) * * Linux caps the semaphore value, both at 0 * and at SEMVMX. * |
239521f31
|
2193 |
* Manfred <manfred@colorfullife.com> |
1da177e4c
|
2194 |
*/ |
5f921ae96
|
2195 2196 2197 2198 |
if (semaphore->semval < 0) semaphore->semval = 0; if (semaphore->semval > SEMVMX) semaphore->semval = SEMVMX; |
b488893a3
|
2199 |
semaphore->sempid = task_tgid_vnr(current); |
1da177e4c
|
2200 2201 |
} } |
1da177e4c
|
2202 |
/* maybe some queued-up processes were waiting for this */ |
9ae949fa3
|
2203 |
do_smart_update(sma, NULL, 0, 1, &wake_q); |
6062a8dc0
|
2204 |
sem_unlock(sma, -1); |
6d49dab8a
|
2205 |
rcu_read_unlock(); |
9ae949fa3
|
2206 |
wake_up_q(&wake_q); |
380af1b33
|
2207 |
|
693a8b6ee
|
2208 |
kfree_rcu(un, rcu); |
1da177e4c
|
2209 |
} |
4daa28f6d
|
2210 |
kfree(ulp); |
1da177e4c
|
2211 2212 2213 |
} #ifdef CONFIG_PROC_FS |
19b4946ca
|
2214 |
static int sysvipc_sem_proc_show(struct seq_file *s, void *it) |
1da177e4c
|
2215 |
{ |
1efdb69b0
|
2216 |
struct user_namespace *user_ns = seq_user_ns(s); |
ade9f91b3
|
2217 2218 |
struct kern_ipc_perm *ipcp = it; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); |
e54d02b23
|
2219 |
time64_t sem_otime; |
d12e1e50e
|
2220 |
|
d8c633766
|
2221 2222 2223 |
/* * The proc interface isn't aware of sem_lock(), it calls * ipc_lock_object() directly (in sysvipc_find_ipc). |
5864a2fd3
|
2224 2225 |
* In order to stay compatible with sem_lock(), we must * enter / leave complex_mode. |
d8c633766
|
2226 |
*/ |
5864a2fd3
|
2227 |
complexmode_enter(sma); |
d8c633766
|
2228 |
|
d12e1e50e
|
2229 |
sem_otime = get_semotime(sma); |
19b4946ca
|
2230 |
|
7f032d6ef
|
2231 |
seq_printf(s, |
e54d02b23
|
2232 2233 |
"%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu ", |
7f032d6ef
|
2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 |
sma->sem_perm.key, sma->sem_perm.id, sma->sem_perm.mode, sma->sem_nsems, from_kuid_munged(user_ns, sma->sem_perm.uid), from_kgid_munged(user_ns, sma->sem_perm.gid), from_kuid_munged(user_ns, sma->sem_perm.cuid), from_kgid_munged(user_ns, sma->sem_perm.cgid), sem_otime, sma->sem_ctime); |
5864a2fd3
|
2244 |
complexmode_tryleave(sma); |
7f032d6ef
|
2245 |
return 0; |
1da177e4c
|
2246 2247 |
} #endif |